Generate Features

The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.

All Generate methods are async and should be used with await.

Overview

import asyncio
import os
from tabstack import TABStack

async def main():
    async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
        # Generate transformed JSON from a URL
        await tabs.generate.json(url, schema, instructions, nocache=False)

asyncio.run(main())

Generate JSON

Transform web content into structured data using AI with custom instructions.

Basic Usage

import asyncio
import os
from tabstack import TABStack

async def main():
    async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
        schema = {
            "type": "object",
            "properties": {
                "summaries": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "title": {"type": "string"},
                            "category": {"type": "string"},
                            "summary": {"type": "string"}
                        },
                        "required": ["title", "category", "summary"]
                    }
                }
            },
            "required": ["summaries"]
        }

        result = await tabs.generate.json(
            url='https://news.ycombinator.com',
            schema=schema,
            instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
        )

        for summary in result.data['summaries']:
            print(f"{summary['title']} [{summary['category']}]")
            print(f"  {summary['summary']}\n")

asyncio.run(main())

Real-World Examples

Example 1: Sentiment Analysis

async def analyze_sentiment():
    async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
        schema = {
            "type": "object",
            "properties": {
                "overall_sentiment": {
                    "type": "string",
                    "enum": ["positive", "negative", "neutral"]
                },
                "confidence": {
                    "type": "number",
                    "description": "Confidence score from 0 to 1"
                },
                "key_phrases": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Important phrases that indicate sentiment"
                },
                "emotional_tone": {
                    "type": "string"
                },
                "main_topics": {
                    "type": "array",
                    "items": {"type": "string"}
                }
            },
            "required": ["overall_sentiment", "confidence"]
        }

        result = await tabs.generate.json(
            url='https://reviews.example.com/product/123',
            schema=schema,
            instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
        )

        print(f"Sentiment: {result.data['overall_sentiment']} ({result.data['confidence']*100}% confident)")
        print(f"Tone: {result.data.get('emotional_tone', 'N/A')}")
        print(f"Key phrases: {', '.join(result.data.get('key_phrases', []))}")
        print(f"Topics: {', '.join(result.data.get('main_topics', []))}")

asyncio.run(analyze_sentiment())

Example 2: Content Categorization

async def categorize_article():
    async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
        schema = {
            "type": "object",
            "properties": {
                "title": {"type": "string"},
                "primary_category": {"type": "string"},
                "secondary_categories": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "tags": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "target_audience": {"type": "string"},
                "reading_level": {
                    "type": "string",
                    "enum": ["beginner", "intermediate", "advanced"]
                },
                "estimated_reading_time": {"type": "number"}
            }
        }

        result = await tabs.generate.json(
            url='https://blog.example.com/article',
            schema=schema,
            instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
        )

        data = result.data
        print(f"Title: {data['title']}")
        print(f"Category: {data['primary_category']}")
        print(f"Level: {data['reading_level']}")
        print(f"Reading Time: {data['estimated_reading_time']} minutes")
        print(f"Tags: {', '.join(data['tags'])}")

asyncio.run(categorize_article())

Example 3: Extract Key Insights

async def extract_insights():
    async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
        schema = {
            "type": "object",
            "properties": {
                "main_thesis": {"type": "string"},
                "key_points": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "conclusions": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "action_items": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "relevant_quotes": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "quote": {"type": "string"},
                            "context": {"type": "string"}
                        }
                    }
                }
            }
        }

        result = await tabs.generate.json(
            url='https://research.example.com/paper',
            schema=schema,
            instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
        )

        data = result.data
        print('Main Thesis:')
        print(data['main_thesis'])
        print('\nKey Points:')
        for i, point in enumerate(data['key_points'], 1):
            print(f"{i}. {point}")
        print('\nAction Items:')
        for i, item in enumerate(data['action_items'], 1):
            print(f"{i}. {item}")

asyncio.run(extract_insights())

Writing Effective Instructions

Be Specific and Clear

# ❌ Vague
instructions = 'Analyze this content'

# ✅ Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'

Include Context

# ✅ Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''

Options Reference

generate.json()

Parameter	Type	Default	Description
`url`	`str`	required	URL to analyze and transform
`schema`	`dict`	required	JSON Schema for output structure
`instructions`	`str`	required	AI instructions for transformation
`nocache`	`bool`	`False`	Bypass cache and force fresh generation

Best Practices

1. Use Descriptive Schema Properties

schema = {
    "type": "object",
    "properties": {
        "sentiment": {
            "type": "string",
            "enum": ["positive", "negative", "neutral"],
            "description": "Overall sentiment based on tone and language used"
        }
    }
}

2. Handle Errors

try:
    result = await tabs.generate.json(url, schema, instructions)
except Exception as error:
    print(f"Generation failed: {error}")

Next Steps

Automate Features: Execute complex browser automation tasks
Error Handling: Build robust applications
REST API Reference: See the underlying REST API endpoint

Overview​

Generate JSON​

Basic Usage​

Real-World Examples​

Example 1: Sentiment Analysis​

Example 2: Content Categorization​

Example 3: Extract Key Insights​

Writing Effective Instructions​

Be Specific and Clear​

Include Context​

Options Reference​

generate.json()​

Best Practices​

1. Use Descriptive Schema Properties​

2. Handle Errors​

Next Steps​