Generate Features

SDKs

Python

Use AI to transform and analyze web content with the Tabstack Python SDK Generate operator.

The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.

Overview

import os
from tabstack import Tabstack

with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
    # Generate transformed JSON from a URL
    result = client.generate.json(
        url='https://example.com',
        json_schema=schema,
        instructions='Your transformation instructions',
        geo_target=None,  # Optional: geotarget requests (e.g., {'country': 'GB'})
        nocache=False     # Optional: bypass cache
    )

Generate JSON

Transform web content into structured data using AI with custom instructions.

Basic Usage

import os
from tabstack import Tabstack

with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
    schema = {
        "type": "object",
        "properties": {
            "summaries": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "title": {"type": "string"},
                        "category": {"type": "string"},
                        "summary": {"type": "string"}
                    },
                    "required": ["title", "category", "summary"]
                }
            }
        },
        "required": ["summaries"]
    }

    result = client.generate.json(
        url='https://news.ycombinator.com',
        json_schema=schema,
        instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
    )

    for summary in result['summaries']:
        print(f"{summary['title']} [{summary['category']}]")
        print(f"  {summary['summary']}\n")

Async Usage

import asyncio
import os
from tabstack import AsyncTabstack

async def generate_summaries():
    async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
        schema = {
            "type": "object",
            "properties": {
                "summaries": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "title": {"type": "string"},
                            "category": {"type": "string"},
                            "summary": {"type": "string"}
                        }
                    }
                }
            }
        }

        result = await client.generate.json(
            url='https://news.ycombinator.com',
            json_schema=schema,
            instructions='For each story, categorize it and write a one-sentence summary'
        )

        for summary in result['summaries']:
            print(f"{summary['title']} [{summary['category']}]")

asyncio.run(generate_summaries())

Real-World Examples

Example 1: Sentiment Analysis

import os
from tabstack import Tabstack

def analyze_sentiment():
    with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
        schema = {
            "type": "object",
            "properties": {
                "overall_sentiment": {
                    "type": "string",
                    "enum": ["positive", "negative", "neutral"]
                },
                "confidence": {
                    "type": "number",
                    "description": "Confidence score from 0 to 1"
                },
                "key_phrases": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Important phrases that indicate sentiment"
                },
                "emotional_tone": {
                    "type": "string"
                },
                "main_topics": {
                    "type": "array",
                    "items": {"type": "string"}
                }
            },
            "required": ["overall_sentiment", "confidence"]
        }

        result = client.generate.json(
            url='https://reviews.example.com/product/123',
            json_schema=schema,
            instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
        )

        print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)")
        print(f"Tone: {result.get('emotional_tone', 'N/A')}")
        print(f"Key phrases: {', '.join(result.get('key_phrases', []))}")
        print(f"Topics: {', '.join(result.get('main_topics', []))}")

analyze_sentiment()

Example 2: Content Categorization

import os
from tabstack import Tabstack

def categorize_article():
    with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
        schema = {
            "type": "object",
            "properties": {
                "title": {"type": "string"},
                "primary_category": {"type": "string"},
                "secondary_categories": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "tags": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "target_audience": {"type": "string"},
                "reading_level": {
                    "type": "string",
                    "enum": ["beginner", "intermediate", "advanced"]
                },
                "estimated_reading_time": {"type": "number"}
            }
        }

        result = client.generate.json(
            url='https://blog.example.com/article',
            json_schema=schema,
            instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
        )

        print(f"Title: {result['title']}")
        print(f"Category: {result['primary_category']}")
        print(f"Level: {result['reading_level']}")
        print(f"Reading Time: {result['estimated_reading_time']} minutes")
        print(f"Tags: {', '.join(result['tags'])}")

categorize_article()

Example 3: Extract Key Insights

import os
from tabstack import Tabstack

def extract_insights():
    with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
        schema = {
            "type": "object",
            "properties": {
                "main_thesis": {"type": "string"},
                "key_points": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "conclusions": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "action_items": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "relevant_quotes": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "quote": {"type": "string"},
                            "context": {"type": "string"}
                        }
                    }
                }
            }
        }

        result = client.generate.json(
            url='https://research.example.com/paper',
            json_schema=schema,
            instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
        )

        print('Main Thesis:')
        print(result['main_thesis'])
        print('\nKey Points:')
        for i, point in enumerate(result['key_points'], 1):
            print(f"{i}. {point}")
        print('\nAction Items:')
        for i, item in enumerate(result['action_items'], 1):
            print(f"{i}. {item}")

extract_insights()

Writing Effective Instructions

Be Specific and Clear

# Vague
instructions = 'Analyze this content'

# Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'

Include Context

# Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''

Options Reference

generate.json()

Parameter	Type	Required	Description
`url`	`str`	Yes	URL to analyze and transform
`json_schema`	`object`	Yes	JSON Schema for output structure
`instructions`	`str`	Yes	AI instructions for transformation
`geo_target`	`dict`	No	Geotargeting parameters (e.g., `{'country': 'GB'}`) for region-specific content
`nocache`	`bool`	No	Bypass cache and force fresh generation (default: `False`)

Best Practices

1. Use Descriptive Schema Properties

schema = {
    "type": "object",
    "properties": {
        "sentiment": {
            "type": "string",
            "enum": ["positive", "negative", "neutral"],
            "description": "Overall sentiment based on tone and language used"
        }
    }
}

2. Handle Errors

import tabstack
from tabstack import Tabstack

with Tabstack() as client:
    try:
        result = client.generate.json(
            url=url,
            json_schema=schema,
            instructions=instructions
        )
    except tabstack.APIStatusError as error:
        print(f"Generation failed: {error.status_code} - {error}")

3. Use Per-Request Options

# Override timeout for long-running generations
result = client.with_options(timeout=120.0).generate.json(
    url=url,
    json_schema=schema,
    instructions=instructions
)

Next Steps

Automate Features: Execute complex browser automation tasks
Error Handling: Build robust applications
REST API Reference: See the underlying REST API endpoint