--- title: Generate Features | Tabstack description: Use AI to transform and analyze web content with the Tabstack Python SDK Generate operator. --- The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content. ## Overview ``` import os from tabstack import Tabstack with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: # Generate transformed JSON from a URL result = client.generate.json( url='https://example.com', json_schema=schema, instructions='Your transformation instructions', geo_target=None, # Optional: geotarget requests (e.g., {'country': 'GB'}) nocache=False # Optional: bypass cache ) ``` ## Generate JSON Transform web content into structured data using AI with custom instructions. ### Basic Usage ``` import os from tabstack import Tabstack with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "summaries": { "type": "array", "items": { "type": "object", "properties": { "title": {"type": "string"}, "category": {"type": "string"}, "summary": {"type": "string"} }, "required": ["title", "category", "summary"] } } }, "required": ["summaries"] } result = client.generate.json( url='https://news.ycombinator.com', json_schema=schema, instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary' ) for summary in result['summaries']: print(f"{summary['title']} [{summary['category']}]") print(f" {summary['summary']}\n") ``` ### Async Usage ``` import asyncio import os from tabstack import AsyncTabstack async def generate_summaries(): async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "summaries": { "type": "array", "items": { "type": "object", "properties": { "title": {"type": "string"}, "category": {"type": "string"}, "summary": {"type": "string"} } } } } } result = await client.generate.json( url='https://news.ycombinator.com', json_schema=schema, instructions='For each story, categorize it and write a one-sentence summary' ) for summary in result['summaries']: print(f"{summary['title']} [{summary['category']}]") asyncio.run(generate_summaries()) ``` ## Real-World Examples ### Example 1: Sentiment Analysis ``` import os from tabstack import Tabstack def analyze_sentiment(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "overall_sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"] }, "confidence": { "type": "number", "description": "Confidence score from 0 to 1" }, "key_phrases": { "type": "array", "items": {"type": "string"}, "description": "Important phrases that indicate sentiment" }, "emotional_tone": { "type": "string" }, "main_topics": { "type": "array", "items": {"type": "string"} } }, "required": ["overall_sentiment", "confidence"] } result = client.generate.json( url='https://reviews.example.com/product/123', json_schema=schema, instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed' ) print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)") print(f"Tone: {result.get('emotional_tone', 'N/A')}") print(f"Key phrases: {', '.join(result.get('key_phrases', []))}") print(f"Topics: {', '.join(result.get('main_topics', []))}") analyze_sentiment() ``` ### Example 2: Content Categorization ``` import os from tabstack import Tabstack def categorize_article(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "title": {"type": "string"}, "primary_category": {"type": "string"}, "secondary_categories": { "type": "array", "items": {"type": "string"} }, "tags": { "type": "array", "items": {"type": "string"} }, "target_audience": {"type": "string"}, "reading_level": { "type": "string", "enum": ["beginner", "intermediate", "advanced"] }, "estimated_reading_time": {"type": "number"} } } result = client.generate.json( url='https://blog.example.com/article', json_schema=schema, instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time' ) print(f"Title: {result['title']}") print(f"Category: {result['primary_category']}") print(f"Level: {result['reading_level']}") print(f"Reading Time: {result['estimated_reading_time']} minutes") print(f"Tags: {', '.join(result['tags'])}") categorize_article() ``` ### Example 3: Extract Key Insights ``` import os from tabstack import Tabstack def extract_insights(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "main_thesis": {"type": "string"}, "key_points": { "type": "array", "items": {"type": "string"} }, "conclusions": { "type": "array", "items": {"type": "string"} }, "action_items": { "type": "array", "items": {"type": "string"} }, "relevant_quotes": { "type": "array", "items": { "type": "object", "properties": { "quote": {"type": "string"}, "context": {"type": "string"} } } } } } result = client.generate.json( url='https://research.example.com/paper', json_schema=schema, instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.' ) print('Main Thesis:') print(result['main_thesis']) print('\nKey Points:') for i, point in enumerate(result['key_points'], 1): print(f"{i}. {point}") print('\nAction Items:') for i, item in enumerate(result['action_items'], 1): print(f"{i}. {item}") extract_insights() ``` ## Writing Effective Instructions ### Be Specific and Clear ``` # Vague instructions = 'Analyze this content' # Specific instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes' ``` ### Include Context ``` # Good: With context instructions = ''' You are analyzing product reviews for a purchasing decision. Extract the most mentioned pros and cons, and identify any dealbreaker issues. ''' ``` ## Options Reference ### generate.json() | Parameter | Type | Required | Description | | -------------- | -------- | -------- | ------------------------------------------------------------------------------- | | `url` | `str` | Yes | URL to analyze and transform | | `json_schema` | `object` | Yes | JSON Schema for output structure | | `instructions` | `str` | Yes | AI instructions for transformation | | `geo_target` | `dict` | No | Geotargeting parameters (e.g., `{'country': 'GB'}`) for region-specific content | | `nocache` | `bool` | No | Bypass cache and force fresh generation (default: `False`) | ## Best Practices ### 1. Use Descriptive Schema Properties ``` schema = { "type": "object", "properties": { "sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"], "description": "Overall sentiment based on tone and language used" } } } ``` ### 2. Handle Errors ``` import tabstack from tabstack import Tabstack with Tabstack() as client: try: result = client.generate.json( url=url, json_schema=schema, instructions=instructions ) except tabstack.APIStatusError as error: print(f"Generation failed: {error.status_code} - {error}") ``` ### 3. Use Per-Request Options ``` # Override timeout for long-running generations result = client.with_options(timeout=120.0).generate.json( url=url, json_schema=schema, instructions=instructions ) ``` ## Next Steps - **[Automate Features](./automate)**: Execute complex browser automation tasks - **[Error Handling](./error-handling)**: Build robust applications - **[REST API Reference](/api/index.md)**: See the underlying REST API endpoint