Generate Features
Use AI to transform and analyze web content with the Tabstack Python SDK Generate operator.
The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.
Overview
Section titled “Overview”import osfrom tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: # Generate transformed JSON from a URL result = client.generate.json( url='https://example.com', json_schema=schema, instructions='Your transformation instructions', geo_target=None, # Optional: geotarget requests (e.g., {'country': 'GB'}) nocache=False # Optional: bypass cache )Generate JSON
Section titled “Generate JSON”Transform web content into structured data using AI with custom instructions.
Basic Usage
Section titled “Basic Usage”import osfrom tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "summaries": { "type": "array", "items": { "type": "object", "properties": { "title": {"type": "string"}, "category": {"type": "string"}, "summary": {"type": "string"} }, "required": ["title", "category", "summary"] } } }, "required": ["summaries"] }
result = client.generate.json( url='https://news.ycombinator.com', json_schema=schema, instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary' )
for summary in result['summaries']: print(f"{summary['title']} [{summary['category']}]") print(f" {summary['summary']}\n")Async Usage
Section titled “Async Usage”import asyncioimport osfrom tabstack import AsyncTabstack
async def generate_summaries(): async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "summaries": { "type": "array", "items": { "type": "object", "properties": { "title": {"type": "string"}, "category": {"type": "string"}, "summary": {"type": "string"} } } } } }
result = await client.generate.json( url='https://news.ycombinator.com', json_schema=schema, instructions='For each story, categorize it and write a one-sentence summary' )
for summary in result['summaries']: print(f"{summary['title']} [{summary['category']}]")
asyncio.run(generate_summaries())Real-World Examples
Section titled “Real-World Examples”Example 1: Sentiment Analysis
Section titled “Example 1: Sentiment Analysis”import osfrom tabstack import Tabstack
def analyze_sentiment(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "overall_sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"] }, "confidence": { "type": "number", "description": "Confidence score from 0 to 1" }, "key_phrases": { "type": "array", "items": {"type": "string"}, "description": "Important phrases that indicate sentiment" }, "emotional_tone": { "type": "string" }, "main_topics": { "type": "array", "items": {"type": "string"} } }, "required": ["overall_sentiment", "confidence"] }
result = client.generate.json( url='https://reviews.example.com/product/123', json_schema=schema, instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed' )
print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)") print(f"Tone: {result.get('emotional_tone', 'N/A')}") print(f"Key phrases: {', '.join(result.get('key_phrases', []))}") print(f"Topics: {', '.join(result.get('main_topics', []))}")
analyze_sentiment()Example 2: Content Categorization
Section titled “Example 2: Content Categorization”import osfrom tabstack import Tabstack
def categorize_article(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "title": {"type": "string"}, "primary_category": {"type": "string"}, "secondary_categories": { "type": "array", "items": {"type": "string"} }, "tags": { "type": "array", "items": {"type": "string"} }, "target_audience": {"type": "string"}, "reading_level": { "type": "string", "enum": ["beginner", "intermediate", "advanced"] }, "estimated_reading_time": {"type": "number"} } }
result = client.generate.json( url='https://blog.example.com/article', json_schema=schema, instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time' )
print(f"Title: {result['title']}") print(f"Category: {result['primary_category']}") print(f"Level: {result['reading_level']}") print(f"Reading Time: {result['estimated_reading_time']} minutes") print(f"Tags: {', '.join(result['tags'])}")
categorize_article()Example 3: Extract Key Insights
Section titled “Example 3: Extract Key Insights”import osfrom tabstack import Tabstack
def extract_insights(): with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client: schema = { "type": "object", "properties": { "main_thesis": {"type": "string"}, "key_points": { "type": "array", "items": {"type": "string"} }, "conclusions": { "type": "array", "items": {"type": "string"} }, "action_items": { "type": "array", "items": {"type": "string"} }, "relevant_quotes": { "type": "array", "items": { "type": "object", "properties": { "quote": {"type": "string"}, "context": {"type": "string"} } } } } }
result = client.generate.json( url='https://research.example.com/paper', json_schema=schema, instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.' )
print('Main Thesis:') print(result['main_thesis']) print('\nKey Points:') for i, point in enumerate(result['key_points'], 1): print(f"{i}. {point}") print('\nAction Items:') for i, item in enumerate(result['action_items'], 1): print(f"{i}. {item}")
extract_insights()Writing Effective Instructions
Section titled “Writing Effective Instructions”Be Specific and Clear
Section titled “Be Specific and Clear”# Vagueinstructions = 'Analyze this content'
# Specificinstructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'Include Context
Section titled “Include Context”# Good: With contextinstructions = '''You are analyzing product reviews for a purchasing decision.Extract the most mentioned pros and cons, and identify any dealbreaker issues.'''Options Reference
Section titled “Options Reference”generate.json()
Section titled “generate.json()”| Parameter | Type | Required | Description |
|---|---|---|---|
url | str | Yes | URL to analyze and transform |
json_schema | object | Yes | JSON Schema for output structure |
instructions | str | Yes | AI instructions for transformation |
geo_target | dict | No | Geotargeting parameters (e.g., {'country': 'GB'}) for region-specific content |
nocache | bool | No | Bypass cache and force fresh generation (default: False) |
Best Practices
Section titled “Best Practices”1. Use Descriptive Schema Properties
Section titled “1. Use Descriptive Schema Properties”schema = { "type": "object", "properties": { "sentiment": { "type": "string", "enum": ["positive", "negative", "neutral"], "description": "Overall sentiment based on tone and language used" } }}2. Handle Errors
Section titled “2. Handle Errors”import tabstackfrom tabstack import Tabstack
with Tabstack() as client: try: result = client.generate.json( url=url, json_schema=schema, instructions=instructions ) except tabstack.APIStatusError as error: print(f"Generation failed: {error.status_code} - {error}")3. Use Per-Request Options
Section titled “3. Use Per-Request Options”# Override timeout for long-running generationsresult = client.with_options(timeout=120.0).generate.json( url=url, json_schema=schema, instructions=instructions)Next Steps
Section titled “Next Steps”- Automate Features: Execute complex browser automation tasks
- Error Handling: Build robust applications
- REST API Reference: See the underlying REST API endpoint