Generate Features
The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.
All Generate methods are async and should be used with await.
Overview
import asyncio
import os
from tabstack import TABStack
async def main():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
# Generate transformed JSON from a URL
await tabs.generate.json(url, schema, instructions, nocache=False)
asyncio.run(main())
Generate JSON
Transform web content into structured data using AI with custom instructions.
Basic Usage
import asyncio
import os
from tabstack import TABStack
async def main():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
},
"required": ["title", "category", "summary"]
}
}
},
"required": ["summaries"]
}
result = await tabs.generate.json(
url='https://news.ycombinator.com',
schema=schema,
instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
)
for summary in result.data['summaries']:
print(f"{summary['title']} [{summary['category']}]")
print(f" {summary['summary']}\n")
asyncio.run(main())
Real-World Examples
Example 1: Sentiment Analysis
async def analyze_sentiment():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"overall_sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"confidence": {
"type": "number",
"description": "Confidence score from 0 to 1"
},
"key_phrases": {
"type": "array",
"items": {"type": "string"},
"description": "Important phrases that indicate sentiment"
},
"emotional_tone": {
"type": "string"
},
"main_topics": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["overall_sentiment", "confidence"]
}
result = await tabs.generate.json(
url='https://reviews.example.com/product/123',
schema=schema,
instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
)
print(f"Sentiment: {result.data['overall_sentiment']} ({result.data['confidence']*100}% confident)")
print(f"Tone: {result.data.get('emotional_tone', 'N/A')}")
print(f"Key phrases: {', '.join(result.data.get('key_phrases', []))}")
print(f"Topics: {', '.join(result.data.get('main_topics', []))}")
asyncio.run(analyze_sentiment())
Example 2: Content Categorization
async def categorize_article():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"primary_category": {"type": "string"},
"secondary_categories": {
"type": "array",
"items": {"type": "string"}
},
"tags": {
"type": "array",
"items": {"type": "string"}
},
"target_audience": {"type": "string"},
"reading_level": {
"type": "string",
"enum": ["beginner", "intermediate", "advanced"]
},
"estimated_reading_time": {"type": "number"}
}
}
result = await tabs.generate.json(
url='https://blog.example.com/article',
schema=schema,
instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
)
data = result.data
print(f"Title: {data['title']}")
print(f"Category: {data['primary_category']}")
print(f"Level: {data['reading_level']}")
print(f"Reading Time: {data['estimated_reading_time']} minutes")
print(f"Tags: {', '.join(data['tags'])}")
asyncio.run(categorize_article())
Example 3: Extract Key Insights
async def extract_insights():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"main_thesis": {"type": "string"},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"conclusions": {
"type": "array",
"items": {"type": "string"}
},
"action_items": {
"type": "array",
"items": {"type": "string"}
},
"relevant_quotes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"context": {"type": "string"}
}
}
}
}
}
result = await tabs.generate.json(
url='https://research.example.com/paper',
schema=schema,
instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
)
data = result.data
print('Main Thesis:')
print(data['main_thesis'])
print('\nKey Points:')
for i, point in enumerate(data['key_points'], 1):
print(f"{i}. {point}")
print('\nAction Items:')
for i, item in enumerate(data['action_items'], 1):
print(f"{i}. {item}")
asyncio.run(extract_insights())
Writing Effective Instructions
Be Specific and Clear
# ❌ Vague
instructions = 'Analyze this content'
# ✅ Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'
Include Context
# ✅ Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''
Options Reference
generate.json()
| Parameter | Type | Default | Description |
|---|---|---|---|
url | str | required | URL to analyze and transform |
schema | dict | required | JSON Schema for output structure |
instructions | str | required | AI instructions for transformation |
nocache | bool | False | Bypass cache and force fresh generation |
Best Practices
1. Use Descriptive Schema Properties
schema = {
"type": "object",
"properties": {
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"],
"description": "Overall sentiment based on tone and language used"
}
}
}
2. Handle Errors
try:
result = await tabs.generate.json(url, schema, instructions)
except Exception as error:
print(f"Generation failed: {error}")
Next Steps
- Automate Features: Execute complex browser automation tasks
- Error Handling: Build robust applications
- REST API Reference: See the underlying REST API endpoint