Generate Features
The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.
Overview
import os
from tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
# Generate transformed JSON from a URL
result = client.generate.json(
url='https://example.com',
json_schema=schema,
instructions='Your transformation instructions',
geo_target=None, # Optional: geotarget requests (e.g., {'country': 'GB'})
nocache=False # Optional: bypass cache
)
Generate JSON
Transform web content into structured data using AI with custom instructions.
Basic Usage
import os
from tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
},
"required": ["title", "category", "summary"]
}
}
},
"required": ["summaries"]
}
result = client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
)
for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")
print(f" {summary['summary']}\n")
Async Usage
import asyncio
import os
from tabstack import AsyncTabstack
async def generate_summaries():
async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
}
}
}
}
}
result = await client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it and write a one-sentence summary'
)
for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")
asyncio.run(generate_summaries())
Real-World Examples
Example 1: Sentiment Analysis
import os
from tabstack import Tabstack
def analyze_sentiment():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"overall_sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"confidence": {
"type": "number",
"description": "Confidence score from 0 to 1"
},
"key_phrases": {
"type": "array",
"items": {"type": "string"},
"description": "Important phrases that indicate sentiment"
},
"emotional_tone": {
"type": "string"
},
"main_topics": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["overall_sentiment", "confidence"]
}
result = client.generate.json(
url='https://reviews.example.com/product/123',
json_schema=schema,
instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
)
print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)")
print(f"Tone: {result.get('emotional_tone', 'N/A')}")
print(f"Key phrases: {', '.join(result.get('key_phrases', []))}")
print(f"Topics: {', '.join(result.get('main_topics', []))}")
analyze_sentiment()
Example 2: Content Categorization
import os
from tabstack import Tabstack
def categorize_article():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"primary_category": {"type": "string"},
"secondary_categories": {
"type": "array",
"items": {"type": "string"}
},
"tags": {
"type": "array",
"items": {"type": "string"}
},
"target_audience": {"type": "string"},
"reading_level": {
"type": "string",
"enum": ["beginner", "intermediate", "advanced"]
},
"estimated_reading_time": {"type": "number"}
}
}
result = client.generate.json(
url='https://blog.example.com/article',
json_schema=schema,
instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
)
print(f"Title: {result['title']}")
print(f"Category: {result['primary_category']}")
print(f"Level: {result['reading_level']}")
print(f"Reading Time: {result['estimated_reading_time']} minutes")
print(f"Tags: {', '.join(result['tags'])}")
categorize_article()
Example 3: Extract Key Insights
import os
from tabstack import Tabstack
def extract_insights():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"main_thesis": {"type": "string"},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"conclusions": {
"type": "array",
"items": {"type": "string"}
},
"action_items": {
"type": "array",
"items": {"type": "string"}
},
"relevant_quotes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"context": {"type": "string"}
}
}
}
}
}
result = client.generate.json(
url='https://research.example.com/paper',
json_schema=schema,
instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
)
print('Main Thesis:')
print(result['main_thesis'])
print('\nKey Points:')
for i, point in enumerate(result['key_points'], 1):
print(f"{i}. {point}")
print('\nAction Items:')
for i, item in enumerate(result['action_items'], 1):
print(f"{i}. {item}")
extract_insights()
Writing Effective Instructions
Be Specific and Clear
# Vague
instructions = 'Analyze this content'
# Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'
Include Context
# Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''
Options Reference
generate.json()
| Parameter | Type | Required | Description |
|---|---|---|---|
url | str | Yes | URL to analyze and transform |
json_schema | object | Yes | JSON Schema for output structure |
instructions | str | Yes | AI instructions for transformation |
geo_target | dict | No | Geotargeting parameters (e.g., {'country': 'GB'}) for region-specific content |
nocache | bool | No | Bypass cache and force fresh generation (default: False) |
Best Practices
1. Use Descriptive Schema Properties
schema = {
"type": "object",
"properties": {
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"],
"description": "Overall sentiment based on tone and language used"
}
}
}
2. Handle Errors
import tabstack
from tabstack import Tabstack
with Tabstack() as client:
try:
result = client.generate.json(
url=url,
json_schema=schema,
instructions=instructions
)
except tabstack.APIStatusError as error:
print(f"Generation failed: {error.status_code} - {error}")
3. Use Per-Request Options
# Override timeout for long-running generations
result = client.with_options(timeout=120.0).generate.json(
url=url,
json_schema=schema,
instructions=instructions
)
Next Steps
- Automate Features: Execute complex browser automation tasks
- Error Handling: Build robust applications
- REST API Reference: See the underlying REST API endpoint