Skip to content
Get started

Generate Features

Use AI to transform and analyze web content with the Tabstack Python SDK Generate operator.

The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.

import os
from tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
# Generate transformed JSON from a URL
result = client.generate.json(
url='https://example.com',
json_schema=schema,
instructions='Your transformation instructions',
geo_target=None, # Optional: geotarget requests (e.g., {'country': 'GB'})
nocache=False # Optional: bypass cache
)

Transform web content into structured data using AI with custom instructions.

import os
from tabstack import Tabstack
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
},
"required": ["title", "category", "summary"]
}
}
},
"required": ["summaries"]
}
result = client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
)
for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")
print(f" {summary['summary']}\n")
import asyncio
import os
from tabstack import AsyncTabstack
async def generate_summaries():
async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
}
}
}
}
}
result = await client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it and write a one-sentence summary'
)
for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")
asyncio.run(generate_summaries())
import os
from tabstack import Tabstack
def analyze_sentiment():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"overall_sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"confidence": {
"type": "number",
"description": "Confidence score from 0 to 1"
},
"key_phrases": {
"type": "array",
"items": {"type": "string"},
"description": "Important phrases that indicate sentiment"
},
"emotional_tone": {
"type": "string"
},
"main_topics": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["overall_sentiment", "confidence"]
}
result = client.generate.json(
url='https://reviews.example.com/product/123',
json_schema=schema,
instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
)
print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)")
print(f"Tone: {result.get('emotional_tone', 'N/A')}")
print(f"Key phrases: {', '.join(result.get('key_phrases', []))}")
print(f"Topics: {', '.join(result.get('main_topics', []))}")
analyze_sentiment()
import os
from tabstack import Tabstack
def categorize_article():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"primary_category": {"type": "string"},
"secondary_categories": {
"type": "array",
"items": {"type": "string"}
},
"tags": {
"type": "array",
"items": {"type": "string"}
},
"target_audience": {"type": "string"},
"reading_level": {
"type": "string",
"enum": ["beginner", "intermediate", "advanced"]
},
"estimated_reading_time": {"type": "number"}
}
}
result = client.generate.json(
url='https://blog.example.com/article',
json_schema=schema,
instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
)
print(f"Title: {result['title']}")
print(f"Category: {result['primary_category']}")
print(f"Level: {result['reading_level']}")
print(f"Reading Time: {result['estimated_reading_time']} minutes")
print(f"Tags: {', '.join(result['tags'])}")
categorize_article()
import os
from tabstack import Tabstack
def extract_insights():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"main_thesis": {"type": "string"},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"conclusions": {
"type": "array",
"items": {"type": "string"}
},
"action_items": {
"type": "array",
"items": {"type": "string"}
},
"relevant_quotes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"context": {"type": "string"}
}
}
}
}
}
result = client.generate.json(
url='https://research.example.com/paper',
json_schema=schema,
instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
)
print('Main Thesis:')
print(result['main_thesis'])
print('\nKey Points:')
for i, point in enumerate(result['key_points'], 1):
print(f"{i}. {point}")
print('\nAction Items:')
for i, item in enumerate(result['action_items'], 1):
print(f"{i}. {item}")
extract_insights()
# Vague
instructions = 'Analyze this content'
# Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'
# Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''
ParameterTypeRequiredDescription
urlstrYesURL to analyze and transform
json_schemaobjectYesJSON Schema for output structure
instructionsstrYesAI instructions for transformation
geo_targetdictNoGeotargeting parameters (e.g., {'country': 'GB'}) for region-specific content
nocacheboolNoBypass cache and force fresh generation (default: False)
schema = {
"type": "object",
"properties": {
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"],
"description": "Overall sentiment based on tone and language used"
}
}
}
import tabstack
from tabstack import Tabstack
with Tabstack() as client:
try:
result = client.generate.json(
url=url,
json_schema=schema,
instructions=instructions
)
except tabstack.APIStatusError as error:
print(f"Generation failed: {error.status_code} - {error}")
# Override timeout for long-running generations
result = client.with_options(timeout=120.0).generate.json(
url=url,
json_schema=schema,
instructions=instructions
)