Skip to main content

Generate Features

The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.

Overview

import os
from tabstack import Tabstack

with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
# Generate transformed JSON from a URL
result = client.generate.json(
url='https://example.com',
json_schema=schema,
instructions='Your transformation instructions',
geo_target=None, # Optional: geotarget requests (e.g., {'country': 'GB'})
nocache=False # Optional: bypass cache
)

Generate JSON

Transform web content into structured data using AI with custom instructions.

Basic Usage

import os
from tabstack import Tabstack

with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
},
"required": ["title", "category", "summary"]
}
}
},
"required": ["summaries"]
}

result = client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
)

for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")
print(f" {summary['summary']}\n")

Async Usage

import asyncio
import os
from tabstack import AsyncTabstack

async def generate_summaries():
async with AsyncTabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
}
}
}
}
}

result = await client.generate.json(
url='https://news.ycombinator.com',
json_schema=schema,
instructions='For each story, categorize it and write a one-sentence summary'
)

for summary in result['summaries']:
print(f"{summary['title']} [{summary['category']}]")

asyncio.run(generate_summaries())

Real-World Examples

Example 1: Sentiment Analysis

import os
from tabstack import Tabstack

def analyze_sentiment():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"overall_sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"confidence": {
"type": "number",
"description": "Confidence score from 0 to 1"
},
"key_phrases": {
"type": "array",
"items": {"type": "string"},
"description": "Important phrases that indicate sentiment"
},
"emotional_tone": {
"type": "string"
},
"main_topics": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["overall_sentiment", "confidence"]
}

result = client.generate.json(
url='https://reviews.example.com/product/123',
json_schema=schema,
instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
)

print(f"Sentiment: {result['overall_sentiment']} ({result['confidence']*100}% confident)")
print(f"Tone: {result.get('emotional_tone', 'N/A')}")
print(f"Key phrases: {', '.join(result.get('key_phrases', []))}")
print(f"Topics: {', '.join(result.get('main_topics', []))}")

analyze_sentiment()

Example 2: Content Categorization

import os
from tabstack import Tabstack

def categorize_article():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"primary_category": {"type": "string"},
"secondary_categories": {
"type": "array",
"items": {"type": "string"}
},
"tags": {
"type": "array",
"items": {"type": "string"}
},
"target_audience": {"type": "string"},
"reading_level": {
"type": "string",
"enum": ["beginner", "intermediate", "advanced"]
},
"estimated_reading_time": {"type": "number"}
}
}

result = client.generate.json(
url='https://blog.example.com/article',
json_schema=schema,
instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
)

print(f"Title: {result['title']}")
print(f"Category: {result['primary_category']}")
print(f"Level: {result['reading_level']}")
print(f"Reading Time: {result['estimated_reading_time']} minutes")
print(f"Tags: {', '.join(result['tags'])}")

categorize_article()

Example 3: Extract Key Insights

import os
from tabstack import Tabstack

def extract_insights():
with Tabstack(api_key=os.getenv('TABSTACK_API_KEY')) as client:
schema = {
"type": "object",
"properties": {
"main_thesis": {"type": "string"},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"conclusions": {
"type": "array",
"items": {"type": "string"}
},
"action_items": {
"type": "array",
"items": {"type": "string"}
},
"relevant_quotes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"context": {"type": "string"}
}
}
}
}
}

result = client.generate.json(
url='https://research.example.com/paper',
json_schema=schema,
instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
)

print('Main Thesis:')
print(result['main_thesis'])
print('\nKey Points:')
for i, point in enumerate(result['key_points'], 1):
print(f"{i}. {point}")
print('\nAction Items:')
for i, item in enumerate(result['action_items'], 1):
print(f"{i}. {item}")

extract_insights()

Writing Effective Instructions

Be Specific and Clear

# Vague
instructions = 'Analyze this content'

# Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'

Include Context

# Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''

Options Reference

generate.json()

ParameterTypeRequiredDescription
urlstrYesURL to analyze and transform
json_schemaobjectYesJSON Schema for output structure
instructionsstrYesAI instructions for transformation
geo_targetdictNoGeotargeting parameters (e.g., {'country': 'GB'}) for region-specific content
nocacheboolNoBypass cache and force fresh generation (default: False)

Best Practices

1. Use Descriptive Schema Properties

schema = {
"type": "object",
"properties": {
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"],
"description": "Overall sentiment based on tone and language used"
}
}
}

2. Handle Errors

import tabstack
from tabstack import Tabstack

with Tabstack() as client:
try:
result = client.generate.json(
url=url,
json_schema=schema,
instructions=instructions
)
except tabstack.APIStatusError as error:
print(f"Generation failed: {error.status_code} - {error}")

3. Use Per-Request Options

# Override timeout for long-running generations
result = client.with_options(timeout=120.0).generate.json(
url=url,
json_schema=schema,
instructions=instructions
)

Next Steps