Skip to main content

Generate Features

The Generate operator uses AI to transform and analyze web content according to your instructions. Unlike Extract which pulls data as-is, Generate creates new insights, summaries, and transformations of the original content.

All Generate methods are async and should be used with await.

Overview

import asyncio
import os
from tabstack import TABStack

async def main():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
# Generate transformed JSON from a URL
await tabs.generate.json(url, schema, instructions, nocache=False)

asyncio.run(main())

Generate JSON

Transform web content into structured data using AI with custom instructions.

Basic Usage

import asyncio
import os
from tabstack import TABStack

async def main():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"summaries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"category": {"type": "string"},
"summary": {"type": "string"}
},
"required": ["title", "category", "summary"]
}
}
},
"required": ["summaries"]
}

result = await tabs.generate.json(
url='https://news.ycombinator.com',
schema=schema,
instructions='For each story, categorize it (tech, business, science, etc.) and write a one-sentence summary'
)

for summary in result.data['summaries']:
print(f"{summary['title']} [{summary['category']}]")
print(f" {summary['summary']}\n")

asyncio.run(main())

Real-World Examples

Example 1: Sentiment Analysis

async def analyze_sentiment():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"overall_sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"confidence": {
"type": "number",
"description": "Confidence score from 0 to 1"
},
"key_phrases": {
"type": "array",
"items": {"type": "string"},
"description": "Important phrases that indicate sentiment"
},
"emotional_tone": {
"type": "string"
},
"main_topics": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["overall_sentiment", "confidence"]
}

result = await tabs.generate.json(
url='https://reviews.example.com/product/123',
schema=schema,
instructions='Analyze the overall sentiment of the product reviews, extract key phrases, and identify main topics discussed'
)

print(f"Sentiment: {result.data['overall_sentiment']} ({result.data['confidence']*100}% confident)")
print(f"Tone: {result.data.get('emotional_tone', 'N/A')}")
print(f"Key phrases: {', '.join(result.data.get('key_phrases', []))}")
print(f"Topics: {', '.join(result.data.get('main_topics', []))}")

asyncio.run(analyze_sentiment())

Example 2: Content Categorization

async def categorize_article():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"primary_category": {"type": "string"},
"secondary_categories": {
"type": "array",
"items": {"type": "string"}
},
"tags": {
"type": "array",
"items": {"type": "string"}
},
"target_audience": {"type": "string"},
"reading_level": {
"type": "string",
"enum": ["beginner", "intermediate", "advanced"]
},
"estimated_reading_time": {"type": "number"}
}
}

result = await tabs.generate.json(
url='https://blog.example.com/article',
schema=schema,
instructions='Categorize this article, identify relevant tags, determine the target audience, assess reading difficulty, and estimate reading time'
)

data = result.data
print(f"Title: {data['title']}")
print(f"Category: {data['primary_category']}")
print(f"Level: {data['reading_level']}")
print(f"Reading Time: {data['estimated_reading_time']} minutes")
print(f"Tags: {', '.join(data['tags'])}")

asyncio.run(categorize_article())

Example 3: Extract Key Insights

async def extract_insights():
async with TABStack(api_key=os.getenv('TABSTACK_API_KEY')) as tabs:
schema = {
"type": "object",
"properties": {
"main_thesis": {"type": "string"},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"conclusions": {
"type": "array",
"items": {"type": "string"}
},
"action_items": {
"type": "array",
"items": {"type": "string"}
},
"relevant_quotes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"context": {"type": "string"}
}
}
}
}
}

result = await tabs.generate.json(
url='https://research.example.com/paper',
schema=schema,
instructions='Extract the main thesis, key supporting points, conclusions, and actionable recommendations. Include 2-3 relevant quotes with context.'
)

data = result.data
print('Main Thesis:')
print(data['main_thesis'])
print('\nKey Points:')
for i, point in enumerate(data['key_points'], 1):
print(f"{i}. {point}")
print('\nAction Items:')
for i, item in enumerate(data['action_items'], 1):
print(f"{i}. {item}")

asyncio.run(extract_insights())

Writing Effective Instructions

Be Specific and Clear

# ❌ Vague
instructions = 'Analyze this content'

# ✅ Specific
instructions = 'Analyze the sentiment of customer reviews, categorize each as positive/negative/neutral, and extract common themes'

Include Context

# ✅ Good: With context
instructions = '''
You are analyzing product reviews for a purchasing decision.
Extract the most mentioned pros and cons, and identify any dealbreaker issues.
'''

Options Reference

generate.json()

ParameterTypeDefaultDescription
urlstrrequiredURL to analyze and transform
schemadictrequiredJSON Schema for output structure
instructionsstrrequiredAI instructions for transformation
nocacheboolFalseBypass cache and force fresh generation

Best Practices

1. Use Descriptive Schema Properties

schema = {
"type": "object",
"properties": {
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"],
"description": "Overall sentiment based on tone and language used"
}
}
}

2. Handle Errors

try:
result = await tabs.generate.json(url, schema, instructions)
except Exception as error:
print(f"Generation failed: {error}")

Next Steps