Job Listings Aggregator

Examples

Extract structured job listings from multiple company career pages. Shows how to apply a consistent schema across a list of URLs using /extract/json.

Given a list of company career page URLs, this example extracts structured job postings from each using client.extract.json(). The same schema applies to every URL — useful for hiring intent monitoring, prospect research signals, or any workflow where you need normalized job data across a set of employers.

import Tabstack, { RateLimitError, AuthenticationError } from "@tabstack/sdk";

const client = new Tabstack();

const careerPages = [
  "https://stripe.com/jobs",
  "https://vercel.com/careers",
  "https://planetscale.com/careers",
];

type Job = {
  title: string;
  department: string;
  location: string;
  remote_ok: boolean;
  employment_type: "full-time" | "part-time" | "contract" | "internship";
};

const jobSchema = {
  type: "object",
  properties: {
    jobs: {
      type: "array",
      items: {
        type: "object",
        properties: {
          title: { type: "string" },
          department: { type: "string" },
          location: { type: "string" },
          remote_ok: { type: "boolean" },
          employment_type: {
            type: "string",
            enum: ["full-time", "part-time", "contract", "internship"],
          },
        },
        required: [
          "title",
          "department",
          "location",
          "remote_ok",
          "employment_type",
        ],
        additionalProperties: false,
      },
    },
  },
  required: ["jobs"],
  additionalProperties: false,
};

async function extractJobs(url: string): Promise<{ url: string; jobs: Job[] }> {
  try {
    // The SDK returns `Record<string, unknown>` for extract.json -- cast to
    // the shape declared by your schema so downstream code is typed.
    const result = (await client.extract.json({
      url,
      json_schema: jobSchema,
      effort: "standard",
    })) as { jobs?: Job[] };
    return { url, jobs: result.jobs ?? [] };
  } catch (error) {
    if (error instanceof RateLimitError) {
      console.error(`Rate limit hit for ${url} -- retry after backoff`);
    } else if (error instanceof AuthenticationError) {
      throw error; // fatal, no point continuing
    } else {
      console.error(`Extraction failed for ${url}:`, error);
    }
    return { url, jobs: [] };
  }
}

const results = await Promise.all(careerPages.map(extractJobs));

for (const { url, jobs } of results) {
  console.log(`\n${url} -- ${jobs.length} listing(s)`);
  for (const job of jobs) {
    const remote = job.remote_ok ? "remote ok" : "on-site";
    console.log(
      `  ${job.title} | ${job.department} | ${job.location} | ${remote} | ${job.employment_type}`,
    );
  }
}

import os
from tabstack import Tabstack, RateLimitError, AuthenticationError

client = Tabstack(api_key=os.environ["TABSTACK_API_KEY"])

career_pages = [
    "https://stripe.com/jobs",
    "https://vercel.com/careers",
    "https://planetscale.com/careers",
]

job_schema = {
    "type": "object",
    "properties": {
        "jobs": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "title": {"type": "string"},
                    "department": {"type": "string"},
                    "location": {"type": "string"},
                    "remote_ok": {"type": "boolean"},
                    "employment_type": {
                        "type": "string",
                        "enum": ["full-time", "part-time", "contract", "internship"],
                    },
                },
                "required": ["title", "department", "location", "remote_ok", "employment_type"],
                "additionalProperties": False,
            },
        }
    },
    "required": ["jobs"],
    "additionalProperties": False,
}

for url in career_pages:
    try:
        result = client.extract.json(
            url=url,
            json_schema=job_schema,
            effort="standard",
        )
        jobs = result.get("jobs", [])
        print(f"\n{url} -- {len(jobs)} listing(s)")
        for job in jobs:
            remote = "remote ok" if job["remote_ok"] else "on-site"
            print(f"  {job['title']} | {job['department']} | {job['location']} | {remote} | {job['employment_type']}")
    except RateLimitError:
        print(f"Rate limit hit for {url} -- retry after backoff")
    except AuthenticationError:
        raise
    except Exception as e:
        print(f"Extraction failed for {url}: {e}")

How it works

client.extract.json() renders the page and maps its content to the provided JSON schema. Every URL in careerPages gets the same schema, so the output is normalized regardless of how each company structures its career page.

The jobs array in the schema wraps all listings from a single page. If a page has 40 postings, you get 40 items back. If the page is inaccessible or returns no structured data, the result is an empty array — the error handler catches the failure and continues to the next URL.

The TypeScript version runs all URLs in parallel with Promise.all. The Python version runs sequentially — swap in asyncio and async/await if throughput matters at scale.

effort: "standard" handles most career pages. Use effort: "max" for JS-heavy SPAs that render listings client-side.

Installation

TypeScript
Python

npm install @tabstack/sdk

pip install tabstack

Set your API key before running:

export TABSTACK_API_KEY=your_api_key