---
title: Job Listings Aggregator | Tabstack
description: Extract structured job listings from multiple company career pages. Shows how to apply a consistent schema across a list of URLs using /extract/json.
---

Given a list of company career page URLs, this example extracts structured job postings from each using `client.extract.json()`. The same schema applies to every URL — useful for hiring intent monitoring, prospect research signals, or any workflow where you need normalized job data across a set of employers.

## Complete code

- [TypeScript](#tab-panel-2)
- [Python](#tab-panel-3)

```
import Tabstack, { RateLimitError, AuthenticationError } from "@tabstack/sdk";


const client = new Tabstack();


const careerPages = [
  "https://stripe.com/jobs",
  "https://vercel.com/careers",
  "https://planetscale.com/careers",
];


type Job = {
  title: string;
  department: string;
  location: string;
  remote_ok: boolean;
  employment_type: "full-time" | "part-time" | "contract" | "internship";
};


const jobSchema = {
  type: "object",
  properties: {
    jobs: {
      type: "array",
      items: {
        type: "object",
        properties: {
          title: { type: "string" },
          department: { type: "string" },
          location: { type: "string" },
          remote_ok: { type: "boolean" },
          employment_type: {
            type: "string",
            enum: ["full-time", "part-time", "contract", "internship"],
          },
        },
        required: ["title", "department", "location", "remote_ok", "employment_type"],
        additionalProperties: false,
      },
    },
  },
  required: ["jobs"],
  additionalProperties: false,
};


async function extractJobs(url: string): Promise<{ url: string; jobs: Job[] }> {
  try {
    // The SDK returns `Record<string, unknown>` for extract.json -- cast to
    // the shape declared by your schema so downstream code is typed.
    const result = (await client.extract.json({
      url,
      json_schema: jobSchema,
      effort: "standard",
    })) as { jobs?: Job[] };
    return { url, jobs: result.jobs ?? [] };
  } catch (error) {
    if (error instanceof RateLimitError) {
      console.error(`Rate limit hit for ${url} -- retry after backoff`);
    } else if (error instanceof AuthenticationError) {
      throw error; // fatal, no point continuing
    } else {
      console.error(`Extraction failed for ${url}:`, error);
    }
    return { url, jobs: [] };
  }
}


const results = await Promise.all(careerPages.map(extractJobs));


for (const { url, jobs } of results) {
  console.log(`\n${url} -- ${jobs.length} listing(s)`);
  for (const job of jobs) {
    const remote = job.remote_ok ? "remote ok" : "on-site";
    console.log(`  ${job.title} | ${job.department} | ${job.location} | ${remote} | ${job.employment_type}`);
  }
}
```

```
import os
from tabstack import Tabstack, RateLimitError, AuthenticationError


client = Tabstack(api_key=os.environ["TABSTACK_API_KEY"])


career_pages = [
    "https://stripe.com/jobs",
    "https://vercel.com/careers",
    "https://planetscale.com/careers",
]


job_schema = {
    "type": "object",
    "properties": {
        "jobs": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "title": {"type": "string"},
                    "department": {"type": "string"},
                    "location": {"type": "string"},
                    "remote_ok": {"type": "boolean"},
                    "employment_type": {
                        "type": "string",
                        "enum": ["full-time", "part-time", "contract", "internship"],
                    },
                },
                "required": ["title", "department", "location", "remote_ok", "employment_type"],
                "additionalProperties": False,
            },
        }
    },
    "required": ["jobs"],
    "additionalProperties": False,
}


for url in career_pages:
    try:
        result = client.extract.json(
            url=url,
            json_schema=job_schema,
            effort="standard",
        )
        jobs = result.get("jobs", [])
        print(f"\n{url} -- {len(jobs)} listing(s)")
        for job in jobs:
            remote = "remote ok" if job["remote_ok"] else "on-site"
            print(f"  {job['title']} | {job['department']} | {job['location']} | {remote} | {job['employment_type']}")
    except RateLimitError:
        print(f"Rate limit hit for {url} -- retry after backoff")
    except AuthenticationError:
        raise
    except Exception as e:
        print(f"Extraction failed for {url}: {e}")
```

## How it works

`client.extract.json()` renders the page and maps its content to the provided JSON schema. Every URL in `careerPages` gets the same schema, so the output is normalized regardless of how each company structures its career page.

The `jobs` array in the schema wraps all listings from a single page. If a page has 40 postings, you get 40 items back. If the page is inaccessible or returns no structured data, the result is an empty array — the error handler catches the failure and continues to the next URL.

The TypeScript version runs all URLs in parallel with `Promise.all`. The Python version runs sequentially — swap in `asyncio` and `async`/`await` if throughput matters at scale.

`effort: "standard"` handles most career pages. Use `effort: "max"` for JS-heavy SPAs that render listings client-side.

## Installation

- [TypeScript](#tab-panel-4)
- [Python](#tab-panel-5)

Terminal window

```
npm install @tabstack/sdk
```

Terminal window

```
pip install tabstack
```

Set your API key before running:

Terminal window

```
export TABSTACK_API_KEY=your_api_key
```