openapi: 3.1.0
info:
title: Bright Data Web Scraper API
description: |
Bright Data's Web Scraper API exposes asynchronous scraping jobs (snapshots) backed by 660+
pre-built dataset endpoints plus custom collectors. Trigger a scrape, poll progress, list snapshots,
cancel or rerun, and download results in JSON, NDJSON, CSV, or JSONL — optionally compressed.
Snapshots can be pushed to S3, Azure Blob, GCS, Snowflake, or a webhook.
Authentication uses a Bearer API token issued from the Bright Data control panel.
version: v3
contact:
name: Bright Data
url: https://docs.brightdata.com
license:
name: Bright Data API Terms of Service
url: https://brightdata.com/legal/tos
servers:
- url: https://api.brightdata.com
description: Production
security:
- BearerAuth: []
tags:
- name: Scrape
description: Trigger and monitor asynchronous scraping jobs.
- name: Snapshots
description: List, download, cancel, and rerun snapshots produced by scraping jobs.
- name: Delivery
description: Deliver snapshots to cloud storage destinations or webhooks.
paths:
/datasets/v3/scrape:
post:
summary: Trigger a Web Scraper Job
description: |
Trigger an asynchronous scraping job against a Bright Data dataset. Pass the `dataset_id` of
the target collector and the per-record input payload as a JSON array. Returns a `snapshot_id`
used to poll progress and download results.
operationId: triggerScrape
tags: [Scrape]
parameters:
- name: dataset_id
in: query
required: true
schema: { type: string }
description: Bright Data dataset identifier (e.g. `gd_l1viktl72bvl7bjuj0`).
- name: include_errors
in: query
schema: { type: boolean }
- name: notify
in: query
schema: { type: string, format: uri }
description: Webhook URL Bright Data calls when the snapshot completes.
- name: format
in: query
schema: { type: string, enum: [json, ndjson, csv, jsonl] }
- name: limit_per_input
in: query
schema: { type: integer }
- name: limit_multiple_results
in: query
schema: { type: integer }
requestBody:
required: true
content:
application/json:
schema:
type: array
items:
type: object
additionalProperties: true
responses:
"200":
description: Job submitted.
content:
application/json:
schema:
type: object
properties:
snapshot_id: { type: string }
/datasets/v3/progress/{snapshot_id}:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
get:
summary: Get Scrape Progress
operationId: getScrapeProgress
tags: [Scrape]
responses:
"200":
description: Progress.
content:
application/json:
schema:
type: object
properties:
status:
type: string
enum: [running, ready, failed, building, collecting, cancelled]
records: { type: integer }
errors: { type: integer }
/datasets/v3/log/{snapshot_id}:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
get:
summary: Get Snapshot Log
operationId: getSnapshotLog
tags: [Snapshots]
responses:
"200":
description: Snapshot log.
content:
application/json:
schema:
type: array
items: { type: object }
/datasets/v3/snapshots:
get:
summary: List Snapshots
operationId: listSnapshots
tags: [Snapshots]
parameters:
- { name: dataset_id, in: query, schema: { type: string } }
- { name: from_date, in: query, schema: { type: string, format: date } }
- { name: to_date, in: query, schema: { type: string, format: date } }
- { name: status, in: query, schema: { type: string } }
responses:
"200":
description: Snapshots.
content:
application/json:
schema:
type: array
items: { $ref: '#/components/schemas/Snapshot' }
/datasets/v3/snapshot/{snapshot_id}/cancel:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
post:
summary: Cancel a Snapshot
operationId: cancelSnapshot
tags: [Snapshots]
responses:
"200":
description: Cancelled.
content:
application/json: { schema: { type: object } }
/datasets/v3/snapshot/{snapshot_id}/rerun:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
post:
summary: Rerun a Snapshot
operationId: rerunSnapshot
tags: [Snapshots]
responses:
"200":
description: Snapshot rerun started.
content:
application/json:
schema:
type: object
properties:
snapshot_id: { type: string }
/datasets/v3/snapshot/{snapshot_id}:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
get:
summary: Download Snapshot Results
operationId: downloadSnapshot
tags: [Snapshots]
parameters:
- { name: format, in: query, schema: { type: string, enum: [json, ndjson, csv, jsonl] } }
- { name: compress, in: query, schema: { type: boolean } }
- { name: batch_size, in: query, schema: { type: integer } }
- { name: part, in: query, schema: { type: integer } }
responses:
"200":
description: Snapshot data (up to 5 GB per request).
content:
application/json:
schema:
type: array
items: { type: object }
text/csv:
schema: { type: string }
application/x-ndjson:
schema: { type: string }
/datasets/v3/snapshot/{snapshot_id}/deliver:
parameters:
- name: snapshot_id
in: path
required: true
schema: { type: string }
post:
summary: Deliver Snapshot to Cloud Storage
operationId: deliverSnapshot
tags: [Delivery]
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
deliver:
type: object
properties:
type: { type: string, enum: [s3, gcs, azure, snowflake, webhook] }
filename: { type: object }
bucket: { type: string }
credentials: { type: object, additionalProperties: true }
format: { type: string, enum: [json, ndjson, csv, parquet] }
compress: { type: boolean }
responses:
"200":
description: Delivery scheduled.
content:
application/json: { schema: { type: object } }
/datasets:
get:
summary: List Available Datasets
operationId: listDatasets
tags: [Scrape]
responses:
"200":
description: List of datasets the caller is entitled to query.
content:
application/json:
schema:
type: array
items:
type: object
properties:
id: { type: string }
name: { type: string }
description: { type: string }
components:
securitySchemes:
BearerAuth:
type: http
scheme: bearer
schemas:
Snapshot:
type: object
properties:
id: { type: string }
dataset_id: { type: string }
status: { type: string }
created: { type: string, format: date-time }
dataset_size: { type: integer }
records: { type: integer }
errors: { type: integer }