Bright Data Web Scraper API

Asynchronous snapshot-based scraping over 660+ pre-built dataset endpoints (Amazon, Walmart, LinkedIn, Instagram, TikTok, YouTube, Reddit, Google Maps, Airbnb, Booking, Zillow, and many more) plus custom collectors. Trigger via `POST /datasets/v3/scrape`, poll `/datasets/v3/progress/{snapshot_id}`, and download via `/datasets/v3/snapshot/{snapshot_id}`. Returns JSON, NDJSON, CSV, or Parquet; supports direct delivery to S3, Azure Blob, GCS, Snowflake, and webhooks.

OpenAPI Specification

bright-data-web-scraper-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Bright Data Web Scraper API
  description: |
    Bright Data's Web Scraper API exposes asynchronous scraping jobs (snapshots) backed by 660+
    pre-built dataset endpoints plus custom collectors. Trigger a scrape, poll progress, list snapshots,
    cancel or rerun, and download results in JSON, NDJSON, CSV, or JSONL — optionally compressed.
    Snapshots can be pushed to S3, Azure Blob, GCS, Snowflake, or a webhook.

    Authentication uses a Bearer API token issued from the Bright Data control panel.
  version: v3
  contact:
    name: Bright Data
    url: https://docs.brightdata.com
  license:
    name: Bright Data API Terms of Service
    url: https://brightdata.com/legal/tos
servers:
  - url: https://api.brightdata.com
    description: Production
security:
  - BearerAuth: []
tags:
  - name: Scrape
    description: Trigger and monitor asynchronous scraping jobs.
  - name: Snapshots
    description: List, download, cancel, and rerun snapshots produced by scraping jobs.
  - name: Delivery
    description: Deliver snapshots to cloud storage destinations or webhooks.
paths:
  /datasets/v3/scrape:
    post:
      summary: Trigger a Web Scraper Job
      description: |
        Trigger an asynchronous scraping job against a Bright Data dataset. Pass the `dataset_id` of
        the target collector and the per-record input payload as a JSON array. Returns a `snapshot_id`
        used to poll progress and download results.
      operationId: triggerScrape
      tags: [Scrape]
      parameters:
        - name: dataset_id
          in: query
          required: true
          schema: { type: string }
          description: Bright Data dataset identifier (e.g. `gd_l1viktl72bvl7bjuj0`).
        - name: include_errors
          in: query
          schema: { type: boolean }
        - name: notify
          in: query
          schema: { type: string, format: uri }
          description: Webhook URL Bright Data calls when the snapshot completes.
        - name: format
          in: query
          schema: { type: string, enum: [json, ndjson, csv, jsonl] }
        - name: limit_per_input
          in: query
          schema: { type: integer }
        - name: limit_multiple_results
          in: query
          schema: { type: integer }
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: array
              items:
                type: object
                additionalProperties: true
      responses:
        "200":
          description: Job submitted.
          content:
            application/json:
              schema:
                type: object
                properties:
                  snapshot_id: { type: string }
  /datasets/v3/progress/{snapshot_id}:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    get:
      summary: Get Scrape Progress
      operationId: getScrapeProgress
      tags: [Scrape]
      responses:
        "200":
          description: Progress.
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    enum: [running, ready, failed, building, collecting, cancelled]
                  records: { type: integer }
                  errors: { type: integer }
  /datasets/v3/log/{snapshot_id}:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    get:
      summary: Get Snapshot Log
      operationId: getSnapshotLog
      tags: [Snapshots]
      responses:
        "200":
          description: Snapshot log.
          content:
            application/json:
              schema:
                type: array
                items: { type: object }
  /datasets/v3/snapshots:
    get:
      summary: List Snapshots
      operationId: listSnapshots
      tags: [Snapshots]
      parameters:
        - { name: dataset_id, in: query, schema: { type: string } }
        - { name: from_date, in: query, schema: { type: string, format: date } }
        - { name: to_date, in: query, schema: { type: string, format: date } }
        - { name: status, in: query, schema: { type: string } }
      responses:
        "200":
          description: Snapshots.
          content:
            application/json:
              schema:
                type: array
                items: { $ref: '#/components/schemas/Snapshot' }
  /datasets/v3/snapshot/{snapshot_id}/cancel:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    post:
      summary: Cancel a Snapshot
      operationId: cancelSnapshot
      tags: [Snapshots]
      responses:
        "200":
          description: Cancelled.
          content:
            application/json: { schema: { type: object } }
  /datasets/v3/snapshot/{snapshot_id}/rerun:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    post:
      summary: Rerun a Snapshot
      operationId: rerunSnapshot
      tags: [Snapshots]
      responses:
        "200":
          description: Snapshot rerun started.
          content:
            application/json:
              schema:
                type: object
                properties:
                  snapshot_id: { type: string }
  /datasets/v3/snapshot/{snapshot_id}:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    get:
      summary: Download Snapshot Results
      operationId: downloadSnapshot
      tags: [Snapshots]
      parameters:
        - { name: format, in: query, schema: { type: string, enum: [json, ndjson, csv, jsonl] } }
        - { name: compress, in: query, schema: { type: boolean } }
        - { name: batch_size, in: query, schema: { type: integer } }
        - { name: part, in: query, schema: { type: integer } }
      responses:
        "200":
          description: Snapshot data (up to 5 GB per request).
          content:
            application/json:
              schema:
                type: array
                items: { type: object }
            text/csv:
              schema: { type: string }
            application/x-ndjson:
              schema: { type: string }
  /datasets/v3/snapshot/{snapshot_id}/deliver:
    parameters:
      - name: snapshot_id
        in: path
        required: true
        schema: { type: string }
    post:
      summary: Deliver Snapshot to Cloud Storage
      operationId: deliverSnapshot
      tags: [Delivery]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                deliver:
                  type: object
                  properties:
                    type: { type: string, enum: [s3, gcs, azure, snowflake, webhook] }
                    filename: { type: object }
                    bucket: { type: string }
                    credentials: { type: object, additionalProperties: true }
                format: { type: string, enum: [json, ndjson, csv, parquet] }
                compress: { type: boolean }
      responses:
        "200":
          description: Delivery scheduled.
          content:
            application/json: { schema: { type: object } }
  /datasets:
    get:
      summary: List Available Datasets
      operationId: listDatasets
      tags: [Scrape]
      responses:
        "200":
          description: List of datasets the caller is entitled to query.
          content:
            application/json:
              schema:
                type: array
                items:
                  type: object
                  properties:
                    id: { type: string }
                    name: { type: string }
                    description: { type: string }
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
  schemas:
    Snapshot:
      type: object
      properties:
        id: { type: string }
        dataset_id: { type: string }
        status: { type: string }
        created: { type: string, format: date-time }
        dataset_size: { type: integer }
        records: { type: integer }
        errors: { type: integer }