Bright Data Dataset Marketplace API

Programmatic access to 350+ ready-to-use datasets across 250+ domains (eCommerce, social, real estate, travel, business). List available datasets via `GET /datasets`, inspect schemas via `GET /datasets/{dataset_id}/metadata`, retrieve snapshots via `GET /datasets/snapshots/{id}`, and push deliveries to S3, Azure, GCS, Snowflake, or webhooks via `POST /datasets/snapshots/{id}/deliver`.

Bright Data Dataset Marketplace API is one of 11 APIs that Bright Data publishes on the APIs.io network, described by a machine-readable OpenAPI specification.

This API exposes 1 machine-runnable capability that can be deployed as REST, MCP, or Agent Skill surfaces via Naftiko.

Tagged areas include Datasets, Marketplace, and Bulk Data. The published artifact set on APIs.io includes API documentation, an OpenAPI specification, and 1 Naftiko capability spec.

OpenAPI Specification

bright-data-dataset-marketplace-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Bright Data Dataset Marketplace API
  description: |
    Programmatic access to Bright Data's 350+ ready-to-use datasets across 250+ domains (eCommerce,
    social, real estate, travel, business). List entitled datasets, inspect metadata, retrieve snapshots,
    and deliver to S3, Azure Blob, GCS, Snowflake, or a webhook.
  version: v3
  contact:
    name: Bright Data
    url: https://docs.brightdata.com
servers:
  - url: https://api.brightdata.com
    description: Production
security:
  - BearerAuth: []
tags:
  - name: Datasets
  - name: Snapshots
  - name: Delivery
paths:
  /datasets:
    get:
      summary: List Marketplace Datasets
      operationId: listMarketplaceDatasets
      tags: [Datasets]
      responses:
        "200":
          description: Datasets entitled to the caller.
          content:
            application/json:
              schema:
                type: array
                items:
                  type: object
                  properties:
                    id: { type: string }
                    name: { type: string }
                    description: { type: string }
                    schema_url: { type: string, format: uri }
                    record_count: { type: integer }
                    refresh_cadence: { type: string }
  /datasets/{dataset_id}/metadata:
    parameters:
      - { name: dataset_id, in: path, required: true, schema: { type: string } }
    get:
      summary: Get Dataset Metadata
      operationId: getDatasetMetadata
      tags: [Datasets]
      responses:
        "200":
          description: Dataset metadata.
          content:
            application/json:
              schema:
                type: object
                additionalProperties: true
  /datasets/snapshots/{snapshot_id}:
    parameters:
      - { name: snapshot_id, in: path, required: true, schema: { type: string } }
    get:
      summary: Get Dataset Snapshot
      operationId: getDatasetSnapshot
      tags: [Snapshots]
      parameters:
        - { name: format, in: query, schema: { type: string, enum: [json, ndjson, csv, parquet] } }
      responses:
        "200":
          description: Snapshot rows.
          content:
            application/json:
              schema:
                type: array
                items: { type: object, additionalProperties: true }
  /datasets/snapshots/{snapshot_id}/deliver:
    parameters:
      - { name: snapshot_id, in: path, required: true, schema: { type: string } }
    post:
      summary: Deliver Snapshot to Cloud
      operationId: deliverDatasetSnapshot
      tags: [Delivery]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [destination]
              properties:
                destination:
                  type: object
                  properties:
                    type: { type: string, enum: [s3, azure, gcs, snowflake, webhook] }
                    bucket: { type: string }
                    credentials: { type: object, additionalProperties: true }
                format: { type: string, enum: [json, ndjson, csv, parquet] }
                compress: { type: boolean }
      responses:
        "200":
          description: Delivery scheduled.
          content:
            application/json: { schema: { type: object } }
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer