Bright Data Web Archive API

Search and deliver petabyte-scale historical web snapshots across 250+ domains. Submit a query via `POST /webarchive/search`, monitor with `GET /webarchive/search/{search_id}`, list all searches via `GET /webarchive/searches`, and deliver matching corpora to S3/Azure/GCS via `POST /deliver-to-cloud`.

Bright Data Web Archive API is one of 11 APIs that Bright Data publishes on the APIs.io network, described by a machine-readable OpenAPI specification.

This API exposes 1 machine-runnable capability that can be deployed as REST, MCP, or Agent Skill surfaces via Naftiko.

Tagged areas include Historical Data, Web Archive, and Time Series. The published artifact set on APIs.io includes API documentation, an OpenAPI specification, and 1 Naftiko capability spec.

OpenAPI Specification

bright-data-web-archive-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Bright Data Web Archive API
  description: |
    The Web Archive API exposes Bright Data's petabyte-scale historical web index across 250+ domains.
    Submit a search via `POST /webarchive/search`, monitor with `GET /webarchive/search/{search_id}`,
    list all searches via `GET /webarchive/searches`, and deliver matching corpora to S3/Azure/GCS
    via `POST /webarchive/deliver-to-cloud`.
  version: '1.0'
  contact:
    name: Bright Data
    url: https://docs.brightdata.com
servers:
  - url: https://api.brightdata.com
    description: Production
security:
  - BearerAuth: []
tags:
  - name: Archive
paths:
  /webarchive/search:
    post:
      summary: Submit a Web Archive Search
      operationId: submitArchiveSearch
      tags: [Archive]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [domain]
              properties:
                domain: { type: string }
                query: { type: string }
                from_date: { type: string, format: date }
                to_date: { type: string, format: date }
                limit: { type: integer }
      responses:
        "200":
          description: Search submitted.
          content:
            application/json:
              schema:
                type: object
                properties:
                  search_id: { type: string }
  /webarchive/search/{search_id}:
    parameters:
      - { name: search_id, in: path, required: true, schema: { type: string } }
    get:
      summary: Get Web Archive Search
      operationId: getArchiveSearch
      tags: [Archive]
      responses:
        "200":
          description: Search status and results pointer.
          content:
            application/json:
              schema:
                type: object
                properties:
                  search_id: { type: string }
                  status: { type: string, enum: [pending, running, ready, failed] }
                  records: { type: integer }
                  download_url: { type: string, format: uri }
  /webarchive/searches:
    get:
      summary: List Web Archive Searches
      operationId: listArchiveSearches
      tags: [Archive]
      responses:
        "200":
          description: List of searches.
          content:
            application/json:
              schema:
                type: array
                items:
                  type: object
                  properties:
                    search_id: { type: string }
                    domain: { type: string }
                    status: { type: string }
                    created: { type: string, format: date-time }
  /webarchive/deliver-to-cloud:
    post:
      summary: Deliver Archive Search to Cloud Storage
      operationId: deliverArchiveToCloud
      tags: [Archive]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [search_id, destination]
              properties:
                search_id: { type: string }
                destination:
                  type: object
                  properties:
                    type: { type: string, enum: [s3, azure, gcs] }
                    bucket: { type: string }
                    credentials: { type: object, additionalProperties: true }
                format: { type: string, enum: [json, ndjson, parquet] }
      responses:
        "200":
          description: Delivery scheduled.
          content:
            application/json: { schema: { type: object } }
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer