SimilarWeb Batch API

The SimilarWeb Batch API is optimized for large-scale bulk data extraction, supporting jobs of up to one million domains per request. It delivers data asynchronously to cloud storage destinations including Amazon S3, Google Cloud Storage, and Snowflake. It is ideal for historical analysis, data warehouse ingestion, and large-scale competitive research workflows.

OpenAPI Specification

similarweb-batch-api-openapi.yml Raw ↑
openapi: 3.0.3
info:
  title: SimilarWeb Batch API
  description: >-
    The SimilarWeb Batch API is optimized for large-scale bulk data extraction,
    supporting asynchronous jobs of up to one million domains per request.
    It delivers data to cloud storage destinations including Amazon S3, Google
    Cloud Storage, and Snowflake. The API covers website traffic, keywords,
    referrals, geography, segments, apps, companies, e-commerce, and
    technographics datasets.
  version: 4.0.0
  termsOfService: https://www.similarweb.com/corp/legal/terms-of-use/
  contact:
    name: SimilarWeb Support
    url: https://support.similarweb.com/hc/en-us/articles/22089555897373-REST-API
  license:
    name: Proprietary
    url: https://www.similarweb.com/corp/legal/terms-of-use/
externalDocs:
  description: SimilarWeb Batch API Documentation
  url: https://developers.similarweb.com/docs/intro-to-the-batch-api-datasets.md
servers:
  - url: https://api.similarweb.com
    description: SimilarWeb API server
tags:
  - name: Reports
    description: Submit, track, and retrieve bulk data report requests
  - name: Integrations
    description: Manage cloud storage integrations (S3, GCS, Snowflake)
  - name: Credits
    description: Batch API credit management
  - name: Webhooks
    description: Webhook subscription management for data-ready notifications

paths:

  # -----------------------------------------------------------------------
  # Credits
  # -----------------------------------------------------------------------
  /v3/batch/credits:
    get:
      operationId: getBatchCredits
      summary: Get Remaining Credits
      description: Retrieve the current remaining data credits on the account.
      tags:
        - Credits
      security:
        - apiKeyHeader: []
      responses:
        '200':
          description: Remaining credits
          content:
            application/json:
              schema:
                type: object
                properties:
                  credits:
                    type: integer
                    example: 1234
        '400':
          $ref: '#/components/responses/BadRequest'

  # -----------------------------------------------------------------------
  # Reports
  # -----------------------------------------------------------------------
  /batch/v4/request-report:
    post:
      operationId: requestReport
      summary: Request Report
      description: >-
        Submit a batch data extraction request specifying tables, filters,
        date ranges, metrics, and a delivery method (download link, Amazon S3,
        Google Cloud Storage, or Snowflake). Returns a report_id for tracking.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ReportRequest'
            example:
              report_query:
                tables:
                  - vtable: similarweb_websites_traffic
                    granularity: monthly
                    start_date: '2024-01'
                    end_date: '2024-06'
                    metrics:
                      - visits
                      - bounce_rate
              delivery_information:
                delivery_method: download_link
                response_format: json
      responses:
        '200':
          description: Report request submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ReportSubmitResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/request-status:
    get:
      operationId: getRequestStatus
      summary: Request Status
      description: >-
        Retrieve the current status of a previously submitted batch report
        request.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      parameters:
        - name: report_id
          in: query
          required: true
          schema:
            type: string
            format: uuid
          description: Report ID returned by the request-report endpoint
      responses:
        '200':
          description: Report status
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ReportStatusResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/request-validate:
    post:
      operationId: validateRequest
      summary: Request Validate
      description: >-
        Validate a batch request and estimate its data credit cost before
        submission. Uses the same request body as request-report.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ReportRequest'
      responses:
        '200':
          description: Validation result with estimated cost
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ValidateResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/report-history:
    get:
      operationId: getReportHistory
      summary: Report History
      description: Retrieve a paginated history of past batch report requests.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      parameters:
        - name: limit
          in: query
          required: false
          schema:
            type: integer
            default: 20
          description: Number of records to return
        - name: offset
          in: query
          required: false
          schema:
            type: integer
            default: 0
          description: Starting position
      responses:
        '200':
          description: Report history list
          content:
            application/json:
              schema:
                type: object
                properties:
                  reports:
                    type: array
                    items:
                      $ref: '#/components/schemas/ReportStatusResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/retry-request:
    post:
      operationId: retryRequest
      summary: Retry Request
      description: >-
        Retry a failed batch report request. Requests can be retried up to
        3 times.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - report_id
              properties:
                report_id:
                  type: string
                  format: uuid
                  description: ID of the failed report to retry
      responses:
        '200':
          description: Retry submitted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ReportSubmitResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/tables-describe:
    get:
      operationId: describeTables
      summary: Describe Available Tables
      description: >-
        List all queryable Batch API tables with their available metrics,
        filters, and date range constraints.
      tags:
        - Reports
      security:
        - apiKeyHeader: []
      responses:
        '200':
          description: Table descriptions
          content:
            application/json:
              schema:
                type: object
                properties:
                  tables:
                    type: array
                    items:
                      $ref: '#/components/schemas/TableDescription'
        '400':
          $ref: '#/components/responses/BadRequest'

  # -----------------------------------------------------------------------
  # Integrations - S3
  # -----------------------------------------------------------------------
  /batch/v4/s3-integration:
    post:
      operationId: createS3Integration
      summary: S3 Integration
      description: Create a new Amazon S3 integration for batch report delivery.
      tags:
        - Integrations
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/S3IntegrationRequest'
      responses:
        '200':
          description: Integration created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/IntegrationResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  # -----------------------------------------------------------------------
  # Integrations - GCS
  # -----------------------------------------------------------------------
  /batch/v4/gcs-integration:
    post:
      operationId: createGcsIntegration
      summary: Google Cloud Storage (GCS) Integration
      description: Create a new Google Cloud Storage integration for batch report delivery.
      tags:
        - Integrations
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GcsIntegrationRequest'
      responses:
        '200':
          description: GCS integration created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/IntegrationResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/integrations:
    get:
      operationId: getAllIntegrations
      summary: Get All Account Integrations
      description: Retrieve all cloud storage integrations configured for the account.
      tags:
        - Integrations
      security:
        - apiKeyHeader: []
      responses:
        '200':
          description: List of integrations
          content:
            application/json:
              schema:
                type: object
                properties:
                  integrations:
                    type: array
                    items:
                      $ref: '#/components/schemas/IntegrationResponse'
        '400':
          $ref: '#/components/responses/BadRequest'

  # -----------------------------------------------------------------------
  # Webhooks
  # -----------------------------------------------------------------------
  /batch/v4/webhooks/subscribe:
    post:
      operationId: subscribeWebhook
      summary: Subscribe to Webhook Events
      description: >-
        Subscribe to event notifications to stay updated on data releases
        and report completions.
      tags:
        - Webhooks
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/WebhookSubscribeRequest'
      responses:
        '200':
          description: Subscription created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/WebhookSubscription'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/webhooks/list:
    get:
      operationId: listWebhookSubscriptions
      summary: List Webhook Subscriptions
      description: Retrieve all active webhook subscriptions for the account.
      tags:
        - Webhooks
      security:
        - apiKeyHeader: []
      responses:
        '200':
          description: List of webhook subscriptions
          content:
            application/json:
              schema:
                type: object
                properties:
                  subscriptions:
                    type: array
                    items:
                      $ref: '#/components/schemas/WebhookSubscription'
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/webhooks/unsubscribe:
    delete:
      operationId: unsubscribeWebhook
      summary: Unsubscribe from Webhook Events
      description: Remove a webhook subscription.
      tags:
        - Webhooks
      security:
        - apiKeyHeader: []
      parameters:
        - name: webhook_id
          in: query
          required: true
          schema:
            type: string
          description: Webhook subscription ID to remove
      responses:
        '200':
          description: Subscription removed
        '400':
          $ref: '#/components/responses/BadRequest'

  /batch/v4/webhooks/test:
    post:
      operationId: testWebhook
      summary: Test Your Webhooks
      description: Send a test notification to a webhook URL to verify configuration.
      tags:
        - Webhooks
      security:
        - apiKeyHeader: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - webhook_url
              properties:
                webhook_url:
                  type: string
                  format: uri
                  description: The webhook URL to test
      responses:
        '200':
          description: Test notification sent
        '400':
          $ref: '#/components/responses/BadRequest'

components:
  securitySchemes:
    apiKeyHeader:
      type: apiKey
      in: header
      name: api-key
      description: SimilarWeb API key passed as a request header

  schemas:
    ReportRequest:
      type: object
      required:
        - report_query
        - delivery_information
      properties:
        report_query:
          $ref: '#/components/schemas/ReportQuery'
        delivery_information:
          $ref: '#/components/schemas/DeliveryInformation'

    ReportQuery:
      type: object
      required:
        - tables
      properties:
        tables:
          type: array
          description: Array of table queries to execute
          items:
            $ref: '#/components/schemas/TableQuery'

    TableQuery:
      type: object
      required:
        - vtable
        - granularity
      properties:
        vtable:
          type: string
          description: Table identifier (e.g., similarweb_websites_traffic)
        granularity:
          type: string
          enum: [daily, weekly, monthly]
          description: Temporal resolution for the data
        start_date:
          type: string
          description: Start date in YYYY-MM-DD or YYYY-MM format
        end_date:
          type: string
          description: End date in YYYY-MM-DD or YYYY-MM format
        latest:
          type: boolean
          description: Override end_date to the latest available date
        all_history:
          type: boolean
          description: Use the complete available date range
        window_size:
          type: string
          description: Relative period (e.g., "3m" for 3 months, "1y" for 1 year)
        filters:
          type: object
          description: Key-value filter criteria (e.g., country, domain list)
        metrics:
          type: array
          items:
            type: string
          description: Specific column names to include in the output
        paging:
          $ref: '#/components/schemas/PagingConfig'

    PagingConfig:
      type: object
      properties:
        limit:
          type: integer
          description: Results per page
        offset:
          type: integer
          default: 0
          description: Starting position
        sort:
          type: string
          description: Metric to sort by
        sort_asc:
          type: string
          description: Sort ascending (default false - descending)
          default: 'false'

    DeliveryInformation:
      type: object
      required:
        - delivery_method
      properties:
        delivery_method:
          type: string
          enum: [download_link, bucket_access, snowflake]
          description: Output destination type
        response_format:
          type: string
          enum: [json, csv, parquet, orc]
          description: Output file format (required for bucket_access)
        webhook_url:
          type: string
          format: uri
          description: Optional webhook URL to notify when report is ready
        delivery_method_params:
          $ref: '#/components/schemas/DeliveryMethodParams'

    DeliveryMethodParams:
      type: object
      properties:
        integration_name:
          type: string
          description: Integration name when multiple integrations exist
        table_name:
          type: string
          description: Target table name for Snowflake delivery
        retention_days:
          type: integer
          enum: [1, 30, 60, 180, 365]
          description: File retention period in days
        num_of_files:
          type: string
          description: Number of output files for bucket_access delivery
        write_mode:
          type: string
          enum: [overwrite]
          description: Write mode for S3 operations

    ReportSubmitResponse:
      type: object
      properties:
        report_id:
          type: string
          format: uuid
          description: Unique identifier for the submitted report
        status:
          type: string
          example: pending
          description: Current status of the report

    ReportStatusResponse:
      type: object
      properties:
        report_id:
          type: string
          format: uuid
        status:
          type: string
          enum: [pending, in_progress, completed, failed]
          description: Current processing status
        created_at:
          type: string
          format: date-time
        completed_at:
          type: string
          format: date-time
        download_url:
          type: string
          format: uri
          description: Download URL (populated when delivery_method is download_link)
        error_message:
          type: string
          description: Error details if status is failed

    ValidateResponse:
      type: object
      properties:
        valid:
          type: boolean
          description: Whether the request is valid
        estimated_cost:
          type: integer
          description: Estimated data credit cost
        errors:
          type: array
          items:
            type: string
          description: Validation errors if request is invalid

    TableDescription:
      type: object
      properties:
        vtable:
          type: string
          description: Table identifier
        description:
          type: string
          description: Human-readable description of the table
        metrics:
          type: array
          items:
            type: string
          description: Available metric columns
        filters:
          type: array
          items:
            type: string
          description: Supported filter dimensions
        min_date:
          type: string
          description: Earliest available data date
        granularities:
          type: array
          items:
            type: string
          description: Supported granularity levels

    S3IntegrationRequest:
      type: object
      required:
        - bucket_name
        - integration_name
      properties:
        integration_name:
          type: string
          description: Name for this integration
        bucket_name:
          type: string
          description: Target Amazon S3 bucket name
        region:
          type: string
          description: AWS region of the bucket
        prefix:
          type: string
          description: Path prefix within the bucket

    GcsIntegrationRequest:
      type: object
      required:
        - bucket_name
        - integration_name
      properties:
        integration_name:
          type: string
          description: Name for this integration
        bucket_name:
          type: string
          description: Target Google Cloud Storage bucket name
        prefix:
          type: string
          description: Path prefix within the bucket

    IntegrationResponse:
      type: object
      properties:
        integration_name:
          type: string
        integration_type:
          type: string
          enum: [s3, gcs, snowflake]
        status:
          type: string
        created_at:
          type: string
          format: date-time

    WebhookSubscribeRequest:
      type: object
      required:
        - webhook_url
        - events
      properties:
        webhook_url:
          type: string
          format: uri
          description: URL to receive event notifications
        events:
          type: array
          items:
            type: string
          description: Event types to subscribe to (e.g., data_release, report_complete)
        secret:
          type: string
          description: Optional secret for webhook signature verification

    WebhookSubscription:
      type: object
      properties:
        webhook_id:
          type: string
          description: Unique identifier for the subscription
        webhook_url:
          type: string
          format: uri
        events:
          type: array
          items:
            type: string
        created_at:
          type: string
          format: date-time
        status:
          type: string
          enum: [active, inactive]

  responses:
    BadRequest:
      description: Bad request - invalid parameters or missing required fields
      content:
        application/json:
          schema:
            type: object