Reducto Pipeline API

Compose Parse, Split, Extract, Edit, and Classify into a single multi-step workflow with chained outputs. Supports priority requests on Growth, and on-premise / VPC deployments on Enterprise.

OpenAPI Specification

reducto-pipeline-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Reducto Pipeline API
  version: 1.0.0
  description: Compose Parse, Split, Extract, and Edit into a single multi-step workflow with chained outputs.
  contact:
    name: Reducto Support
    email: [email protected]
    url: https://reducto.ai/contact
  license:
    name: Reducto Terms of Service
    url: https://reducto.ai/terms
servers:
- url: https://platform.reducto.ai
  description: Reducto production platform
security:
- SkippableHTTPBearer: []
tags:
- name: Pipeline
paths:
  /pipeline:
    post:
      summary: Pipeline
      operationId: pipeline_pipeline_post
      security:
      - SkippableHTTPBearer: []
      parameters:
      - name: user-id
        in: header
        required: false
        schema:
          anyOf:
          - type: string
          - type: 'null'
          title: User-Id
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/V3PipelineConfig'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PipelineResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
      - Pipeline
  /pipeline_async:
    post:
      summary: Pipeline Async
      operationId: pipeline_async_pipeline_async_post
      security:
      - SkippableHTTPBearer: []
      parameters:
      - name: user-id
        in: header
        required: false
        schema:
          anyOf:
          - type: string
          - type: 'null'
          title: User-Id
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/V3AsyncPipelineConfig'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncPipelineResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
      - Pipeline
components:
  schemas:
    UrlResult:
      properties:
        type:
          type: string
          const: url
          title: Type
          description: type = 'url'
        url:
          type: string
          title: Url
        result_id:
          type: string
          title: Result Id
      type: object
      required:
      - type
      - url
      - result_id
      title: UrlResult
    ParseBlock-Output:
      properties:
        type:
          type: string
          enum:
          - Header
          - Footer
          - Title
          - Section Header
          - Page Number
          - List Item
          - Figure
          - Table
          - Key Value
          - Text
          - Comment
          - Signature
          title: Type
          description: The type of block extracted from the document.
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: The bounding box of the block extracted from the document.
        content:
          type: string
          title: Content
          description: The content of the block extracted from the document.
        image_url:
          anyOf:
          - type: string
          - type: 'null'
          title: Image Url
          description: (Experimental) The URL of the image associated with the block.
        chart_data:
          anyOf:
          - items:
              type: string
            type: array
          - type: 'null'
          title: Chart Data
          description: (Experimental) The URL/link to chart data JSON for figure blocks processed by chart agent.
        confidence:
          anyOf:
          - type: string
          - type: 'null'
          title: Confidence
          description: The confidence for the block. It is either low or high and takes into account factors like OCR and
            table structure
          default: low
        granular_confidence:
          anyOf:
          - $ref: '#/components/schemas/GranularConfidence'
          - type: 'null'
          description: Granular confidence scores for the block. It is a dictionary of confidence scores for the block. The
            confidence scores will not be None if the user has enabled numeric confidence scores.
        extra:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          title: Extra
          description: Extra metadata fields for the block. Fields like 'is_chart' will only appear when set to True.
      type: object
      required:
      - type
      - bbox
      - content
      title: ParseBlock
    V3PipelineConfig:
      properties:
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        pipeline_id:
          type: string
          title: Pipeline Id
          description: The ID of the pipeline to use for the document.
        settings:
          $ref: '#/components/schemas/PipelineSettings'
          default: {}
      type: object
      required:
      - input
      - pipeline_id
      title: V3PipelineConfig
    ParseChunk-Output:
      properties:
        content:
          type: string
          title: Content
          description: The content of the chunk extracted from the document.
        embed:
          type: string
          title: Embed
          description: Chunk content optimized for embedding and retrieval.
        enriched:
          anyOf:
          - type: string
          - type: 'null'
          title: Enriched
          description: The enriched content of the chunk extracted from the document.
        enrichment_success:
          type: boolean
          title: Enrichment Success
          description: Whether the enrichment was successful.
          default: false
        blocks:
          items:
            $ref: '#/components/schemas/ParseBlock-Output'
          type: array
          title: Blocks
      type: object
      required:
      - content
      - embed
      - enriched
      - blocks
      title: ParseChunk
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
            - type: string
            - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
      - loc
      - msg
      - type
      title: ValidationError
    Split:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
          - high
          - low
          title: Conf
          default: low
        partitions:
          anyOf:
          - items:
              $ref: '#/components/schemas/SplitPartition'
            type: array
          - type: 'null'
          title: Partitions
      type: object
      required:
      - name
      - pages
      title: Split
    ParseUsage:
      properties:
        num_pages:
          type: integer
          title: Num Pages
        credits:
          anyOf:
          - type: number
          - type: 'null'
          title: Credits
        credit_breakdown:
          anyOf:
          - additionalProperties:
              type: number
            propertyNames:
              enum:
              - page
              - html_page
              - docx_native_page
              - chart_agent
              - spreadsheet_cells
              - billable_spreadsheet_pages
              - agentic
              - complex
              - enrich_table
              - figure_summary
              - table_summary
              - key_value
              - agentic_text
              - promptable_agentic_text
            type: object
          - type: 'null'
          title: Credit Breakdown
        page_billing_breakdown:
          anyOf:
          - additionalProperties:
              items:
                type: string
                enum:
                - page
                - html_page
                - docx_native_page
                - agentic
                - complex
                - chart_agent
                - spreadsheet_cells
                - billable_spreadsheet_pages
                - enrich_table
                - figure_summary
                - table_summary
                - key_value
                - agentic_text
                - promptable_agentic_text
              type: array
            type: object
          - type: 'null'
          title: Page Billing Breakdown
          description: Per-page breakdown of features used. Maps 1-indexed page numbers (as strings) to the list of billing
            features applied on that page (e.g. 'page', 'complex', 'chart_agent').
      type: object
      required:
      - num_pages
      title: ParseUsage
    OCRResult-Output:
      properties:
        words:
          items:
            $ref: '#/components/schemas/OCRWord'
          type: array
          title: Words
        lines:
          items:
            $ref: '#/components/schemas/OCRLine'
          type: array
          title: Lines
      type: object
      required:
      - words
      - lines
      title: OCRResult
    config__v3__AsyncConfig:
      properties:
        metadata:
          title: Metadata
          description: JSON metadata included in webhook request body. Defaults to None.
        priority:
          type: boolean
          title: Priority
          description: If True, attempts to process the job with priority if the user has priority processing budget available;
            by default, sync jobs are prioritized above async jobs.
          default: false
        webhook:
          anyOf:
          - $ref: '#/components/schemas/SvixWebhookConfig'
          - $ref: '#/components/schemas/DirectWebhookConfig'
          - type: 'null'
          title: Webhook
          description: The webhook configuration for the asynchronous processing.
      type: object
      title: AsyncConfig
    SvixWebhookConfig:
      properties:
        mode:
          type: string
          const: svix
          title: Mode
          default: svix
        channels:
          items:
            type: string
          type: array
          title: Channels
          description: A list of Svix channels the message will be delivered down, omit to send to all channels.
      type: object
      title: SvixWebhookConfig
    PipelineSettings:
      properties:
        document_password:
          anyOf:
          - type: string
          - type: 'null'
          title: Document Password
          description: Password to decrypt password-protected documents.
      additionalProperties: false
      type: object
      title: PipelineSettings
      description: Settings for pipeline execution that override pipeline defaults.
    BoundingBox:
      properties:
        left:
          type: number
          title: Left
        top:
          type: number
          title: Top
        width:
          type: number
          title: Width
        height:
          type: number
          title: Height
        page:
          type: integer
          title: Page
          description: The page number of the bounding box (1-indexed).
        original_page:
          type: integer
          title: Original Page
          description: The page number in the original document of the bounding box (1-indexed).
      type: object
      required:
      - left
      - top
      - width
      - height
      - page
      title: BoundingBox
    SplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
          - high
          - low
          title: Conf
          default: low
      type: object
      required:
      - name
      - pages
      title: SplitPartition
    SplitResult:
      properties:
        section_mapping:
          anyOf:
          - additionalProperties:
              items:
                type: integer
              type: array
            type: object
          - type: 'null'
          title: Section Mapping
        splits:
          items:
            $ref: '#/components/schemas/Split'
          type: array
          title: Splits
      type: object
      required:
      - section_mapping
      - splits
      title: SplitResult
    ExtractResponse:
      additionalProperties: true
      type: object
    ExtractSplitResponse:
      properties:
        split_name:
          type: string
          title: Split Name
        page_range:
          items:
            type: integer
          type: array
          title: Page Range
        partition:
          anyOf:
          - type: string
          - type: 'null'
          title: Partition
        result:
          anyOf:
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          title: Result
      type: object
      required:
      - split_name
      - page_range
      - result
      title: ExtractSplitResponse
      description: This is the response format for Extract -> Split Pipelines
    PipelineResponse:
      properties:
        response_type:
          type: string
          const: pipeline
          title: Response Type
          default: pipeline
        job_id:
          type: string
          title: Job Id
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          $ref: '#/components/schemas/PipelineResult'
      type: object
      required:
      - job_id
      - usage
      - result
      title: PipelineResponse
    DirectWebhookConfig:
      properties:
        mode:
          type: string
          const: direct
          title: Mode
          default: direct
        url:
          type: string
          title: Url
      type: object
      required:
      - url
      title: DirectWebhookConfig
    V3ExtractResponse:
      additionalProperties: true
      type: object
    AsyncPipelineResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncPipelineResponse
    GranularConfidence:
      properties:
        extract_confidence:
          anyOf:
          - type: number
          - type: 'null'
          title: Extract Confidence
        parse_confidence:
          anyOf:
          - type: number
          - type: 'null'
          title: Parse Confidence
      type: object
      title: GranularConfidence
    ParseResponse:
      properties:
        response_type:
          type: string
          const: parse
          title: Response Type
          default: parse
        job_id:
          type: string
          title: Job Id
        duration:
          type: number
          title: Duration
          description: The duration of the parse request in seconds.
        pdf_url:
          anyOf:
          - type: string
          - type: 'null'
          title: Pdf Url
          description: The storage URL of the converted PDF file.
        studio_link:
          anyOf:
          - type: string
          - type: 'null'
          title: Studio Link
          description: The link to the studio pipeline for the document.
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
          - $ref: '#/components/schemas/FullResult-Output'
          - $ref: '#/components/schemas/UrlResult'
          title: Result
          description: The response from the document processing service. Note that there can be two types of responses, Full
            Result and URL Result. This is due to limitations on the max return size on HTTPS. If the response is too large,
            it will be returned as a presigned URL in the URL response. You should handle this in your application.
        parse_mode:
          anyOf:
          - type: string
            enum:
            - base
            - lite
          - type: 'null'
          title: Parse Mode
          description: "Which pipeline produced this response. ``lite`` means Reducto Flash Lite served the request; ``base``\
            \ is the standard pipeline. Optional / nullable for forward compatibility \u2014 older API instances or persisted\
            \ responses written before this field existed will leave it ``None``; treat ``None`` as ``base``."
      type: object
      required:
      - job_id
      - duration
      - usage
      - result
      title: ParseResponse
    EditWidget:
      properties:
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: Bounding box coordinates of the widget
        description:
          type: string
          title: Description
          description: Description of the widget extracted from the document
        type:
          type: string
          enum:
          - text
          - checkbox
          - radio
          - dropdown
          - barcode
          title: Type
          description: Type of the form widget
        fill:
          type: boolean
          title: Fill
          description: If True (default), the system will attempt to fill this widget. If False, the widget will be created
            but intentionally left unfilled.
          default: true
        value:
          anyOf:
          - type: string
          - type: 'null'
          title: Value
          description: If provided, this value will be used directly instead of attempting to intelligently determine the
            field value.
        font_size:
          anyOf:
          - type: number
            maximum: 72
            minimum: 1
          - type: 'null'
          title: Font Size
          description: Font size in points for this specific field. Takes priority over the global font_size in EditOptions.
            If not set, falls back to the global font_size, then to auto-calculated sizing.
      type: object
      required:
      - bbox
      - description
      - type
      title: EditWidget
    DeepSplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
      type: object
      required:
      - name
      - pages
      title: DeepSplitPartition
    OCRWord:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          anyOf:
          - type: number
          - type: 'null'
          title: Confidence
          description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
        chunk_index:
          anyOf:
          - type: integer
          - type: 'null'
          title: Chunk Index
          description: The index of the chunk that the word belongs to.
        rotation:
          anyOf:
          - type: integer
          - type: 'null'
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
      - text
      - bbox
      title: OCRWord
    SplitResponse:
      properties:
        response_type:
          type: string
          const: split
          title: Response Type
          default: split
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
          - $ref: '#/components/schemas/SplitResult'
          - $ref: '#/components/schemas/DeepSplitResult'
          title: Result
          description: The split result.
      type: object
      required:
      - usage
      - result
      title: SplitResponse
    DeepSplitResult:
      properties:
        splits:
          items:
            $ref: '#/components/schemas/DeepSplit'
          type: array
          title: Splits
      type: object
      required:
      - splits
      title: DeepSplitResult
    FullResult-Output:
      properties:
        type:
          type: string
          const: full
          title: Type
          description: type = 'full'
        chunks:
          items:
            $ref: '#/components/schemas/ParseChunk-Output'
          type: array
          title: Chunks
        ocr:
          anyOf:
          - $ref: '#/components/schemas/OCRResult-Output'
          - type: 'null'
        custom:
          anyOf:
          - {}
          - type: 'null'
          title: Custom
      type: object
      required:
      - type
      - chunks
      title: FullResult
    UploadResponse:
      properties:
        file_id:
          type: string
          title: File Id
        presigned_url:
          anyOf:
          - type: string
          - type: 'null'
          title: Presigned Url
      type: object
      required:
      - file_id
      title: UploadResponse
    V3AsyncPipelineConfig:
      properties:
        async:
          $ref: '#/components/schemas/config__v3__AsyncConfig'
          description: The configuration options for asynchronous processing (default synchronous).
          default:
            priority: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        pipeline_id:
          type: string
          title: Pipeline Id
          description: The ID of the pipeline to use for the document.
        settings:
          $ref: '#/components/schemas/PipelineSettings'
          default: {}
      type: object
      required:
      - input
      - pipeline_id
      title: V3AsyncPipelineConfig
    PipelineResult:
      properties:
        parse:
          anyOf:
          - $ref: '#/components/schemas/ParseResponse'
          - items:
              $ref: '#/components/schemas/ParseResponse'
            type: array
          - type: 'null'
          title: Parse
        extract:
          anyOf:
          - items:
              $ref: '#/components/schemas/ExtractSplitResponse'
            type: array
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          - type: 'null'
          title: Extract
        split:
          anyOf:
          - $ref: '#/components/schemas/SplitResponse'
          - type: 'null'
        edit:
          anyOf:
          - $ref: '#/components/schemas/EditResponse'
          - type: 'null'
      type: object
      required:
      - parse
      - extract
      - split
      title: PipelineResult
    EditResponse:
      properties:
        response_type:
          type: string
          const: edit
          title: Response Type
          default: edit
        document_url:
          type: string
          title: Document Url
          description: Presigned URL to download the edited document.
        form_schema:
          anyOf:
          - items:
              $ref: '#/components/schemas/EditWidget'
            type: array
          - type: 'null'
          title: Form Schema
          description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes.
        usage:
          anyOf:
          - $ref: '#/components/schemas/ParseUsage'
          - type: 'null'
          description: Usage information for the edit operation, including number of pages and credits charged.
      type: object
      required:
      - document_url
      title: EditResponse
    DeepSplitPageEvidence:
      properties:
        page_number:
          type: integer
          title: Page Number
        evidence:
          type: string
          title: Evidence
        confidence:
          anyOf:
          - type: string
            enum:
            - high
            - medium
            - low
          - type: 'null'
          title: Confidence
      type: object
      required:
      - page_number
      - evidence
      title: DeepSplitPageEvidence
    DeepSplit:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
        partitions:
          anyOf:
          - items:
              $ref: '#/components/schemas/DeepSplitPartition'
            type: array
          - type: 'null'
          title: Partitions
      type: object
      required:
      - name
      - pages
      title: DeepSplit
    OCRLine:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          anyOf:
          - type: number
          - type: 'null'
          title: Confidence
          description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
        chunk_index:
          anyOf:
          - type: integer
          - type: 'null'
          title: Chunk Index
          description: The index of the chunk that the line belongs to.
        rotation:
          anyOf:
          - type: integer
          - type: 'null'
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
      - text
      - bbox
      title: OCRLine
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
  securitySchemes:
    SkippableHTTPBearer:
      type: http
      scheme: bearer