Xceptor Document Upload API

API for uploading and processing documents through Xceptor's data extraction engine. Supports intelligent document processing using NLP, OCR, and generative AI to transform unstructured documents including PDFs, emails, and spreadsheets into structured, trusted data.

OpenAPI Specification

xceptor-document-upload-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Xceptor Document Upload API
  description: >-
    API for uploading and processing documents through Xceptor's data
    extraction engine. Supports intelligent document processing using NLP,
    OCR, and generative AI to transform unstructured documents including
    PDFs, emails, spreadsheets, and handwritten forms into structured,
    trusted data. The API handles document ingestion, classification,
    field and table extraction, and confidence-scored output for financial
    services use cases such as trade confirmations, tax documents, loan
    notices, and client onboarding materials.
  version: '1.0'
  contact:
    name: Xceptor API Support
    url: https://www.xceptor.com/support
    email: [email protected]
  termsOfService: https://www.xceptor.com/legal-tcs
externalDocs:
  description: Xceptor Document Upload API Documentation
  url: https://docs.xceptor.com/api/documents
servers:
  - url: https://api.xceptor.com/v1
    description: Production Server
tags:
  - name: Documents
    description: >-
      Operations for uploading, listing, and managing documents in the
      Xceptor platform.
  - name: Extraction
    description: >-
      Operations for triggering and monitoring document data extraction
      using Xceptor's AI-powered processing engine.
  - name: Templates
    description: >-
      Operations for managing extraction templates that define the fields
      and tables to extract from specific document types.
security:
  - bearerAuth: []
paths:
  /documents:
    get:
      operationId: listDocuments
      summary: Xceptor List documents
      description: >-
        Retrieves a paginated list of documents that have been uploaded to
        the Xceptor platform. Documents can be filtered by status,
        classification, and upload date. Each document includes metadata
        about its processing state and extracted data availability.
      tags:
        - Documents
      parameters:
        - $ref: '#/components/parameters/PageParam'
        - $ref: '#/components/parameters/PageSizeParam'
        - name: status
          in: query
          description: Filter documents by processing status
          schema:
            type: string
            enum:
              - uploaded
              - classifying
              - classified
              - extracting
              - extracted
              - validated
              - failed
        - name: classification
          in: query
          description: Filter documents by their classified document type
          schema:
            type: string
        - name: uploaded_after
          in: query
          description: Filter documents uploaded after this date-time
          schema:
            type: string
            format: date-time
        - name: uploaded_before
          in: query
          description: Filter documents uploaded before this date-time
          schema:
            type: string
            format: date-time
      responses:
        '200':
          description: A paginated list of documents
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocumentList'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
    post:
      operationId: uploadDocument
      summary: Xceptor Upload a document
      description: >-
        Uploads a document to the Xceptor platform for processing. The
        document is accepted in its original format and queued for
        classification and extraction. Supported formats include PDF,
        Microsoft Excel, CSV, XML, JSON, email (EML/MSG), and image
        files (PNG, JPG, TIFF). Documents can optionally be assigned to
        a specific extraction template.
      tags:
        - Documents
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - file
              properties:
                file:
                  type: string
                  format: binary
                  description: The document file to upload
                template_id:
                  type: string
                  format: uuid
                  description: >-
                    The identifier of the extraction template to use. If
                    omitted, automatic classification determines the template.
                workflow_id:
                  type: string
                  format: uuid
                  description: >-
                    The identifier of a workflow to trigger after extraction
                    completes
                metadata:
                  type: string
                  description: >-
                    JSON-encoded metadata to associate with the document,
                    such as source system identifiers or business context
      responses:
        '201':
          description: Document uploaded successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Document'
        '400':
          description: Invalid file or request parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '413':
          description: File size exceeds maximum allowed limit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /documents/batch:
    post:
      operationId: uploadDocumentBatch
      summary: Xceptor Upload a batch of documents
      description: >-
        Uploads multiple documents in a single request for batch processing.
        All documents in the batch are processed using the same extraction
        template and workflow configuration. This is efficient for processing
        large volumes of similar documents such as trade confirmations or
        tax forms.
      tags:
        - Documents
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - files
              properties:
                files:
                  type: array
                  items:
                    type: string
                    format: binary
                  description: The document files to upload
                template_id:
                  type: string
                  format: uuid
                  description: The extraction template to apply to all documents
                workflow_id:
                  type: string
                  format: uuid
                  description: The workflow to trigger after extraction
      responses:
        '201':
          description: Batch upload accepted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchUploadResult'
        '400':
          description: Invalid files or request parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /documents/{documentId}:
    get:
      operationId: getDocument
      summary: Xceptor Get document details
      description: >-
        Retrieves the details and processing status of a specific document,
        including its classification, extraction status, and links to
        extracted data when available.
      tags:
        - Documents
      parameters:
        - $ref: '#/components/parameters/DocumentIdParam'
      responses:
        '200':
          description: Document details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Document'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Document not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
    delete:
      operationId: deleteDocument
      summary: Xceptor Delete a document
      description: >-
        Deletes a document and its associated extracted data from the Xceptor
        platform. Documents that are currently being processed cannot be
        deleted until processing completes or is cancelled.
      tags:
        - Documents
      parameters:
        - $ref: '#/components/parameters/DocumentIdParam'
      responses:
        '204':
          description: Document deleted successfully
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Document not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /documents/{documentId}/extraction:
    get:
      operationId: getDocumentExtraction
      summary: Xceptor Get extraction results
      description: >-
        Retrieves the data extraction results for a processed document. The
        results include all extracted fields and tables with their values,
        locations in the source document, and AI confidence scores. Fields
        with confidence scores below the configured threshold are flagged
        for manual review.
      tags:
        - Extraction
      parameters:
        - $ref: '#/components/parameters/DocumentIdParam'
      responses:
        '200':
          description: Extraction results
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionResult'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Document not found or extraction not yet available
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
    post:
      operationId: triggerExtraction
      summary: Xceptor Trigger document extraction
      description: >-
        Manually triggers the data extraction process for a document that has
        been uploaded but not yet processed, or re-triggers extraction with
        a different template. The extraction runs asynchronously and results
        can be retrieved once the document status transitions to extracted.
      tags:
        - Extraction
      parameters:
        - $ref: '#/components/parameters/DocumentIdParam'
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                template_id:
                  type: string
                  format: uuid
                  description: >-
                    The extraction template to use. Overrides the previously
                    assigned template if one exists.
      responses:
        '202':
          description: Extraction triggered
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Document'
        '400':
          description: Document is not in a valid state for extraction
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Document not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /documents/{documentId}/download:
    get:
      operationId: downloadDocument
      summary: Xceptor Download original document
      description: >-
        Downloads the original uploaded document file. Returns the file in
        its original format with appropriate content type headers.
      tags:
        - Documents
      parameters:
        - $ref: '#/components/parameters/DocumentIdParam'
      responses:
        '200':
          description: The original document file
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Document not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /templates:
    get:
      operationId: listTemplates
      summary: Xceptor List extraction templates
      description: >-
        Retrieves a list of available extraction templates. Templates define
        the fields and tables to extract from specific document types,
        along with validation rules and confidence thresholds.
      tags:
        - Templates
      parameters:
        - $ref: '#/components/parameters/PageParam'
        - $ref: '#/components/parameters/PageSizeParam'
        - name: document_type
          in: query
          description: Filter templates by document type
          schema:
            type: string
      responses:
        '200':
          description: A list of extraction templates
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TemplateList'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
  /templates/{templateId}:
    get:
      operationId: getTemplate
      summary: Xceptor Get template details
      description: >-
        Retrieves the full definition of an extraction template, including
        its field definitions, table definitions, validation rules, and
        confidence thresholds.
      tags:
        - Templates
      parameters:
        - $ref: '#/components/parameters/TemplateIdParam'
      responses:
        '200':
          description: Template details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Template'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '404':
          description: Template not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: >-
        OAuth2 access token obtained via the client credentials flow.
        Include as a Bearer token in the Authorization header.
  parameters:
    DocumentIdParam:
      name: documentId
      in: path
      required: true
      description: The unique identifier of the document
      schema:
        type: string
        format: uuid
    TemplateIdParam:
      name: templateId
      in: path
      required: true
      description: The unique identifier of the extraction template
      schema:
        type: string
        format: uuid
    PageParam:
      name: page
      in: query
      description: The page number for pagination (1-based)
      schema:
        type: integer
        minimum: 1
        default: 1
    PageSizeParam:
      name: page_size
      in: query
      description: The number of items per page
      schema:
        type: integer
        minimum: 1
        maximum: 100
        default: 25
  schemas:
    Document:
      type: object
      description: >-
        A document that has been uploaded to the Xceptor platform for
        intelligent data extraction and processing.
      properties:
        id:
          type: string
          format: uuid
          description: The unique identifier of the document
        filename:
          type: string
          description: The original filename of the uploaded document
        content_type:
          type: string
          description: The MIME type of the uploaded document
        file_size:
          type: integer
          description: The file size in bytes
          minimum: 0
        status:
          type: string
          description: The current processing status of the document
          enum:
            - uploaded
            - classifying
            - classified
            - extracting
            - extracted
            - validated
            - failed
        classification:
          type: string
          description: >-
            The document type classification determined by AI or assigned
            by template
        template_id:
          type: string
          format: uuid
          description: The extraction template used for this document
        confidence_score:
          type: number
          description: >-
            The overall confidence score for the classification (0.0 to 1.0)
          minimum: 0.0
          maximum: 1.0
        page_count:
          type: integer
          description: The number of pages in the document
          minimum: 1
        uploaded_at:
          type: string
          format: date-time
          description: The date and time the document was uploaded
        processed_at:
          type: string
          format: date-time
          description: The date and time extraction was completed
        metadata:
          type: object
          description: Custom metadata associated with the document
          additionalProperties: true
    DocumentList:
      type: object
      description: A paginated list of documents
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Document'
          description: The list of documents for the current page
        pagination:
          $ref: '#/components/schemas/Pagination'
    BatchUploadResult:
      type: object
      description: The result of a batch document upload operation
      properties:
        batch_id:
          type: string
          format: uuid
          description: The unique identifier of the batch
        total_files:
          type: integer
          description: The total number of files in the batch
        accepted:
          type: integer
          description: The number of files accepted for processing
        rejected:
          type: integer
          description: The number of files rejected due to errors
        documents:
          type: array
          items:
            $ref: '#/components/schemas/Document'
          description: The list of accepted documents
        errors:
          type: array
          items:
            $ref: '#/components/schemas/BatchFileError'
          description: Errors for rejected files
    BatchFileError:
      type: object
      description: An error associated with a specific file in a batch upload
      properties:
        filename:
          type: string
          description: The name of the file that was rejected
        error:
          $ref: '#/components/schemas/Error'
    ExtractionResult:
      type: object
      description: >-
        The data extraction results for a processed document, including
        all extracted fields, tables, and their confidence scores.
      properties:
        document_id:
          type: string
          format: uuid
          description: The identifier of the source document
        template_id:
          type: string
          format: uuid
          description: The extraction template that was applied
        classification:
          type: string
          description: The classified document type
        extraction_method:
          type: string
          description: The AI method used for extraction
          enum:
            - ocr
            - nlp
            - generative_ai
            - template_based
            - hybrid
        overall_confidence:
          type: number
          description: >-
            The overall confidence score for the extraction (0.0 to 1.0)
          minimum: 0.0
          maximum: 1.0
        fields:
          type: array
          items:
            $ref: '#/components/schemas/ExtractedField'
          description: The list of extracted fields with values and confidence scores
        tables:
          type: array
          items:
            $ref: '#/components/schemas/ExtractedTable'
          description: The list of extracted tables
        requires_review:
          type: boolean
          description: >-
            Whether any extracted values fall below the confidence threshold
            and require manual review
        extracted_at:
          type: string
          format: date-time
          description: The date and time the extraction was performed
    ExtractedField:
      type: object
      description: A single field extracted from a document with its value and metadata
      properties:
        name:
          type: string
          description: The name of the extracted field
        value:
          description: The extracted value of the field
        data_type:
          type: string
          description: The data type of the extracted value
          enum:
            - string
            - number
            - date
            - currency
            - boolean
        confidence:
          type: number
          description: The confidence score for this extraction (0.0 to 1.0)
          minimum: 0.0
          maximum: 1.0
        page_number:
          type: integer
          description: The page number where the field was found
          minimum: 1
        bounding_box:
          $ref: '#/components/schemas/BoundingBox'
        requires_review:
          type: boolean
          description: Whether this field requires manual review due to low confidence
    ExtractedTable:
      type: object
      description: A table extracted from a document with its rows and column headers
      properties:
        name:
          type: string
          description: The name or label of the extracted table
        headers:
          type: array
          items:
            type: string
          description: The column headers of the extracted table
        rows:
          type: array
          items:
            type: array
            items:
              description: A cell value in the table row
          description: The data rows of the extracted table
        confidence:
          type: number
          description: The confidence score for the table extraction (0.0 to 1.0)
          minimum: 0.0
          maximum: 1.0
        page_number:
          type: integer
          description: The page number where the table was found
          minimum: 1
    BoundingBox:
      type: object
      description: >-
        The bounding box coordinates of an extracted element on the
        document page, in normalized coordinates (0.0 to 1.0).
      properties:
        x:
          type: number
          description: The x-coordinate of the top-left corner
          minimum: 0.0
          maximum: 1.0
        'y':
          type: number
          description: The y-coordinate of the top-left corner
          minimum: 0.0
          maximum: 1.0
        width:
          type: number
          description: The width of the bounding box
          minimum: 0.0
          maximum: 1.0
        height:
          type: number
          description: The height of the bounding box
          minimum: 0.0
          maximum: 1.0
    Template:
      type: object
      description: >-
        An extraction template that defines the fields and tables to
        extract from a specific document type, along with validation
        rules and confidence thresholds.
      properties:
        id:
          type: string
          format: uuid
          description: The unique identifier of the template
        name:
          type: string
          description: The display name of the template
        description:
          type: string
          description: A description of the document type this template handles
        document_type:
          type: string
          description: The document type classification this template is designed for
        field_definitions:
          type: array
          items:
            $ref: '#/components/schemas/FieldDefinition'
          description: The fields to extract from documents
        table_definitions:
          type: array
          items:
            $ref: '#/components/schemas/TableDefinition'
          description: The tables to extract from documents
        confidence_threshold:
          type: number
          description: >-
            The minimum confidence score for accepting extracted values
            without manual review (0.0 to 1.0)
          minimum: 0.0
          maximum: 1.0
          default: 0.85
        created_at:
          type: string
          format: date-time
          description: The date and time the template was created
        updated_at:
          type: string
          format: date-time
          description: The date and time the template was last updated
    FieldDefinition:
      type: object
      description: A field definition within an extraction template
      properties:
        name:
          type: string
          description: The name of the field to extract
        data_type:
          type: string
          description: The expected data type of the field
          enum:
            - string
            - number
            - date
            - currency
            - boolean
        required:
          type: boolean
          description: Whether the field is required in the extraction output
        validation_pattern:
          type: string
          description: >-
            A regular expression pattern for validating extracted values
    TableDefinition:
      type: object
      description: A table definition within an extraction template
      properties:
        name:
          type: string
          description: The name of the table to extract
        columns:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
                description: The column name
              data_type:
                type: string
                description: The expected data type of the column
                enum:
                  - string
                  - number
                  - date
                  - currency
                  - boolean
          description: The expected columns in the table
    TemplateList:
      type: object
      description: A paginated list of extraction templates
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Template'
          description: The list of templates for the current page
        pagination:
          $ref: '#/components/schemas/Pagination'
    Pagination:
      type: object
      description: Pagination information for list responses
      properties:
        page:
          type: integer
          description: The current page number
        page_size:
          type: integer
          description: The number of items per page
        total_items:
          type: integer
          description: The total number of items across all pages
        total_pages:
          type: integer
          description: The total number of pages
    Error:
      type: object
      description: An error response from the Xceptor API
      properties:
        code:
          type: string
          description: A machine-readable error code
        message:
          type: string
          description: A human-readable description of the error
        details:
          type: object
          description: Additional error details when available
          additionalProperties: true