Google Cloud Document AI API

Processes documents using machine learning to extract structured data from unstructured and semi-structured content including forms, invoices, and receipts.

OpenAPI Specification

openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Google Cloud Document AI API
  description: >-
    Processes documents to extract structured data from unstructured and
    semi-structured content using machine learning.
  version: v1
  contact:
    name: Google Cloud
    url: https://cloud.google.com/document-ai
servers:
  - url: https://documentai.googleapis.com/v1
paths:
  /projects/{projectId}/locations/{location}/processors:
    get:
      operationId: listProcessors
      summary: Google Cloud Document AI List processors
      description: Lists all processors in the specified project and location.
      parameters:
        - name: projectId
          in: path
          required: true
          schema:
            type: string
        - name: location
          in: path
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Successful response with list of processors.
          content:
            application/json:
              schema:
                type: object
                properties:
                  processors:
                    type: array
                    items:
                      $ref: '#/components/schemas/Processor'
      tags:
        - Projects
    post:
      operationId: createProcessor
      summary: Google Cloud Document AI Create a processor
      description: Creates a new document processor.
      parameters:
        - name: projectId
          in: path
          required: true
          schema:
            type: string
        - name: location
          in: path
          required: true
          schema:
            type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/Processor'
      responses:
        '200':
          description: Successful processor creation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Processor'
      tags:
        - Projects
  /projects/{projectId}/locations/{location}/processors/{processorId}:process:
    post:
      operationId: processDocument
      summary: Google Cloud Document AI Process a document
      description: Processes a single document synchronously.
      parameters:
        - name: projectId
          in: path
          required: true
          schema:
            type: string
        - name: location
          in: path
          required: true
          schema:
            type: string
        - name: processorId
          in: path
          required: true
          schema:
            type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ProcessRequest'
      responses:
        '200':
          description: Successful document processing response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProcessResponse'
      tags:
        - Projects
  /projects/{projectId}/locations/{location}/processors/{processorId}:batchProcess:
    post:
      operationId: batchProcessDocuments
      summary: Google Cloud Document AI Batch process documents
      description: Processes multiple documents asynchronously.
      parameters:
        - name: projectId
          in: path
          required: true
          schema:
            type: string
        - name: location
          in: path
          required: true
          schema:
            type: string
        - name: processorId
          in: path
          required: true
          schema:
            type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/BatchProcessRequest'
      responses:
        '200':
          description: Long-running operation for batch processing.
          content:
            application/json:
              schema:
                type: object
                properties:
                  name:
                    type: string
                  done:
                    type: boolean
      tags:
        - Projects
  /projects/{projectId}/locations/{location}/processorTypes:
    get:
      operationId: listProcessorTypes
      summary: Google Cloud Document AI List processor types
      description: Lists available processor types.
      parameters:
        - name: projectId
          in: path
          required: true
          schema:
            type: string
        - name: location
          in: path
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Successful response with processor types.
          content:
            application/json:
              schema:
                type: object
                properties:
                  processorTypes:
                    type: array
                    items:
                      type: object
                      properties:
                        name:
                          type: string
                        type:
                          type: string
                        category:
                          type: string
      tags:
        - Projects
components:
  schemas:
    Processor:
      type: object
      properties:
        name:
          type: string
        type:
          type: string
        displayName:
          type: string
        state:
          type: string
          enum:
            - STATE_UNSPECIFIED
            - ENABLED
            - DISABLED
            - CREATING
            - FAILED
            - DELETING
        defaultProcessorVersion:
          type: string
        createTime:
          type: string
    ProcessRequest:
      type: object
      properties:
        inlineDocument:
          $ref: '#/components/schemas/Document'
        rawDocument:
          type: object
          properties:
            content:
              type: string
            mimeType:
              type: string
        skipHumanReview:
          type: boolean
    ProcessResponse:
      type: object
      properties:
        document:
          $ref: '#/components/schemas/Document'
        humanReviewStatus:
          type: object
    BatchProcessRequest:
      type: object
      properties:
        inputDocuments:
          type: object
          properties:
            gcsDocuments:
              type: object
              properties:
                documents:
                  type: array
                  items:
                    type: object
                    properties:
                      gcsUri:
                        type: string
                      mimeType:
                        type: string
        documentOutputConfig:
          type: object
          properties:
            gcsOutputConfig:
              type: object
              properties:
                gcsUri:
                  type: string
        skipHumanReview:
          type: boolean
    Document:
      type: object
      properties:
        mimeType:
          type: string
        text:
          type: string
        pages:
          type: array
          items:
            type: object
        entities:
          type: array
          items:
            type: object
            properties:
              type:
                type: string
              mentionText:
                type: string
              confidence:
                type: number
tags:
  - name: Projects