LlamaIndex LlamaExtract API

LlamaExtract is a prebuilt agentic data extraction API that transforms unstructured document data into structured JSON representations. The REST API allows developers to create extraction agents, upload documents, and run extraction jobs programmatically. Jobs are processed asynchronously, and developers can poll for job status and retrieve structured results. It is designed for use cases where documents need to be converted into well-defined schemas for downstream processing.

OpenAPI Specification

llamaindex-llamaextract-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: LlamaIndex LlamaExtract API
  description: >-
    LlamaExtract is a prebuilt agentic data extraction API that transforms
    unstructured document data into structured JSON representations. The REST
    API allows developers to create extraction agents configured with specific
    schemas, upload documents, and run extraction jobs programmatically. Jobs
    are processed asynchronously, and developers can poll for job status and
    retrieve structured results. It supports both human-defined and
    AI-inferred schemas for flexible data extraction workflows.
  version: '1.0'
  contact:
    name: LlamaIndex Support
    url: https://www.llamaindex.ai/contact
  termsOfService: https://www.llamaindex.ai/terms-of-service
externalDocs:
  description: LlamaExtract REST API Documentation
  url: https://developers.llamaindex.ai/python/cloud/llamaextract/getting_started/api/
servers:
  - url: https://api.cloud.llamaindex.ai/api/v1
    description: US Production Server
  - url: https://api.cloud.llamaindex.eu/api/v1
    description: EU Production Server
tags:
  - name: Extraction Agents
    description: >-
      Create and manage extraction agents that are configured with specific
      schemas and extraction settings for processing documents.
  - name: Extraction Jobs
    description: >-
      Run and monitor asynchronous extraction jobs that process documents
      through extraction agents.
  - name: Files
    description: >-
      Upload files for use in extraction workflows.
security:
  - bearerAuth: []
paths:
  /extraction/extraction-agents:
    get:
      operationId: listExtractionAgents
      summary: List extraction agents
      description: >-
        Retrieve a list of extraction agents for the specified project. Each
        agent is configured with a specific data schema and extraction settings
        for processing documents.
      tags:
        - Extraction Agents
      parameters:
        - name: project_id
          in: query
          description: >-
            Filter extraction agents by project identifier.
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Successfully retrieved list of extraction agents
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/ExtractionAgent'
        '401':
          description: Unauthorized - invalid or missing API key
    post:
      operationId: createExtractionAgent
      summary: Create an extraction agent
      description: >-
        Create a new extraction agent with a specified data schema and
        extraction configuration. The agent can then be used to run extraction
        jobs on uploaded documents. Schemas can be provided manually or
        inferred automatically by providing a prompt and example files.
      tags:
        - Extraction Agents
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateExtractionAgentRequest'
      responses:
        '201':
          description: Extraction agent created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionAgent'
        '400':
          description: Bad request - invalid agent configuration
        '401':
          description: Unauthorized - invalid or missing API key
  /extraction/extraction-agents/{agentId}:
    get:
      operationId: getExtractionAgent
      summary: Get an extraction agent
      description: >-
        Retrieve details of a specific extraction agent including its schema,
        configuration, and status.
      tags:
        - Extraction Agents
      parameters:
        - $ref: '#/components/parameters/agentId'
      responses:
        '200':
          description: Successfully retrieved extraction agent details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionAgent'
        '401':
          description: Unauthorized - invalid or missing API key
        '404':
          description: Extraction agent not found
    put:
      operationId: updateExtractionAgent
      summary: Update an extraction agent
      description: >-
        Update the configuration of an existing extraction agent, including
        its schema and extraction settings.
      tags:
        - Extraction Agents
      parameters:
        - $ref: '#/components/parameters/agentId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateExtractionAgentRequest'
      responses:
        '200':
          description: Extraction agent updated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionAgent'
        '400':
          description: Bad request - invalid agent configuration
        '401':
          description: Unauthorized - invalid or missing API key
        '404':
          description: Extraction agent not found
    delete:
      operationId: deleteExtractionAgent
      summary: Delete an extraction agent
      description: >-
        Delete an extraction agent and its associated configuration.
      tags:
        - Extraction Agents
      parameters:
        - $ref: '#/components/parameters/agentId'
      responses:
        '200':
          description: Extraction agent deleted successfully
        '401':
          description: Unauthorized - invalid or missing API key
        '404':
          description: Extraction agent not found
  /extraction/jobs:
    post:
      operationId: createExtractionJob
      summary: Create an extraction job
      description: >-
        Run an extraction job by specifying an extraction agent and one or more
        files to process. The job runs asynchronously and can be polled for
        status and results.
      tags:
        - Extraction Jobs
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateExtractionJobRequest'
      responses:
        '200':
          description: Extraction job created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJob'
        '400':
          description: Bad request - invalid job parameters
        '401':
          description: Unauthorized - invalid or missing API key
  /extraction/jobs/{jobId}:
    get:
      operationId: getExtractionJob
      summary: Get extraction job status
      description: >-
        Retrieve the current status of an extraction job. Poll this endpoint
        to check whether the job has completed processing.
      tags:
        - Extraction Jobs
      parameters:
        - $ref: '#/components/parameters/jobId'
      responses:
        '200':
          description: Successfully retrieved extraction job status
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJob'
        '401':
          description: Unauthorized - invalid or missing API key
        '404':
          description: Extraction job not found
  /extraction/jobs/{jobId}/result:
    get:
      operationId: getExtractionJobResult
      summary: Get extraction job result
      description: >-
        Retrieve the structured data extracted from the documents processed by
        a completed extraction job. Results conform to the schema defined in
        the extraction agent.
      tags:
        - Extraction Jobs
      parameters:
        - $ref: '#/components/parameters/jobId'
      responses:
        '200':
          description: Successfully retrieved extraction results
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionResult'
        '401':
          description: Unauthorized - invalid or missing API key
        '404':
          description: Extraction job not found or not completed
  /files:
    post:
      operationId: uploadFile
      summary: Upload a file
      description: >-
        Upload a file for use in extraction workflows using multipart form
        data. The returned file identifier can be used when creating
        extraction jobs.
      tags:
        - Files
      parameters:
        - name: project_id
          in: query
          description: >-
            The project to associate the file with.
          required: true
          schema:
            type: string
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - file
              properties:
                file:
                  type: string
                  format: binary
                  description: >-
                    The file to upload.
      responses:
        '200':
          description: File uploaded successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/File'
        '400':
          description: Bad request - invalid file or missing parameters
        '401':
          description: Unauthorized - invalid or missing API key
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: API Key
      description: >-
        LlamaCloud API key obtained from the LlamaCloud dashboard. Include as
        a Bearer token in the Authorization header.
  parameters:
    agentId:
      name: agentId
      in: path
      description: >-
        Unique identifier of the extraction agent.
      required: true
      schema:
        type: string
    jobId:
      name: jobId
      in: path
      description: >-
        Unique identifier of the extraction job.
      required: true
      schema:
        type: string
  schemas:
    ExtractionAgent:
      type: object
      description: >-
        An extraction agent configured with a specific data schema and
        extraction settings for processing documents.
      properties:
        id:
          type: string
          description: >-
            Unique identifier of the extraction agent.
        name:
          type: string
          description: >-
            Human-readable name of the extraction agent.
        project_id:
          type: string
          description: >-
            Identifier of the project the agent belongs to.
        data_schema:
          type: object
          additionalProperties: true
          description: >-
            JSON Schema defining the structure of the data to extract.
        description:
          type: string
          description: >-
            Optional description of the extraction agent and its purpose.
        created_at:
          type: string
          format: date-time
          description: >-
            Timestamp when the extraction agent was created.
        updated_at:
          type: string
          format: date-time
          description: >-
            Timestamp when the extraction agent was last updated.
    CreateExtractionAgentRequest:
      type: object
      description: >-
        Request body for creating a new extraction agent.
      required:
        - name
        - project_id
      properties:
        name:
          type: string
          description: >-
            Human-readable name for the new extraction agent.
        project_id:
          type: string
          description: >-
            Identifier of the project to create the agent in.
        data_schema:
          type: object
          additionalProperties: true
          description: >-
            JSON Schema defining the structure of the data to extract. If not
            provided, the schema can be inferred automatically from example
            files.
        description:
          type: string
          description: >-
            Optional description of the extraction agent.
        prompt:
          type: string
          description: >-
            Optional prompt to guide automatic schema inference from example
            documents.
    UpdateExtractionAgentRequest:
      type: object
      description: >-
        Request body for updating an existing extraction agent.
      properties:
        name:
          type: string
          description: >-
            Updated name for the extraction agent.
        data_schema:
          type: object
          additionalProperties: true
          description: >-
            Updated JSON Schema for data extraction.
        description:
          type: string
          description: >-
            Updated description of the extraction agent.
    CreateExtractionJobRequest:
      type: object
      description: >-
        Request body for creating an extraction job.
      required:
        - extraction_agent_id
        - file_ids
      properties:
        extraction_agent_id:
          type: string
          description: >-
            Identifier of the extraction agent to use for processing.
        file_ids:
          type: array
          items:
            type: string
          description: >-
            List of file identifiers to process in this extraction job.
    ExtractionJob:
      type: object
      description: >-
        An asynchronous extraction job that processes documents through an
        extraction agent.
      properties:
        id:
          type: string
          description: >-
            Unique identifier of the extraction job.
        extraction_agent_id:
          type: string
          description: >-
            Identifier of the extraction agent used for this job.
        status:
          type: string
          enum:
            - pending
            - processing
            - completed
            - failed
          description: >-
            Current status of the extraction job.
        file_ids:
          type: array
          items:
            type: string
          description: >-
            Identifiers of the files being processed.
        created_at:
          type: string
          format: date-time
          description: >-
            Timestamp when the extraction job was created.
        completed_at:
          type: string
          format: date-time
          description: >-
            Timestamp when the extraction job completed, if applicable.
    ExtractionResult:
      type: object
      description: >-
        Structured data extracted from documents by a completed extraction job.
      properties:
        job_id:
          type: string
          description: >-
            Identifier of the extraction job that produced this result.
        results:
          type: array
          description: >-
            List of extraction results, one per processed file.
          items:
            $ref: '#/components/schemas/FileExtractionResult'
    FileExtractionResult:
      type: object
      description: >-
        Extraction result for a single file.
      properties:
        file_id:
          type: string
          description: >-
            Identifier of the processed file.
        file_name:
          type: string
          description: >-
            Name of the processed file.
        data:
          type: object
          additionalProperties: true
          description: >-
            Structured data extracted from the file, conforming to the
            extraction agent schema.
        status:
          type: string
          enum:
            - success
            - error
          description: >-
            Status of the extraction for this specific file.
        error:
          type: string
          description: >-
            Error message if extraction failed for this file.
    File:
      type: object
      description: >-
        A file uploaded to the LlamaCloud platform.
      properties:
        id:
          type: string
          description: >-
            Unique identifier of the file.
        name:
          type: string
          description: >-
            Original file name.
        project_id:
          type: string
          description: >-
            Identifier of the project the file belongs to.
        file_size:
          type: integer
          description: >-
            Size of the file in bytes.
        content_type:
          type: string
          description: >-
            MIME type of the file.
        created_at:
          type: string
          format: date-time
          description: >-
            Timestamp when the file was uploaded.