Cloudflare Workers AI API

The Cloudflare Workers AI API enables developers to run machine learning models on Cloudflare's global network via a REST API. The catalog offers 78+ open-source models spanning text generation (Llama 3.1, Mistral, Qwen3, Kimi K2.6), embeddings (BGE, EmbeddingGemma), text-to-image (Flux 2, Stable Diffusion XL), automatic speech recognition (Whisper, Deepgram Nova 3, Flux), text-to-speech (Aura 2, MeloTTS), image-to-text (LLaVA), translation (Indic Trans2, M2M100), classification, object detection (DETR), and voice activity detection. OpenAI-compatible endpoints are available.

OpenAPI Specification

cloudflare-workers-ai-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Cloudflare Workers AI API
  description: >-
    The Cloudflare Workers AI API enables developers to run machine learning
    models on Cloudflare's global network via a REST API. It supports text
    generation, embeddings, image classification, speech recognition, and other
    AI tasks with OpenAI-compatible endpoints for chat completions, text
    completions, and embeddings.
  version: '4.0'
  contact:
    name: Cloudflare Support
    url: https://support.cloudflare.com/
  termsOfService: https://www.cloudflare.com/terms/
externalDocs:
  description: Cloudflare Workers AI Documentation
  url: https://developers.cloudflare.com/workers-ai/
servers:
- url: https://api.cloudflare.com/client/v4
  description: Cloudflare API v4 Production Server
tags:
- name: AI Inference
  description: >-
    Execute AI models for text generation, embeddings, image classification,
    and other machine learning tasks.
- name: OpenAI Compatible
  description: >-
    OpenAI-compatible endpoints for chat completions, text completions,
    embeddings, and responses.
security:
- bearerAuth: []
paths:
  /accounts/{account_id}/ai/run/{model}:
    post:
      operationId: executeAiModel
      summary: Cloudflare Execute Ai Model
      description: >-
        Run an AI model on Cloudflare's global network. The model parameter
        specifies which model to invoke from the Workers AI catalog. Supports
        text generation, text-to-image, image classification, speech-to-text,
        translation, summarization, and embedding models.
      tags:
      - AI Inference
      parameters:
      - $ref: '#/components/parameters/AccountId'
      - name: model
        in: path
        required: true
        description: >-
          The model identifier to execute, e.g. @cf/meta/llama-3.1-8b-instruct.
        schema:
          type: string
        example: example_value
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AiRunRequest'
            examples:
              ExecuteaimodelRequestExample:
                summary: Default executeAiModel request
                x-microcks-default: true
                value:
                  prompt: example_value
                  messages:
                  - role: system
                    content: example_value
                  image:
                  - 10
                  text: example_value
                  source_lang: example_value
                  target_lang: example_value
                  max_tokens: 10
                  temperature: 42.5
                  stream: true
      responses:
        '200':
          description: Model executed successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AiRunResponse'
              examples:
                Executeaimodel200Example:
                  summary: Default executeAiModel 200 response
                  x-microcks-default: true
                  value:
                    result:
                      response: example_value
                    success: true
                    errors:
                    - {}
                    messages:
                    - {}
        '400':
          description: Bad request due to invalid input.
        '401':
          description: Unauthorized.
        '404':
          description: Model not found.
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /accounts/{account_id}/ai/v1/chat/completions:
    post:
      operationId: createChatCompletion
      summary: Cloudflare Create Chat Completion
      description: >-
        OpenAI-compatible endpoint for chat completions. Accepts a messages
        array and returns a model-generated response. Supports streaming via
        server-sent events.
      tags:
      - OpenAI Compatible
      parameters:
      - $ref: '#/components/parameters/AccountId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
            examples:
              CreatechatcompletionRequestExample:
                summary: Default createChatCompletion request
                x-microcks-default: true
                value:
                  model: example_value
                  messages:
                  - role: system
                    content: example_value
                  max_tokens: 10
                  temperature: 42.5
                  top_p: 42.5
                  stream: true
      responses:
        '200':
          description: Chat completion generated successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
              examples:
                Createchatcompletion200Example:
                  summary: Default createChatCompletion 200 response
                  x-microcks-default: true
                  value:
                    id: abc123
                    object: example_value
                    created: 10
                    model: example_value
                    choices:
                    - index: 10
                      message:
                        role: example_value
                        content: example_value
                      finish_reason: stop
                    usage:
                      prompt_tokens: 10
                      completion_tokens: 10
                      total_tokens: 10
        '400':
          description: Bad request.
        '401':
          description: Unauthorized.
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /accounts/{account_id}/ai/v1/completions:
    post:
      operationId: createTextCompletion
      summary: Cloudflare Create Text Completion
      description: >-
        OpenAI-compatible endpoint for text completions. Accepts a prompt
        string and returns a model-generated continuation.
      tags:
      - OpenAI Compatible
      parameters:
      - $ref: '#/components/parameters/AccountId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
              - model
              - prompt
              properties:
                model:
                  type: string
                  description: The model identifier to use.
                prompt:
                  type: string
                  description: The text prompt to complete.
                max_tokens:
                  type: integer
                  description: Maximum number of tokens to generate.
                temperature:
                  type: number
                  description: Sampling temperature between 0 and 2.
                  minimum: 0
                  maximum: 2
                stream:
                  type: boolean
                  description: Whether to stream the response.
            examples:
              CreatetextcompletionRequestExample:
                summary: Default createTextCompletion request
                x-microcks-default: true
                value:
                  model: example_value
                  prompt: example_value
                  max_tokens: 10
                  temperature: 42.5
                  stream: true
      responses:
        '200':
          description: Text completion generated successfully.
        '400':
          description: Bad request.
        '401':
          description: Unauthorized.
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /accounts/{account_id}/ai/v1/embeddings:
    post:
      operationId: createEmbeddings
      summary: Cloudflare Create Embeddings
      description: >-
        OpenAI-compatible endpoint for generating text embeddings. Converts
        text into numerical vector representations for semantic search,
        similarity analysis, and classification.
      tags:
      - OpenAI Compatible
      parameters:
      - $ref: '#/components/parameters/AccountId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
              - model
              - input
              properties:
                model:
                  type: string
                  description: The embedding model identifier.
                input:
                  oneOf:
                  - type: string
                  - type: array
                    items:
                      type: string
                  description: The text or array of texts to embed.
            examples:
              CreateembeddingsRequestExample:
                summary: Default createEmbeddings request
                x-microcks-default: true
                value:
                  model: example_value
                  input: example_value
      responses:
        '200':
          description: Embeddings generated successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
              examples:
                Createembeddings200Example:
                  summary: Default createEmbeddings 200 response
                  x-microcks-default: true
                  value:
                    object: example_value
                    data:
                    - object: example_value
                      index: 10
                      embedding:
                      - {}
                    model: example_value
                    usage:
                      prompt_tokens: 10
                      total_tokens: 10
        '400':
          description: Bad request.
        '401':
          description: Unauthorized.
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
  /accounts/{account_id}/ai/v1/responses:
    post:
      operationId: createResponse
      summary: Cloudflare Create Response
      description: >-
        OpenAI-compatible responses endpoint for generating model responses
        with additional tool use and structured output capabilities.
      tags:
      - OpenAI Compatible
      parameters:
      - $ref: '#/components/parameters/AccountId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
              - model
              - input
              properties:
                model:
                  type: string
                  description: The model identifier.
                input:
                  type: string
                  description: The input text for the model.
            examples:
              CreateresponseRequestExample:
                summary: Default createResponse request
                x-microcks-default: true
                value:
                  model: example_value
                  input: example_value
      responses:
        '200':
          description: Response generated successfully.
        '400':
          description: Bad request.
        '401':
          description: Unauthorized.
      x-microcks-operation:
        delay: 0
        dispatcher: FALLBACK
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: >-
        API token with Workers AI Read and Edit permissions.
  parameters:
    AccountId:
      name: account_id
      in: path
      required: true
      description: The unique identifier of the Cloudflare account.
      schema:
        type: string
  schemas:
    AiRunRequest:
      type: object
      properties:
        prompt:
          type: string
          description: The input prompt for text generation models.
          example: example_value
        messages:
          type: array
          description: >-
            Array of messages for chat-style models.
          items:
            type: object
            properties:
              role:
                type: string
                enum:
                - system
                - user
                - assistant
              content:
                type: string
          example: []
        image:
          type: array
          items:
            type: integer
          description: Raw image bytes for image classification models.
          example: []
        text:
          type: string
          description: Text input for embedding or translation models.
          example: example_value
        source_lang:
          type: string
          description: Source language code for translation.
          example: example_value
        target_lang:
          type: string
          description: Target language code for translation.
          example: example_value
        max_tokens:
          type: integer
          description: Maximum number of tokens to generate.
          example: 10
        temperature:
          type: number
          description: Sampling temperature.
          example: 42.5
        stream:
          type: boolean
          description: Whether to stream the response using server-sent events.
          example: true
    AiRunResponse:
      type: object
      properties:
        result:
          type: object
          properties:
            response:
              type: string
              description: The generated text response.
          example: example_value
        success:
          type: boolean
          example: true
        errors:
          type: array
          items:
            type: object
          example: []
        messages:
          type: array
          items:
            type: object
          example: []
    ChatCompletionRequest:
      type: object
      required:
      - model
      - messages
      properties:
        model:
          type: string
          description: >-
            The model identifier, e.g. @cf/meta/llama-3.1-8b-instruct.
          example: example_value
        messages:
          type: array
          items:
            type: object
            required:
            - role
            - content
            properties:
              role:
                type: string
                enum:
                - system
                - user
                - assistant
              content:
                type: string
          example: []
        max_tokens:
          type: integer
          description: Maximum number of tokens to generate.
          example: 10
        temperature:
          type: number
          description: Sampling temperature between 0 and 2.
          minimum: 0
          maximum: 2
          example: 42.5
        top_p:
          type: number
          description: Nucleus sampling parameter.
          minimum: 0
          maximum: 1
          example: 42.5
        stream:
          type: boolean
          description: Whether to stream the response via server-sent events.
          example: true
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
          description: Unique identifier for the completion.
          example: abc123
        object:
          type: string
          const: chat.completion
          example: example_value
        created:
          type: integer
          description: Unix timestamp of when the completion was created.
          example: 10
        model:
          type: string
          description: The model used for the completion.
          example: example_value
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                type: object
                properties:
                  role:
                    type: string
                  content:
                    type: string
              finish_reason:
                type: string
                enum:
                - stop
                - length
          example: []
        usage:
          type: object
          properties:
            prompt_tokens:
              type: integer
            completion_tokens:
              type: integer
            total_tokens:
              type: integer
          example: example_value
    EmbeddingResponse:
      type: object
      properties:
        object:
          type: string
          const: list
          example: example_value
        data:
          type: array
          items:
            type: object
            properties:
              object:
                type: string
                const: embedding
              index:
                type: integer
              embedding:
                type: array
                items:
                  type: number
          example: []
        model:
          type: string
          example: example_value
        usage:
          type: object
          properties:
            prompt_tokens:
              type: integer
            total_tokens:
              type: integer
          example: example_value