DeepSeek Fill-In-The-Middle (FIM) Completion API

Beta endpoint that performs fill-in-the-middle completions where a prompt and an optional suffix are provided and the model fills the gap. Useful for code completion and inline code generation tasks.

OpenAPI Specification

deepseek-fim-completion-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: DeepSeek FIM Completion
  description: The DeepSeek API uses an API format compatible with OpenAI.
  version: 1.0.0
servers:
  - url: https://api.deepseek.com/beta
paths:
  /completions:
    post:
      summary: DeepSeek Create FIM Completion
      description: >-
        Generates a fill-in-the-middle completion for a given prompt using
        DeepSeek models.
      operationId: createFIMCompletion
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - model
                - prompt
              properties:
                model:
                  type: string
                  enum:
                    - deepseek-chat
                  description: ID of the model to use.
                prompt:
                  type: string
                  default: Once upon a time,
                  description: The prompt to generate completions for.
                echo:
                  type: boolean
                  nullable: true
                  description: Echo back the prompt in addition to the completion.
                frequency_penalty:
                  type: number
                  nullable: true
                  minimum: -2
                  maximum: 2
                  default: 0
                  description: >
                    Positive values penalize new tokens based on their
                    frequency  in the text, reducing repetition.
                logprobs:
                  type: integer
                  nullable: true
                  maximum: 20
                  description: >
                    Number of most likely tokens to return with log
                    probabilities. The API will always return the log
                    probability of the sampled token.
                max_tokens:
                  type: integer
                  nullable: true
                  description: >-
                    The maximum number of tokens that can be generated in the
                    completion.
                presence_penalty:
                  type: number
                  nullable: true
                  minimum: -2
                  maximum: 2
                  default: 0
                  description: >
                    Positive values penalize new tokens that appear in the text,
                    encouraging discussion of new topics.
                stop:
                  type: object
                  nullable: true
                  description: Sequence where the model stops generating tokens.
                stream:
                  type: boolean
                  nullable: true
                  description: >
                    Whether to stream back partial progress. If set, tokens are
                    sent  as server-sent events until completion.
                stream_options:
                  type: object
                  nullable: true
                  description: Options for streaming responses.
                suffix:
                  type: string
                  nullable: true
                  description: Text that comes after the completion of the inserted text.
                temperature:
                  type: number
                  nullable: true
                  maximum: 2
                  default: 1
                  description: >-
                    Controls randomness in generation (higher values = more
                    random).
                top_p:
                  type: number
                  nullable: true
                  maximum: 1
                  default: 1
                  description: >
                    Nucleus sampling parameter. Tokens are selected from the  top_p probability mass.
      responses:
        '200':
          description: Successful completion.
          content:
            application/json:
              schema:
                type: object
                required:
                  - id
                  - choices
                  - created
                  - model
                  - object
                properties:
                  id:
                    type: string
                    description: Unique identifier for the completion.
                  choices:
                    type: array
                    items:
                      type: object
                      description: Array of generated responses.
                  created:
                    type: integer
                    format: int64
                    description: Unix timestamp of the response creation.
                  model:
                    type: string
                    description: The model used for the completion.
                  system_fingerprint:
                    type: string
                    description: Backend configuration fingerprint (optional).
                  object:
                    type: string
                    enum:
                      - text_completion
                    description: Object type of the response.
                  usage:
                    type: object
                    description: Details of token usage.
      tags:
        - Completions
tags:
  - name: Completions