Google Gemini API

OpenAPI Specification

google-gemini-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Google Gemini API
  description: 'The Gemini API can generate text output from various inputs, including text, images, video, and audio, leveraging Gemini models.

    '
  version: 1.0.0
  contact:
    name: Google AI
    url: https://ai.google.dev
servers:
  - url: https://generativelanguage.googleapis.com/v1beta
    description: Gemini API Server
security:
  - ApiKeyHeader: []
tags:
  - name: Audio Understanding
    description: Operations related to Audio Understanding
  - name: Content Generation
    description: Operations related to Content Generation
  - name: Document Understanding
    description: Operations related to Document Understanding
  - name: File Api
    description: Operations related to File Api
  - name: Image Generation
    description: Operations related to Image Generation
  - name: Speech Generation
    description: Operations related to Speech Generation
  - name: Streaming
    description: Operations related to Streaming
  - name: Video Understanding
    description: Operations related to Video Understanding
paths:
  /files:
    post:
      tags:
        - Audio Understanding
        - Document Understanding
        - File Api
        - Video Understanding
      summary: Google Upload File (Resumable)
      description: "Upload a video, PDF, audio, or other media file using resumable upload protocol. Use this for files larger than 20MB, \nvideos longer than approximately 1 minute, or when you want to reuse the file across multiple requests.\n"
      operationId: uploadFile
      parameters:
        - name: X-Goog-Upload-Protocol
          in: header
          required: true
          schema:
            type: string
            enum:
              - resumable
          description: Upload protocol type
        - name: X-Goog-Upload-Command
          in: header
          required: true
          schema:
            type: string
            enum:
              - start
              - upload, finalize
          description: Upload command (start for initiation, "upload, finalize" for data upload)
        - name: X-Goog-Upload-Header-Content-Length
          in: header
          required: false
          schema:
            type: integer
          description: Total size of the file in bytes (required for start command)
        - name: X-Goog-Upload-Header-Content-Type
          in: header
          required: false
          schema:
            type: string
          description: MIME type of the file (required for start command)
        - name: X-Goog-Upload-Offset
          in: header
          required: false
          schema:
            type: integer
          description: Byte offset for upload (required for upload command)
        - name: Content-Length
          in: header
          required: false
          schema:
            type: integer
          description: Size of the current chunk being uploaded
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/FilesPostRequest'
          application/octet-stream:
            schema:
              $ref: '#/components/schemas/FilesPostRequest1'
      responses:
        '200':
          description: Upload initiated or completed successfully
          headers:
            x-goog-upload-url:
              description: URL for uploading file data (returned on start command)
              schema:
                type: string
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileUploadResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
  /files/{name}:
    get:
      tags:
        - File Api
      summary: Google Get File Metadata
      description: "Retrieve metadata for an uploaded file. Use this to verify the file was successfully \nstored and to check its processing state.\n"
      operationId: getFile
      parameters:
        - name: name
          in: path
          required: true
          description: The resource name of the file (e.g., "files/abc123")
          schema:
            type: string
          example: files/abc123
      responses:
        '200':
          description: File metadata retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileUploadResponse'
        '404':
          description: File not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
  /models/{model}:countTokens:
    post:
      tags:
        - Audio Understanding
        - Content Generation
      summary: Google Count Tokens
      description: 'Get a count of the number of tokens in content, including audio, video, images, and text.

        Useful for estimating costs and ensuring content fits within model context windows.

        '
      operationId: countTokens
      parameters:
        - name: model
          in: path
          required: true
          description: The model to use for token counting
          schema:
            type: string
            example: gemini-2.5-flash
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ModelsPostRequest'
            examples:
              ModelsPostRequestExample:
                $ref: '#/components/examples/ModelsPostRequestExample'
      responses:
        '200':
          description: Token count retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CountTokensResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
  /models/{model}:generateContent:
    post:
      tags:
        - Audio Understanding
        - Content Generation
        - Document Understanding
        - Video Understanding
      summary: Google Generate Content
      description: "Generates text output from various inputs including text, images, video, audio, and PDF documents.\n\n**Document Understanding**: Process PDF documents up to 1000 pages using native vision to:\n- Analyze and interpret text, images, diagrams, charts, and tables\n- Extract information into structured output formats\n- Summarize and answer questions based on visual and textual elements\n- Transcribe document content (e.g., to HTML) preserving layouts and formatting\n- Process multiple PDFs in a single request (within context window limits)\n\n**Video Understanding**: Process videos to describe, segment, and extract information, answer questions, \nand refer to specific timestamps.\n\n**Audio Understanding**: Analyze and understand audio input to:\n- Transcribe speech to text with timestamps\n- Translate audio content\n- Detect and label different speakers (speaker diarization)\n- Detect emotion in speech and music\n- Analyze specific segments and provide timestamps\n- Describe, summarize, or answer questions about audio content\n\n**Image Input**: Combine text with images for multimodal understanding.\n"
      operationId: generateContent
      parameters:
        - name: model
          in: path
          required: true
          description: The model to use for generation
          schema:
            type: string
            enum:
              - gemini-2.5-flash
              - gemini-2.5-pro
            example: gemini-2.5-flash
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateContentRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GenerateContentResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized - Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
  /models/{ttsModel}:generateContent:
    post:
      tags:
        - Speech Generation
      summary: Google Generate Speech (Text-to-Speech)
      description: "Transform text input into single-speaker or multi-speaker audio using native text-to-speech (TTS) \ngeneration capabilities. TTS is controllable through natural language to guide style, accent, pace, \nand tone of the audio.\n\n**Capabilities:**\n- Single-speaker or multi-speaker audio (up to 2 speakers)\n- 30 voice options with different characteristics (bright, upbeat, informative, etc.)\n- 24 supported languages with automatic language detection\n- Controllable style, tone, accent, and pace via prompts\n- Audio output in PCM format (24kHz, 16-bit, mono)\n\n**Note**: TTS models accept text-only inputs and produce audio-only outputs.\n"
      operationId: generateSpeech
      parameters:
        - name: ttsModel
          in: path
          required: true
          description: The TTS model to use for speech generation
          schema:
            type: string
            enum:
              - gemini-2.5-flash-preview-tts
              - gemini-2.5-pro-preview-tts
            example: gemini-2.5-flash-preview-tts
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SpeechGenerationRequest'
      responses:
        '200':
          description: Successful response with generated audio
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SpeechGenerationResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized - Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
  /models/{model}:streamGenerateContent:
    post:
      tags:
        - Content Generation
        - Streaming
      summary: Google Stream Generate Content
      description: Generates text output with streaming responses, receiving GenerateContentResponse instances incrementally
      operationId: streamGenerateContent
      parameters:
        - name: model
          in: path
          required: true
          description: The model to use for generation
          schema:
            type: string
            enum:
              - gemini-2.5-flash
              - gemini-2.5-pro
            example: gemini-2.5-flash
        - name: alt
          in: query
          required: true
          description: Alternative response format
          schema:
            type: string
            enum:
              - sse
            example: sse
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateContentRequest'
      responses:
        '200':
          description: Successful streaming response
          content:
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ModelsPostResponse'
              examples:
                ModelsPostResponseExample:
                  $ref: '#/components/examples/ModelsPostResponseExample'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized - Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: ModelsPostResponseExample
  /models/{imageModel}:generateContent:
    post:
      tags:
        - Image Generation
      summary: Google Generate Images
      description: "Generate and process images conversationally. Supports text-to-image, text-and-image-to-image, \nand multi-turn image editing. All generated images include a SynthID watermark.\n"
      operationId: generateImage
      parameters:
        - name: imageModel
          in: path
          required: true
          description: The image generation model to use
          schema:
            type: string
            enum:
              - gemini-2.5-flash-image
              - gemini-3-pro-image-preview
            example: gemini-2.5-flash-image
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ImageGenerationRequest'
      responses:
        '200':
          description: Successful response with generated image
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageGenerationResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '401':
          description: Unauthorized - Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
        '429':
          description: Too many requests
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Error'
      x-microcks-operation:
        delay: 100
        dispatcher: FALLBACK
        dispatcherRules: SuccessExample
components:
  schemas:
    AudioCandidate:
      type: object
      properties:
        content:
          $ref: '#/components/schemas/AudioContent'
          description: Generated audio content
        finishReason:
          type: string
          enum:
            - FINISH_REASON_UNSPECIFIED
            - STOP
            - MAX_TOKENS
            - SAFETY
            - RECITATION
            - OTHER
          description: Reason why generation stopped
        index:
          type: integer
          description: Index of the candidate
    AudioContent:
      type: object
      properties:
        parts:
          type: array
          description: Audio data parts
          items:
            type: object
            properties:
              inlineData:
                type: object
                required:
                  - mimeType
                  - data
                properties:
                  mimeType:
                    type: string
                    description: MIME type of the audio (PCM format)
                    example: audio/pcm
                  data:
                    type: string
                    format: byte
                    description: Base64-encoded audio data (24kHz, 16-bit, mono PCM)
    Candidate:
      type: object
      properties:
        content:
          $ref: '#/components/schemas/Content'
          description: Generated content
        finishReason:
          type: string
          enum:
            - FINISH_REASON_UNSPECIFIED
            - STOP
            - MAX_TOKENS
            - SAFETY
            - RECITATION
            - OTHER
          description: Reason why generation stopped
        index:
          type: integer
          description: Index of the candidate
        safetyRatings:
          type: array
          items:
            $ref: '#/components/schemas/SafetyRating'
          description: Safety ratings for the generated content
    Content:
      type: object
      properties:
        role:
          type: string
          enum:
            - user
            - model
          description: The role of the content producer (optional for single-turn, required for multi-turn)
        parts:
          type: array
          description: Ordered parts that constitute a single message
          items:
            $ref: '#/components/schemas/Part'
      required:
        - parts
    CountTokensResponse:
      type: object
      properties:
        totalTokens:
          type: integer
          description: Total number of tokens in the provided content
        totalBillableCharacters:
          type: integer
          description: Total billable characters (for audio/video content)
    Error:
      type: object
      properties:
        error:
          type: object
          properties:
            code:
              type: integer
              description: HTTP status code
            message:
              type: string
              description: Error message
            status:
              type: string
              description: Error status
    FileData:
      type: object
      description: Reference to a file uploaded via the File API or a YouTube URL
      properties:
        mime_type:
          type: string
          description: MIME type of the file (optional for YouTube URLs)
          example: video/mp4
        file_uri:
          type: string
          description: URI of the file from File API or YouTube URL
          examples:
            - https://generativelanguage.googleapis.com/v1beta/files/abc123
            - https://www.youtube.com/watch?v=9hE5-98ZeCg
      required:
        - file_uri
    FileUploadResponse:
      type: object
      properties:
        file:
          type: object
          properties:
            name:
              type: string
              description: Resource name of the file
              example: files/abc123
            display_name:
              type: string
              description: Display name of the file
            mime_type:
              type: string
              description: MIME type of the file
              example: video/mp4
            size_bytes:
              type: string
              description: Size of the file in bytes
            create_time:
              type: string
              format: date-time
              description: Creation timestamp
            update_time:
              type: string
              format: date-time
              description: Last update timestamp
            expiration_time:
              type: string
              format: date-time
              description: Expiration timestamp
            sha256_hash:
              type: string
              description: SHA-256 hash of the file
            uri:
              type: string
              description: URI to use in generateContent requests
              example: https://generativelanguage.googleapis.com/v1beta/files/abc123
            state:
              type: string
              enum:
                - STATE_UNSPECIFIED
                - PROCESSING
                - ACTIVE
                - FAILED
              description: Processing state of the file
            error:
              type: object
              description: Error details if state is FAILED
              properties:
                message:
                  type: string
    FilesPostRequest:
      description: FilesPostRequest schema
      type: object
      properties:
        file:
          $ref: '#/components/schemas/FilesPostRequestFile'
    FilesPostRequest1:
      description: FilesPostRequest1 schema
      type: string
      format: binary
    FilesPostRequestFile:
      description: FilesPostRequestFile schema
      type: object
      properties:
        display_name:
          type: string
          description: Display name for the file
    GenerateContentRequest:
      type: object
      required:
        - contents
      properties:
        contents:
          type: array
          description: The content of the current conversation with the model
          items:
            $ref: '#/components/schemas/Content'
        system_instruction:
          $ref: '#/components/schemas/Content'
          description: System instructions to guide model behavior
        generationConfig:
          $ref: '#/components/schemas/GenerationConfig'
          description: Configuration options for model generation
    GenerateContentResponse:
      type: object
      properties:
        candidates:
          type: array
          description: Generated content candidates
          items:
            $ref: '#/components/schemas/Candidate'
        promptFeedback:
          $ref: '#/components/schemas/PromptFeedback'
          description: Feedback about the prompt
        usageMetadata:
          $ref: '#/components/schemas/UsageMetadata'
          description: Token usage information
    GenerationConfig:
      type: object
      description: Configuration options for model generation and outputs
      properties:
        stopSequences:
          type: array
          description: Set of character sequences that will stop output generation
          items:
            type: string
          maxItems: 5
        temperature:
          type: number
          format: float
          description: Controls randomness of output (default 1.0 for Gemini 3 models)
          minimum: 0.0
          maximum: 2.0
          default: 1.0
        topP:
          type: number
          format: float
          description: Maximum cumulative probability of tokens to consider
          minimum: 0.0
          maximum: 1.0
        topK:
          type: integer
          description: Maximum number of tokens to consider
          minimum: 1
        maxOutputTokens:
          type: integer
          description: Maximum number of tokens to generate
          minimum: 1
        thinkingConfig:
          $ref: '#/components/schemas/ThinkingConfig'
          description: Configuration for thinking mode (Gemini 2.5 models)
        response_mime_type:
          type: string
          description: MIME type for structured output (e.g., application/json)
          enum:
            - text/plain
            - application/json
          example: application/json
        response_schema:
          type: object
          description: JSON schema for structured output format
          additionalProperties: true
    ImageCandidate:
      type: object
      properties:
        content:
          $ref: '#/components/schemas/ImageContent'
          description: Generated content including text and/or images
        finishReason:
          type: string
          enum:
            - FINISH_REASON_UNSPECIFIED
            - STOP
            - MAX_TOKENS
            - SAFETY
            - RECITATION
            - OTHER
          description: Reason why generation stopped
        index:
          type: integer
          description: Index of the candidate
        safetyRatings:
          type: array
          items:
            $ref: '#/components/schemas/SafetyRating'
          description: Safety ratings for the generated content
    ImageConfig:
      type: object
      description: Configuration for image generation parameters
      properties:
        aspectRatio:
          type: string
          description: Aspect ratio for generated images
          enum:
            - '1:1'
            - '3:4'
            - '4:3'
            - '9:16'
            - '16:9'
          example: '16:9'
        imageSize:
          type: string
          description: Size of generated images
          enum:
            - 1K
            - 2K
            - 4K
          example: 2K
    ImageContent:
      type: object
      properties:
        role:
          type: string
          enum:
            - user
            - model
          description: The role of the content producer
        parts:
          type: array
          description: Ordered parts that constitute a single message (can include text and inline image data)
          items:
            $ref: '#/components/schemas/Part'
      required:
        - parts
    ImageGenerationConfig:
      type: object
      description: Configuration options for image generation
      properties:
        responseModalities:
          type: array
          description: Modalities to include in the response
          items:
            type: string
            enum:
              - TEXT
              - IMAGE
          example:
            - TEXT
            - IMAGE
        imageConfig:
          $ref: '#/components/schemas/ImageConfig'
          description: Configuration for image generation
    ImageGenerationRequest:
      type: object
      required:
        - contents
      properties:
        contents:
          type: array
          description: The content of the current conversation with the model
          items:
            $ref: '#/components/schemas/Content'
        generationConfig:
          $ref: '#/components/schemas/ImageGenerationConfig'
          description: Configuration options for image generation
    ImageGenerationResponse:
      type: object
      properties:
        candidates:
          type: array
          description: Generated content candidates with images
          items:
            $ref: '#/components/schemas/ImageCandidate'
        promptFeedback:
          $ref: '#/components/schemas/PromptFeedback'
          description: Feedback about the prompt
        usageMetadata:
          $ref: '#/components/schemas/UsageMetadata'
          description: Token usage information
    InlineData:
      type: object
      required:
        - mime_type
        - data
      properties:
        mime_type:
          type: string
          description: MIME type of the data
          enum:
            - image/jpeg
            - image/png
            - image/webp
            - video/mp4
            - video/mpeg
            - video/mov
            - video/avi
            - video/x-flv
            - video/mpg
            - video/webm
            - video/wmv
            - video/3gpp
            - audio/wav
            - audio/mp3
            - audio/aiff
            - audio/aac
            - audio/ogg
            - audio/flac
            - application/pdf
        data:
          type: string
          format: byte
          description: Base64 encoded data
    ModelsPostRequest:
      description: ModelsPostRequest schema
      type: object
      required:
        - contents
      properties:
        contents:
          type: array
          description: The content to count tokens for
          items:
            $ref: '#/components/schemas/Content'
    ModelsPostResponse:
      type: string
      description: Server-Sent Events stream of GenerateContentResponse objects
    MultiSpeakerVoiceConfig:
      type: object
      required:
        - speakerVoiceConfigs
      properties:
        speakerVoiceConfigs:
          type: array
          description: Voice configurations for each speaker (maximum 2)
          minItems: 2
          maxItems: 2
          items:
            $ref: '#/components/schemas/SpeakerVoiceConfig'
    Part:
      type: object
      description: A part of the content, can be text, inline data, or file data
      oneOf:
        - type: object
          required:
            - text
          properties:
            text:
              type: string
              description: Text content
        - type: object
          required:
            - inline_data
          properties:
            inline_data:
              $ref: '#/components/schemas/InlineData'
        - type: object
          required:
            - file_data
          properties:
            file_data:
              $ref: '#/components/schemas/FileData'
            video_metadata:
              $ref: '#/components/schemas/VideoMetadata'
              description: Optional metadata for video processing (clipping, FPS)
    PrebuiltVoiceConfig:
      type: object
      required:
        - voiceName
      properties:
        voiceName:
          type: string
          description: Name of the prebuilt voice with characteristic (Bright, Upbeat, Informative, etc.)
          enum:
            - Zephyr
            - Puck
            - Charon
            - Kore
            - Fenrir
            - Leda
            - Orus
            - Aoede
            - Callirrhoe
            - Autonoe
            - Enceladus
            - Iapetus
            - Umbriel
            - Algieba
            - Despina
            - Erinome
            - Algenib
            - Rasalgethi
            - Laomedeia
            - Achernar
            - Alnilam
            - Schedar
            - Gacrux
            - Pulcherrima
            - Achird
            - Zubenelgenubi
            - Vindemiatrix
            - Sadachbia
            - Sadaltager
            - Sulafat
          example: Kore
    PromptFeedback:
      type: object
      properties:
        blockReason:
          type: string
          enum:
            - BLOCK_REASON_UNSPECIFIED
            - SAFETY
            - OTHER
          description: Reason why prompt was blocked
        safetyRatings:
          type: array
          items:
            $ref: '#/components/schemas/SafetyRating'
          description: Safety ratings for the prompt
    SafetyRating:
      type: object
      properties:
        category:
          type: string
          enum:
            - HARM_CATEGORY_UNSPECIFIED
            - HARM_CATEGORY_DEROGATORY
            - HARM_CATEGORY_TOXICITY
            - HARM_CATEGORY_VIOLENCE
            - HARM_CATEGORY_SEXUAL
            - HARM_CATEGORY_MEDICAL
            - HARM_CATEGORY_DANGEROUS
            - HARM_CATEGORY_HARASSMENT
            - HARM_CATEGORY_HATE_SPEECH
            - HARM_CATEGORY_SEXUALLY_EXPLICIT
            - HARM_CATEGORY_DANGEROUS_CONTENT
        probability:
          type: string
          enum:
            - HARM_PROBABILITY_UNSPECIFIED
            - NEGLIGIBLE
            - LOW
            - MEDIUM
            - HIGH
    SpeakerVoiceConfig:
      type: object
      required:
        - speaker
        - voiceConfig
      properties:
        speaker:
          type: string
          description: Name of the speaker (must match names used in prompt)
          example: Joe
        voiceConfig:
          $ref: '#/components/schemas/VoiceConfig'
          description: Voice configuration for this speaker
    SpeechConfig:
      type: object
      description: Configuration for single or multi-speaker speech
      oneOf:
        - type: object
          required:
            - voiceConfig
          properties:
            voiceConfig:
              $ref: '#/components/schemas/VoiceConfig'
              description: Single-speaker voice configuration
        - type: object
          required:
            - multiSpeakerVoiceConfig
          properties:
            multiSpeakerVoiceConfig:
              $ref: '#/components/schemas/MultiSpeakerVoiceConfig'
              description: Multi-speaker voice configuration (up to 2 speakers)
    SpeechGenerationConfig:
      type: object
      required:
        - responseModalities
        - speechConfig
      properties:
        responseModalities:
          type: array
 

# --- truncated at 32 KB (35 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/google/refs/heads/main/openapi/google-gemini-api-openapi.yml