OpenAI Audio API

The Audio API provides two speech to text endpoints, transcriptions and translations, based on our state-of-the-art open source large-v2 Whisper model.

OpenAPI Specification

audio-openapi-original.yml Raw ↑
openapi: 3.0.0
info:
  title: 'OpenAI audio'
  description: Needs description.
  version: 2.0.0
  termsOfService: https://openai.com/policies/terms-of-use
  contact:
    name: OpenAI Support
    url: https://help.openai.com/
  license:
    name: MIT
    url: https://github.com/openai/openai-openapi/blob/master/LICENSE
servers:
  - url: https://api.openai.com/v1
tags:
  - name: Audio
    description: Learn how to turn audio into text or text into audio.
paths:
  /audio/speech:
    post:
      operationId: createSpeech
      tags:
        - Audio
      summary: OpenAI Generates audio from the input text.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateSpeechRequest'
      responses:
        '200':
          description: OK
          headers:
            Transfer-Encoding:
              schema:
                type: string
              description: chunked
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
      x-oaiMeta:
        name: Create speech
        group: audio
        returns: The audio file content.
        examples:
          request:
            curl: |
              curl https://api.openai.com/v1/audio/speech \
                -H "Authorization: Bearer $OPENAI_API_KEY" \
                -H "Content-Type: application/json" \
                -d '{
                  "model": "tts-1",
                  "input": "The quick brown fox jumped over the lazy dog.",
                  "voice": "alloy"
                }' \
                --output speech.mp3
            python: |
              from pathlib import Path
              import openai

              speech_file_path = Path(__file__).parent / "speech.mp3"
              response = openai.audio.speech.create(
                model="tts-1",
                voice="alloy",
                input="The quick brown fox jumped over the lazy dog."
              )
              response.stream_to_file(speech_file_path)
            node: |
              import fs from "fs";
              import path from "path";
              import OpenAI from "openai";

              const openai = new OpenAI();

              const speechFile = path.resolve("./speech.mp3");

              async function main() {
                const mp3 = await openai.audio.speech.create({
                  model: "tts-1",
                  voice: "alloy",
                  input: "Today is a wonderful day to build something people love!",
                });
                console.log(speechFile);
                const buffer = Buffer.from(await mp3.arrayBuffer());
                await fs.promises.writeFile(speechFile, buffer);
              }
              main();
  /audio/transcriptions:
    post:
      operationId: createTranscription
      tags:
        - Audio
      summary: OpenAI Transcribes audio into the input language.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/CreateTranscriptionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateTranscriptionResponse'
      x-oaiMeta:
        name: Create transcription
        group: audio
        returns: The transcribed text.
        examples:
          request:
            curl: |
              curl https://api.openai.com/v1/audio/transcriptions \
                -H "Authorization: Bearer $OPENAI_API_KEY" \
                -H "Content-Type: multipart/form-data" \
                -F file="@/path/to/file/audio.mp3" \
                -F model="whisper-1"
            python: |
              from openai import OpenAI
              client = OpenAI()

              audio_file = open("speech.mp3", "rb")
              transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
              )
            node: |
              import fs from "fs";
              import OpenAI from "openai";

              const openai = new OpenAI();

              async function main() {
                const transcription = await openai.audio.transcriptions.create({
                  file: fs.createReadStream("audio.mp3"),
                  model: "whisper-1",
                });

                console.log(transcription.text);
              }
              main();
          response: |
            {
              "text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that."
            }
  /audio/translations:
    post:
      operationId: createTranslation
      tags:
        - Audio
      summary: OpenAI Translates audio into English.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/CreateTranslationRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateTranslationResponse'
      x-oaiMeta:
        name: Create translation
        group: audio
        returns: The translated text.
        examples:
          request:
            curl: |
              curl https://api.openai.com/v1/audio/translations \
                -H "Authorization: Bearer $OPENAI_API_KEY" \
                -H "Content-Type: multipart/form-data" \
                -F file="@/path/to/file/german.m4a" \
                -F model="whisper-1"
            python: |
              from openai import OpenAI
              client = OpenAI()

              audio_file = open("speech.mp3", "rb")
              transcript = client.audio.translations.create(
                model="whisper-1",
                file=audio_file
              )
            node: |
              import fs from "fs";
              import OpenAI from "openai";

              const openai = new OpenAI();

              async function main() {
                  const translation = await openai.audio.translations.create({
                      file: fs.createReadStream("speech.mp3"),
                      model: "whisper-1",
                  });

                  console.log(translation.text);
              }
              main();
          response: |
            {
              "text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
            }
components:
  securitySchemes:
    ApiKeyAuth:
      type: http
      scheme: bearer
  schemas:
    CreateTranscriptionResponse:
      type: object
      properties:
        text:
          type: string
      required:
        - text
    CreateTranslationResponse:
      type: object
      properties:
        text:
          type: string
      required:
        - text
security:
  - ApiKeyAuth: []
x-oaiMeta:
  groups:
    - id: audio
      title: Audio
      description: |
        Learn how to turn audio into text or text into audio.

        Related guide: [Speech to text](/docs/guides/speech-to-text)
      sections:
        - type: endpoint
          key: createSpeech
          path: createSpeech
        - type: endpoint
          key: createTranscription
          path: createTranscription
        - type: endpoint
          key: createTranslation
          path: createTranslation
    - id: chat
      title: Chat
      description: >
        Given a list of messages comprising a conversation, the model will
        return a response.


        Related guide: [Chat Completions](/docs/guides/text-generation)
      sections:
        - type: endpoint
          key: createChatCompletion
          path: create
        - type: object
          key: CreateChatCompletionResponse
          path: object
        - type: object
          key: CreateChatCompletionStreamResponse
          path: streaming
    - id: embeddings
      title: Embeddings
      description: >
        Get a vector representation of a given input that can be easily consumed
        by machine learning models and algorithms.


        Related guide: [Embeddings](/docs/guides/embeddings)
      sections:
        - type: endpoint
          key: createEmbedding
          path: create
        - type: object
          key: Embedding
          path: object
    - id: fine-tuning
      title: Fine-tuning
      description: >
        Manage fine-tuning jobs to tailor a model to your specific training
        data.


        Related guide: [Fine-tune models](/docs/guides/fine-tuning)
      sections:
        - type: endpoint
          key: createFineTuningJob
          path: create
        - type: endpoint
          key: listPaginatedFineTuningJobs
          path: list
        - type: endpoint
          key: listFineTuningEvents
          path: list-events
        - type: endpoint
          key: retrieveFineTuningJob
          path: retrieve
        - type: endpoint
          key: cancelFineTuningJob
          path: cancel
        - type: object
          key: FineTuningJob
          path: object
        - type: object
          key: FineTuningJobEvent
          path: event-object
    - id: files
      title: Files
      description: >
        Files are used to upload documents that can be used with features like
        [Assistants](/docs/api-reference/assistants) and
        [Fine-tuning](/docs/api-reference/fine-tuning).
      sections:
        - type: endpoint
          key: createFile
          path: create
        - type: endpoint
          key: listFiles
          path: list
        - type: endpoint
          key: retrieveFile
          path: retrieve
        - type: endpoint
          key: deleteFile
          path: delete
        - type: endpoint
          key: downloadFile
          path: retrieve-contents
        - type: object
          key: OpenAIFile
          path: object
    - id: images
      title: Images
      description: >
        Given a prompt and/or an input image, the model will generate a new
        image.


        Related guide: [Image generation](/docs/guides/images)
      sections:
        - type: endpoint
          key: createImage
          path: create
        - type: endpoint
          key: createImageEdit
          path: createEdit
        - type: endpoint
          key: createImageVariation
          path: createVariation
        - type: object
          key: Image
          path: object
    - id: models
      title: Models
      description: >
        List and describe the various models available in the API. You can refer
        to the [Models](/docs/models) documentation to understand what models
        are available and the differences between them.
      sections:
        - type: endpoint
          key: listModels
          path: list
        - type: endpoint
          key: retrieveModel
          path: retrieve
        - type: endpoint
          key: deleteModel
          path: delete
        - type: object
          key: Model
          path: object
    - id: moderations
      title: Moderations
      description: >
        Given a input text, outputs if the model classifies it as violating
        OpenAI's content policy.


        Related guide: [Moderations](/docs/guides/moderation)
      sections:
        - type: endpoint
          key: createModeration
          path: create
        - type: object
          key: CreateModerationResponse
          path: object
    - id: assistants
      title: Assistants
      beta: true
      description: |
        Build assistants that can call models and use tools to perform tasks.

        [Get started with the Assistants API](/docs/assistants)
      sections:
        - type: endpoint
          key: createAssistant
          path: createAssistant
        - type: endpoint
          key: createAssistantFile
          path: createAssistantFile
        - type: endpoint
          key: listAssistants
          path: listAssistants
        - type: endpoint
          key: listAssistantFiles
          path: listAssistantFiles
        - type: endpoint
          key: getAssistant
          path: getAssistant
        - type: endpoint
          key: getAssistantFile
          path: getAssistantFile
        - type: endpoint
          key: modifyAssistant
          path: modifyAssistant
        - type: endpoint
          key: deleteAssistant
          path: deleteAssistant
        - type: endpoint
          key: deleteAssistantFile
          path: deleteAssistantFile
        - type: object
          key: AssistantObject
          path: object
        - type: object
          key: AssistantFileObject
          path: file-object
    - id: threads
      title: Threads
      beta: true
      description: |
        Create threads that assistants can interact with.

        Related guide: [Assistants](/docs/assistants/overview)
      sections:
        - type: endpoint
          key: createThread
          path: createThread
        - type: endpoint
          key: getThread
          path: getThread
        - type: endpoint
          key: modifyThread
          path: modifyThread
        - type: endpoint
          key: deleteThread
          path: deleteThread
        - type: object
          key: ThreadObject
          path: object
    - id: messages
      title: Messages
      beta: true
      description: |
        Create messages within threads

        Related guide: [Assistants](/docs/assistants/overview)
      sections:
        - type: endpoint
          key: createMessage
          path: createMessage
        - type: endpoint
          key: listMessages
          path: listMessages
        - type: endpoint
          key: listMessageFiles
          path: listMessageFiles
        - type: endpoint
          key: getMessage
          path: getMessage
        - type: endpoint
          key: getMessageFile
          path: getMessageFile
        - type: endpoint
          key: modifyMessage
          path: modifyMessage
        - type: object
          key: MessageObject
          path: object
        - type: object
          key: MessageFileObject
          path: file-object
    - id: runs
      title: Runs
      beta: true
      description: |
        Represents an execution run on a thread.

        Related guide: [Assistants](/docs/assistants/overview)
      sections:
        - type: endpoint
          key: createRun
          path: createRun
        - type: endpoint
          key: createThreadAndRun
          path: createThreadAndRun
        - type: endpoint
          key: listRuns
          path: listRuns
        - type: endpoint
          key: listRunSteps
          path: listRunSteps
        - type: endpoint
          key: getRun
          path: getRun
        - type: endpoint
          key: getRunStep
          path: getRunStep
        - type: endpoint
          key: modifyRun
          path: modifyRun
        - type: endpoint
          key: submitToolOuputsToRun
          path: submitToolOutputs
        - type: endpoint
          key: cancelRun
          path: cancelRun
        - type: object
          key: RunObject
          path: object
        - type: object
          key: RunStepObject
          path: step-object
    - id: completions
      title: Completions
      legacy: true
      description: >
        Given a prompt, the model will return one or more predicted completions
        along with the probabilities of alternative tokens at each position.
        Most developer should use our [Chat Completions
        API](/docs/guides/text-generation/text-generation-models) to leverage
        our best and newest models. Most models that support the legacy
        Completions endpoint [will be shut off on January 4th,
        2024](/docs/deprecations/2023-07-06-gpt-and-embeddings).
      sections:
        - type: endpoint
          key: createCompletion
          path: create
        - type: object
          key: CreateCompletionResponse
          path: object