Amazon Polly API

The Amazon Polly API enables you to synthesize speech from text (plain text or SSML), manage custom pronunciation lexicons, list available voices across multiple languages and engines, and manage asynchronous synthesis tasks with S3 output.

OpenAPI Specification

amazon-polly-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Amazon Polly API
  description: >-
    The Amazon Polly API enables you to synthesize speech from text using
    a variety of lifelike voices across multiple languages. You can also
    manage pronunciation lexicons and start long-running speech synthesis
    tasks.
  version: '2016-06-10'
  contact:
    name: AWS Support
    url: https://aws.amazon.com/premiumsupport/
  license:
    name: Apache 2.0
    url: https://www.apache.org/licenses/LICENSE-2.0.html
  x-logo:
    url: https://a0.awsstatic.com/libra-css/images/logos/aws_logo_smile_1200x630.png
servers:
- url: https://polly.{region}.amazonaws.com
  description: Amazon Polly regional endpoint
  variables:
    region:
      default: us-east-1
      description: AWS region
      enum:
      - us-east-1
      - us-east-2
      - us-west-1
      - us-west-2
      - eu-west-1
      - eu-west-2
      - eu-west-3
      - eu-central-1
      - ap-northeast-1
      - ap-northeast-2
      - ap-southeast-1
      - ap-southeast-2
      - ap-south-1
      - sa-east-1
      - ca-central-1
security:
- sigv4: []
tags:
- name: Lexicons
  description: Operations for managing pronunciation lexicons
- name: Speech Synthesis
  description: Operations for synthesizing speech from text
- name: Voices
  description: Operations for listing available voices
paths:
  /v1/speech:
    post:
      operationId: SynthesizeSpeech
      summary: Amazon Polly Synthesize Speech
      description: Synthesizes UTF-8 input text into an audio stream.
      tags:
      - Speech Synthesis
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SynthesizeSpeechInput'
      responses:
        '200':
          description: Speech audio stream
          content:
            audio/mpeg:
              schema:
                type: string
                format: binary
            audio/ogg:
              schema:
                type: string
                format: binary
            audio/pcm:
              schema:
                type: string
                format: binary
  /v1/voices:
    get:
      operationId: DescribeVoices
      summary: Amazon Polly List Available Voices
      description: Returns the list of voices that are available for use when synthesizing speech.
      tags:
      - Voices
      parameters:
      - name: Engine
        in: query
        schema:
          type: string
          enum:
          - standard
          - neural
          - long-form
          - generative
      - name: LanguageCode
        in: query
        schema:
          type: string
      responses:
        '200':
          description: Voices listed successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  Voices:
                    type: array
                    items:
                      $ref: '#/components/schemas/Voice'
  /v1/lexicons/{LexiconName}:
    put:
      operationId: PutLexicon
      summary: Amazon Polly Store a Pronunciation Lexicon
      description: Stores a pronunciation lexicon in an AWS Region.
      tags:
      - Lexicons
      parameters:
      - name: LexiconName
        in: path
        required: true
        schema:
          type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                Content:
                  type: string
                  description: Content of the PLS lexicon as string data
      responses:
        '200':
          description: Lexicon stored successfully
    get:
      operationId: GetLexicon
      summary: Amazon Polly Get a Pronunciation Lexicon
      description: Returns the content of the specified pronunciation lexicon.
      tags:
      - Lexicons
      parameters:
      - name: LexiconName
        in: path
        required: true
        schema:
          type: string
      responses:
        '200':
          description: Lexicon retrieved successfully
    delete:
      operationId: DeleteLexicon
      summary: Amazon Polly Delete a Pronunciation Lexicon
      description: Deletes the specified pronunciation lexicon stored in an AWS Region.
      tags:
      - Lexicons
      parameters:
      - name: LexiconName
        in: path
        required: true
        schema:
          type: string
      responses:
        '200':
          description: Lexicon deleted successfully
  /v1/lexicons:
    get:
      operationId: ListLexicons
      summary: Amazon Polly List Pronunciation Lexicons
      description: Returns a list of pronunciation lexicons stored in an AWS Region.
      tags:
      - Lexicons
      responses:
        '200':
          description: Lexicons listed successfully
components:
  securitySchemes:
    sigv4:
      type: apiKey
      name: Authorization
      in: header
      description: AWS Signature Version 4
  schemas:
    SynthesizeSpeechInput:
      type: object
      required:
      - OutputFormat
      - Text
      - VoiceId
      properties:
        Engine:
          type: string
          enum:
          - standard
          - neural
          - long-form
          - generative
          description: The engine to use for speech synthesis
        LanguageCode:
          type: string
          description: Language code for the synthesis request
        LexiconNames:
          type: array
          items:
            type: string
          description: List of lexicon names to apply during synthesis
        OutputFormat:
          type: string
          enum:
          - json
          - mp3
          - ogg_vorbis
          - pcm
          description: The format in which the returned output will be encoded
        SampleRate:
          type: string
          description: The audio frequency in Hz
        SpeechMarkTypes:
          type: array
          items:
            type: string
            enum:
            - sentence
            - ssml
            - viseme
            - word
        Text:
          type: string
          description: Input text to synthesize
        TextType:
          type: string
          enum:
          - ssml
          - text
          description: Whether the input text is plain text or SSML
        VoiceId:
          type: string
          description: Voice ID to use for the synthesis
    Voice:
      type: object
      properties:
        Gender:
          type: string
          enum:
          - Female
          - Male
        Id:
          type: string
          description: Amazon Polly assigned voice ID
        LanguageCode:
          type: string
        LanguageName:
          type: string
        Name:
          type: string
          description: Name of the voice
        SupportedEngines:
          type: array
          items:
            type: string
            enum:
            - standard
            - neural
            - long-form
            - generative