D-ID Videos API

REST API for generating AI talking-head videos from a source image combined with a text script or audio file. Supports multiple avatar versions including V4 Expressive (full-HD with dynamic expressions), V3 Pro, V3 Instant (personal avatars from short clips), and V2 Photo avatars. Returns asynchronous job results via polling or webhook.

OpenAPI Specification

d-id-videos-openapi.yml Raw ↑
openapi: 3.0.3
info:
  title: D-ID Videos API
  description: >
    REST API for generating AI talking-head videos from a source image combined
    with a text script or audio file. Supports multiple avatar versions including
    V4 Expressive (full-HD with dynamic expressions), V3 Pro, V3 Instant
    (personal avatars from short clips), and V2 Photo avatars. Returns
    asynchronous job results via polling or webhook.
  version: 1.0.0
  contact:
    url: https://www.d-id.com
  termsOfService: https://www.d-id.com/terms-of-use/
externalDocs:
  description: D-ID API Reference
  url: https://docs.d-id.com/reference/get-started
servers:
  - url: https://api.d-id.com
    description: D-ID Production API

security:
  - basicAuth: []
  - bearerAuth: []

tags:
  - name: Talks
    description: Generate AI talking-head videos (V2 Photo / V3 Pro avatars)
  - name: Videos V4
    description: Generate V4 Expressive full-HD avatar videos
  - name: Clips
    description: Manage presenter clips and train personal avatars
  - name: Translations
    description: Translate existing videos into 100+ languages with lip-sync
  - name: Voices
    description: List available TTS voices
  - name: Credits
    description: Retrieve account credit balance

paths:
  /talks:
    get:
      operationId: gettalks
      summary: List talks
      description: Retrieve a paginated list of talks for the authenticated user.
      tags:
        - Talks
      parameters:
        - name: limit
          in: query
          description: Number of talks to return (default 100).
          schema:
            type: number
            default: 100
        - name: token
          in: query
          description: Pagination token from a previous response.
          schema:
            type: string
      responses:
        '200':
          description: Paginated list of talks.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetTalksDto'
        '401':
          $ref: '#/components/responses/Unauthorized'

    post:
      operationId: createtalk
      summary: Create a talk
      description: >
        Generate an AI talking-head video by combining a source image with a
        text script or audio file. Processing is asynchronous — poll GET /talks/{id}
        or provide a webhook URL for completion notification.
      tags:
        - Talks
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateTalkDto'
      responses:
        '201':
          description: Talk created and queued for processing.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TalkResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/PaymentRequired'
        '403':
          $ref: '#/components/responses/Forbidden'
        '451':
          $ref: '#/components/responses/ContentModeration'

  /talks/{id}:
    get:
      operationId: gettalk
      summary: Get a talk
      description: Retrieve the status and result of a specific talk.
      tags:
        - Talks
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: Talk object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetTalkDto'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

    delete:
      operationId: deletetalk
      summary: Delete a talk
      description: Delete a specific talk by ID. Cannot delete a talk that is currently processing.
      tags:
        - Talks
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: Deleted talk object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetTalkDto'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          description: Talk is in process, try again later.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JsonError'

  /videos:
    get:
      operationId: listv4videos
      summary: List V4 videos
      description: Retrieve a paginated list of V4 Expressive avatar videos.
      tags:
        - Videos V4
      parameters:
        - name: limit
          in: query
          schema:
            type: number
            default: 100
        - name: token
          in: query
          schema:
            type: string
      responses:
        '200':
          description: Paginated list of V4 videos.
          content:
            application/json:
              schema:
                type: object
                properties:
                  videos:
                    type: array
                    items:
                      $ref: '#/components/schemas/V4VideoDto'
                  token:
                    type: string
        '401':
          $ref: '#/components/responses/Unauthorized'

    post:
      operationId: createv4video
      summary: Create a V4 video
      description: Generate a V4 Expressive full-HD avatar video.
      tags:
        - Videos V4
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateV4VideoDto'
      responses:
        '201':
          description: V4 video created.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/V4VideoDto'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/PaymentRequired'

  /videos/{id}:
    get:
      operationId: getv4video
      summary: Get a V4 video
      description: Retrieve the status and result of a specific V4 video.
      tags:
        - Videos V4
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: V4 video object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/V4VideoDto'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

    delete:
      operationId: deletev4video
      summary: Delete a V4 video
      tags:
        - Videos V4
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: Deleted V4 video object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/V4VideoDto'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

  /translations:
    get:
      operationId: getvideotranslates
      summary: List video translations
      description: Retrieve a paginated list of video translation jobs.
      tags:
        - Translations
      parameters:
        - name: limit
          in: query
          schema:
            type: number
            default: 100
        - name: token
          in: query
          schema:
            type: string
      responses:
        '200':
          description: Paginated list of translations.
          content:
            application/json:
              schema:
                type: object
                properties:
                  translations:
                    type: array
                    items:
                      $ref: '#/components/schemas/GetTranslationResponse'
                  token:
                    type: string
        '401':
          $ref: '#/components/responses/Unauthorized'

    post:
      operationId: createvideotranslate
      summary: Create a video translation
      description: >
        Translate an existing video into one or more target languages using
        AI speech translation, voice cloning, and lip-sync technology.
      tags:
        - Translations
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateTranslationDto'
      responses:
        '201':
          description: Translation job created.
          content:
            application/json:
              schema:
                type: object
                properties:
                  translations:
                    type: array
                    items:
                      $ref: '#/components/schemas/GetTranslationResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/PaymentRequired'
        '403':
          $ref: '#/components/responses/Forbidden'

  /translations/{id}:
    get:
      operationId: getvideotranslate
      summary: Get a video translation
      description: Retrieve the status and result of a specific translation job.
      tags:
        - Translations
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: Translation object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetTranslationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

    delete:
      operationId: deletevideotranslate
      summary: Delete a video translation
      tags:
        - Translations
      parameters:
        - $ref: '#/components/parameters/resourceId'
      responses:
        '200':
          description: Deleted translation object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetTranslationResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

  /tts/voices:
    get:
      operationId: voices
      summary: List voices
      description: Retrieve available TTS voices, optionally filtered by provider or voice ID.
      tags:
        - Voices
      parameters:
        - name: provider
          in: query
          description: Filter by voice provider.
          schema:
            type: string
            enum:
              - amazon
              - microsoft
              - azure-openai
              - elevenlabs
              - google
        - name: id
          in: query
          description: Filter by voice ID (unique per provider).
          schema:
            type: string
      responses:
        '200':
          description: Array of voice objects.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/IVoice'
        '401':
          $ref: '#/components/responses/Unauthorized'

  /credits:
    get:
      operationId: getcredits
      summary: Get credits
      description: Retrieve the authenticated user's credit balance and expiration details.
      tags:
        - Credits
      responses:
        '200':
          description: User credit information.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UserCredits'
        '404':
          description: No credits found for user.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JsonError'

components:
  securitySchemes:
    basicAuth:
      type: http
      scheme: basic
      description: HTTP Basic Authentication using API key as username.
    bearerAuth:
      type: http
      scheme: bearer
      description: Bearer token authentication.

  parameters:
    resourceId:
      name: id
      in: path
      required: true
      description: Unique resource identifier.
      schema:
        type: string

  responses:
    BadRequest:
      description: Bad request — invalid parameters.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'
    Unauthorized:
      description: Authentication failure.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'
          example:
            kind: AuthorizationError
            description: user unauthenticated
    PaymentRequired:
      description: Insufficient credits.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'
    Forbidden:
      description: Permission denied.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'
    NotFound:
      description: Resource not found.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'
          example:
            kind: NotFoundError
            description: not found
    ContentModeration:
      description: Content moderation rejection.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/JsonError'

  schemas:
    JsonError:
      type: object
      properties:
        kind:
          type: string
          description: Error type identifier.
          example: AuthorizationError
        description:
          type: string
          description: Human-readable error description.
          example: user unauthenticated

    TalkStatus:
      type: string
      enum:
        - created
        - started
        - done
        - error
        - rejected
      description: Processing status of a talk or video job.

    TextScript:
      type: object
      required:
        - type
        - input
      properties:
        type:
          type: string
          enum:
            - text
        input:
          type: string
          description: Text content to synthesize (3–40,000 characters).
          minLength: 3
          maxLength: 40000
        provider:
          type: object
          description: TTS provider configuration.
          properties:
            type:
              type: string
              enum:
                - microsoft
                - elevenlabs
                - amazon
                - google
                - azure-openai
            voice_id:
              type: string
              description: Voice identifier for the selected provider.
            language:
              type: string
              description: Language code for TTS synthesis.

    AudioScript:
      type: object
      required:
        - type
        - audio_url
      properties:
        type:
          type: string
          enum:
            - audio
        audio_url:
          type: string
          format: uri
          description: URL of the audio file (max 15 MB, 5–10 minutes duration).

    Script:
      oneOf:
        - $ref: '#/components/schemas/TextScript'
        - $ref: '#/components/schemas/AudioScript'
      discriminator:
        propertyName: type

    CreateTalkDto:
      type: object
      required:
        - script
      properties:
        source_url:
          type: string
          format: uri
          description: URL of the source image to animate (defaults to Alice demo image).
        script:
          $ref: '#/components/schemas/Script'
        config:
          type: object
          description: Advanced configuration options.
          properties:
            logo:
              type: object
              properties:
                url:
                  type: string
                  format: uri
                position:
                  type: array
                  items:
                    type: number
            result_format:
              type: string
              enum:
                - mp4
                - gif
                - wav
            fluent:
              type: boolean
            pad_audio:
              type: number
        user_data:
          type: string
          description: Custom metadata (max 1,000 characters).
          maxLength: 1000
        name:
          type: string
          description: Title for the generated video.
        webhook:
          type: string
          format: uri
          description: HTTPS webhook URL for completion notification.
        result_url:
          type: string
          format: uri
          description: Custom destination URL for the output video.

    GetTalkDto:
      type: object
      properties:
        id:
          type: string
          description: Unique talk identifier.
        user_id:
          type: string
          description: User who submitted the talk.
        source_url:
          type: string
          format: uri
          description: Source image URL (.jpg or .png).
        audio_url:
          type: string
          format: uri
          description: Audio file URL used for the actor.
        created_at:
          type: string
          format: date-time
        created_by:
          type: string
        started_at:
          type: string
          format: date-time
        modified_at:
          type: string
          format: date-time
        status:
          $ref: '#/components/schemas/TalkStatus'
        result_url:
          type: string
          format: uri
          description: S3 URI to the resulting video.
        config:
          type: object
          description: Configuration used for processing.
        webhook:
          type: string
          format: uri
        metadata:
          type: object
          description: Collected process data.

    TalkResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: talk
        created_at:
          type: string
          format: date-time
        created_by:
          type: string
        status:
          $ref: '#/components/schemas/TalkStatus'

    GetTalksDto:
      type: object
      properties:
        talks:
          type: array
          items:
            $ref: '#/components/schemas/GetTalkDto'
        token:
          type: string
          description: Pagination token for the next page.

    CreateV4VideoDto:
      type: object
      required:
        - script
      properties:
        script:
          $ref: '#/components/schemas/Script'
        presenter_id:
          type: string
          description: V4 avatar presenter identifier.
        config:
          type: object
          description: V4-specific configuration.
        webhook:
          type: string
          format: uri
        result_url:
          type: string
          format: uri
        user_data:
          type: string
          maxLength: 1000

    V4VideoDto:
      type: object
      properties:
        id:
          type: string
        status:
          $ref: '#/components/schemas/TalkStatus'
        result_url:
          type: string
          format: uri
        created_at:
          type: string
          format: date-time
        modified_at:
          type: string
          format: date-time
        created_by:
          type: string
        user_data:
          type: string

    CreateTranslationDto:
      type: object
      required:
        - source_url
        - languages
      properties:
        source_url:
          type: string
          format: uri
          description: >
            URL of the video to translate (HTTPS or S3; .mp4, .mov, or .mpeg).
        languages:
          type: array
          items:
            type: string
          description: Target translation language codes.
          example:
            - es
            - fr
        source_language:
          type: string
          description: Original video language for improved STT accuracy.
        webhook:
          type: string
          format: uri
          description: HTTPS webhook URL for completion notification.
        user_data:
          type: string
          description: Custom metadata (1–1,000 characters).
          minLength: 1
          maxLength: 1000
        result_url:
          type: string
          format: uri
          description: Destination for the output video.

    TranscriptionVersion:
      type: object
      properties:
        url:
          type: string
          format: uri
        version_type:
          type: string
        selected:
          type: boolean

    GetTranslationResponse:
      type: object
      properties:
        id:
          type: string
        group_id:
          type: string
        name:
          type: string
        owner_id:
          type: string
        status:
          type: string
          enum:
            - created
            - validating
            - done
            - error
            - rejected
            - ready
        created_at:
          type: string
          format: date-time
        created_by:
          type: string
        modified_at:
          type: string
          format: date-time
        result_url:
          type: string
          format: uri
        subtitles_url:
          type: string
          format: uri
        thumbnail_url:
          type: string
          format: uri
        transcriptions:
          type: array
          items:
            $ref: '#/components/schemas/TranscriptionVersion'

    VoiceLanguage:
      type: object
      properties:
        code:
          type: string
          description: Language code (e.g., en-US).
        name:
          type: string
          description: Human-readable language name.

    IVoice:
      type: object
      required:
        - id
        - name
        - gender
        - access
        - provider
        - languages
        - styles
      properties:
        id:
          type: string
          description: Unique voice identifier.
        name:
          type: string
          description: Display name of the voice.
        gender:
          type: string
          description: Voice gender.
        access:
          type: string
          enum:
            - public
            - premium
            - private
            - external-private
        provider:
          type: string
          enum:
            - amazon
            - microsoft
            - azure-openai
            - elevenlabs
            - google
        languages:
          type: array
          items:
            $ref: '#/components/schemas/VoiceLanguage'
        styles:
          type: array
          items:
            type: string
        language:
          type: string
        config:
          type: object
          properties:
            modelId:
              type: string
        description:
          type: string
        age:
          type: string
        useCase:
          type: string
        voiceType:
          type: string
          enum:
            - Avatar
            - CloneVoice
        isLegacy:
          type: boolean

    CreditItem:
      type: object
      properties:
        owner_id:
          type: string
        remaining:
          type: number
          description: Available credits.
        total:
          type: number
          description: Total allocated credits.
        expire_at:
          type: string
          format: date-time
        created_at:
          type: string
          format: date-time
        modified_at:
          type: string
          format: date-time
        valid_from:
          type: string
          format: date-time
        product_id:
          type: string
        price_id:
          type: string
        plan_group:
          type: string
        product_billing_interval:
          type: string

    UserCredits:
      type: object
      properties:
        credits:
          type: array
          items:
            $ref: '#/components/schemas/CreditItem'
        remaining:
          type: number
          description: Aggregate remaining credits across all credit items.
        total:
          type: number
          description: Aggregate total credits across all credit items.