Groq
Groq Flex Processing API

Flexible service tier offering higher throughput at relaxed latency targets for cost-sensitive workloads.
Documentation GitHub OpenAPI
Documentation

📖
Documentation
https://console.groq.com/docs/flex-processing
Specifications

⚙
OpenAPI
https://raw.githubusercontent.com/api-evangelist/groq/refs/heads/main/openapi/groq-openapi.yml
OpenAPI Specification

openapi: 3.0.1
info:
  title: GroqCloud API
  description: Specification of the Groq cloud API
  termsOfService: https://groq.com/terms-of-use/
  contact:
    name: Groq Support
    email: [email protected]
  version: '2.1'
servers:
  - url: https://api.groq.com
paths:
  /openai/v1/audio/speech:
    post:
      operationId: createSpeech
      tags:
        - Audio
      summary: Generates audio from the input text.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateSpeechRequest'
      responses:
        '200':
          description: OK
          headers:
            Transfer-Encoding:
              schema:
                type: string
              description: chunked
          content:
            audio/wav:
              schema:
                type: string
                format: binary
      x-groq-metadata:
        returns: Returns an audio file in `wav` format.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/audio/speech \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
                    "model": "playai-tts",
                    "input": "I love building and shipping new features for our users!",
                    "voice": "Fritz-PlayAI",
                    "response_format": "wav"
                  }'
              py: |
                import os
                from groq import Groq

                client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

                speech_file_path = "speech.wav"
                model = "playai-tts"
                voice = "Fritz-PlayAI"
                text = "I love building and shipping new features for our users!"
                response_format = "wav"

                response = client.audio.speech.create(
                    model=model,
                    voice=voice,
                    input=text,
                    response_format=response_format
                )

                response.write_to_file(speech_file_path)
              js: |
                import fs from "fs";
                import path from "path";
                import Groq from 'groq-sdk';

                const groq = new Groq({
                  apiKey: process.env.GROQ_API_KEY
                });

                const speechFilePath = "speech.wav";
                const model = "playai-tts";
                const voice = "Fritz-PlayAI";
                const text = "I love building and shipping new features for our users!";
                const responseFormat = "wav";

                async function main() {
                  const response = await groq.audio.speech.create({
                    model: model,
                    voice: voice,
                    input: text,
                    response_format: responseFormat
                  });

                  const buffer = Buffer.from(await response.arrayBuffer());
                  await fs.promises.writeFile(speechFilePath, buffer);
                }

                main();
  /openai/v1/audio/transcriptions:
    post:
      operationId: createTranscription
      tags:
        - Audio
      summary: Transcribes audio into the input language.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/CreateTranscriptionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateTranscriptionResponseJson'
      x-groq-metadata:
        returns: Returns an audio transcription object.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/audio/transcriptions \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: multipart/form-data" \
                  -F file="@./sample_audio.m4a" \
                  -F model="whisper-large-v3"
              py: |
                import os
                from groq import Groq

                client = Groq()
                filename = os.path.dirname(__file__) + "/sample_audio.m4a"

                with open(filename, "rb") as file:
                    transcription = client.audio.transcriptions.create(
                      file=(filename, file.read()),
                      model="whisper-large-v3",
                      prompt="Specify context or spelling",  # Optional
                      response_format="json",  # Optional
                      language="en",  # Optional
                      temperature=0.0  # Optional
                    )
                    print(transcription.text)
              js: |
                import fs from "fs";
                import Groq from "groq-sdk";

                const groq = new Groq();
                async function main() {
                  const transcription = await groq.audio.transcriptions.create({
                    file: fs.createReadStream("sample_audio.m4a"),
                    model: "whisper-large-v3",
                    prompt: "Specify context or spelling", // Optional
                    response_format: "json", // Optional
                    language: "en", // Optional
                    temperature: 0.0, // Optional
                  });
                  console.log(transcription.text);
                }
                main();
            response: |
              {
                "text": "Your transcribed text appears here...",
                "x_groq": {
                  "id": "req_unique_id"
                }
              }
  /openai/v1/audio/translations:
    post:
      operationId: createTranslation
      tags:
        - Audio
      summary: Translates audio into English.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/CreateTranslationRequest'
      responses:
        '200':
          description: OK
          content:
            text/plain:
              schema:
                type: string
            application/json:
              schema:
                $ref: '#/components/schemas/CreateTranslationResponseJson'
      x-groq-metadata:
        returns: Returns an audio translation object.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/audio/translations \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: multipart/form-data" \
                  -F file="@./sample_audio.m4a" \
                  -F model="whisper-large-v3"
              py: |
                # Default
                import os
                from groq import Groq

                client = Groq()
                filename = os.path.dirname(__file__) + "/sample_audio.m4a"

                with open(filename, "rb") as file:
                    translation = client.audio.translations.create(
                      file=(filename, file.read()),
                      model="whisper-large-v3",
                      prompt="Specify context or spelling",  # Optional
                      response_format="json",  # Optional
                      temperature=0.0  # Optional
                    )
                    print(translation.text)
              js: |
                // Default
                import fs from "fs";
                import Groq from "groq-sdk";

                const groq = new Groq();
                async function main() {
                  const translation = await groq.audio.translations.create({
                    file: fs.createReadStream("sample_audio.m4a"),
                    model: "whisper-large-v3",
                    prompt: "Specify context or spelling", // Optional
                    response_format: "json", // Optional
                    temperature: 0.0, // Optional
                  });
                  console.log(translation.text);
                }
                main();
            response: |
              {
                "text": "Your translated text appears here...",
                "x_groq": {
                  "id": "req_unique_id"
                }
              }
  /openai/v1/batches:
    post:
      summary: Creates and executes a batch from an uploaded file of requests. [Learn more](/docs/batch).
      operationId: createBatch
      tags:
        - Batch
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - input_file_id
                - endpoint
                - completion_window
              properties:
                input_file_id:
                  type: string
                  description: >
                    The ID of an uploaded file that contains requests for the new batch.


                    See [upload file](/docs/api-reference#files-upload) for how to upload a file.


                    Your input file must be formatted as a [JSONL file](/docs/batch), and must be uploaded
                    with the purpose `batch`. The file can be up to 100 MB in size.
                endpoint:
                  type: string
                  enum:
                    - /v1/chat/completions
                  description: >-
                    The endpoint to be used for all requests in the batch. Currently `/v1/chat/completions` is
                    supported.
                completion_window:
                  type: string
                  description: >-
                    The time frame within which the batch should be processed. Durations from `24h` to `7d`
                    are supported.
                metadata:
                  type: object
                  additionalProperties:
                    type: string
                  description: Optional custom metadata for the batch.
                  nullable: true
      responses:
        '200':
          description: Batch created successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
      x-groq-metadata:
        returns: A created batch object.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/batches \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
                    "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
                    "endpoint": "/v1/chat/completions",
                    "completion_window": "24h"
                  }'
              py: |
                import os
                from groq import Groq

                client = Groq(
                    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
                )
                batch = client.batches.create(
                    completion_window="24h",
                    endpoint="/v1/chat/completions",
                    input_file_id="file_01jh6x76wtemjr74t1fh0faj5t",
                )
                print(batch.id)
              js: |
                import Groq from 'groq-sdk';

                const client = new Groq({
                  apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
                });

                async function main() {
                  const batch = await client.batches.create({
                    completion_window: "24h",
                    endpoint: "/v1/chat/completions",
                    input_file_id: "file_01jh6x76wtemjr74t1fh0faj5t",
                  });
                  console.log(batch.id);
                }

                main();
            response: |
              {
                "id": "batch_01jh6xa7reempvjyh6n3yst2zw",
                "object": "batch",
                "endpoint": "/v1/chat/completions",
                "errors": null,
                "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
                "completion_window": "24h",
                "status": "validating",
                "output_file_id": null,
                "error_file_id": null,
                "finalizing_at": null,
                "failed_at": null,
                "expired_at": null,
                "cancelled_at": null,
                "request_counts": {
                  "total": 0,
                  "completed": 0,
                  "failed": 0
                },
                "metadata": null,
                "created_at": 1736472600,
                "expires_at": 1736559000,
                "cancelling_at": null,
                "completed_at": null,
                "in_progress_at": null
              }
    get:
      operationId: listBatches
      tags:
        - Batch
      summary: List your organization's batches.
      responses:
        '200':
          description: Batch listed successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBatchesResponse'
      x-groq-metadata:
        returns: A list of batches
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/batches \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json"
              py: |
                import os
                from groq import Groq

                client = Groq(
                    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
                )
                batch_list = client.batches.list()
                print(batch_list.data)
              js: |
                import Groq from 'groq-sdk';

                const client = new Groq({
                  apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
                });

                async function main() {
                  const batchList = await client.batches.list();
                  console.log(batchList.data);
                }

                main();
            response: |
              {
                "object": "list",
                "data": [
                  {
                    "id": "batch_01jh6xa7reempvjyh6n3yst2zw",
                    "object": "batch",
                    "endpoint": "/v1/chat/completions",
                    "errors": null,
                    "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
                    "completion_window": "24h",
                    "status": "validating",
                    "output_file_id": null,
                    "error_file_id": null,
                    "finalizing_at": null,
                    "failed_at": null,
                    "expired_at": null,
                    "cancelled_at": null,
                    "request_counts": {
                      "total": 0,
                      "completed": 0,
                      "failed": 0
                    },
                    "metadata": null,
                    "created_at": 1736472600,
                    "expires_at": 1736559000,
                    "cancelling_at": null,
                    "completed_at": null,
                    "in_progress_at": null
                  }
                ]
              }
  /openai/v1/batches/{batch_id}:
    get:
      operationId: retrieveBatch
      tags:
        - Batch
      summary: Retrieves a batch.
      parameters:
        - in: path
          name: batch_id
          required: true
          schema:
            type: string
          description: The ID of the batch to retrieve.
      responses:
        '200':
          description: Batch retrieved successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
      x-groq-metadata:
        returns: A batch object.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json"
              py: |
                import os
                from groq import Groq

                client = Groq(
                    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
                )
                batch = client.batches.retrieve(
                    "batch_01jh6xa7reempvjyh6n3yst2zw",
                )
                print(batch.id)
              js: |
                import Groq from 'groq-sdk';

                const client = new Groq({
                  apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
                });

                async function main() {
                  const batch = await client.batches.retrieve("batch_01jh6xa7reempvjyh6n3yst2zw");
                  console.log(batch.id);
                }

                main();
            response: |
              {
                "id": "batch_01jh6xa7reempvjyh6n3yst2zw",
                "object": "batch",
                "endpoint": "/v1/chat/completions",
                "errors": null,
                "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
                "completion_window": "24h",
                "status": "validating",
                "output_file_id": null,
                "error_file_id": null,
                "finalizing_at": null,
                "failed_at": null,
                "expired_at": null,
                "cancelled_at": null,
                "request_counts": {
                  "total": 0,
                  "completed": 0,
                  "failed": 0
                },
                "metadata": null,
                "created_at": 1736472600,
                "expires_at": 1736559000,
                "cancelling_at": null,
                "completed_at": null,
                "in_progress_at": null
              }
  /openai/v1/batches/{batch_id}/cancel:
    post:
      operationId: cancelBatch
      tags:
        - Batch
      summary: Cancels a batch.
      parameters:
        - in: path
          name: batch_id
          required: true
          schema:
            type: string
          description: The ID of the batch to cancel.
      responses:
        '200':
          description: Batch cancelled successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
      x-groq-metadata:
        returns: A batch object.
        examples:
          - title: Default
            request:
              curl: |
                curl -X POST https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw/cancel \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json"
              py: |
                import os
                from groq import Groq

                client = Groq(
                    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
                )
                batch = client.batches.cancel(
                    "batch_01jh6xa7reempvjyh6n3yst2zw",
                )
                print(batch.id)
              js: |
                import Groq from 'groq-sdk';

                const client = new Groq({
                  apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
                });

                async function main() {
                  const batch = await client.batches.cancel("batch_01jh6xa7reempvjyh6n3yst2zw");
                  console.log(batch.id);
                }

                main();
            response: |
              {
                "id": "batch_01jh6xa7reempvjyh6n3yst2zw",
                "object": "batch",
                "endpoint": "/v1/chat/completions",
                "errors": null,
                "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
                "completion_window": "24h",
                "status": "cancelling",
                "output_file_id": null,
                "error_file_id": null,
                "finalizing_at": null,
                "failed_at": null,
                "expired_at": null,
                "cancelled_at": null,
                "request_counts": {
                  "total": 0,
                  "completed": 0,
                  "failed": 0
                },
                "metadata": null,
                "created_at": 1736472600,
                "expires_at": 1736559000,
                "cancelling_at": null,
                "completed_at": null,
                "in_progress_at": null
              }
  /openai/v1/chat/completions:
    post:
      operationId: createChatCompletion
      tags:
        - Chat
      summary: Creates a model response for the given chat conversation.
      requestBody:
        required: true
        description: The chat prompt and parameters
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateChatCompletionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateChatCompletionResponse'
      x-groq-metadata:
        returns: >-
          Returns a [chat completion](/docs/api-reference#chat-create) object, or a streamed sequence of [chat
          completion chunk](/docs/api-reference#chat-create) objects if the request is streamed.
        examples:
          - title: Default
            request:
              py: |
                import os

                from groq import Groq

                client = Groq(
                    # This is the default and can be omitted
                    api_key=os.environ.get("GROQ_API_KEY"),
                )

                chat_completion = client.chat.completions.create(
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a helpful assistant."
                        },
                        {
                            "role": "user",
                            "content": "Explain the importance of fast language models",
                        }
                    ],
                    model="llama-3.3-70b-versatile",
                )

                print(chat_completion.choices[0].message.content)
              js: |
                import Groq from "groq-sdk";

                const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });

                async function main() {
                  const completion = await groq.chat.completions
                    .create({
                      messages: [
                        {
                          role: "user",
                          content: "Explain the importance of fast language models",
                        },
                      ],
                      model: "llama-3.3-70b-versatile",
                    })
                  console.log(completion.choices[0].message.content);
                }

                main();
              curl: |
                curl https://api.groq.com/openai/v1/chat/completions -s \
                -H "Content-Type: application/json" \
                -H "Authorization: Bearer $GROQ_API_KEY" \
                -d '{
                  "model": "llama-3.3-70b-versatile",
                  "messages": [{
                      "role": "user",
                      "content": "Explain the importance of fast language models"
                  }]
                }'
            response: |
              {
                "id": "chatcmpl-f51b2cd2-bef7-417e-964e-a08f0b513c22",
                "object": "chat.completion",
                "created": 1730241104,
                "model": "openai/gpt-oss-20b",
                "choices": [
                  {
                    "index": 0,
                    "message": {
                      "role": "assistant",
                      "content": "Fast language models have gained significant attention in recent years due to their ability to process and generate human-like text quickly and efficiently. The importance of fast language models can be understood from their potential applications and benefits:\n\n1. **Real-time Chatbots and Conversational Interfaces**: Fast language models enable the development of chatbots and conversational interfaces that can respond promptly to user queries, making them more engaging and useful.\n2. **Sentiment Analysis and Opinion Mining**: Fast language models can quickly analyze text data to identify sentiments, opinions, and emotions, allowing for improved customer service, market research, and opinion mining.\n3. **Language Translation and Localization**: Fast language models can quickly translate text between languages, facilitating global communication and enabling businesses to reach a broader audience.\n4. **Text Summarization and Generation**: Fast language models can summarize long documents or even generate new text on a given topic, improving information retrieval and processing efficiency.\n5. **Named Entity Recognition and Information Extraction**: Fast language models can rapidly recognize and extract specific entities, such as names, locations, and organizations, from unstructured text data.\n6. **Recommendation Systems**: Fast language models can analyze large amounts of text data to personalize product recommendations, improve customer experience, and increase sales.\n7. **Content Generation for Social Media**: Fast language models can quickly generate engaging content for social media platforms, helping businesses maintain a consistent online presence and increasing their online visibility.\n8. **Sentiment Analysis for Stock Market Analysis**: Fast language models can quickly analyze social media posts, news articles, and other text data to identify sentiment trends, enabling financial analysts to make more informed investment decisions.\n9. **Language Learning and Education**: Fast language models can provide instant feedback and adaptive language learning, making language education more effective and engaging.\n10. **Domain-Specific Knowledge Extraction**: Fast language models can quickly extract relevant information from vast amounts of text data, enabling domain experts to focus on high-level decision-making rather than manual information gathering.\n\nThe benefits of fast language models include:\n\n* **Increased Efficiency**: Fast language models can process large amounts of text data quickly, reducing the time and effort required for tasks such as sentiment analysis, entity recognition, and text summarization.\n* **Improved Accuracy**: Fast language models can analyze and learn from large datasets, leading to more accurate results and more informed decision-making.\n* **Enhanced User Experience**: Fast language models can enable real-time interactions, personalized recommendations, and timely responses, improving the overall user experience.\n* **Cost Savings**: Fast language models can automate many tasks, reducing the need for manual labor and minimizing costs associated with data processing and analysis.\n\nIn summary, fast language models have the potential to transform various industries and applications by providing fast, accurate, and efficient language processing capabilities."
                    },
                    "logprobs": null,
                    "finish_reason": "stop"
                  }
                ],
                "usage": {
                  "queue_time": 0.037493756,
                  "prompt_tokens": 18,
                  "prompt_time": 0.000680594,
                  "completion_tokens": 556,
                  "completion_time": 0.463333333,
                  "total_tokens": 574,
                  "total_time": 0.464013927
                },
                "system_fingerprint": "fp_179b0f92c9",
                "x_groq": { "id": "req_01jbd6g2qdfw2adyrt2az8hz4w" }
              }
  /openai/v1/embeddings:
    post:
      operationId: createEmbedding
      tags:
        - Embeddings
      summary: Creates an embedding vector representing the input text.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateEmbeddingRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateEmbeddingResponse'
  /openai/v1/files:
    get:
      operationId: listFiles
      tags:
        - Files
      summary: Returns a list of files.
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListFilesResponse'
      x-groq-metadata:
        returns: A list of [File](/docs/api-reference#files-upload) objects.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/files \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -H "Content-Type: application/json"
              py: |
                import os
                from groq import Groq

                client = Groq(
                    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
                )
                file_list = client.files.list()
                print(file_list.data)
              js: |
                import Groq from 'groq-sdk';

                const client = new Groq({
                  apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
                });

                async function main() {
                  const fileList = await client.files.list();
                  console.log(fileList.data);
                }

                main();
            response: |
              {
                "object": "list",
                "data": [
                  {
                    "id": "file_01jh6x76wtemjr74t1fh0faj5t",
                    "object": "file",
                    "bytes": 966,
                    "created_at": 1736472501,
                    "filename": "batch_file.jsonl",
                    "purpose": "batch"
                  }
                ]
              }
    post:
      operationId: uploadFile
      tags:
        - Files
      summary: >
        Upload a file that can be used across various endpoints.


        The Batch API only supports `.jsonl` files up to 100 MB in size. The input also has a specific
        required [format](/docs/batch).


        Please contact us if you need to increase these storage limits.
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/CreateFileRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/File'
      x-groq-metadata:
        returns: The uploaded File object.
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.groq.com/openai/v1/files \
                  -H "Authorization: Bearer $GROQ_API_KEY" \
                  -F purpose="batch" \
                  -F "file=@batch_file.jsonl"
              py: |
                import os
                import requests # pip install requests first!

                def upload_file_to_groq(api_key, file_path):
                    url = "https://api.groq.com/openai/v1/files"

                    headers = {
                        "Authorization": f"Bearer {api_key}"
                    }

                    # Prepare the file and form data
                    files = {
                        "file": ("batch_file.jsonl", open(file_path, "rb"))
                    }

                    data = {
                        "purpose": "batch"
                    }

                    # Make the POST request
                   

# --- truncated at 32 KB (188 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/groq/refs/heads/main/openapi/groq-openapi.yml