Groq LoRA Inference API
Serves customer LoRA adapters on top of supported base models for low-latency custom inference.
Serves customer LoRA adapters on top of supported base models for low-latency custom inference.
openapi: 3.0.1
info:
title: GroqCloud API
description: Specification of the Groq cloud API
termsOfService: https://groq.com/terms-of-use/
contact:
name: Groq Support
email: [email protected]
version: '2.1'
servers:
- url: https://api.groq.com
paths:
/openai/v1/audio/speech:
post:
operationId: createSpeech
tags:
- Audio
summary: Generates audio from the input text.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateSpeechRequest'
responses:
'200':
description: OK
headers:
Transfer-Encoding:
schema:
type: string
description: chunked
content:
audio/wav:
schema:
type: string
format: binary
x-groq-metadata:
returns: Returns an audio file in `wav` format.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/audio/speech \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "playai-tts",
"input": "I love building and shipping new features for our users!",
"voice": "Fritz-PlayAI",
"response_format": "wav"
}'
py: |
import os
from groq import Groq
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
speech_file_path = "speech.wav"
model = "playai-tts"
voice = "Fritz-PlayAI"
text = "I love building and shipping new features for our users!"
response_format = "wav"
response = client.audio.speech.create(
model=model,
voice=voice,
input=text,
response_format=response_format
)
response.write_to_file(speech_file_path)
js: |
import fs from "fs";
import path from "path";
import Groq from 'groq-sdk';
const groq = new Groq({
apiKey: process.env.GROQ_API_KEY
});
const speechFilePath = "speech.wav";
const model = "playai-tts";
const voice = "Fritz-PlayAI";
const text = "I love building and shipping new features for our users!";
const responseFormat = "wav";
async function main() {
const response = await groq.audio.speech.create({
model: model,
voice: voice,
input: text,
response_format: responseFormat
});
const buffer = Buffer.from(await response.arrayBuffer());
await fs.promises.writeFile(speechFilePath, buffer);
}
main();
/openai/v1/audio/transcriptions:
post:
operationId: createTranscription
tags:
- Audio
summary: Transcribes audio into the input language.
requestBody:
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/CreateTranscriptionRequest'
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/CreateTranscriptionResponseJson'
x-groq-metadata:
returns: Returns an audio transcription object.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/audio/transcriptions \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F file="@./sample_audio.m4a" \
-F model="whisper-large-v3"
py: |
import os
from groq import Groq
client = Groq()
filename = os.path.dirname(__file__) + "/sample_audio.m4a"
with open(filename, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(filename, file.read()),
model="whisper-large-v3",
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
language="en", # Optional
temperature=0.0 # Optional
)
print(transcription.text)
js: |
import fs from "fs";
import Groq from "groq-sdk";
const groq = new Groq();
async function main() {
const transcription = await groq.audio.transcriptions.create({
file: fs.createReadStream("sample_audio.m4a"),
model: "whisper-large-v3",
prompt: "Specify context or spelling", // Optional
response_format: "json", // Optional
language: "en", // Optional
temperature: 0.0, // Optional
});
console.log(transcription.text);
}
main();
response: |
{
"text": "Your transcribed text appears here...",
"x_groq": {
"id": "req_unique_id"
}
}
/openai/v1/audio/translations:
post:
operationId: createTranslation
tags:
- Audio
summary: Translates audio into English.
requestBody:
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/CreateTranslationRequest'
responses:
'200':
description: OK
content:
text/plain:
schema:
type: string
application/json:
schema:
$ref: '#/components/schemas/CreateTranslationResponseJson'
x-groq-metadata:
returns: Returns an audio translation object.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/audio/translations \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F file="@./sample_audio.m4a" \
-F model="whisper-large-v3"
py: |
# Default
import os
from groq import Groq
client = Groq()
filename = os.path.dirname(__file__) + "/sample_audio.m4a"
with open(filename, "rb") as file:
translation = client.audio.translations.create(
file=(filename, file.read()),
model="whisper-large-v3",
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
temperature=0.0 # Optional
)
print(translation.text)
js: |
// Default
import fs from "fs";
import Groq from "groq-sdk";
const groq = new Groq();
async function main() {
const translation = await groq.audio.translations.create({
file: fs.createReadStream("sample_audio.m4a"),
model: "whisper-large-v3",
prompt: "Specify context or spelling", // Optional
response_format: "json", // Optional
temperature: 0.0, // Optional
});
console.log(translation.text);
}
main();
response: |
{
"text": "Your translated text appears here...",
"x_groq": {
"id": "req_unique_id"
}
}
/openai/v1/batches:
post:
summary: Creates and executes a batch from an uploaded file of requests. [Learn more](/docs/batch).
operationId: createBatch
tags:
- Batch
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- input_file_id
- endpoint
- completion_window
properties:
input_file_id:
type: string
description: >
The ID of an uploaded file that contains requests for the new batch.
See [upload file](/docs/api-reference#files-upload) for how to upload a file.
Your input file must be formatted as a [JSONL file](/docs/batch), and must be uploaded
with the purpose `batch`. The file can be up to 100 MB in size.
endpoint:
type: string
enum:
- /v1/chat/completions
description: >-
The endpoint to be used for all requests in the batch. Currently `/v1/chat/completions` is
supported.
completion_window:
type: string
description: >-
The time frame within which the batch should be processed. Durations from `24h` to `7d`
are supported.
metadata:
type: object
additionalProperties:
type: string
description: Optional custom metadata for the batch.
nullable: true
responses:
'200':
description: Batch created successfully.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
x-groq-metadata:
returns: A created batch object.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/batches \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
"endpoint": "/v1/chat/completions",
"completion_window": "24h"
}'
py: |
import os
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted
)
batch = client.batches.create(
completion_window="24h",
endpoint="/v1/chat/completions",
input_file_id="file_01jh6x76wtemjr74t1fh0faj5t",
)
print(batch.id)
js: |
import Groq from 'groq-sdk';
const client = new Groq({
apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
});
async function main() {
const batch = await client.batches.create({
completion_window: "24h",
endpoint: "/v1/chat/completions",
input_file_id: "file_01jh6x76wtemjr74t1fh0faj5t",
});
console.log(batch.id);
}
main();
response: |
{
"id": "batch_01jh6xa7reempvjyh6n3yst2zw",
"object": "batch",
"endpoint": "/v1/chat/completions",
"errors": null,
"input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
"completion_window": "24h",
"status": "validating",
"output_file_id": null,
"error_file_id": null,
"finalizing_at": null,
"failed_at": null,
"expired_at": null,
"cancelled_at": null,
"request_counts": {
"total": 0,
"completed": 0,
"failed": 0
},
"metadata": null,
"created_at": 1736472600,
"expires_at": 1736559000,
"cancelling_at": null,
"completed_at": null,
"in_progress_at": null
}
get:
operationId: listBatches
tags:
- Batch
summary: List your organization's batches.
responses:
'200':
description: Batch listed successfully.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
x-groq-metadata:
returns: A list of batches
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/batches \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json"
py: |
import os
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted
)
batch_list = client.batches.list()
print(batch_list.data)
js: |
import Groq from 'groq-sdk';
const client = new Groq({
apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
});
async function main() {
const batchList = await client.batches.list();
console.log(batchList.data);
}
main();
response: |
{
"object": "list",
"data": [
{
"id": "batch_01jh6xa7reempvjyh6n3yst2zw",
"object": "batch",
"endpoint": "/v1/chat/completions",
"errors": null,
"input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
"completion_window": "24h",
"status": "validating",
"output_file_id": null,
"error_file_id": null,
"finalizing_at": null,
"failed_at": null,
"expired_at": null,
"cancelled_at": null,
"request_counts": {
"total": 0,
"completed": 0,
"failed": 0
},
"metadata": null,
"created_at": 1736472600,
"expires_at": 1736559000,
"cancelling_at": null,
"completed_at": null,
"in_progress_at": null
}
]
}
/openai/v1/batches/{batch_id}:
get:
operationId: retrieveBatch
tags:
- Batch
summary: Retrieves a batch.
parameters:
- in: path
name: batch_id
required: true
schema:
type: string
description: The ID of the batch to retrieve.
responses:
'200':
description: Batch retrieved successfully.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
x-groq-metadata:
returns: A batch object.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json"
py: |
import os
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted
)
batch = client.batches.retrieve(
"batch_01jh6xa7reempvjyh6n3yst2zw",
)
print(batch.id)
js: |
import Groq from 'groq-sdk';
const client = new Groq({
apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
});
async function main() {
const batch = await client.batches.retrieve("batch_01jh6xa7reempvjyh6n3yst2zw");
console.log(batch.id);
}
main();
response: |
{
"id": "batch_01jh6xa7reempvjyh6n3yst2zw",
"object": "batch",
"endpoint": "/v1/chat/completions",
"errors": null,
"input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
"completion_window": "24h",
"status": "validating",
"output_file_id": null,
"error_file_id": null,
"finalizing_at": null,
"failed_at": null,
"expired_at": null,
"cancelled_at": null,
"request_counts": {
"total": 0,
"completed": 0,
"failed": 0
},
"metadata": null,
"created_at": 1736472600,
"expires_at": 1736559000,
"cancelling_at": null,
"completed_at": null,
"in_progress_at": null
}
/openai/v1/batches/{batch_id}/cancel:
post:
operationId: cancelBatch
tags:
- Batch
summary: Cancels a batch.
parameters:
- in: path
name: batch_id
required: true
schema:
type: string
description: The ID of the batch to cancel.
responses:
'200':
description: Batch cancelled successfully.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
x-groq-metadata:
returns: A batch object.
examples:
- title: Default
request:
curl: |
curl -X POST https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw/cancel \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json"
py: |
import os
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted
)
batch = client.batches.cancel(
"batch_01jh6xa7reempvjyh6n3yst2zw",
)
print(batch.id)
js: |
import Groq from 'groq-sdk';
const client = new Groq({
apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
});
async function main() {
const batch = await client.batches.cancel("batch_01jh6xa7reempvjyh6n3yst2zw");
console.log(batch.id);
}
main();
response: |
{
"id": "batch_01jh6xa7reempvjyh6n3yst2zw",
"object": "batch",
"endpoint": "/v1/chat/completions",
"errors": null,
"input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t",
"completion_window": "24h",
"status": "cancelling",
"output_file_id": null,
"error_file_id": null,
"finalizing_at": null,
"failed_at": null,
"expired_at": null,
"cancelled_at": null,
"request_counts": {
"total": 0,
"completed": 0,
"failed": 0
},
"metadata": null,
"created_at": 1736472600,
"expires_at": 1736559000,
"cancelling_at": null,
"completed_at": null,
"in_progress_at": null
}
/openai/v1/chat/completions:
post:
operationId: createChatCompletion
tags:
- Chat
summary: Creates a model response for the given chat conversation.
requestBody:
required: true
description: The chat prompt and parameters
content:
application/json:
schema:
$ref: '#/components/schemas/CreateChatCompletionRequest'
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/CreateChatCompletionResponse'
x-groq-metadata:
returns: >-
Returns a [chat completion](/docs/api-reference#chat-create) object, or a streamed sequence of [chat
completion chunk](/docs/api-reference#chat-create) objects if the request is streamed.
examples:
- title: Default
request:
py: |
import os
from groq import Groq
client = Groq(
# This is the default and can be omitted
api_key=os.environ.get("GROQ_API_KEY"),
)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Explain the importance of fast language models",
}
],
model="llama-3.3-70b-versatile",
)
print(chat_completion.choices[0].message.content)
js: |
import Groq from "groq-sdk";
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });
async function main() {
const completion = await groq.chat.completions
.create({
messages: [
{
role: "user",
content: "Explain the importance of fast language models",
},
],
model: "llama-3.3-70b-versatile",
})
console.log(completion.choices[0].message.content);
}
main();
curl: |
curl https://api.groq.com/openai/v1/chat/completions -s \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $GROQ_API_KEY" \
-d '{
"model": "llama-3.3-70b-versatile",
"messages": [{
"role": "user",
"content": "Explain the importance of fast language models"
}]
}'
response: |
{
"id": "chatcmpl-f51b2cd2-bef7-417e-964e-a08f0b513c22",
"object": "chat.completion",
"created": 1730241104,
"model": "openai/gpt-oss-20b",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Fast language models have gained significant attention in recent years due to their ability to process and generate human-like text quickly and efficiently. The importance of fast language models can be understood from their potential applications and benefits:\n\n1. **Real-time Chatbots and Conversational Interfaces**: Fast language models enable the development of chatbots and conversational interfaces that can respond promptly to user queries, making them more engaging and useful.\n2. **Sentiment Analysis and Opinion Mining**: Fast language models can quickly analyze text data to identify sentiments, opinions, and emotions, allowing for improved customer service, market research, and opinion mining.\n3. **Language Translation and Localization**: Fast language models can quickly translate text between languages, facilitating global communication and enabling businesses to reach a broader audience.\n4. **Text Summarization and Generation**: Fast language models can summarize long documents or even generate new text on a given topic, improving information retrieval and processing efficiency.\n5. **Named Entity Recognition and Information Extraction**: Fast language models can rapidly recognize and extract specific entities, such as names, locations, and organizations, from unstructured text data.\n6. **Recommendation Systems**: Fast language models can analyze large amounts of text data to personalize product recommendations, improve customer experience, and increase sales.\n7. **Content Generation for Social Media**: Fast language models can quickly generate engaging content for social media platforms, helping businesses maintain a consistent online presence and increasing their online visibility.\n8. **Sentiment Analysis for Stock Market Analysis**: Fast language models can quickly analyze social media posts, news articles, and other text data to identify sentiment trends, enabling financial analysts to make more informed investment decisions.\n9. **Language Learning and Education**: Fast language models can provide instant feedback and adaptive language learning, making language education more effective and engaging.\n10. **Domain-Specific Knowledge Extraction**: Fast language models can quickly extract relevant information from vast amounts of text data, enabling domain experts to focus on high-level decision-making rather than manual information gathering.\n\nThe benefits of fast language models include:\n\n* **Increased Efficiency**: Fast language models can process large amounts of text data quickly, reducing the time and effort required for tasks such as sentiment analysis, entity recognition, and text summarization.\n* **Improved Accuracy**: Fast language models can analyze and learn from large datasets, leading to more accurate results and more informed decision-making.\n* **Enhanced User Experience**: Fast language models can enable real-time interactions, personalized recommendations, and timely responses, improving the overall user experience.\n* **Cost Savings**: Fast language models can automate many tasks, reducing the need for manual labor and minimizing costs associated with data processing and analysis.\n\nIn summary, fast language models have the potential to transform various industries and applications by providing fast, accurate, and efficient language processing capabilities."
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"queue_time": 0.037493756,
"prompt_tokens": 18,
"prompt_time": 0.000680594,
"completion_tokens": 556,
"completion_time": 0.463333333,
"total_tokens": 574,
"total_time": 0.464013927
},
"system_fingerprint": "fp_179b0f92c9",
"x_groq": { "id": "req_01jbd6g2qdfw2adyrt2az8hz4w" }
}
/openai/v1/embeddings:
post:
operationId: createEmbedding
tags:
- Embeddings
summary: Creates an embedding vector representing the input text.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateEmbeddingRequest'
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/CreateEmbeddingResponse'
/openai/v1/files:
get:
operationId: listFiles
tags:
- Files
summary: Returns a list of files.
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListFilesResponse'
x-groq-metadata:
returns: A list of [File](/docs/api-reference#files-upload) objects.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/files \
-H "Authorization: Bearer $GROQ_API_KEY" \
-H "Content-Type: application/json"
py: |
import os
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted
)
file_list = client.files.list()
print(file_list.data)
js: |
import Groq from 'groq-sdk';
const client = new Groq({
apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted
});
async function main() {
const fileList = await client.files.list();
console.log(fileList.data);
}
main();
response: |
{
"object": "list",
"data": [
{
"id": "file_01jh6x76wtemjr74t1fh0faj5t",
"object": "file",
"bytes": 966,
"created_at": 1736472501,
"filename": "batch_file.jsonl",
"purpose": "batch"
}
]
}
post:
operationId: uploadFile
tags:
- Files
summary: >
Upload a file that can be used across various endpoints.
The Batch API only supports `.jsonl` files up to 100 MB in size. The input also has a specific
required [format](/docs/batch).
Please contact us if you need to increase these storage limits.
requestBody:
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/CreateFileRequest'
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/File'
x-groq-metadata:
returns: The uploaded File object.
examples:
- title: Default
request:
curl: |
curl https://api.groq.com/openai/v1/files \
-H "Authorization: Bearer $GROQ_API_KEY" \
-F purpose="batch" \
-F "file=@batch_file.jsonl"
py: |
import os
import requests # pip install requests first!
def upload_file_to_groq(api_key, file_path):
url = "https://api.groq.com/openai/v1/files"
headers = {
"Authorization": f"Bearer {api_key}"
}
# Prepare the file and form data
files = {
"file": ("batch_file.jsonl", open(file_path, "rb"))
}
data = {
"purpose": "batch"
}
# Make the POST request
# --- truncated at 32 KB (188 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/groq/refs/heads/main/openapi/groq-openapi.yml