Fireworks Embeddings API
Generate dense vector embeddings for retrieval, RAG, and semantic search using nomic, Qwen3, BGE, and other open embedding models.
Generate dense vector embeddings for retrieval, RAG, and semantic search using nomic, Qwen3, BGE, and other open embedding models.
openapi: 3.1.0
info:
title: Gateway REST API
version: 4.259.0
tags:
- name: gateway.openapi_Gateway
x-displayName: Gateway
- name: gateway-extra.openapi_Gateway
x-displayName: Gateway
- name: responses.openapi_other
x-displayName: other
- name: text-completion.openapi_other
x-displayName: other
- name: anthropic-messages.openapi_other
x-displayName: other
paths:
/v1/accounts:
servers:
- url: https://api.fireworks.ai
get:
summary: List Accounts
operationId: Gateway_ListAccounts
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayListAccountsResponse'
parameters:
- name: pageSize
description: >-
The maximum number of accounts to return. The maximum page_size is
200,
values above 200 will be coerced to 200.
If unspecified, the default is 50.
in: query
required: false
schema:
type: integer
format: int32
- name: pageToken
description: >-
A page token, received from a previous ListAccounts call. Provide
this
to retrieve the subsequent page. When paginating, all other
parameters
provided to ListAccounts must match the call that provided the page
token.
in: query
required: false
schema:
type: string
- name: filter
description: |-
Only accounts satisfying the provided filter (if specified) will be
returned. See https://google.aip.dev/160 for the filter grammar.
in: query
required: false
schema:
type: string
- name: orderBy
description: |-
Not supported.
Accounts will be returned ordered by `name`.
in: query
required: false
schema:
type: string
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/auth/refresh:
servers:
- url: https://api.fireworks.ai
post:
summary: Refresh a session JWT before expiry with a fresh TTL.
operationId: Gateway_RefreshSessionToken
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayRefreshSessionTokenResponse'
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayRefreshSessionTokenRequest'
description: Request to refresh an existing session JWT before it expires.
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/creditCodes:redeem:
servers:
- url: https://api.fireworks.ai
post:
summary: |-
APIs for credit codes.
Redeem Credit Code
operationId: Gateway_RedeemCreditCode
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayRedeemCreditCodeResponse'
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayRedeemCreditCodeRequest'
description: >-
RedeemCreditCodeRequest is the request to redeem a credit code for an
account.
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/validateModelConfig:
servers:
- url: https://api.fireworks.ai
post:
summary: Validate Model Config
operationId: Gateway_ValidateModelConfig
responses:
'200':
description: A successful response.
content:
application/json:
schema:
type: object
properties: {}
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayValidateModelConfigRequest'
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}:
servers:
- url: https://api.fireworks.ai
get:
summary: Get Account
operationId: Gateway_GetAccount
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayAccount'
parameters:
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/accountUsageFilterOptions:
servers:
- url: https://api.fireworks.ai
get:
summary: >-
Distinct filter values for an account/time range (serverless +
dedicated; for FE, separate -yet mirrors GetAccountUsage).
operationId: Gateway_GetAccountUsageFilterOptions
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: >-
#/components/schemas/gatewayGetAccountUsageFilterOptionsResponse
parameters:
- name: startTime
in: query
required: true
schema:
type: string
format: date-time
- name: endTime
in: query
required: true
schema:
type: string
format: date-time
- name: usageType
description: |-
If not specified, loads filter options for both usage streams.
- SERVERLESS: Serverless filter dimensions only (model_name, api_key_id, annotations.*).
- DEDICATED_DEPLOYMENT: Dedicated deployment filter dimensions (deployment_name, annotations.team, .project, .environment).
in: query
required: false
schema:
type: string
enum:
- USAGE_TYPE_UNSPECIFIED
- SERVERLESS
- DEDICATED_DEPLOYMENT
default: USAGE_TYPE_UNSPECIFIED
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/auditLogs:
servers:
- url: https://api.fireworks.ai
get:
summary: List User Audit Logs
operationId: Gateway_ListAuditLogs
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayListAuditLogsResponse'
parameters:
- name: startTime
description: |-
Start time of the audit logs to retrieve.
If unspecified, the default is 30 days before now.
in: query
required: false
schema:
type: string
format: date-time
- name: endTime
description: |-
End time of the audit logs to retrieve.
If unspecified, the default is the current time.
in: query
required: false
schema:
type: string
format: date-time
- name: email
description: |-
Optional.
Filter audit logs for user email associated with the account.
in: query
required: false
schema:
type: string
- name: pageSize
description: >-
The maximum number of audit logs to return. The maximum page_size is
200,
values above 200 will be coerced to 200.
If unspecified, the default is 10.
in: query
required: false
schema:
type: integer
format: int32
- name: pageToken
description: >-
A page token, received from a previous ListAuditLogs call. Provide
this
to retrieve the subsequent page. When paginating, all other
parameters
provided to ListAuditLogs must match the call that provided the page
token.
in: query
required: false
schema:
type: string
- name: filter
description: Unused but required to use existing ListRequest functionality.
in: query
required: false
schema:
type: string
- name: orderBy
description: Unused but required to use existing ListRequest functionality.
in: query
required: false
schema:
type: string
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/batchInferenceJobs:
servers:
- url: https://api.fireworks.ai
get:
summary: List Batch Inference Jobs
operationId: Gateway_ListBatchInferenceJobs
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayListBatchInferenceJobsResponse'
parameters:
- name: pageSize
description: >-
The maximum number of batch inference jobs to return. The maximum
page_size is 200,
values above 200 will be coerced to 200.
If unspecified, the default is 50.
in: query
required: false
schema:
type: integer
format: int32
- name: pageToken
description: >-
A page token, received from a previous ListBatchInferenceJobs call.
Provide this
to retrieve the subsequent page. When paginating, all other
parameters
provided to ListBatchInferenceJobs must match the call that provided
the page
token.
in: query
required: false
schema:
type: string
- name: filter
description: |-
Only jobs satisfying the provided filter (if specified) will be
returned. See https://google.aip.dev/160 for the filter grammar.
in: query
required: false
schema:
type: string
- name: orderBy
description: >-
A comma-separated list of fields to order by. e.g. "foo,bar"
The default sort order is ascending. To specify a descending order
for a
field, append a " desc" suffix. e.g. "foo desc,bar"
Subfields are specified with a "." character. e.g. "foo.bar"
If not specified, the default order is by "created_time".
in: query
required: false
schema:
type: string
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
post:
summary: Create Batch Inference Job
operationId: Gateway_CreateBatchInferenceJob
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayBatchInferenceJob'
parameters:
- name: batchInferenceJobId
description: ID of the batch inference job.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayBatchInferenceJob'
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/batchInferenceJobs/{batch_inference_job_id}:
servers:
- url: https://api.fireworks.ai
get:
summary: Get Batch Inference Job
operationId: Gateway_GetBatchInferenceJob
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayBatchInferenceJob'
parameters:
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: batch_inference_job_id
in: path
required: true
description: The Batch Inference Job Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
delete:
summary: Delete Batch Inference Job
operationId: Gateway_DeleteBatchInferenceJob
responses:
'200':
description: A successful response.
content:
application/json:
schema:
type: object
properties: {}
parameters:
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: batch_inference_job_id
in: path
required: true
description: The Batch Inference Job Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/billing/summary:
servers:
- url: https://api.fireworks.ai
get:
summary: Get billing summary information for an account
operationId: Gateway_GetBillingSummary
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayGetBillingSummaryResponse'
parameters:
- name: startTime
description: >-
Start time for the billing period.
Note: Costs are aggregated daily. Only the date portion (YYYY-MM-DD)
is used;
the time portion is ignored. For example, 2025-10-05T07:18:29Z and
2025-10-05T23:59:59Z are treated the same as 2025-10-05T00:00:00Z.
in: query
required: true
schema:
type: string
format: date-time
- name: endTime
description: >-
End time for the billing period (exclusive).
Note: Costs are aggregated daily. Only the date portion (YYYY-MM-DD)
is used;
the time portion is ignored. Costs for the end date are NOT
included.
For example, to get costs for Oct 5 and Oct 6, use:
start_time: 2025-10-05T00:00:00Z
end_time: 2025-10-07T00:00:00Z (Oct 7 is excluded)
in: query
required: true
schema:
type: string
format: date-time
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/billingUsage:
servers:
- url: https://api.fireworks.ai
get:
summary: >-
Get account usage (serverless and dedicated deployments).
Optionally filter by usage type via the usage_type field. If not
specified, returns all usage types.
TODO: rename this to /accountUsage
operationId: Gateway_GetAccountUsage
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayAccountUsage'
parameters:
- name: startTime
description: |-
Costs returned are inclusive of `start_time`.
start_time must be before end_time.
in: query
required: true
schema:
type: string
format: date-time
- name: endTime
description: |-
Costs returned are exclusive of `end_time`.
end_time must not be more than 31 days after start_time.
in: query
required: true
schema:
type: string
format: date-time
- name: usageType
description: >-
Usage type to query usage for
If not specified, returns all usage types (both serverless and
dedicated deployments).
- USAGE_TYPE_UNSPECIFIED: Default value. When specified (or when usage_type field is not set),
returns usage data for all deployment types: both serverless
requests and dedicated deployments.
- SERVERLESS: Returns only serverless usage data.
Filters the response to include only usage from serverless API
requests.
- DEDICATED_DEPLOYMENT: Returns only dedicated deployment usage data.
Filters the response to include only usage from dedicated
deployments.
in: query
required: false
schema:
type: string
enum:
- USAGE_TYPE_UNSPECIFIED
- SERVERLESS
- DEDICATED_DEPLOYMENT
default: USAGE_TYPE_UNSPECIFIED
- name: timezone
description: >-
IANA timezone identifier for daily aggregation (e.g.,
"America/Los_Angeles", "Europe/London").
When specified, the returned data will be aggregated into daily
buckets based on this timezone.
If not specified or empty, defaults to "UTC".
See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
in: query
required: false
schema:
type: string
- name: groupBy
description: >-
Dimension to group usage by (at most one value; repeated for wire
compatibility).
Serverless: "model_name", "api_key_id", "annotations.team",
"annotations.project", "annotations.environment".
Dedicated: "deployment_name", "accelerator_type", and the same
annotation keys.
When usage_type is unspecified, dimensions that apply only to the
other stream are ignored there
(e.g. "deployment_name" and "accelerator_type" are ignored for
serverless; "model_name" and "api_key_id" for dedicated).
Example: ["annotations.team"] or ["model_name"].
If empty: serverless aggregates by model name; dedicated defaults to
deployment and accelerator type.
in: query
required: false
explode: true
schema:
type: array
items:
type: string
- name: filter
description: >-
This is a request variable of the map type. The query format is
"map_name[key]=value", e.g. If the map name is Age, the key type is
string, and the value type is integer, the query parameter is
expressed as Age["bob"]=18
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/checkpoints/{checkpoint_id}:promote:
servers:
- url: https://api.fireworks.ai
post:
summary: >-
Promote a checkpoint to a model.
The checkpoint is identified by account + snapshot ID; the trainer job
ID
is passed in the request body to resolve the GCS bucket.
operationId: Gateway_PromoteCheckpoint
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayPromoteCheckpointResponse'
parameters:
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: checkpoint_id
in: path
required: true
description: The Checkpoint Id
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GatewayPromoteCheckpointBody'
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/clusters:
servers:
- url: https://api.fireworks.ai
get:
summary: List Clusters
operationId: Gateway_ListClusters
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayListClustersResponse'
parameters:
- name: pageSize
description: >-
The maximum number of clusters to return. The maximum page_size is
200,
values above 200 will be coerced to 200.
If unspecified, the default is 50.
in: query
required: false
schema:
type: integer
format: int32
- name: pageToken
description: >-
A page token, received from a previous ListClusters call. Provide
this
to retrieve the subsequent page. When paginating, all other
parameters
provided to ListClusters must match the call that provided the page
token.
in: query
required: false
schema:
type: string
- name: filter
description: |-
Only clusters satisfying the provided filter (if specified) will be
returned. See https://google.aip.dev/160 for the filter grammar.
in: query
required: false
schema:
type: string
- name: orderBy
description: >-
A comma-separated list of fields to order by. e.g. "foo,bar"
The default sort order is ascending. To specify a descending order
for a
field, append a " desc" suffix. e.g. "foo desc,bar"
Subfields are specified with a "." character. e.g. "foo.bar"
If not specified, the default order is by "name".
in: query
required: false
schema:
type: string
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
post:
summary: Create Cluster
operationId: Gateway_CreateCluster
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayCluster'
parameters:
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GatewayCreateClusterBody'
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/clusters/{cluster_id}:
servers:
- url: https://api.fireworks.ai
get:
summary: Get Cluster
operationId: Gateway_GetCluster
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayCluster'
parameters:
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: cluster_id
in: path
required: true
description: The Cluster Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
patch:
summary: Update Cluster
operationId: Gateway_UpdateCluster
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayCluster'
parameters:
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: cluster_id
in: path
required: true
description: The Cluster Id
schema:
type: string
requestBody:
content:
application/json:
schema:
type: object
properties:
displayName:
type: string
description: >-
Human-readable display name of the cluster. e.g. "My
Cluster"
Must be fewer than 64 characters long.
createTime:
type: string
format: date-time
description: The creation time of the cluster.
readOnly: true
eksCluster:
$ref: '#/components/schemas/gatewayEksCluster'
fakeCluster:
$ref: '#/components/schemas/gatewayFakeCluster'
state:
$ref: '#/components/schemas/gatewayClusterState'
description: The current state of the cluster.
readOnly: true
status:
$ref: '#/components/schemas/gatewayStatus'
description: >-
Detailed information about the current status of the
cluster.
readOnly: true
updateTime:
type: string
format: date-time
description: The update time for the cluster.
readOnly: true
title: |-
The properties of the cluster being updated. `cluster.name` must
be populated with the updated resource's name.
description: |-
The properties of the cluster being updated. `cluster.name` must
be populated with the updated resource's name.
required: true
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
delete:
summary: Delete Cluster
operationId: Gateway_DeleteCluster
responses:
'200':
description: A successful response.
content:
application/json:
schema:
type: object
properties: {}
parameters:
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: cluster_id
in: path
required: true
description: The Cluster Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/clusters/{cluster_id}:getConnectionInfo:
servers:
- url: https://api.fireworks.ai
get:
summary: Get Cluster Connection Info
description: Retrieve connection settings for the cluster to be put in kubeconfig
operationId: Gateway_GetClusterConnectionInfo
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayClusterConnectionInfo'
parameters:
- name: readMask
description: >-
The fields to be returned in the response. If empty or "*", all
fields will be returned.
in: query
required: false
schema:
type: string
- name: account_id
in: path
required: true
description: The Account Id
schema:
type: string
- name: cluster_id
in: path
required: true
description: The Cluster Id
schema:
type: string
tags:
- gateway.openapi_Gateway
security:
- BearerAuth: []
/v1/accounts/{account_id}/creditRedemptions:
servers:
- url: https://api.fireworks.ai
get:
summary: List Credit Redemptions
operationId: Gateway_ListCreditRedemptions
responses:
'200':
description: A successful response.
content:
application/json:
schema:
$ref: '#/components/schemas/gatewayListCreditRedemptionsResponse'
parameters:
- name: pageSize
description: The maximum number of redemptions to return
in: query
required: false
schema:
type: integer
format: int32
- name: pageToken
description: A page token, received from a previous ListCreditRedemptions call
in: query
required: false
schema:
type: string
- name: filter
description: Filter string to filter redemptions
in: query
required: false
schema:
type: string
- name: orderBy
description: >-
A comma-separated list of fields to order by. e
# --- truncated at 32 KB (755 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/fireworks-ai/refs/heads/main/openapi/fireworks-ai-merged-openapi.yml