Inflection AI
Inflection for Enterprise

Enterprise deployment of Inflection 3.0 models including a purpose-built on-premise server option for data locality and TCO optimization. Contact sales.
Documentation GitHub OpenAPI
Documentation

📖
Documentation
https://inflection.ai/
Specifications

⚙
OpenAPI
https://raw.githubusercontent.com/api-evangelist/inflection/refs/heads/main/openapi/inflection-openapi.json
OpenAPI Specification

{"openapi":"3.1.0","info":{"title":"Inflection Inference API","version":"1.0.0"},"paths":{"/status":{"get":{"summary":"Status ","operationId":"status__status_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/StatusResponse"}}}}}}},"/v1/chat/completions":{"post":{"summary":"Chat Completions","description":"Chat completions endpoint for Inflection hosted models.\n\n⚠️ This endpoint is under active development to enable all Inflection model and config types.","operationId":"chat_completions_v1_chat_completions_post","security":[{"OAuth2PasswordBearer":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"},"example":{"model":"inflection_3_pi","messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What is the weather like today?"}],"max_tokens":150,"temperature":0.7}}}},"responses":{"200":{"description":"Returns either the full response (`application/json`) or streams multiple `ChatCompletionStreamResponse` objects via `text/event-stream`. Each event in the stream is a JSON object matching the ChatCompletionStreamResponse schema, and each event is prefixed by `data:` (typical for Server-Sent Events).","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionResponse"},"example":{"id":"cmpl-5y3yYz9d7Gv0yWw8","object":"chat.completion","created":1627290155,"model":"inflection_3_pi","choices":[{"message":{"role":"assistant","content":"The weather today is sunny, with a high of 25°C."},"finish_reason":"stop"}],"usage":{"prompt_tokens":13,"completion_tokens":10,"total_tokens":23}}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/ChatCompletionStreamResponse"},"example":"data: {\n  \"id\": \"chatcmpl-abc123\",\n  \"object\": \"chat.completion.chunk\",\n  \"created\": 1627290155,\n  \"model\": \"inflection_3_pi\",\n  \"choices\": [\n    {\n      \"delta\": {\n        \"role\": \"assistant\",\n        \"content\": \"The weather today is sunny,\"\n      },\n      \"finish_reason\": null\n    }\n  ],\n}\n\ndata: {\n  \"id\": \"chatcmpl-abc123\",\n  \"object\": \"chat.completion.chunk\",\n  \"created\": 1627290155,\n  \"model\": \"inflection_3_pi\",\n  \"choices\": [\n    {\n      \"delta\": {\n        \"role\": \"assistant\",\n        \"content\": \" with a high of 25°C.\"\n      },\n      \"finish_reason\": \"stop\"\n    }\n  ],\n}\n"}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/v1/chat/completions \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"inflection_3_pi\",\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"What is the weather like today?\"}\n    ],\n    \"max_tokens\": 150,\n    \"temperature\": 0.7\n  }'"},{"lang":"curl Example Streaming","source":"curl --url https://api.inflection.ai/v1/chat/completions \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"inflection_3_pi\",\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"What is the weather like today?\"}\n    ],\n    \"stream\": true\n  }' --no-buffer"}]}},"/v1/chat/attributes":{"post":{"summary":"Chat Attributes","description":"Get attribute scores for the most recent assistant message in a chat completions style request.\n\n⚠️ Experimental endpoint for evaluating message attributes.","operationId":"chat_attributes_v1_chat_attributes_post","security":[{"OAuth2PasswordBearer":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AttributesRequest"},"example":{"model":"Pi-3.1","messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Tell me about quantum computing."},{"role":"assistant","content":"No way."}],"attributes":[{"name":"Helpful","definition":"The assistant is helpful and provides useful information.","categories":["Assistant"]},{"name":"Rude","definition":"The assistant uses mean or offensive language or is otherwise rude.","categories":["Assistant"]}],"filter_categories":["Assistant"]}}},"description":"\nFollows the same schema as the chat completions endpoint, but with the addition of:\n- `attributes`: An optional list of attributes to score. If no attributes are provided, a set of default attributes is used.\n- `filter_categories`: An optional list of categories to filter the attributes by. If no categories are provided, all attributes are used.\n- `disable_attributes_prompt`: An optional boolean to disable the attributes default prompt. If `true`, the attributes default prompt will not be added to the system prompt.\n"},"responses":{"200":{"description":"\nReturns attribute scores for the final assistant message. If no attributes are provided, a set of default attributes is used.\n\nScores are provided in JSON format as `{\"AttributeName\": {\"score\": float, \"positive\": bool}, ...}` in the first Choice message content.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionResponse"},"example":{"id":"chatcmpl-xyz789","object":"chat.completion","created":1627290155,"model":"Pi-3.1","choices":[{"message":{"role":"assistant","content":"{\"Helpful\": {\"score\": 0.1, \"positive\": true}, \"Rude\": {\"score\": 0.9, \"positive\": false}}"},"finish_reason":"stop"}],"usage":{"prompt_tokens":135,"completion_tokens":20,"total_tokens":155}}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/v1/chat/attributes \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"Pi-3.1\",\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"Tell me about quantum computing.\"},\n      {\"role\": \"assistant\", \"content\": \"No way.\"}\n    ],\n    \"attributes\": [\n      {\n        \"name\": \"Helpful\",\n        \"definition\": \"The assistant is helpful and provides useful information.\",\n        \"categories\": [\"Assistant\"]\n      },\n      {\n        \"name\": \"Rude\",\n        \"definition\": \"The assistant uses mean or offensive language or is otherwise rude.\",\n        \"categories\": [\"Assistant\"]\n      }\n    ],\n    \"filter_categories\": [\"Assistant\"]\n  }'"}]}},"/v1/embeddings":{"post":{"summary":"Embeddings","description":"Embeddings endpoint for Inflection hosted models.","operationId":"embeddings_v1_embeddings_post","security":[{"OAuth2PasswordBearer":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/EmbeddingCompletionRequest"},{"$ref":"#/components/schemas/EmbeddingChatRequest"}],"title":"Request","$ref":"#/components/schemas/EmbeddingRequest"},"example":{"model":"inf_3_1_embedding","input":"The quick brown fox jumps over the lazy dog"}}}},"responses":{"200":{"description":"Returns the embeddings","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EmbeddingResponse"},"example":{"id":"embd-8b2d80eb72634137842d104f671a712b","object":"list","created":1751327294,"model":"inf_3_1_embedding","data":[{"index":0,"object":"embedding","embedding":[0.01,-0.02,0.03]}],"usage":{"prompt_tokens":6,"completion_tokens":0,"total_tokens":6}}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/v1/embeddings \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"inf_3_1_embedding\",\n    \"input\": \"The quick brown fox jumps over the lazy dog\"\n  }'"}]}},"/v1/discovery/configs":{"get":{"summary":"Configs","description":"Get all available configs (for use in the chat completion endpoint `model` field) for an organization.\n\nReturns each config's `name` (aka `model`), `alias` (for UI display), `default_parameters`, and `description`.","operationId":"configs_v1_discovery_configs_get","security":[{"OAuth2PasswordBearer":[]}],"responses":{"200":{"description":"List of configs available for the organization","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ConfigForOrgResponse"}},"example":[{"name":"pi_3_1","alias":"Pi (3.1)","default_parameters":{"temperature":{"range":[0,1],"value":1.0},"maximumLength":{"range":[1,1024],"value":256},"topP":{"range":[0,1],"value":0.95},"stopTokens":{"range":[],"value":[]},"webSearch":{"range":[],"value":true}},"description":"Pi chat assistant based on Llama"}]}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/v1/discovery/configs \\\n  -X GET \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\"\n  "}]}},"/external/api/inference/openai/v1/chat/completions":{"post":{"summary":"Legacy Chat Completions","description":"Chat completions compatible endpoint for legacy Inflection models.\n\nProvides a compatibility chat completions endpoint to talk with any Inflection model.","operationId":"external_openai_chat_endpoint_external_api_inference_openai_v1_chat_completions_post","security":[{"OAuth2PasswordBearer":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"},"description":"Note: This endpoint only supports a limited subset of the OpenAI chat completions parameters. Supported parameters are: 'model', 'messages', 'temperature', 'max_tokens', 'top_p', 'tools', 'response_format', and 'stream'. Other OpenAI parameters like functions, function_call, n, presence_penalty, frequency_penalty, logit_bias, user, etc. are not supported.","example":{"model":"Pi-3.1","messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What is the weather like today?"}],"temperature":0.7,"max_tokens":150,"top_p":0.9,"stream":true}}}},"responses":{"200":{"description":"Returns either the full response (`application/json`) or streams multiple `ChatCompletionStreamResponse` objects via `text/event-stream`. Note: The response format matches OpenAI's schema but only includes the core fields. Advanced features like logprobs, etc. are not supported.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionResponse"},"example":{"id":"cmpl-xyz789","object":"chat.completion","created":1627290155,"model":"Pi-3.1","choices":[{"message":{"role":"assistant","content":"The weather today is sunny with a high of 25°C."},"finish_reason":"stop"}],"usage":{"prompt_tokens":13,"completion_tokens":10,"total_tokens":23}}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/ChatCompletionStreamResponse"},"example":"data: {\n  \"id\": \"chatcmpl-abc123\",\n  \"object\": \"chat.completion.chunk\",\n  \"created\": 1627290155,\n  \"model\": \"Pi-3.1\",\n  \"choices\": [\n    {\n      \"delta\": {\n        \"role\": \"assistant\",\n        \"content\": \"The weather today is sunny,\"\n      },\n      \"finish_reason\": null\n    }\n  ]\n}\n\ndata: {\n  \"id\": \"chatcmpl-abc123\",\n  \"object\": \"chat.completion.chunk\",\n  \"created\": 1627290155,\n  \"model\": \"Pi-3.1\",\n  \"choices\": [\n    {\n      \"delta\": {\n        \"content\": \" with a high of 25°C.\"\n      },\n      \"finish_reason\": \"stop\"\n    }\n  ]\n}"}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/external/api/inference/openai/v1/chat/completions \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"Pi-3.1\",\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"What is the weather like today?\"}\n    ],\n    \"temperature\": 0.7,\n    \"max_tokens\": 150,\n    \"top_p\": 0.9\n  }'"},{"lang":"curl Example Streaming","source":"curl --url https://api.inflection.ai/external/api/inference/openai/v1/chat/completions \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"model\": \"Pi-3.1\",\n    \"messages\": [\n      {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n      {\"role\": \"user\", \"content\": \"What is the weather like today?\"}\n    ],\n    \"stream\": true,\n    \"temperature\": 0.7,\n    \"max_tokens\": 150,\n    \"top_p\": 0.9\n  }' --no-buffer"}]}},"/external/api/inference":{"post":{"summary":"Legacy API Inference Chat","description":"Calls an Inflection model using the legacy chat API.","operationId":"external_chat_external_api_inference_post","security":[{"OAuth2PasswordBearer":[]}],"parameters":[{"name":"origin_endpoint","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Origin Endpoint"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExternalChatBody"},"example":{"config":"Pi-3.1","context":[{"type":"Instruction","text":"You are a helpful assistant."},{"type":"Human","text":"What is the weather like today?"}],"temperature":0.7,"max_tokens":150,"web_search":false}}}},"responses":{"200":{"description":"Returns the chat response with text and optional tool calls","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExternalChatResponse"},"example":{"created":1627290155,"text":"The weather today is sunny with a high of 25°C."}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/external/api/inference \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"config\": \"Pi-3.1\",\n    \"context\": [\n      {\"type\": \"Instruction\", \"text\": \"You are a helpful assistant.\"},\n      {\"type\": \"Human\", \"text\": \"What is the weather like today?\"}\n    ],\n    \"temperature\": 0.7,\n    \"max_tokens\": 150,\n    \"web_search\": false\n  }'"}]}},"/external/api/inference/streaming":{"post":{"summary":"Legacy API Streaming","description":"Calls an Inflection model using the legacy chat streaming API.","operationId":"external_streaming_external_api_inference_streaming_post","security":[{"OAuth2PasswordBearer":[]}],"parameters":[{"name":"origin_endpoint","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Origin Endpoint"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExternalChatBody"},"example":{"config":"Pi-3.1","context":[{"type":"Instruction","text":"You are a helpful assistant."},{"type":"Human","text":"What is the weather like today?"}],"temperature":0.7,"max_tokens":150,"web_search":false}}}},"responses":{"200":{"description":"Streams the chat response with text and optional tool calls","content":{"text/event-stream":{"schema":{"type":"string"},"example":"data: {\n  \"created\": 1627290155,\n  \"idx\": 0,\n  \"text\": \"The weather today\",\n  \"tool_calls\": null\n}\n\ndata: {\n  \"created\": 1627290155,\n  \"idx\": 1,\n  \"text\": \" is sunny with a high of 25°C.\",\n  \"tool_calls\": null\n}"}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-code-samples":[{"lang":"curl Example","source":"curl --url https://api.inflection.ai/external/api/inference/streaming \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -d '{\n    \"config\": \"Pi-3.1\",\n    \"context\": [\n      {\"type\": \"Instruction\", \"text\": \"You are a helpful assistant.\"},\n      {\"type\": \"Human\", \"text\": \"What is the weather like today?\"}\n    ],\n    \"temperature\": 0.7,\n    \"max_tokens\": 150,\n    \"web_search\": false\n  }' --no-buffer"}]}}},"components":{"schemas":{"Attribute":{"properties":{"name":{"type":"string","title":"Name"},"definition":{"type":"string","title":"Definition"},"categories":{"anyOf":[{"items":{"$ref":"#/components/schemas/Category"},"type":"array"},{"type":"null"}],"title":"Categories"},"positive":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Positive","default":true},"example":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Example"}},"type":"object","required":["name","definition"],"title":"Attribute"},"AttributesRequest":{"properties":{"messages":{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionDeveloperMessageParam"},{"$ref":"#/components/schemas/ChatCompletionSystemMessageParam"},{"$ref":"#/components/schemas/ChatCompletionUserMessageParam"},{"$ref":"#/components/schemas/ChatCompletionAssistantMessageParam"},{"$ref":"#/components/schemas/ChatCompletionToolMessageParam"},{"$ref":"#/components/schemas/ChatCompletionFunctionMessageParam"},{"$ref":"#/components/schemas/CustomChatCompletionMessageParam"}]},"type":"array","title":"Messages"},"model":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Model"},"frequency_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Frequency Penalty","default":0.0},"logit_bias":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"title":"Logit Bias"},"logprobs":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Logprobs","default":false},"top_logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top Logprobs","default":0},"max_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Tokens","deprecated":true},"max_completion_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Completion Tokens"},"n":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N","default":1},"presence_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Presence Penalty","default":0.0},"response_format":{"anyOf":[{"$ref":"#/components/schemas/ResponseFormat"},{"$ref":"#/components/schemas/StructuralTagResponseFormat"},{"type":"null"}],"title":"Response Format"},"seed":{"anyOf":[{"type":"integer","maximum":4294967295.0,"minimum":0.0},{"type":"null"}],"title":"Seed"},"stop":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Stop","default":[]},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"stream_options":{"anyOf":[{"$ref":"#/components/schemas/StreamOptions"},{"type":"null"}]},"temperature":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Temperature"},"top_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Top P"},"tools":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChatCompletionToolsParam"},"type":"array"},{"type":"null"}],"title":"Tools"},"tool_choice":{"anyOf":[{"type":"string","enum":["none"],"const":"none"},{"type":"string","enum":["auto"],"const":"auto"},{"type":"string","enum":["required"],"const":"required"},{"$ref":"#/components/schemas/ChatCompletionNamedToolChoiceParam"},{"type":"null"}],"title":"Tool Choice","default":"none"},"parallel_tool_calls":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Parallel Tool Calls","default":false},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"best_of":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Best Of"},"use_beam_search":{"type":"boolean","title":"Use Beam Search","default":false},"top_k":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Top K"},"min_p":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Min P"},"repetition_penalty":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Repetition Penalty"},"length_penalty":{"type":"number","title":"Length Penalty","default":1.0},"stop_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Stop Token Ids","default":[]},"include_stop_str_in_output":{"type":"boolean","title":"Include Stop Str In Output","default":false},"ignore_eos":{"type":"boolean","title":"Ignore Eos","default":false},"min_tokens":{"type":"integer","title":"Min Tokens","default":0},"skip_special_tokens":{"type":"boolean","title":"Skip Special Tokens","default":true},"spaces_between_special_tokens":{"type":"boolean","title":"Spaces Between Special Tokens","default":true},"truncate_prompt_tokens":{"anyOf":[{"type":"integer","minimum":1.0},{"type":"null"}],"title":"Truncate Prompt Tokens"},"prompt_logprobs":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Prompt Logprobs"},"allowed_token_ids":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Allowed Token Ids"},"bad_words":{"items":{"type":"string"},"type":"array","title":"Bad Words"},"echo":{"type":"boolean","title":"Echo","description":"If true, the new message will be prepended with the last message if they belong to the same role.","default":false},"add_generation_prompt":{"type":"boolean","title":"Add Generation Prompt","description":"If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.","default":true},"continue_final_message":{"type":"boolean","title":"Continue Final Message","description":"If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to \"prefill\" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.","default":false},"add_special_tokens":{"type":"boolean","title":"Add Special Tokens","description":"If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).","default":false},"documents":{"anyOf":[{"items":{"additionalProperties":{"type":"string"},"type":"object"},"type":"array"},{"type":"null"}],"title":"Documents","description":"A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing \"title\" and \"text\" keys."},"chat_template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Chat Template","description":"A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one."},"chat_template_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Chat Template Kwargs","description":"Additional keyword args to pass to the template renderer. Will be accessible by the chat template."},"mm_processor_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Mm Processor Kwargs","description":"Additional kwargs to pass to the HF processor."},"guided_json":{"anyOf":[{"type":"string"},{"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Guided Json","description":"If specified, the output will follow the JSON schema."},"guided_regex":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Regex","description":"If specified, the output will follow the regex pattern."},"guided_choice":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Guided Choice","description":"If specified, the output will be exactly one of the choices."},"guided_grammar":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Grammar","description":"If specified, the output will follow the context free grammar."},"structural_tag":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Structural Tag","description":"If specified, the output will follow the structural tag schema."},"guided_decoding_backend":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Decoding Backend","description":"If specified, will override the default guided decoding backend of the server for this specific request. If set, must be either 'outlines' / 'lm-format-enforcer'"},"guided_whitespace_pattern":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Guided Whitespace Pattern","description":"If specified, will override the default whitespace pattern for guided json decoding."},"guided_meta":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Guided Meta","description":"Specify custom arguments for your backend"},"priority":{"type":"integer","title":"Priority","description":"The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.","default":0},"request_id":{"type":"string","title":"Request Id","description":"The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response."},"logits_processors":{"anyOf":[{"items":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/LogitsProcessorConstructor"}]},"type":"array"},{"type":"null"}],"title":"Logits Processors","description":"A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}."},"return_tokens_as_token_ids":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Tokens As Token Ids","description":"If specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified."},"cache_salt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cache Salt","description":"If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit). Not supported by vLLM engine V0."},"kv_transfer_params":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kv Transfer Params","description":"KVTransfer parameters used for disaggregated serving."},"vllm_xargs":{"anyOf":[{"additionalProperties":{"anyOf":[{"type":"string"},{"type":"integer"},{"type":"number"}]},"type":"object"},{"type":"null"}],"title":"Vllm Xargs","description":"Additional request parameters with string or numeric values, used by custom extensions."},"timeout_sec":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Timeout Sec"},"attributes":{"anyOf":[{"items":{"$ref":"#/components/schemas/Attribute"},"type":"array"},{"type":"null"}],"title":"Attributes"},"filter_categories":{"anyOf":[{"items":{"$ref":"#/components/schemas/Category"},"type":"array"},{"type":"null"}],"title":"Filter Categories"},"disable_attributes_prompt":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Disable Attributes Prompt","default":false}},"additionalProperties":true,"type":"object","required":["messages"],"title":"AttributesRequest"},"Audio":{"properties":{"id":{"type":"string","title":"Id"}},"type":"object","required":["id"],"title":"Audio"},"AudioURL":{"properties":{"url":{"type":"string","title":"Url"}},"type":"object","required":["url"],"title":"AudioURL"},"BaseModel":{"properties":{},"type":"object","title":"BaseModel"},"Category":{"type":"string","enum":["Personal","Interpersonal","Execution","EQ","Creativity","Humour","IQ","Phrasing","User","Assistant"],"title":"Category"},"ChatCompletionAssistantMessageParam":{"properties":{"role":{"type":"string","enum":["assistant"],"const":"assistant","title":"Role"},"audio":{"anyOf":[{"$ref":"#/components/schemas/Audio"},{"type":"null"}]},"content":{"anyOf":[{"type":"string"},{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},{"$ref":"#/components/schemas/ChatCompletionContentPartRefusalParam"}]},"type":"array"},{"type":"null"}],"title":"Content"},"function_call":{"anyOf":[{"$ref":"#/components/schemas/FunctionCall-Input"},{"type":"null"}]},"name":{"type":"string","title":"Name"},"refusal":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Refusal"},"tool_calls":{"items":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionMessageFunctionToolCallParam"},{"$ref":"#/components/schemas/ChatCompletionMessageCustomToolCallParam"}]},"type":"array","title":"Tool Calls"}},"type":"object","required":["role"],"title":"ChatCompletionAssistantMessageParam"},"ChatCompletionContentPartAudioParam":{"properties":{"audio_url":{"$ref":"#/components/schemas/AudioURL"},"type":{"type":"string","enum":["audio_url"],"const":"audio_url","title":"Type"}},"type":"object","required":["audio_url","type"],"title":"ChatCompletionContentPartAudioParam"},"ChatCompletionContentPartImageEmbedsParam":{"properties":{"image_embeds":{"anyOf":[{"type":"string"},{"additionalProperties":{"type":"string"},"type":"object"}],"title":"Image Embeds"},"type":{"type":"string","enum":["image_embeds"],"const":"image_embeds","title":"Type"}},"type":"object","required":["image_embeds","type"],"title":"ChatCompletionContentPartImageEmbedsParam"},"ChatCompletionContentPartImageParam":{"properties":{"image_url":{"$ref":"#/components/schemas/ImageURL"},"type":{"type":"string","enum":["image_url"],"const":"image_url","title":"Type"}},"type":"object","required":["image_url","type"],"title":"ChatCompletionContentPartImageParam"},"ChatCompletionContentPartInputAudioParam":{"properties":{"input_audio":{"$ref":"#/components/schemas/InputAudio"},"type":{"type":"string","enum":["input_audio"],"const":"input_audio","title":"Type"}},"type":"object","required":["input_audio","type"],"title":"ChatCompletionContentPartInputAudioParam"},"ChatCompletionContentPartRefusalParam":{"properties":{"refusal":{"type":"string","title":"Refusal"},"type":{"type":"string","enum":["refusal"],"const":"refusal","title":"Type"}},"type":"object","required":["refusal","type"],"title":"ChatCompletionContentPartRefusalParam"},"ChatCompletionContentPartTextParam":{"properties":{"text":{"type":"string","title":"Text"},"type":{"type":"string","enum":["text"],"const":"text","title":"Type"}},"type":"object","required":["text","type"],"title":"ChatCompletionContentPartTextParam"},"ChatCompletionContentPartVideoParam":{"properties":{"video_url":{"$ref":"#/components/schemas/VideoURL"},"type":{"type":"string","enum":["video_url"],"const":"video_url","title":"Type"}},"type":"object","required":["video_url","type"],"title":"ChatCompletionContentPartVideoParam"},"ChatCompletionDeveloperMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/ChatCompletionContentPartTextParam"},"type":"array"}],"title":"Content"},"role":{"type":"string","enum":["developer"],"const":"developer","title":"Role"},"name":{"type":"string","title":"Name"}},"type":"object","required":["content","role"],"title":"ChatCompletionDeveloperMessageParam"},"ChatCompletionFunctionMessageParam":{"properties":{"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"type":"string","title":"Name"},"role":{"type":"string","enum":["function"],"const":"function","title":"Role"}},"type":"object","required":["content","name","role"],"title":"ChatCompletionFunctionMessageParam"},"ChatCompletionLogProb":{"properties":{"token":{"type":"string","title":"Token"},"logprob":{"type":"number","title":"Logprob","default":-9999.0},"bytes":{"anyOf":[{"ite

# --- truncated at 32 KB (66 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/inflection/refs/heads/main/openapi/inflection-openapi.json