Moonshot AI
Moonshot AI Platform API

OpenAI-compatible inference API for Kimi K2.6, K2.5, K2, and Moonshot V1 models. Supports chat completions (incl. partial mode and tool use), models listing, file upload and content extraction, batch jobs, account balance, and token estimation.
Documentation GitHub OpenAPI
OpenAPI Specification

{
  "openapi": "3.1.0",
  "info": {
    "title": "Moonshot AI API",
    "version": "1.0.0",
    "description": "API for Moonshot AI / Kimi large language model services"
  },
  "servers": [
    {
      "url": "https://api.moonshot.ai",
      "description": "Production"
    }
  ],
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer",
        "description": "The Authorization header expects a Bearer token. Use an MOONSHOT_API_KEY as the token. This is a server-side secret key. Generate one on the [API keys page](https://platform.kimi.ai/console/api-keys) in your dashboard."
      }
    },
    "schemas": {
      "Message": {
        "type": "object",
        "properties": {
          "role": {
            "type": "string",
            "enum": [
              "system",
              "user",
              "assistant"
            ],
            "description": "The role of the message sender"
          },
          "content": {
            "oneOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "oneOf": [
                    {
                      "title": "text",
                      "type": "object",
                      "properties": {
                        "type": {
                          "type": "string",
                          "enum": [
                            "text"
                          ]
                        },
                        "text": {
                          "type": "string"
                        }
                      },
                      "required": [
                        "type",
                        "text"
                      ]
                    },
                    {
                      "title": "image_url",
                      "type": "object",
                      "properties": {
                        "type": {
                          "type": "string",
                          "enum": [
                            "image_url"
                          ]
                        },
                        "image_url": {
                          "oneOf": [
                            {
                              "type": "object",
                              "properties": {
                                "url": {
                                  "type": "string"
                                }
                              },
                              "required": [
                                "url"
                              ]
                            },
                            {
                              "type": "string"
                            }
                          ]
                        }
                      },
                      "required": [
                        "type",
                        "image_url"
                      ]
                    },
                    {
                      "title": "video_url",
                      "type": "object",
                      "properties": {
                        "type": {
                          "type": "string",
                          "enum": [
                            "video_url"
                          ]
                        },
                        "video_url": {
                          "oneOf": [
                            {
                              "type": "object",
                              "properties": {
                                "url": {
                                  "type": "string"
                                }
                              },
                              "required": [
                                "url"
                              ]
                            },
                            {
                              "type": "string"
                            }
                          ]
                        }
                      },
                      "required": [
                        "type",
                        "video_url"
                      ]
                    }
                  ]
                }
              }
            ],
            "description": "The content of the message. Can be a plain text string, or an array of objects with text/image_url/video_url types (for multimodal input)"
          },
          "name": {
            "type": "string",
            "default": null,
            "description": "Optional name for the message sender"
          },
          "partial": {
            "type": "boolean",
            "default": false,
            "description": "Enable Partial Mode by setting this to true in the last assistant message"
          }
        },
        "required": [
          "role",
          "content"
        ]
      },
      "ToolDefinition": {
        "type": "object",
        "properties": {
          "type": {
            "type": "string",
            "enum": [
              "function"
            ]
          },
          "function": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Function name. Must follow the regex: ^[a-zA-Z_][a-zA-Z0-9-_]{2,63}$",
                "pattern": "^[a-zA-Z_][a-zA-Z0-9-_]{2,63}$"
              },
              "description": {
                "type": "string",
                "description": "Description of what the function does"
              },
              "parameters": {
                "type": "object",
                "description": "Function parameters as JSON Schema. Must conform to the MFJS (Moonshot Flavored JSON Schema) specification",
                "additionalProperties": true
              },
              "strict": {
                "type": "boolean",
                "default": true,
                "description": "Whether to strictly constrain tool call arguments according to the parameters schema. Defaults to true. When false, only guarantees the output is a valid JSON object without enforcing internal structure."
              }
            },
            "required": [
              "name",
              "parameters"
            ]
          }
        },
        "required": [
          "type",
          "function"
        ]
      },
      "ChatRequestBase": {
        "type": "object",
        "properties": {
          "messages": {
            "type": "array",
            "description": "A list of messages in the conversation so far. Each element has the format {\"role\": \"user\", \"content\": \"Hello\"}. role supports system, user, or assistant. content must not be empty. The content field can be a string or an array[object] (for multimodal input).",
            "items": {
              "$ref": "#/components/schemas/Message"
            }
          },
          "max_tokens": {
            "type": "integer",
            "deprecated": true,
            "description": "Deprecated, please refer to max_completion_tokens"
          },
          "max_completion_tokens": {
            "type": "integer",
            "description": "The maximum number of tokens to generate for the chat completion. If not specified, defaults to a reasonable integer such as 1024. If the result reaches the maximum number of tokens without ending, the finish reason will be \"length\"; otherwise, it will be \"stop\". This refers to the length of tokens you expect us to return, not the total length of input plus output. If input plus max_completion_tokens exceeds the model context window, the API returns invalid_request_error."
          },
          "response_format": {
            "type": "object",
            "description": "Controls the model output format. Default is {\"type\": \"text\"} for plain text output. Set to {\"type\": \"json_object\"} to enable JSON mode, ensuring output is a valid JSON object (you must guide the model to output JSON in the prompt). Set to {\"type\": \"json_schema\"} to enable Structured Output, constraining output to match a specified JSON Schema (recommended, requires the json_schema field). If you encounter schema validation issues, please submit feedback at walle GitHub Issues (https://github.com/MoonshotAI/walle/issues).",
            "properties": {
              "type": {
                "type": "string",
                "enum": [
                  "text",
                  "json_object",
                  "json_schema"
                ],
                "description": "Output format type. text: default, plain text output; json_object: ensures output is a valid JSON object; json_schema: constrains output to match a specified JSON Schema (recommended, requires the json_schema field)"
              },
              "json_schema": {
                "type": "object",
                "description": "Used when type is json_schema. Defines the JSON Schema that the output should conform to.",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Schema name for identification"
                  },
                  "strict": {
                    "type": "boolean",
                    "default": true,
                    "description": "Whether to strictly constrain output according to the schema. Defaults to true. When true, the schema must conform to the MFJS specification; non-conforming schemas will return errors or warnings. When false, only guarantees the output is a valid JSON object without enforcing internal structure."
                  },
                  "schema": {
                    "type": "object",
                    "description": "The JSON Schema object defining the structure the output should conform to. Must conform to MFJS (Moonshot Flavored JSON Schema) specification. You can use the walle CLI tool to validate: go install github.com/moonshotai/walle/cmd/walle@latest && walle -schema 'your_schema' -level strict",
                    "additionalProperties": true
                  }
                },
                "required": [
                  "name",
                  "schema"
                ]
              }
            }
          },
          "stop": {
            "oneOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "maxItems": 5
              }
            ],
            "default": null,
            "description": "Stop words, which will halt the output when a full match is found. The matched words themselves will not be output. A maximum of 5 strings is allowed, and each string must not exceed 32 bytes"
          },
          "stream": {
            "type": "boolean",
            "default": false,
            "description": "Whether to return the response in a streaming fashion. Default is false."
          },
          "stream_options": {
            "type": "object",
            "description": "Options for streaming responses",
            "properties": {
              "include_usage": {
                "type": "boolean",
                "default": false,
                "description": "If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. NOTE: If the stream is interrupted, you may not receive the final usage chunk which contains the total token usage for the request"
              }
            }
          },
          "tools": {
            "type": "array",
            "description": "A list of tools the model may call",
            "items": {
              "$ref": "#/components/schemas/ToolDefinition"
            },
            "maxItems": 128
          },
          "prompt_cache_key": {
            "type": "string",
            "default": null,
            "description": "Used to cache responses for similar requests to optimize cache hit rates. For Coding Agents, this is typically a session id or task id representing a single session; if the session is exited and later resumed, this value should remain the same. For Kimi Code Plan, this field is required to improve cache hit rates. For other agents involving multi-turn conversations, it is also recommended to implement this field"
          },
          "safety_identifier": {
            "type": "string",
            "description": "A stable identifier used to help detect users of your application that may be violating usage policies. The ID should be a string that uniquely identifies each user. It is recommended to hash the username or email address to avoid sending any identifying information"
          }
        },
        "required": [
          "messages"
        ]
      },
      "MoonshotV1ChatRequest": {
        "title": "moonshot-v1",
        "allOf": [
          {
            "$ref": "#/components/schemas/ChatRequestBase"
          },
          {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "Model ID",
                "enum": [
                  "moonshot-v1-8k",
                  "moonshot-v1-32k",
                  "moonshot-v1-128k",
                  "moonshot-v1-auto",
                  "moonshot-v1-8k-vision-preview",
                  "moonshot-v1-32k-vision-preview",
                  "moonshot-v1-128k-vision-preview"
                ],
                "default": "moonshot-v1-128k"
              },
              "temperature": {
                "type": "number",
                "format": "float",
                "description": "The sampling temperature to use, ranging from 0 to 1. A higher value (e.g., 0.7) will make the output more random, while a lower value (e.g., 0.2) will make it more focused and deterministic. Default is 0.0.",
                "default": 0,
                "minimum": 0,
                "maximum": 1
              },
              "top_p": {
                "type": "number",
                "format": "float",
                "description": "Another sampling method, where the model considers the results of tokens with a cumulative probability mass of top_p. Thus, 0.1 means only considering the top 10% of tokens by probability mass. Generally, we suggest changing either this or the temperature, but not both at the same time. Default is 1.0.",
                "default": 1,
                "minimum": 0,
                "maximum": 1
              },
              "n": {
                "type": "integer",
                "description": "The number of results to generate for each input message. Default is 1, must not exceed 5. When the temperature is very close to 0, only 1 result can be returned.",
                "default": 1,
                "minimum": 1,
                "maximum": 5
              },
              "presence_penalty": {
                "type": "number",
                "format": "float",
                "description": "Presence penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on whether they appear in the text, increasing the likelihood of the model discussing new topics",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              },
              "frequency_penalty": {
                "type": "number",
                "format": "float",
                "description": "Frequency penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on their existing frequency in the text, reducing the likelihood of the model repeating the same phrases verbatim",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              }
            },
            "required": [
              "model"
            ]
          }
        ]
      },
      "KimiK2ChatRequest": {
        "title": "kimi-k2",
        "allOf": [
          {
            "$ref": "#/components/schemas/ChatRequestBase"
          },
          {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "Model ID",
                "enum": [
                  "kimi-k2-0905-preview",
                  "kimi-k2-0711-preview",
                  "kimi-k2-turbo-preview"
                ],
                "default": "kimi-k2-0905-preview"
              },
              "temperature": {
                "type": "number",
                "format": "float",
                "description": "The sampling temperature to use, ranging from 0 to 1. A higher value (e.g., 0.7) will make the output more random, while a lower value (e.g., 0.2) will make it more focused and deterministic. Default is 0.6.",
                "default": 0.6,
                "minimum": 0,
                "maximum": 1
              },
              "top_p": {
                "type": "number",
                "format": "float",
                "description": "Another sampling method, where the model considers the results of tokens with a cumulative probability mass of top_p. Thus, 0.1 means only considering the top 10% of tokens by probability mass. Generally, we suggest changing either this or the temperature, but not both at the same time. Default is 1.0.",
                "default": 1,
                "minimum": 0,
                "maximum": 1
              },
              "n": {
                "type": "integer",
                "description": "The number of results to generate for each input message. Default is 1, must not exceed 5. When the temperature is very close to 0, only 1 result can be returned.",
                "default": 1,
                "minimum": 1,
                "maximum": 5
              },
              "presence_penalty": {
                "type": "number",
                "format": "float",
                "description": "Presence penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on whether they appear in the text, increasing the likelihood of the model discussing new topics",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              },
              "frequency_penalty": {
                "type": "number",
                "format": "float",
                "description": "Frequency penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on their existing frequency in the text, reducing the likelihood of the model repeating the same phrases verbatim",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              }
            },
            "required": [
              "model"
            ]
          }
        ]
      },
      "KimiK2ThinkingChatRequest": {
        "title": "kimi-k2-thinking",
        "allOf": [
          {
            "$ref": "#/components/schemas/ChatRequestBase"
          },
          {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "Model ID",
                "enum": [
                  "kimi-k2-thinking",
                  "kimi-k2-thinking-turbo"
                ],
                "default": "kimi-k2-thinking"
              },
              "temperature": {
                "type": "number",
                "format": "float",
                "description": "The sampling temperature to use, ranging from 0 to 1. A higher value (e.g., 0.7) will make the output more random, while a lower value (e.g., 0.2) will make it more focused and deterministic. Default is 1.0.",
                "default": 1,
                "minimum": 0,
                "maximum": 1
              },
              "top_p": {
                "type": "number",
                "format": "float",
                "description": "Another sampling method, where the model considers the results of tokens with a cumulative probability mass of top_p. Thus, 0.1 means only considering the top 10% of tokens by probability mass. Generally, we suggest changing either this or the temperature, but not both at the same time. Default is 1.0.",
                "default": 1,
                "minimum": 0,
                "maximum": 1
              },
              "n": {
                "type": "integer",
                "description": "The number of results to generate for each input message. Default is 1, must not exceed 5. When the temperature is very close to 0, only 1 result can be returned.",
                "default": 1,
                "minimum": 1,
                "maximum": 5
              },
              "presence_penalty": {
                "type": "number",
                "format": "float",
                "description": "Presence penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on whether they appear in the text, increasing the likelihood of the model discussing new topics",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              },
              "frequency_penalty": {
                "type": "number",
                "format": "float",
                "description": "Frequency penalty, a number between -2.0 and 2.0. A positive value will penalize new tokens based on their existing frequency in the text, reducing the likelihood of the model repeating the same phrases verbatim",
                "default": 0,
                "minimum": -2,
                "maximum": 2
              }
            },
            "required": [
              "model"
            ]
          }
        ]
      },
      "KimiK25ChatRequest": {
        "title": "kimi-k2.5",
        "allOf": [
          {
            "$ref": "#/components/schemas/ChatRequestBase"
          },
          {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "Model ID",
                "enum": [
                  "kimi-k2.5"
                ],
                "default": "kimi-k2.5"
              },
              "thinking": {
                "type": "object",
                "description": "Controls whether thinking is enabled for the model. Optional parameter. Default value is {\"type\": \"enabled\"}.",
                "properties": {
                  "type": {
                    "type": "string",
                    "enum": [
                      "enabled",
                      "disabled"
                    ],
                    "description": "Enable or disable thinking capability"
                  }
                },
                "required": [
                  "type"
                ],
                "additionalProperties": false
              }
            },
            "required": [
              "model"
            ]
          }
        ]
      },
      "KimiK26ChatRequest": {
        "title": "kimi-k2.6",
        "allOf": [
          {
            "$ref": "#/components/schemas/ChatRequestBase"
          },
          {
            "type": "object",
            "properties": {
              "model": {
                "type": "string",
                "description": "Model ID",
                "enum": [
                  "kimi-k2.6"
                ],
                "default": "kimi-k2.6"
              },
              "thinking": {
                "type": "object",
                "description": "Controls whether thinking is enabled for the kimi-k2.6 model, and whether to fully preserve reasoning_content across multi-turn conversations. Optional parameter. Default value is {\"type\": \"enabled\"}.",
                "properties": {
                  "type": {
                    "type": "string",
                    "enum": [
                      "enabled",
                      "disabled"
                    ],
                    "description": "Enable or disable thinking capability"
                  },
                  "keep": {
                    "type": [
                      "string",
                      "null"
                    ],
                    "enum": [
                      "all",
                      null
                    ],
                    "description": "Controls whether reasoning_content from previous turns is preserved across a multi-turn conversation, i.e. whether to enable Preserved Thinking. Defaults to `null`, meaning historical thinking is NOT preserved.\n\n- `null` (default) or omitted: The server ignores reasoning_content from historical turns.\n- `\"all\"`: Preserves reasoning_content from historical turns and provides it to the model as part of the context, enabling Preserved Thinking. When using this, keep the reasoning_content from every historical assistant message in messages as-is. Recommended to use together with `type: \"enabled\"`.\n- Note: This parameter only affects reasoning_content from historical turns; it does not change whether the model produces/outputs thinking within the current turn (that is controlled by `type`). For best practices, see [Preserved Thinking](/guide/use-kimi-k2-thinking-model#preserved-thinking)."
                  }
                },
                "required": [
                  "type"
                ],
                "additionalProperties": false
              }
            },
            "required": [
              "model"
            ]
          }
        ]
      },
      "ChatCompletionResponse": {
        "type": "object",
        "properties": {
          "id": {
            "type": "string",
            "description": "Unique identifier for the completion"
          },
          "object": {
            "type": "string",
            "description": "Object type",
            "example": "chat.completion"
          },
          "created": {
            "type": "integer",
            "description": "Unix timestamp of when the completion was created"
          },
          "model": {
            "type": "string",
            "description": "Model used for the completion"
          },
          "choices": {
            "type": "array",
            "description": "List of completion choices",
            "items": {
              "type": "object",
              "properties": {
                "index": {
                  "type": "integer"
                },
                "message": {
                  "type": "object",
                  "properties": {
                    "role": {
                      "type": "string",
                      "enum": [
                        "assistant"
                      ]
                    },
                    "content": {
                      "type": [
                        "string",
                        "null"
                      ],
                      "description": "The assistant's message content"
                    },
                    "tool_calls": {
                      "type": "array",
                      "description": "Tool calls made by the model",
                      "items": {
                        "type": "object",
                        "properties": {
                          "id": {
                            "type": "string"
                          },
                          "type": {
                            "type": "string",
                            "enum": [
                              "function"
                            ]
                          },
                          "function": {
                            "type": "object",
                            "properties": {
                              "name": {
                                "type": "string"
                              },
                              "arguments": {
                                "type": "string",
                                "description": "JSON string of function arguments"
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                },
                "finish_reason": {
                  "type": "string",
                  "enum": [
                    "stop",
                    "length",
                    "tool_calls"
                  ]
                }
              }
            }
          },
          "usage": {
            "type": "object",
            "properties": {
              "prompt_tokens": {
                "type": "integer",
                "description": "Number of tokens in the prompt"
              },
              "completion_tokens": {
                "type": "integer",
                "description": "Number of tokens in the completion"
              },
              "total_tokens": {
                "type": "integer",
                "description": "Total number of tokens used"
              }
            }
          }
        }
      },
      "BalanceResponse": {
        "type": "object",
        "properties": {
          "code": {
            "type": "integer",
            "description": "Response code. 0 indicates success."
          },
          "data": {
            "type": "object",
            "properties": {
              "available_balance": {
                "type": "number",
                "format": "float",
                "description": "The available balance (unit: USD), including cash balance and voucher balance. When it is less than or equal to 0, the user cannot call the inference API",
                "example": 49.58894
              },
              "voucher_balance": {
                "type": "number",
                "format": "float",
                "description": "The voucher balance (unit: USD), which cannot be negative",
                "example": 46.58893
              },
              "cash_balance": {
                "type": "number",
                "format": "float",
                "description": "The cash balance (unit: USD), which can be negative, indicating that the user owes money. When it is negative, available_balance is equal to the value of voucher_balance",
                "example": 3.00001
              }
            },
            "required": [
              "available_balance",
              "voucher_balance",
              "cash_balance"
            ]
          },
          "scode": {
            "type": "string",
            "description": "Status code",
            "example": "0x0"
          },
          "status": {
            "type": "boolean",
            "description": "Request status",
            "example": true
          }
        },
        "required": [
          "code",
          "data",
          "scode",
          "status"
        ]
      },
      "EstimateTokenRequest": {
        "type": "object",
        "properties": {
          "model": {
            "type": "string",
            "description": "Model ID",
            "default": "kimi-k2.5",
            "enum": [
              "kimi-k2.6",
              "kimi-k2.5",
              "kimi-k2-0905-preview",
              "kimi-k2-0711-preview",
              "kimi-k2-turbo-preview",
              "moonshot-v1-8k",
              "moonshot-v1-32k",
              "moonshot-v1-128k",
              "moonshot-v1-auto",
              "moonshot-v1-8k-vision-preview",
              "moonshot-v1-32k-vision-preview",
              "moonshot-v1-128k-vision-preview"
            ]
          },
          "messages": {
            "type": "array",
            "description": "A list of messages in the conversation so far. Each element has the format {\"role\": \"user\", \"content\": \"Hello\"}. role supports system, user, or assistant. content must not be empty",
            "items": {
              "$ref": "#/components/schemas/Message"
            }
          }
        },
        "required": [
          "model",
          "messages"
        ]
      },
      "EstimateTokenResponse": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "total_tokens": {
                "type": "integer",
                "description": "Estimated total number of tokens",
                "example": 80
              }
            },
            "required": [
              "total_tokens"
            ]
          }
        },
        "required": [
          "data"
        ]
      },
      "FileObject": {
        "type": "obj

# --- truncated at 32 KB (66 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/moonshot-ai/refs/heads/main/openapi/moonshot-ai-openapi.json
Moonshot AI Platform API

Documentation

Specifications

Other Resources

OpenAPI Specification