Parasail Inference API

OpenAI-compatible real-time and streaming inference API exposing serverless access to popular open-weight LLMs, embedding models, and the model catalog. Endpoints: /v1/chat/completions, /v1/completions, /v1/embeddings, /v1/models. Bearer-token authentication; pay-per-token billing; supports streaming, tool use, and structured outputs. Compatible with the OpenAI Python and TypeScript clients by overriding base_url.

Parasail Inference API is one of 3 APIs that Parasail publishes on the APIs.io network, described by a machine-readable OpenAPI specification.

This API exposes 3 machine-runnable capabilities that can be deployed as REST, MCP, or Agent Skill surfaces via Naftiko and 1 JSON Schema definition.

Tagged areas include AI, Artificial Intelligence, Inference, Chat, and Embeddings. The published artifact set on APIs.io includes API documentation, an OpenAPI specification, a JSON-LD context, 3 Naftiko capability specs, and 1 JSON Schema.

OpenAPI Specification

parasail-inference-api-openapi.yml Raw ↑
openapi: 3.1.0
info:
  title: Parasail Inference API
  description: |
    OpenAI-compatible inference API for Parasail's AI Supercloud. Provides serverless
    pay-per-token access to popular open-weight LLMs, embedding models, image generation
    models, and text-to-speech models running on Parasail's global GPU network.
  version: '1.0'
  contact:
    name: Parasail
    url: https://docs.parasail.io/parasail-docs/
servers:
  - url: https://api.parasail.io/v1
    description: Parasail OpenAI-compatible inference endpoint
security:
  - bearerAuth: []
tags:
  - name: Chat
    description: Chat completions for conversational LLM workloads.
  - name: Completions
    description: Legacy text completions for prompt-only LLM workloads.
  - name: Embeddings
    description: Vector embeddings for RAG, semantic search, and similarity workloads.
  - name: Models
    description: Discover the models currently exposed on the serverless tier.
paths:
  /chat/completions:
    post:
      tags: [Chat]
      operationId: createChatCompletion
      summary: Create Chat Completion
      description: Generate a chat completion from a list of messages using an OpenAI-compatible request body.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: A chat completion response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                type: string
                description: Server-Sent Events stream when stream=true.
  /completions:
    post:
      tags: [Completions]
      operationId: createCompletion
      summary: Create Completion
      description: Generate a text completion from a prompt using an OpenAI-compatible request body.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
      responses:
        '200':
          description: A text completion response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
  /embeddings:
    post:
      tags: [Embeddings]
      operationId: createEmbedding
      summary: Create Embedding
      description: Generate vector embeddings for one or more input strings.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
      responses:
        '200':
          description: An embeddings response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
  /models:
    get:
      tags: [Models]
      operationId: listModels
      summary: List Models
      description: List the serverless models currently available on Parasail.
      responses:
        '200':
          description: A list of available models.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelList'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: Send your Parasail API key as a Bearer token.
  schemas:
    ChatCompletionRequest:
      type: object
      required: [model, messages]
      properties:
        model:
          type: string
          description: ID of the model to use (e.g. parasail-deepseek-v3, parasail-qwen3-coder-480b).
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatMessage'
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
        stream:
          type: boolean
          description: If true, response is returned as a Server-Sent Events stream.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        tools:
          type: array
          items:
            type: object
        response_format:
          type: object
          description: JSON schema or json_object response format constraint.
    ChatMessage:
      type: object
      required: [role, content]
      properties:
        role:
          type: string
          enum: [system, user, assistant, tool]
        content:
          oneOf:
            - type: string
            - type: array
              items:
                type: object
        name:
          type: string
        tool_call_id:
          type: string
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                $ref: '#/components/schemas/ChatMessage'
              finish_reason:
                type: string
        usage:
          $ref: '#/components/schemas/Usage'
    CompletionRequest:
      type: object
      required: [model, prompt]
      properties:
        model:
          type: string
        prompt:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
        stream:
          type: boolean
    CompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: text_completion
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              text:
                type: string
              index:
                type: integer
              finish_reason:
                type: string
        usage:
          $ref: '#/components/schemas/Usage'
    EmbeddingRequest:
      type: object
      required: [model, input]
      properties:
        model:
          type: string
          description: Embedding model ID.
        input:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        encoding_format:
          type: string
          enum: [float, base64]
    EmbeddingResponse:
      type: object
      properties:
        object:
          type: string
          example: list
        data:
          type: array
          items:
            type: object
            properties:
              object:
                type: string
                example: embedding
              embedding:
                type: array
                items:
                  type: number
              index:
                type: integer
        model:
          type: string
        usage:
          $ref: '#/components/schemas/Usage'
    Usage:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer
    ModelList:
      type: object
      properties:
        object:
          type: string
          example: list
        data:
          type: array
          items:
            $ref: '#/components/schemas/Model'
    Model:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: model
        created:
          type: integer
        owned_by:
          type: string