> ## Documentation Index
> Fetch the complete documentation index at: https://www.edgee.ai/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# OpenAI Chat Completion

> Create chat completions using the OpenAI Chat Completion API format

Creates a completion for the chat message. The Edgee API is OpenAI-compatible and works with any model and provider. Supports both streaming and non-streaming responses.


## OpenAPI

````yaml POST /v1/chat/completions
openapi: 3.0.1
info:
  title: Edgee API
  version: 1.0.0
  description: >-
    Edgee is an edge-native AI Gateway with private model hosting, automatic
    model selection, cost audits/alerts, and edge tools. This API is
    OpenAI-compatible, providing one API for any model and any provider.
servers:
  - url: https://edgee.io
    description: Edgee AI Gateway
security:
  - bearerAuth: []
tags:
  - name: Chat
    description: Chat completion endpoints (OpenAI format)
  - name: Messages
    description: Messages endpoints (Anthropic format)
  - name: Responses
    description: Responses endpoints (OpenAI Responses API format)
  - name: Models
    description: Model management endpoints
  - name: Tokens
    description: Token estimation endpoints
  - name: Compress
    description: Standalone token compression endpoint
paths:
  /v1/chat/completions:
    post:
      tags:
        - Chat
      summary: Create chat completion
      description: >-
        Creates a completion for the chat message. Supports both streaming and
        non-streaming responses. The API is OpenAI-compatible and works with any
        model and provider.
      operationId: createChatCompletion
      parameters:
        - name: X-Edgee-Tags
          in: header
          required: false
          schema:
            type: string
          description: >-
            Comma-separated list of tags for categorizing and filtering requests
            in analytics and logs. Example:
            `production,chatbot,customer-support`
        - name: X-Edgee-Debug
          in: header
          required: false
          schema:
            type: boolean
          description: >-
            Enable debug mode to include additional debugging information in the
            response.
        - name: X-Edgee-Compression-Model
          in: header
          required: false
          schema:
            type: string
            enum:
              - claude
              - opencode
              - cursor
              - codex
          description: >-
            Compression bundle to apply. Tunes the compressor for the agentic
            style of the calling client.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: Chat completion created successfully
          headers:
            X-Edgee-Provider:
              description: >-
                Name of the upstream provider actually used to fulfill the
                request (e.g. `openai`, `anthropic`, `google`).
              schema:
                type: string
            X-Edgee-Fallback-Used:
              description: >-
                Set to `1` when a fallback provider was used because the primary
                provider failed or was unavailable.
              schema:
                type: string
                enum:
                  - '1'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
              example:
                id: chatcmpl-123
                object: chat.completion
                created: 1677652288
                model: openai/gpt-5.2
                choices:
                  - index: 0
                    message:
                      role: assistant
                      content: Hello! How can I assist you today?
                    finish_reason: stop
                usage:
                  prompt_tokens: 10
                  completion_tokens: 10
                  total_tokens: 20
                  input_tokens_details:
                    cached_tokens: 0
                  output_tokens_details:
                    reasoning_tokens: 0
                compression:
                  saved_tokens: 450
                  cost_savings: 27000
                  reduction: 48.99884991374353
                  time_ms: 12
            text/event-stream:
              schema:
                type: string
                format: binary
                description: >-
                  Server-Sent Events stream. Each event is a JSON object
                  prefixed with 'data: ' and followed by two newlines. The
                  stream consists of multiple `ChatCompletionChunk` objects, and
                  optionally a final chunk with usage statistics if
                  `stream_options.include_usage` is true.
              examples:
                contentChunk:
                  value: >+
                    data:
                    {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"openai/gpt-5.2","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}

                roleChunk:
                  value: >+
                    data:
                    {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"openai/gpt-5.2","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}

                finalChunk:
                  value: >+
                    data:
                    {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"openai/gpt-5.2","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":10,"total_tokens":20,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}}}

        '400':
          description: Bad request - invalid input parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              examples:
                badModelId:
                  value:
                    error:
                      code: bad_model_id
                      message: 'Invalid model ID: ''invalid-model'''
                modelNotFound:
                  value:
                    error:
                      code: model_not_found
                      message: Model 'openai/gpt-1' not found
                providerNotSupported:
                  value:
                    error:
                      code: provider_not_supported
                      message: >-
                        Provider 'anthropic' is not supported for model
                        'openai/gpt-5.2'
        '401':
          description: Unauthorized - missing or invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                error:
                  code: unauthorized
                  message: Missing Authorization header
        '403':
          description: Forbidden - API key is inactive, expired, or model not allowed
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              examples:
                inactiveKey:
                  value:
                    error:
                      code: forbidden
                      message: API key is inactive
                expiredKey:
                  value:
                    error:
                      code: forbidden
                      message: API key has expired
                modelNotAllowed:
                  value:
                    error:
                      code: forbidden
                      message: Model 'openai/gpt-5.2' is not allowed for this API key
        '429':
          description: Too many requests - usage limit exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                error:
                  code: usage_limit_exceeded
                  message: 'Usage limit exceeded: 1000.00 / 1000 tokens used'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
components:
  schemas:
    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: >-
            ID of the model to use. Format: `{author_id}/{model_id}` (e.g.
            `openai/gpt-5.2`)
          example: openai/gpt-5.2
        messages:
          type: array
          description: A list of messages comprising the conversation so far.
          items:
            $ref: '#/components/schemas/Message'
          minItems: 1
        max_tokens:
          type: integer
          description: >-
            The maximum number of tokens that can be generated in the chat
            completion.
          minimum: 1
        stream:
          type: boolean
          description: >-
            If set, partial message deltas will be sent, as in OpenAI. Streamed
            chunks are sent as Server-Sent Events (SSE).
          default: false
        stream_options:
          type: object
          description: Options for streaming response.
          properties:
            include_usage:
              type: boolean
              description: >-
                If set, an additional `[DONE]` message will be sent with usage
                statistics when the stream is finished.
        tools:
          type: array
          description: >-
            A list of tools the model may call. Currently, only `function` type
            is supported.
          items:
            $ref: '#/components/schemas/Tool'
        tool_choice:
          oneOf:
            - type: string
              enum:
                - none
                - auto
              description: >-
                Bare-string mode. `none` forbids tool calls; `auto` lets the
                model decide.
            - $ref: '#/components/schemas/ToolChoiceTypedMode'
            - $ref: '#/components/schemas/ToolChoiceSpecific'
          description: >-
            Controls which tool (if any) the model is allowed to call. Accepts a
            bare string (`none` / `auto`), a typed-mode object (`{ "type":
            "auto" | "none" }`), or a specific function reference.
        edgee_tool_ids:
          type: array
          items:
            type: string
          description: >-
            List of Edge Tool IDs to inject (e.g. edgee_current_time,
            edgee_generate_uuid). Each ID must be activated for your API key.
            When omitted or empty, only tools with hydration enabled for your
            org or API key are auto-injected. Invalid or non-activated IDs
            return 400 with invalid_edgee_tool_ids.
          example:
            - edgee_current_time
            - edgee_generate_uuid
        edgee_pending_id:
          type: string
          description: >-
            Pending operation ID when continuing a conversation after Edge Tool
            execution (e.g. when mixing client-side and Edge Tools). The gateway
            injects stored Edge Tool results into the message history.
        tags:
          type: array
          items:
            type: string
          description: >-
            Optional tags to categorize and label the request. Useful for
            filtering and grouping requests in analytics and logs. Can also be
            sent via the `x-edgee-tags` header as a comma-separated string.
        enable_debug:
          type: boolean
          description: >-
            When `true`, the response includes additional debug information.
            Equivalent to the `X-Edgee-Debug` header.
        compression_model:
          $ref: '#/components/schemas/CompressionModel'
    ChatCompletionResponse:
      type: object
      required:
        - id
        - object
        - created
        - model
        - choices
        - usage
      properties:
        id:
          type: string
          description: A unique identifier for the chat completion.
          example: chatcmpl-123
        object:
          type: string
          enum:
            - chat.completion
          description: The object type, which is always `chat.completion`.
        created:
          type: integer
          description: >-
            The Unix timestamp (in seconds) of when the chat completion was
            created.
          example: 1677652288
        model:
          type: string
          description: The model used for the chat completion.
          example: openai/gpt-5.2
        choices:
          type: array
          description: >-
            A list of chat completion choices. Can be more than one if n is
            greater than 1.
          items:
            $ref: '#/components/schemas/ChatCompletionChoice'
        usage:
          $ref: '#/components/schemas/Usage'
        compression:
          $ref: '#/components/schemas/CompressionInfo'
        edgee_pending_id:
          type: string
          description: >-
            Present when one or more Edge Tool calls were deferred. Pass this
            value back as `edgee_pending_id` in the next request to resume the
            conversation with the tool results filled in.
        edgee_tools_executed:
          type: array
          description: >-
            List of Edge Tools the gateway executed inline before returning.
            Empty or absent when no Edge Tools ran.
          items:
            $ref: '#/components/schemas/EdgeeToolExecuted'
    ErrorResponse:
      type: object
      required:
        - error
      description: >-
        Error response. The `error` object follows OpenAI's error envelope
        shape; the gateway additionally populates `type` (Anthropic-style
        category) and `param` when applicable.
      properties:
        error:
          type: object
          required:
            - message
          properties:
            message:
              type: string
              description: A human-readable error message.
            type:
              type: string
              enum:
                - invalid_request_error
                - authentication_error
                - permission_error
                - not_found_error
                - rate_limit_error
                - server_error
                - provider_error
              description: Anthropic-style high-level error category. Always present.
            code:
              type: string
              nullable: true
              description: >-
                A machine-readable error code. Currently emitted values:
                `unauthorized`, `forbidden`, `invalid_json`, `bad_model_id`,
                `model_not_found`, `provider_not_supported`,
                `invalid_tokenizer`, `invalid_request`, `usage_limit_exceeded`,
                `provider_error`, `internal_error`.
              example: bad_model_id
            param:
              type: string
              nullable: true
              description: >-
                Name of the request parameter that caused the error, when
                applicable.
    Message:
      type: object
      required:
        - role
      properties:
        role:
          type: string
          enum:
            - system
            - user
            - assistant
            - tool
            - developer
          description: >-
            The role of the message author. Required properties vary by role:

            - `system`, `user`, `developer`: requires `content`

            - `assistant`: `content` is optional (can be empty if `tool_calls`
            is present)

            - `tool`: requires `content` and `tool_call_id`
        content:
          type: string
          description: >-
            The contents of the message. Required for all roles except
            `assistant` (where it can be empty if `tool_calls` is present). For
            `assistant` role, defaults to empty string if not provided.
        name:
          type: string
          description: >-
            An optional name for the participant. Provides the model information
            to differentiate between participants of the same role. Used for
            `system`, `user`, `assistant`, and `developer` roles.
        tool_call_id:
          type: string
          description: >-
            The ID of the tool call that this message is responding to. Required
            for `tool` role only.
        refusal:
          type: string
          description: >-
            The refusal message from the model, if any. Used for `assistant`
            role only.
        tool_calls:
          type: array
          description: >-
            The tool calls made by the assistant. Used for `assistant` role
            only.
          items:
            $ref: '#/components/schemas/ToolCall'
        cache_control:
          type: object
          description: >-
            Anthropic prompt-cache control passthrough. Applied when routing to
            an Anthropic-backed model. Used on `system`, `user`, and `assistant`
            roles only.
          additionalProperties: true
          example:
            type: ephemeral
    Tool:
      type: object
      required:
        - type
        - function
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool. Currently, only `function` is supported.
        function:
          $ref: '#/components/schemas/FunctionDefinition'
    ToolChoiceTypedMode:
      type: object
      description: >-
        Typed-mode form of `tool_choice` — an object containing only a `type`
        field set to `auto` or `none`.
      required:
        - type
      properties:
        type:
          type: string
          enum:
            - auto
            - none
    ToolChoiceSpecific:
      type: object
      required:
        - type
        - function
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool.
        function:
          $ref: '#/components/schemas/ToolChoiceFunction'
    CompressionModel:
      type: string
      description: >-
        Selects the compression bundle to apply to the request. Equivalent to
        the `X-Edgee-Compression-Model` header.
      enum:
        - claude
        - opencode
        - codex
        - cursor
    ChatCompletionChoice:
      type: object
      required:
        - index
        - message
      properties:
        index:
          type: integer
          description: The index of the choice in the list of choices.
          minimum: 0
        message:
          $ref: '#/components/schemas/Message'
        finish_reason:
          type: string
          enum:
            - stop
            - length
            - content_filter
            - tool_calls
          description: >-
            The reason the model stopped generating tokens. This will be `stop`
            if the model hit a natural stop point or a provided stop sequence,
            `length` if the maximum number of tokens specified in the request
            was reached, `content_filter` if content was omitted due to a flag
            from our content filters, or `tool_calls` if the model called a
            tool.
    Usage:
      type: object
      description: >-
        Usage statistics for the completion. In streaming responses, this is
        only present in the final chunk when `stream_options.include_usage` is
        true.
      required:
        - prompt_tokens
        - completion_tokens
        - total_tokens
        - input_tokens_details
        - output_tokens_details
      properties:
        prompt_tokens:
          type: integer
          description: Number of tokens in the prompt.
          minimum: 0
        completion_tokens:
          type: integer
          description: Number of tokens in the generated completion.
          minimum: 0
        total_tokens:
          type: integer
          description: Total number of tokens used in the request (prompt + completion).
          minimum: 0
        input_tokens_details:
          $ref: '#/components/schemas/InputTokenDetails'
        output_tokens_details:
          $ref: '#/components/schemas/OutputTokenDetails'
    CompressionInfo:
      type: object
      description: >-
        Token compression metrics. Present in the response when token
        compression was applied to the request. The `usage.prompt_tokens` field
        reflects the compressed token count actually billed by the provider.
      required:
        - saved_tokens
        - cost_savings
        - reduction
        - time_ms
      properties:
        saved_tokens:
          type: integer
          description: >-
            Number of input tokens saved by compression (original count minus
            compressed count).
          minimum: 0
          example: 450
        cost_savings:
          type: integer
          description: >-
            Estimated cost savings in micro-units. Divide by `1000000` to
            convert to USD. Example: `27000` = $0.027 saved.
          minimum: 0
          example: 27000
        reduction:
          type: number
          description: >-
            Percentage reduction in input tokens. For example, `48` means the
            compressed prompt was 48% smaller than the original.
          minimum: 0
          maximum: 100
          example: 48.99884991374353
        time_ms:
          type: integer
          description: Time taken to perform compression, in milliseconds.
          minimum: 0
          example: 12
    EdgeeToolExecuted:
      type: object
      description: Summary of an Edge Tool that was executed inline by the gateway.
      required:
        - name
        - success
      properties:
        name:
          type: string
          description: Edge Tool identifier (e.g. `edgee_current_time`).
          example: edgee_current_time
        success:
          type: boolean
          description: Whether the tool returned successfully.
    ToolCall:
      type: object
      required:
        - id
        - type
        - function
      properties:
        id:
          type: string
          description: The ID of the tool call.
        type:
          type: string
          enum:
            - function
          description: The type of the tool call.
        function:
          $ref: '#/components/schemas/FunctionCall'
    FunctionDefinition:
      type: object
      required:
        - name
      properties:
        name:
          type: string
          description: >-
            The name of the function to be called. Must be a-z, A-Z, 0-9, or
            contain underscores and dashes, with a maximum length of 64.
        description:
          type: string
          description: >-
            A description of what the function does, used by the model to choose
            when and how to call the function.
        parameters:
          type: object
          description: >-
            The parameters the functions accepts, described as a JSON Schema
            object. See the guide for examples, and the JSON Schema reference
            for documentation about the format.
          additionalProperties: true
    ToolChoiceFunction:
      type: object
      required:
        - name
      properties:
        name:
          type: string
          description: The name of the function to call.
    InputTokenDetails:
      type: object
      description: Additional details about input tokens.
      properties:
        cached_tokens:
          type: integer
          description: Number of cached tokens read from the prompt cache.
          minimum: 0
        cache_creation_tokens:
          type: integer
          description: >-
            Number of tokens written to the prompt cache (Anthropic-style cache
            creation).
          minimum: 0
    OutputTokenDetails:
      type: object
      description: Additional details about output tokens.
      properties:
        reasoning_tokens:
          type: integer
          description: Number of reasoning tokens in the output.
          minimum: 0
    FunctionCall:
      type: object
      required:
        - name
        - arguments
      properties:
        name:
          type: string
          description: The name of the function to call.
        arguments:
          type: string
          description: The arguments to call the function with, as JSON.
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: >-
        Bearer authentication header of the form `Bearer <token>`, where
        `<token>` is your API key. More info
        [here](/docs/api-reference/authentication)

````