> For the complete documentation index, see [llms.txt](https://docs.ai.neevcloud.com/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://docs.ai.neevcloud.com/api-reference/model-routing/chat.md).

# Chat

## Create Chat Completion

> Creates a model response for the given chat conversation. This endpoint is OpenAI-compatible.

```json
{"openapi":"3.0.3","info":{"title":"Model Routing Service API","version":"0.1.0"},"servers":[{"url":"https://inference.ai.neevcloud.com","description":"Inference / model-routing API base URL"}],"security":[{"BearerAuth":[]}],"components":{"securitySchemes":{"BearerAuth":{"type":"http","scheme":"bearer","description":"JWT token or API key for authentication."}},"schemas":{"ChatCompletionRequest":{"type":"object","description":"Request body for chat completion, following OpenAI's API format.","required":["model","messages"],"properties":{"model":{"type":"string","description":"ID of the model to use."},"messages":{"type":"array","description":"A list of messages comprising the conversation so far.","minItems":1,"items":{"$ref":"#/components/schemas/ChatMessage"}},"temperature":{"type":"number","format":"float","minimum":0,"maximum":2,"default":1,"description":"Sampling temperature between 0 and 2. Higher values make output more random."},"top_p":{"type":"number","format":"float","minimum":0,"maximum":1,"default":1,"description":"Nucleus sampling probability. Use this or temperature, not both."},"n":{"type":"integer","minimum":1,"maximum":128,"default":1,"description":"How many chat completion choices to generate for each input message."},"stream":{"type":"boolean","default":false,"description":"If set, partial message deltas will be sent as server-sent events."},"stop":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"},"maxItems":4}],"nullable":true,"description":"Up to 4 sequences where the API will stop generating further tokens."},"max_tokens":{"type":"integer","minimum":1,"nullable":true,"description":"Maximum number of tokens to generate in the chat completion."},"presence_penalty":{"type":"number","format":"float","minimum":-2,"maximum":2,"default":0,"description":"Penalize new tokens based on whether they appear in the text so far."},"frequency_penalty":{"type":"number","format":"float","minimum":-2,"maximum":2,"default":0,"description":"Penalize new tokens based on their existing frequency in the text."},"logit_bias":{"type":"object","additionalProperties":{"type":"integer"},"nullable":true,"description":"Modify the likelihood of specified tokens appearing in the completion."},"user":{"type":"string","description":"A unique identifier representing your end-user for abuse monitoring."},"seed":{"type":"integer","nullable":true,"description":"If specified, attempts to sample deterministically for reproducible outputs."},"tools":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionTool"},"description":"A list of tools the model may call."},"tool_choice":{"oneOf":[{"type":"string","enum":["none","auto","required"]},{"$ref":"#/components/schemas/ChatCompletionNamedToolChoice"}],"description":"Controls which (if any) tool is called by the model."},"response_format":{"$ref":"#/components/schemas/ResponseFormat"}}},"ChatMessage":{"type":"object","description":"A message in the chat conversation.","required":["role","content"],"properties":{"role":{"type":"string","enum":["system","user","assistant","tool"],"description":"The role of the message author."},"content":{"oneOf":[{"type":"string"},{"type":"array","items":{"$ref":"#/components/schemas/ChatContentPart"}}],"nullable":true,"description":"The contents of the message."},"name":{"type":"string","description":"An optional name for the participant."},"tool_calls":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionMessageToolCall"},"description":"Tool calls generated by the model (for assistant messages)."},"tool_call_id":{"type":"string","description":"Tool call ID that this message is responding to (for tool messages)."}}},"ChatContentPart":{"type":"object","description":"A content part in a message (for multimodal inputs).","required":["type"],"properties":{"type":{"type":"string","enum":["text","image_url"],"description":"The type of content part."},"text":{"type":"string","description":"The text content (for text type)."},"image_url":{"type":"object","properties":{"url":{"type":"string","description":"The URL of the image or base64 encoded image data."},"detail":{"type":"string","enum":["auto","low","high"],"default":"auto","description":"Image detail level."}}}}},"ChatCompletionMessageToolCall":{"type":"object","description":"A tool call generated by the model.","required":["id","type","function"],"properties":{"id":{"type":"string","description":"The ID of the tool call."},"type":{"type":"string","enum":["function"],"description":"The type of tool call."},"function":{"type":"object","required":["name","arguments"],"properties":{"name":{"type":"string","description":"The name of the function to call."},"arguments":{"type":"string","description":"The arguments to call the function with, as a JSON string."}}}}},"ChatCompletionTool":{"type":"object","description":"A tool the model may call.","required":["type","function"],"properties":{"type":{"type":"string","enum":["function"],"description":"The type of tool."},"function":{"$ref":"#/components/schemas/FunctionDefinition"}}},"FunctionDefinition":{"type":"object","description":"Definition of a function that can be called by the model.","required":["name"],"properties":{"name":{"type":"string","description":"The name of the function."},"description":{"type":"string","description":"A description of what the function does."},"parameters":{"type":"object","description":"The parameters the function accepts, described as a JSON Schema object."}}},"ChatCompletionNamedToolChoice":{"type":"object","description":"Specifies a tool the model should use.","required":["type","function"],"properties":{"type":{"type":"string","enum":["function"]},"function":{"type":"object","required":["name"],"properties":{"name":{"type":"string","description":"The name of the function to call."}}}}},"ResponseFormat":{"type":"object","description":"The format that the model must output.","properties":{"type":{"type":"string","enum":["text","json_object"],"default":"text","description":"The type of response format."}}},"ChatCompletionResponse":{"type":"object","description":"Represents a chat completion response returned by model.","required":["id","object","created","model","choices"],"properties":{"id":{"type":"string","description":"A unique identifier for the chat completion."},"object":{"type":"string","enum":["chat.completion"],"description":"The object type, which is always \"chat.completion\"."},"created":{"type":"integer","format":"int64","description":"The Unix timestamp (in seconds) of when the chat completion was created."},"model":{"type":"string","description":"The model used for the chat completion."},"choices":{"type":"array","description":"A list of chat completion choices.","items":{"$ref":"#/components/schemas/ChatCompletionChoice"}},"usage":{"$ref":"#/components/schemas/CompletionUsage"},"system_fingerprint":{"type":"string","description":"A fingerprint representing the backend configuration."}}},"ChatCompletionChoice":{"type":"object","description":"A chat completion choice.","required":["index","message","finish_reason"],"properties":{"index":{"type":"integer","description":"The index of the choice in the list of choices."},"message":{"$ref":"#/components/schemas/ChatCompletionAssistantMessage"},"finish_reason":{"type":"string","enum":["stop","length","tool_calls","content_filter","function_call"],"nullable":true,"description":"The reason the model stopped generating tokens."},"logprobs":{"type":"object","nullable":true,"description":"Log probability information."}}},"ChatCompletionAssistantMessage":{"type":"object","description":"A message generated by the assistant.","required":["role","content"],"properties":{"role":{"type":"string","enum":["assistant"],"description":"The role of the author, always \"assistant\"."},"content":{"type":"string","nullable":true,"description":"The contents of the message."},"tool_calls":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionMessageToolCall"},"description":"Tool calls generated by the model."},"function_call":{"type":"object","deprecated":true,"description":"Deprecated in favor of tool_calls.","properties":{"name":{"type":"string"},"arguments":{"type":"string"}}}}},"CompletionUsage":{"type":"object","description":"Usage statistics for the completion request.","required":["prompt_tokens","completion_tokens","total_tokens"],"properties":{"prompt_tokens":{"type":"integer","format":"int64","description":"Number of tokens in the prompt."},"completion_tokens":{"type":"integer","format":"int64","description":"Number of tokens in the generated completion."},"total_tokens":{"type":"integer","format":"int64","description":"Total number of tokens used in the request."}}},"OpenAIErrorResponse":{"type":"object","description":"Error response following OpenAI's error format.","required":["error"],"properties":{"error":{"type":"object","required":["message","type"],"properties":{"message":{"type":"string","description":"A human-readable error message."},"type":{"type":"string","description":"The type of error."},"param":{"type":"string","nullable":true,"description":"The parameter that caused the error."},"code":{"type":"string","nullable":true,"description":"A machine-readable error code."}}}}}}},"paths":{"/v1/chat/completions":{"post":{"tags":["Chat"],"summary":"Create Chat Completion","description":"Creates a model response for the given chat conversation. This endpoint is OpenAI-compatible.","operationId":"createChatCompletion","parameters":[{"name":"OrgID","in":"header","description":"organization ID for required if request is made using access token.","required":false,"schema":{"type":"string"}},{"name":"ProjectID","in":"header","description":"project ID required if request is made using access token.","required":false,"schema":{"type":"string"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"}}}},"responses":{"200":{"description":"Chat completion response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionResponse"}}}},"400":{"description":"Bad request - invalid parameters","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIErrorResponse"}}}},"401":{"description":"Unauthorized - invalid or missing API key","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIErrorResponse"}}}},"429":{"description":"Rate limit exceeded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIErrorResponse"}}}},"500":{"description":"Internal server error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIErrorResponse"}}}}}}}}}
```


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://docs.ai.neevcloud.com/api-reference/model-routing/chat.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.