|
5 | 5 | "encoding/json"
|
6 | 6 | "errors"
|
7 | 7 | "net/http"
|
| 8 | + |
| 9 | + "github.com/meguminnnnnnnnn/go-openai/jsonschema" |
8 | 10 | )
|
9 | 11 |
|
10 | 12 | // Chat message role defined by the OpenAI API.
|
@@ -234,13 +236,49 @@ type ChatCompletionResponseFormatJSONSchema struct {
|
234 | 236 | Strict bool `json:"strict"`
|
235 | 237 | }
|
236 | 238 |
|
| 239 | +func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) error { |
| 240 | + type rawJSONSchema struct { |
| 241 | + Name string `json:"name"` |
| 242 | + Description string `json:"description,omitempty"` |
| 243 | + Schema json.RawMessage `json:"schema"` |
| 244 | + Strict bool `json:"strict"` |
| 245 | + } |
| 246 | + var raw rawJSONSchema |
| 247 | + if err := json.Unmarshal(data, &raw); err != nil { |
| 248 | + return err |
| 249 | + } |
| 250 | + r.Name = raw.Name |
| 251 | + r.Description = raw.Description |
| 252 | + r.Strict = raw.Strict |
| 253 | + if len(raw.Schema) > 0 && string(raw.Schema) != "null" { |
| 254 | + var d jsonschema.Definition |
| 255 | + err := json.Unmarshal(raw.Schema, &d) |
| 256 | + if err != nil { |
| 257 | + return err |
| 258 | + } |
| 259 | + r.Schema = &d |
| 260 | + } |
| 261 | + return nil |
| 262 | +} |
| 263 | + |
| 264 | +// ChatCompletionRequestExtensions contains third-party OpenAI API extensions |
| 265 | +// (e.g., vendor-specific implementations like vLLM). |
| 266 | +type ChatCompletionRequestExtensions struct { |
| 267 | + // GuidedChoice is a vLLM-specific extension that restricts the model's output |
| 268 | + // to one of the predefined string choices provided in this field. This feature |
| 269 | + // is used to constrain the model's responses to a controlled set of options, |
| 270 | + // ensuring predictable and consistent outputs in scenarios where specific |
| 271 | + // choices are required. |
| 272 | + GuidedChoice []string `json:"guided_choice,omitempty"` |
| 273 | +} |
| 274 | + |
237 | 275 | // ChatCompletionRequest represents a request structure for chat completion API.
|
238 | 276 | type ChatCompletionRequest struct {
|
239 | 277 | Model string `json:"model"`
|
240 | 278 | Messages []ChatCompletionMessage `json:"messages"`
|
241 | 279 | // MaxTokens The maximum number of tokens that can be generated in the chat completion.
|
242 | 280 | // This value can be used to control costs for text generated via API.
|
243 |
| - // This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models. |
| 281 | + // Deprecated: use MaxCompletionTokens. Not compatible with o1-series models. |
244 | 282 | // refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
|
245 | 283 | MaxTokens int `json:"max_tokens,omitempty"`
|
246 | 284 | // MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
|
@@ -286,7 +324,15 @@ type ChatCompletionRequest struct {
|
286 | 324 | ReasoningEffort string `json:"reasoning_effort,omitempty"`
|
287 | 325 | // Metadata to store with the completion.
|
288 | 326 | Metadata map[string]string `json:"metadata,omitempty"`
|
289 |
| - |
| 327 | + // Configuration for a predicted output. |
| 328 | + Prediction *Prediction `json:"prediction,omitempty"` |
| 329 | + // ChatTemplateKwargs provides a way to add non-standard parameters to the request body. |
| 330 | + // Additional kwargs to pass to the template renderer. Will be accessible by the chat template. |
| 331 | + // Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false} |
| 332 | + // https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes |
| 333 | + ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"` |
| 334 | + // Specifies the latency tier to use for processing the request. |
| 335 | + ServiceTier ServiceTier `json:"service_tier,omitempty"` |
290 | 336 | // Extra fields to be sent in the request.
|
291 | 337 | // Useful for experimental features not yet officially supported.
|
292 | 338 | extraFields map[string]any
|
@@ -386,6 +432,15 @@ const (
|
386 | 432 | FinishReasonNull FinishReason = "null"
|
387 | 433 | )
|
388 | 434 |
|
| 435 | +type ServiceTier string |
| 436 | + |
| 437 | +const ( |
| 438 | + ServiceTierAuto ServiceTier = "auto" |
| 439 | + ServiceTierDefault ServiceTier = "default" |
| 440 | + ServiceTierFlex ServiceTier = "flex" |
| 441 | + ServiceTierPriority ServiceTier = "priority" |
| 442 | +) |
| 443 | + |
389 | 444 | func (r FinishReason) MarshalJSON() ([]byte, error) {
|
390 | 445 | if r == FinishReasonNull || r == "" {
|
391 | 446 | return []byte("null"), nil
|
@@ -418,6 +473,7 @@ type ChatCompletionResponse struct {
|
418 | 473 | Usage Usage `json:"usage"`
|
419 | 474 | SystemFingerprint string `json:"system_fingerprint"`
|
420 | 475 | PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"`
|
| 476 | + ServiceTier ServiceTier `json:"service_tier,omitempty"` |
421 | 477 |
|
422 | 478 | httpHeader
|
423 | 479 | }
|
|
0 commit comments