@@ -288,101 +288,189 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
288288 * information about the model's context window, maximum output tokens, and knowledge
289289 * cutoff date.
290290 * <p>
291- * <b>References:</b>
292- * <ul>
293- * <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
294- * <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
295- * and GPT-4 Turbo</a></li>
296- * <li><a href="https://platform.openai.com/docs/models#gpt-3-5-turbo">GPT-3.5
297- * Turbo</a></li>
298- * <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
299- * o1-mini</a></li>
300- * <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
301- * </ul>
291+ * <b>References:</b> <a href="https://platform.openai.com/docs/models">OpenAI Models
292+ * Documentation</a>
302293 */
303294 public enum ChatModel implements ChatModelDescription {
304295
296+ // --- Reasoning Models ---
297+
305298 /**
306- * <b>o1 </b> is trained with reinforcement learning to perform complex reasoning.
307- * It thinks before it answers, producing a long internal chain of thought before
308- * responding to the user .
299+ * <b>o4-mini </b> is the latest small o-series model. It's optimized for fast,
300+ * effective reasoning with exceptionally efficient performance in coding and
301+ * visual tasks .
309302 * <p>
310- * The latest o1 model supports both text and image inputs, and produces text
311- * outputs (including Structured Outputs) .
303+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
304+ * cutoff: June 1, 2024 .
312305 * <p>
313- * The knowledge cutoff for o1 is October, 2023.
306+ * Model ID: o4-mini
314307 * <p>
308+ * See: <a href="https://platform.openai.com/docs/models/o4-mini">o4-mini</a>
315309 */
316- O1 ("o1" ),
310+ O4_MINI ("o4-mini" ),
311+
317312 /**
318- * <b>o1-preview</b> is trained with reinforcement learning to perform complex
319- * reasoning. It thinks before it answers, producing a long internal chain of
320- * thought before responding to the user.
313+ * <b>o3</b> is a well-rounded and powerful model across domains. It sets a new
314+ * standard for math, science, coding, and visual reasoning tasks. It also excels
315+ * at technical writing and instruction-following. Use it to think through
316+ * multi-step problems that involve analysis across text, code, and images.
321317 * <p>
322- * The latest o1-preview model supports both text and image inputs, and produces
323- * text outputs (including Structured Outputs) .
318+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
319+ * cutoff: June 1, 2024 .
324320 * <p>
325- * The knowledge cutoff for o1-preview is October, 2023.
321+ * Model ID: o3
326322 * <p>
323+ * See: <a href="https://platform.openai.com/docs/models/o3">o3</a>
327324 */
328- O1_PREVIEW ("o1-preview" ),
325+ O3 ("o3" ),
326+
327+ /**
328+ * <b>o3-mini</b> is a small reasoning model, providing high intelligence at cost
329+ * and latency targets similar to o1-mini. o3-mini supports key developer
330+ * features, like Structured Outputs, function calling, Batch API.
331+ * <p>
332+ * The knowledge cutoff for o3-mini models is October, 2023.
333+ * <p>
334+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
335+ * cutoff: October 1, 2023.
336+ * <p>
337+ * Model ID: o3-mini
338+ * <p>
339+ * See: <a href="https://platform.openai.com/docs/models/o3-mini">o3-mini</a>
340+ */
341+ O3_MINI ("o3-mini" ),
342+
343+ /**
344+ * The <b>o1</b> series of models are trained with reinforcement learning to
345+ * perform complex reasoning. o1 models think before they answer, producing a long
346+ * internal chain of thought before responding to the user.
347+ * <p>
348+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
349+ * cutoff: October 1, 2023.
350+ * <p>
351+ * Model ID: o1
352+ * <p>
353+ * See: <a href="https://platform.openai.com/docs/models/o1">o1</a>
354+ */
355+ O1 ("o1" ),
329356
330357 /**
331358 * <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
332359 * o1-mini currently only supports text inputs and outputs.
333360 * <p>
334- * The knowledge cutoff for o1-mini is October, 2023.
361+ * Context window: 128,000 tokens. Max output tokens: 65,536 tokens. Knowledge
362+ * cutoff: October 1, 2023.
335363 * <p>
364+ * Model ID: o1-mini
365+ * <p>
366+ * See: <a href="https://platform.openai.com/docs/models/o1-mini">o1-mini</a>
336367 */
337368 O1_MINI ("o1-mini" ),
369+
338370 /**
339- * <b>o3-mini</b> is our most recent small reasoning model, providing high
340- * intelligence at the same cost and latency targets of o1-mini. o3-mini also
341- * supports key developer features, like Structured Outputs, function calling,
342- * Batch API, and more. Like other models in the o-series, it is designed to excel
343- * at science, math, and coding tasks.
371+ * The <b>o1-pro</b> model, part of the o1 series trained with reinforcement
372+ * learning for complex reasoning, uses more compute to think harder and provide
373+ * consistently better answers.
344374 * <p>
345- * The knowledge cutoff for o3-mini models is October, 2023.
375+ * Note: o1-pro is available in the Responses API only to enable support for
376+ * multi-turn model interactions and other advanced API features.
377+ * <p>
378+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
379+ * cutoff: October 1, 2023.
380+ * <p>
381+ * Model ID: o1-pro
346382 * <p>
383+ * See: <a href="https://platform.openai.com/docs/models/o1-pro">o1-pro</a>
347384 */
348- O3_MINI ("o3-mini" ),
385+ O1_PRO ("o1-pro" ),
386+
387+ // --- Flagship Models ---
349388
350389 /**
351- * <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
352- * accepts both text and image inputs and produces text outputs (including
353- * Structured Outputs).
390+ * <b>GPT-4.1</b> is the flagship model for complex tasks. It is well suited for
391+ * problem solving across domains.
392+ * <p>
393+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
394+ * cutoff: June 1, 2024.
354395 * <p>
355- * The knowledge cutoff for GPT-4o models is October, 2023.
396+ * Model ID: gpt-4.1
356397 * <p>
398+ * See: <a href="https://platform.openai.com/docs/models/gpt-4.1">gpt-4.1</a>
399+ */
400+ GPT_4_1 ("gpt-4.1" ),
401+
402+ /**
403+ * <b>GPT-4o</b> (“o” for “omni”) is the versatile, high-intelligence flagship
404+ * model. It accepts both text and image inputs, and produces text outputs
405+ * (including Structured Outputs). It is considered the best model for most tasks,
406+ * and the most capable model outside of the o-series models.
407+ * <p>
408+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
409+ * cutoff: October 1, 2023.
410+ * <p>
411+ * Model ID: gpt-4o
412+ * <p>
413+ * See: <a href="https://platform.openai.com/docs/models/gpt-4o">gpt-4o</a>
357414 */
358415 GPT_4_O ("gpt-4o" ),
416+
359417 /**
360418 * The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
361419 * GPT-4o used in ChatGPT. It is updated frequently when there are significant
362420 * changes to ChatGPT's GPT-4o model.
363421 * <p>
364422 * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
365- * cutoff: October, 2023.
423+ * cutoff: October 1, 2023.
424+ * <p>
425+ * Model ID: chatgpt-4o-latest
426+ * <p>
427+ * See: <a href=
428+ * "https://platform.openai.com/docs/models/chatgpt-4o-latest">chatgpt-4o-latest</a>
366429 */
367430 CHATGPT_4_O_LATEST ("chatgpt-4o-latest" ),
368431
369432 /**
370- * <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
371- * outputs and can be used in the Chat Completions REST API.
433+ * <b>GPT-4o Audio Preview</b> represents a preview release of models that accept
434+ * audio inputs and outputs via the Chat Completions REST API.
435+ * <p>
436+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
437+ * cutoff: October 1, 2023.
372438 * <p>
373- * The knowledge cutoff for GPT -4o Audio models is October, 2023.
439+ * Model ID: gpt -4o-audio-preview
374440 * <p>
441+ * See: <a href=
442+ * "https://platform.openai.com/docs/models/gpt-4o-audio-preview">gpt-4o-audio-preview</a>
375443 */
376444 GPT_4_O_AUDIO_PREVIEW ("gpt-4o-audio-preview" ),
377445
446+ // --- Cost-Optimized Models ---
447+
378448 /**
379- * <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
380- * and outputs and can be used in the Chat Completions REST API.
449+ * <b>GPT-4.1-mini</b> provides a balance between intelligence, speed, and cost
450+ * that makes it an attractive model for many use cases.
451+ * <p>
452+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
453+ * cutoff: June 1, 2024.
381454 * <p>
382- * The knowledge cutoff for GPT-4o -mini Audio models is October, 2023.
455+ * Model ID: gpt-4.1 -mini
383456 * <p>
457+ * See:
458+ * <a href="https://platform.openai.com/docs/models/gpt-4.1-mini">gpt-4.1-mini</a>
384459 */
385- GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
460+ GPT_4_1_MINI ("gpt-4.1-mini" ),
461+
462+ /**
463+ * <b>GPT-4.1-nano</b> is the fastest, most cost-effective GPT-4.1 model.
464+ * <p>
465+ * Context window: 1,047,576 tokens. Max output tokens: 32,768 tokens. Knowledge
466+ * cutoff: June 1, 2024.
467+ * <p>
468+ * Model ID: gpt-4.1-nano
469+ * <p>
470+ * See:
471+ * <a href="https://platform.openai.com/docs/models/gpt-4.1-nano">gpt-4.1-nano</a>
472+ */
473+ GPT_4_1_NANO ("gpt-4.1-nano" ),
386474
387475 /**
388476 * <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
@@ -391,80 +479,106 @@ public enum ChatModel implements ChatModelDescription {
391479 * larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
392480 * results at lower cost and latency.
393481 * <p>
394- * The knowledge cutoff for GPT-4o-mini models is October, 2023.
482+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
483+ * cutoff: October 1, 2023.
484+ * <p>
485+ * Model ID: gpt-4o-mini
395486 * <p>
487+ * See:
488+ * <a href="https://platform.openai.com/docs/models/gpt-4o-mini">gpt-4o-mini</a>
396489 */
397490 GPT_4_O_MINI ("gpt-4o-mini" ),
398491
399492 /**
400- * <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
401- * usable in Chat Completions. Vision requests can now use JSON mode and function
402- * calling.
493+ * <b>GPT-4o-mini Audio Preview</b> is a preview release model that accepts audio
494+ * inputs and outputs and can be used in the Chat Completions REST API.
403495 * <p>
404- * The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
496+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
497+ * cutoff: October 1, 2023.
498+ * <p>
499+ * Model ID: gpt-4o-mini-audio-preview
405500 * <p>
501+ * See: <a href=
502+ * "https://platform.openai.com/docs/models/gpt-4o-mini-audio-preview">gpt-4o-mini-audio-preview</a>
406503 */
407- GPT_4_TURBO ("gpt-4-turbo" ),
504+ GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
505+
506+ // --- Realtime Models ---
408507
409508 /**
410- * <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
411- * “laziness” where the model doesn’t complete a task.
509+ * <b>GPT-4o Realtime</b> model, is capable of responding to audio and text inputs
510+ * in realtime over WebRTC or a WebSocket interface.
511+ * <p>
512+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
513+ * cutoff: October 1, 2023.
514+ * <p>
515+ * Model ID: gpt-4o-realtime-preview
412516 * <p>
413- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
517+ * See: <a href=
518+ * "https://platform.openai.com/docs/models/gpt-4o-realtime-preview">gpt-4o-realtime-preview</a>
414519 */
415- GPT_4_0125_PREVIEW ("gpt-4-0125 -preview" ),
520+ GPT_4O_REALTIME_PREVIEW ("gpt-4o-realtime -preview" ),
416521
417522 /**
418- * Currently points to {@link #GPT_4_0125_PREVIEW}.
523+ * <b>GPT-4o-mini Realtime</b> model, is capable of responding to audio and text
524+ * inputs in realtime over WebRTC or a WebSocket interface.
525+ * <p>
526+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
527+ * cutoff: October 1, 2023.
528+ * <p>
529+ * Model ID: gpt-4o-mini-realtime-preview
419530 * <p>
420- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
531+ * See: <a href=
532+ * "https://platform.openai.com/docs/models/gpt-4o-mini-realtime-preview">gpt-4o-mini-realtime-preview</a>
421533 */
422- GPT_4_1106_PREVIEW ("gpt-4-1106-preview" ),
534+ GPT_4O_MINI_REALTIME_PREVIEW ("gpt-4o-mini-realtime-preview\n " ),
535+
536+ // --- Older GPT Models ---
423537
424538 /**
425- * <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
426- * Completions.
539+ * <b>GPT-4 Turbo</b> is the next generation of GPT-4, an older high-intelligence
540+ * GPT model. It was designed to be a cheaper, better version of GPT-4. Today, we
541+ * recommend using a newer model like GPT-4o.
542+ * <p>
543+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
544+ * cutoff: Dec 01, 2023.
427545 * <p>
428- * Currently points to {@link #GPT_4_0125_PREVIEW}.
546+ * Model ID: gpt-4-turbo
429547 * <p>
430- * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
548+ * See:
549+ * <a href="https://platform.openai.com/docs/models/gpt-4-turbo">gpt-4-turbo</a>
431550 */
432- GPT_4_TURBO_PREVIEW ("gpt-4-turbo-preview " ),
551+ GPT_4_TURBO ("gpt-4-turbo" ),
433552
434553 /**
435554 * <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
436- * Chat Completions.
437- * <p>
438- * Currently points to {@link #GPT_4_0613}.
555+ * Chat Completions. Vision capabilities may not be available.
439556 * <p>
440- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
441- */
442- GPT_4 ("gpt-4" ),
443- /**
444- * GPT-4 model snapshot.
557+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
558+ * cutoff: Dec 01, 2023.
445559 * <p>
446- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
447- */
448- GPT_4_0613 ("gpt-4-0613" ),
449- /**
450- * GPT-4 model snapshot.
560+ * Model ID: gpt-4
451561 * <p>
452- * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
562+ * See: <a href="https://platform.openai.com/docs/models/gpt-4">gpt-4</a>
453563 */
454- GPT_4_0314 ("gpt-4-0314 " ),
564+ GPT_4 ("gpt-4" ),
455565
456566 /**
457567 * <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
458568 * code and have been optimized for chat using the Chat Completions API but work
459- * well for non-chat tasks as well.
460- * <p>
461- * As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
462- * gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
463- * gpt-3.5-turbo is still available for use in the API.
569+ * well for non-chat tasks as well. Generally lower cost but less capable than
570+ * GPT-4 models.
464571 * <p>
572+ * As of July 2024, GPT-4o mini is recommended over gpt-3.5-turbo for most use
573+ * cases.
465574 * <p>
466575 * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
467576 * cutoff: September, 2021.
577+ * <p>
578+ * Model ID: gpt-3.5-turbo
579+ * <p>
580+ * See: <a href=
581+ * "https://platform.openai.com/docs/models/gpt-3.5-turbo">gpt-3.5-turbo</a>
468582 */
469583 GPT_3_5_TURBO ("gpt-3.5-turbo" ),
470584
0 commit comments