@@ -326,119 +326,206 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
326326 }
327327
328328 /**
329- * OpenAI Chat Completion Models:
330- *
329+ * OpenAI Chat Completion Models.
330+ * <p>
331+ * This enum provides a selective list of chat completion models available through the
332+ * OpenAI API, along with their key features and links to the official OpenAI
333+ * documentation for further details.
334+ * <p>
335+ * The models are grouped by their capabilities and intended use cases. For each
336+ * model, a brief description is provided, highlighting its strengths, limitations,
337+ * and any specific features. When available, the description also includes
338+ * information about the model's context window, maximum output tokens, and knowledge
339+ * cutoff date.
340+ * <p>
341+ * <b>References:</b>
331342 * <ul>
332- * <li><a href="https://platform.openai.com/docs/models/gpt-4o">GPT-4o</a></li>
333- * <li><a href="https://platform.openai.com/docs/models/gpt-4o-mini">GPT-4o
334- * mini</a></li>
335- * <li><a href="https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo">GPT-4
343+ * <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
344+ * <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
336345 * and GPT-4 Turbo</a></li>
337- * <li><a href="https://platform.openai.com/docs/models/ gpt-3-5-turbo">GPT-3.5
346+ * <li><a href="https://platform.openai.com/docs/models# gpt-3-5-turbo">GPT-3.5
338347 * Turbo</a></li>
348+ * <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
349+ * o1-mini</a></li>
350+ * <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
339351 * </ul>
340352 */
341353 public enum ChatModel implements ChatModelDescription {
342354
343355 /**
344- * Points to the most recent snapshot of the o1 model:o1-2024-12-17
356+ * <b>o1</b> is trained with reinforcement learning to perform complex reasoning.
357+ * It thinks before it answers, producing a long internal chain of thought before
358+ * responding to the user.
359+ * <p>
360+ * The latest o1 model supports both text and image inputs, and produces text
361+ * outputs (including Structured Outputs).
362+ * <p>
363+ * The knowledge cutoff for o1 is October, 2023.
364+ * <p>
345365 */
346366 O1 ("o1" ),
347367 /**
348- * Latest o1 model snapshot
349- */
350- O1_2024_12_17 ("o1-2024-12-17" ),
351- /**
352- * Points to the most recent snapshot of the o1 preview
353- * model:o1-preview-2024-09-12
368+ * <b>o1-preview</b> is trained with reinforcement learning to perform complex
369+ * reasoning. It thinks before it answers, producing a long internal chain of
370+ * thought before responding to the user.
371+ * <p>
372+ * The latest o1-preview model supports both text and image inputs, and produces
373+ * text outputs (including Structured Outputs).
374+ * <p>
375+ * The knowledge cutoff for o1-preview is October, 2023.
376+ * <p>
354377 */
355378 O1_PREVIEW ("o1-preview" ),
379+
356380 /**
357- * Latest o1 preview model snapshot
358- */
359- O1_PREVIEW_2024_09_12 ( "o1-preview-2024-09-12" ),
360- /**
361- * Points to the most recent o1-mini snapshot:o1-mini-2024-09-12
381+ * <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
382+ * o1-mini currently only supports text inputs and outputs.
383+ * <p>
384+ * The knowledge cutoff for o1-mini is October, 2023.
385+ * <p>
362386 */
363387 O1_MINI ("o1-mini" ),
364388 /**
365- * Latest o1-mini model snapshot
389+ * <b>o3-mini</b> is our most recent small reasoning model, providing high
390+ * intelligence at the same cost and latency targets of o1-mini. o3-mini also
391+ * supports key developer features, like Structured Outputs, function calling,
392+ * Batch API, and more. Like other models in the o-series, it is designed to excel
393+ * at science, math, and coding tasks.
394+ * <p>
395+ * The knowledge cutoff for o3-mini models is October, 2023.
396+ * <p>
366397 */
367- O1_MINI_2024_09_12 ( "o1 -mini-2024-09-12 " ),
398+ O3_MINI ( "o3 -mini" ),
368399
369400 /**
370- * Multimodal flagship model that’s cheaper and faster than GPT-4 Turbo. Currently
371- * points to gpt-4o-2024-05-13.
401+ * <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
402+ * accepts both text and image inputs and produces text outputs (including
403+ * Structured Outputs).
404+ * <p>
405+ * The knowledge cutoff for GPT-4o models is October, 2023.
406+ * <p>
372407 */
373408 GPT_4_O ("gpt-4o" ),
409+ /**
410+ * The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
411+ * GPT-4o used in ChatGPT. It is updated frequently when there are significant
412+ * changes to ChatGPT's GPT-4o model.
413+ * <p>
414+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
415+ * cutoff: October, 2023.
416+ */
417+ CHATGPT_4_O_LATEST ("chatgpt-4o-latest" ),
374418
375419 /**
376- * Preview release for audio inputs in chat completions.
420+ * <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
421+ * outputs and can be used in the Chat Completions REST API.
422+ * <p>
423+ * The knowledge cutoff for GPT-4o Audio models is October, 2023.
424+ * <p>
377425 */
378426 GPT_4_O_AUDIO_PREVIEW ("gpt-4o-audio-preview" ),
379427
380428 /**
381- * Affordable and intelligent small model for fast, lightweight tasks. GPT-4o mini
382- * is cheaper and more capable than GPT-3.5 Turbo. Currently points to
383- * gpt-4o-mini-2024-07-18.
429+ * <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
430+ * and outputs and can be used in the Chat Completions REST API.
431+ * <p>
432+ * The knowledge cutoff for GPT-4o-mini Audio models is October, 2023.
433+ * <p>
434+ */
435+ GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
436+
437+ /**
438+ * <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
439+ * accepts both text and image inputs and produces text outputs (including
440+ * Structured Outputs). It is ideal for fine-tuning, and model outputs from a
441+ * larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
442+ * results at lower cost and latency.
443+ * <p>
444+ * The knowledge cutoff for GPT-4o-mini models is October, 2023.
445+ * <p>
384446 */
385447 GPT_4_O_MINI ("gpt-4o-mini" ),
386448
387449 /**
388- * GPT-4 Turbo with Vision The latest GPT-4 Turbo model with vision capabilities.
389- * Vision requests can now use JSON mode and function calling. Currently points to
390- * gpt-4-turbo-2024-04-09.
450+ * <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
451+ * usable in Chat Completions. Vision requests can now use JSON mode and function
452+ * calling.
453+ * <p>
454+ * The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
455+ * <p>
391456 */
392457 GPT_4_TURBO ("gpt-4-turbo" ),
393458
394459 /**
395- * GPT-4 Turbo with Vision model. Vision requests can now use JSON mode and
396- * function calling.
460+ * <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
461+ * “laziness” where the model doesn’t complete a task.
462+ * <p>
463+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
397464 */
398- GPT_4_TURBO_2024_04_09 ("gpt-4-turbo-2024-04-09 " ),
465+ GPT_4_0125_PREVIEW ("gpt-4-0125-preview " ),
399466
400467 /**
401- * (New) GPT-4 Turbo - latest GPT-4 model intended to reduce cases of “laziness”
402- * where the model doesn’t complete a task. Returns a maximum of 4,096 output
403- * tokens. Context window: 128k tokens
468+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
469+ * <p>
470+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
404471 */
405- GPT_4_0125_PREVIEW ("gpt-4-0125 -preview" ),
472+ GPT_4_1106_PREVIEW ("gpt-4-1106 -preview" ),
406473
407474 /**
408- * Currently points to gpt-4-0125-preview - model featuring improved instruction
409- * following, JSON mode, reproducible outputs, parallel function calling, and
410- * more. Returns a maximum of 4,096 output tokens Context window: 128k tokens
475+ * <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
476+ * Completions.
477+ * <p>
478+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
479+ * <p>
480+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
411481 */
412482 GPT_4_TURBO_PREVIEW ("gpt-4-turbo-preview" ),
413483
414484 /**
415- * Currently points to gpt-4-0613. Snapshot of gpt-4 from June 13th 2023 with
416- * improved function calling support. Context window: 8k tokens
485+ * <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
486+ * Chat Completions.
487+ * <p>
488+ * Currently points to {@link #GPT_4_0613}.
489+ * <p>
490+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
417491 */
418492 GPT_4 ("gpt-4" ),
419-
420493 /**
421- * Currently points to gpt-3.5-turbo-0125. model with higher accuracy at
422- * responding in requested formats and a fix for a bug which caused a text
423- * encoding issue for non-English language function calls. Returns a maximum of
424- * 4,096 Context window: 16k tokens
494+ * GPT-4 model snapshot.
495+ * <p>
496+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
425497 */
426- GPT_3_5_TURBO ("gpt-3.5-turbo" ),
498+ GPT_4_0613 ("gpt-4-0613" ),
499+ /**
500+ * GPT-4 model snapshot.
501+ * <p>
502+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
503+ */
504+ GPT_4_0314 ("gpt-4-0314" ),
427505
428506 /**
429- * (new) The latest GPT-3.5 Turbo model with higher accuracy at responding in
430- * requested formats and a fix for a bug which caused a text encoding issue for
431- * non-English language function calls. Returns a maximum of 4,096 Context window:
432- * 16k tokens
507+ * <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
508+ * code and have been optimized for chat using the Chat Completions API but work
509+ * well for non-chat tasks as well.
510+ * <p>
511+ * As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
512+ * gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
513+ * gpt-3.5-turbo is still available for use in the API.
514+ * <p>
515+ * <p>
516+ * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
517+ * cutoff: September, 2021.
433518 */
434- GPT_3_5_TURBO_0125 ("gpt-3.5-turbo-0125 " ),
519+ GPT_3_5_TURBO ("gpt-3.5-turbo" ),
435520
436521 /**
437- * GPT-3.5 Turbo model with improved instruction following, JSON mode,
438- * reproducible outputs, parallel function calling, and more. Returns a maximum of
439- * 4,096 output tokens. Context window: 16k tokens.
522+ * <b>GPT-3.5 Turbo Instruct</b> has similar capabilities to GPT-3 era models.
523+ * Compatible with the legacy Completions endpoint and not Chat Completions.
524+ * <p>
525+ * Context window: 4,096 tokens. Max output tokens: 4,096 tokens. Knowledge
526+ * cutoff: September, 2021.
440527 */
441- GPT_3_5_TURBO_1106 ("gpt-3.5-turbo-1106 " );
528+ GPT_3_5_TURBO_INSTRUCT ("gpt-3.5-turbo-instruct " );
442529
443530 public final String value ;
444531
0 commit comments