@@ -326,119 +326,326 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
326326 }
327327
328328 /**
329- * OpenAI Chat Completion Models:
330- *
329+ * OpenAI Chat Completion Models.
330+ * <p>
331+ * This enum provides a comprehensive list of chat completion models available through
332+ * the OpenAI API, along with their key features and links to the official OpenAI
333+ * documentation for further details.
334+ * <p>
335+ * The models are grouped by their capabilities and intended use cases. For each
336+ * model, a brief description is provided, highlighting its strengths, limitations,
337+ * and any specific features. When available, the description also includes
338+ * information about the model's context window, maximum output tokens, and knowledge
339+ * cutoff date.
340+ * <p>
341+ * <b>References:</b>
331342 * <ul>
332- * <li><a href="https://platform.openai.com/docs/models/gpt-4o">GPT-4o</a></li>
333- * <li><a href="https://platform.openai.com/docs/models/gpt-4o-mini">GPT-4o
334- * mini</a></li>
335- * <li><a href="https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo">GPT-4
343+ * <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
344+ * <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
336345 * and GPT-4 Turbo</a></li>
337- * <li><a href="https://platform.openai.com/docs/models/ gpt-3-5-turbo">GPT-3.5
346+ * <li><a href="https://platform.openai.com/docs/models# gpt-3-5-turbo">GPT-3.5
338347 * Turbo</a></li>
348+ * <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
349+ * o1-mini</a></li>
350+ * <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
339351 * </ul>
340352 */
341353 public enum ChatModel implements ChatModelDescription {
342354
343355 /**
344- * Points to the most recent snapshot of the o1 model:o1-2024-12-17
356+ * <b>o1</b> is trained with reinforcement learning to perform complex reasoning.
357+ * It thinks before it answers, producing a long internal chain of thought before
358+ * responding to the user.
359+ * <p>
360+ * The latest o1 model supports both text and image inputs, and produces text
361+ * outputs (including Structured Outputs).
362+ * <p>
363+ * The knowledge cutoff for o1 is October, 2023.
364+ * <p>
365+ * Currently points to {@link #O1_2024_12_17}.
345366 */
346367 O1 ("o1" ),
347368 /**
348- * Latest o1 model snapshot
369+ * Latest o1 model snapshot. Supports both text and image inputs, and produces
370+ * text outputs (including Structured Outputs).
371+ * <p>
372+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
373+ * cutoff: October, 2023.
349374 */
350375 O1_2024_12_17 ("o1-2024-12-17" ),
351376 /**
352- * Points to the most recent snapshot of the o1 preview
353- * model:o1-preview-2024-09-12
377+ * <b>o1-preview</b> is trained with reinforcement learning to perform complex
378+ * reasoning. It thinks before it answers, producing a long internal chain of
379+ * thought before responding to the user.
380+ * <p>
381+ * The latest o1-preview model supports both text and image inputs, and produces
382+ * text outputs (including Structured Outputs).
383+ * <p>
384+ * The knowledge cutoff for o1-preview is October, 2023.
385+ * <p>
386+ * Currently points to {@link #O1_PREVIEW_2024_09_12}.
354387 */
355388 O1_PREVIEW ("o1-preview" ),
356389 /**
357- * Latest o1 preview model snapshot
390+ * Latest o1-preview model snapshot. Supports both text and image inputs, and
391+ * produces text outputs (including Structured Outputs).
392+ * <p>
393+ * Context window: 128,000 tokens. Max output tokens: 32,768 tokens. Knowledge
394+ * cutoff: October, 2023.
358395 */
359396 O1_PREVIEW_2024_09_12 ("o1-preview-2024-09-12" ),
360397 /**
361- * Points to the most recent o1-mini snapshot:o1-mini-2024-09-12
398+ * <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
399+ * o1-mini currently only supports text inputs and outputs.
400+ * <p>
401+ * The knowledge cutoff for o1-mini is October, 2023.
402+ * <p>
403+ * Currently points to {@link #O1_MINI_2024_09_12}.
362404 */
363405 O1_MINI ("o1-mini" ),
364406 /**
365- * Latest o1-mini model snapshot
407+ * Latest o1-mini model snapshot. Supports only text inputs and outputs.
408+ * <p>
409+ * Context window: 128,000 tokens. Max output tokens: 65,536 tokens. Knowledge
410+ * cutoff: October, 2023.
366411 */
367412 O1_MINI_2024_09_12 ("o1-mini-2024-09-12" ),
368413
369414 /**
370- * Multimodal flagship model that’s cheaper and faster than GPT-4 Turbo. Currently
371- * points to gpt-4o-2024-05-13.
415+ * <b>o3-mini</b> is our most recent small reasoning model, providing high
416+ * intelligence at the same cost and latency targets of o1-mini. o3-mini also
417+ * supports key developer features, like Structured Outputs, function calling,
418+ * Batch API, and more. Like other models in the o-series, it is designed to excel
419+ * at science, math, and coding tasks.
420+ * <p>
421+ * The knowledge cutoff for o3-mini models is October, 2023.
422+ * <p>
423+ * Currently points to {@link #O3_MINI_2025_01_31}.
424+ */
425+ O3_MINI ("o3-mini" ),
426+ /**
427+ * Latest o3-mini model snapshot.
428+ * <p>
429+ * Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
430+ * cutoff: October, 2023.
431+ */
432+ O3_MINI_2025_01_31 ("o3-mini-2025-01-31" ),
433+
434+ /**
435+ * <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
436+ * accepts both text and image inputs and produces text outputs (including
437+ * Structured Outputs).
438+ * <p>
439+ * The knowledge cutoff for GPT-4o models is October, 2023.
440+ * <p>
441+ * Currently points to {@link #GPT_4_O_2024_08_06}.
372442 */
373443 GPT_4_O ("gpt-4o" ),
444+ /**
445+ * The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
446+ * GPT-4o used in ChatGPT. It is updated frequently when there are significant
447+ * changes to ChatGPT's GPT-4o model.
448+ * <p>
449+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
450+ * cutoff: October, 2023.
451+ */
452+ CHATGPT_4_O_LATEST ("chatgpt-4o-latest" ),
453+ /**
454+ * GPT-4o model snapshot.
455+ * <p>
456+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
457+ * cutoff: October, 2023.
458+ */
459+ GPT_4_O_2024_11_20 ("gpt-4o-2024-11-20" ),
460+ /**
461+ * GPT-4o model snapshot.
462+ * <p>
463+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
464+ * cutoff: October, 2023.
465+ */
466+ GPT_4_O_2024_08_06 ("gpt-4o-2024-08-06" ),
467+ /**
468+ * GPT-4o model snapshot.
469+ * <p>
470+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
471+ * cutoff: October, 2023.
472+ */
473+ GPT_4_O_2024_05_13 ("gpt-4o-2024-05-13" ),
374474
375475 /**
376- * Preview release for audio inputs in chat completions.
476+ * <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
477+ * outputs and can be used in the Chat Completions REST API.
478+ * <p>
479+ * The knowledge cutoff for GPT-4o Audio models is October, 2023.
480+ * <p>
481+ * Currently points to {@link #GPT_4_O_AUDIO_PREVIEW_2024_12_17}.
377482 */
378483 GPT_4_O_AUDIO_PREVIEW ("gpt-4o-audio-preview" ),
379484
380485 /**
381- * Affordable and intelligent small model for fast, lightweight tasks. GPT-4o mini
382- * is cheaper and more capable than GPT-3.5 Turbo. Currently points to
383- * gpt-4o-mini-2024-07-18.
486+ * GPT-4o Audio model snapshot.
487+ * <p>
488+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
489+ * cutoff: October, 2023.
490+ */
491+ GPT_4_O_AUDIO_PREVIEW_2024_12_17 ("gpt-4o-audio-preview-2024-12-17" ),
492+
493+ /**
494+ * GPT-4o Audio model snapshot.
495+ * <p>
496+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
497+ * cutoff: October, 2023.
498+ */
499+ GPT_4_O_AUDIO_PREVIEW_2024_10_01 ("gpt-4o-audio-preview-2024-10-01" ),
500+
501+ /**
502+ * <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
503+ * and outputs and can be used in the Chat Completions REST API.
504+ * <p>
505+ * The knowledge cutoff for GPT-4o-mini Audio models is October, 2023.
506+ * <p>
507+ * Currently points to {@link #GPT_4_O_MINI_AUDIO_PREVIEW_2024_12_17}.
508+ */
509+ GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
510+
511+ /**
512+ * GPT-4o-mini Audio model snapshot.
513+ * <p>
514+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
515+ * cutoff: October, 2023.
516+ */
517+ GPT_4_O_MINI_AUDIO_PREVIEW_2024_12_17 ("gpt-4o-mini-audio-preview-2024-12-17" ),
518+
519+ /**
520+ * <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
521+ * accepts both text and image inputs and produces text outputs (including
522+ * Structured Outputs). It is ideal for fine-tuning, and model outputs from a
523+ * larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
524+ * results at lower cost and latency.
525+ * <p>
526+ * The knowledge cutoff for GPT-4o-mini models is October, 2023.
527+ * <p>
528+ * Currently points to {@link #GPT_4_O_MINI_2024_07_18}.
384529 */
385530 GPT_4_O_MINI ("gpt-4o-mini" ),
386531
387532 /**
388- * GPT-4 Turbo with Vision The latest GPT-4 Turbo model with vision capabilities.
389- * Vision requests can now use JSON mode and function calling. Currently points to
390- * gpt-4-turbo-2024-04-09.
533+ * GPT-4o-mini model snapshot.
534+ * <p>
535+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
536+ * cutoff: October, 2023.
537+ */
538+ GPT_4_O_MINI_2024_07_18 ("gpt-4o-mini-2024-07-18" ),
539+
540+ /**
541+ * <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
542+ * usable in Chat Completions. Vision requests can now use JSON mode and function
543+ * calling.
544+ * <p>
545+ * The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
546+ * <p>
547+ * Currently points to {@link #GPT_4_TURBO_2024_04_09}.
391548 */
392549 GPT_4_TURBO ("gpt-4-turbo" ),
393550
394551 /**
395- * GPT-4 Turbo with Vision model. Vision requests can now use JSON mode and
396- * function calling.
552+ * GPT-4 Turbo model snapshot with vision capabilities.
553+ * <p>
554+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
555+ * cutoff: December, 2023.
397556 */
398557 GPT_4_TURBO_2024_04_09 ("gpt-4-turbo-2024-04-09" ),
399558
400559 /**
401- * (New) GPT-4 Turbo - latest GPT-4 model intended to reduce cases of “laziness”
402- * where the model doesn’t complete a task. Returns a maximum of 4,096 output
403- * tokens. Context window: 128k tokens
560+ * <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
561+ * “laziness” where the model doesn’t complete a task.
562+ * <p>
563+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
404564 */
405565 GPT_4_0125_PREVIEW ("gpt-4-0125-preview" ),
406566
407567 /**
408- * Currently points to gpt-4-0125-preview - model featuring improved instruction
409- * following, JSON mode, reproducible outputs, parallel function calling, and
410- * more. Returns a maximum of 4,096 output tokens Context window: 128k tokens
568+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
569+ * <p>
570+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
571+ */
572+ GPT_4_1106_PREVIEW ("gpt-4-1106-preview" ),
573+
574+ /**
575+ * <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
576+ * Completions.
577+ * <p>
578+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
579+ * <p>
580+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
411581 */
412582 GPT_4_TURBO_PREVIEW ("gpt-4-turbo-preview" ),
413583
414584 /**
415- * Currently points to gpt-4-0613. Snapshot of gpt-4 from June 13th 2023 with
416- * improved function calling support. Context window: 8k tokens
585+ * <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
586+ * Chat Completions.
587+ * <p>
588+ * Currently points to {@link #GPT_4_0613}.
589+ * <p>
590+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
417591 */
418592 GPT_4 ("gpt-4" ),
593+ /**
594+ * GPT-4 model snapshot.
595+ * <p>
596+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
597+ */
598+ GPT_4_0613 ("gpt-4-0613" ),
599+ /**
600+ * GPT-4 model snapshot.
601+ * <p>
602+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
603+ */
604+ GPT_4_0314 ("gpt-4-0314" ),
419605
420606 /**
421- * Currently points to gpt-3.5-turbo-0125. model with higher accuracy at
422- * responding in requested formats and a fix for a bug which caused a text
423- * encoding issue for non-English language function calls. Returns a maximum of
424- * 4,096 Context window: 16k tokens
607+ * <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
608+ * code and have been optimized for chat using the Chat Completions API but work
609+ * well for non-chat tasks as well.
610+ * <p>
611+ * As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
612+ * gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
613+ * gpt-3.5-turbo is still available for use in the API.
614+ * <p>
615+ * Currently points to {@link #GPT_3_5_TURBO_0125}.
616+ * <p>
617+ * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
618+ * cutoff: September, 2021.
425619 */
426620 GPT_3_5_TURBO ("gpt-3.5-turbo" ),
427621
428622 /**
429- * (new) The latest GPT-3.5 Turbo model with higher accuracy at responding in
430- * requested formats and a fix for a bug which caused a text encoding issue for
431- * non-English language function calls. Returns a maximum of 4,096 Context window:
432- * 16k tokens
623+ * The latest GPT-3.5 Turbo model with higher accuracy at responding in requested
624+ * formats and a fix for a bug that caused a text encoding issue for non-English
625+ * language function calls.
626+ * <p>
627+ * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
628+ * cutoff: September, 2021.
433629 */
434630 GPT_3_5_TURBO_0125 ("gpt-3.5-turbo-0125" ),
435631
436632 /**
437633 * GPT-3.5 Turbo model with improved instruction following, JSON mode,
438- * reproducible outputs, parallel function calling, and more. Returns a maximum of
439- * 4,096 output tokens. Context window: 16k tokens.
634+ * reproducible outputs, parallel function calling, and more.
635+ * <p>
636+ * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
637+ * cutoff: September, 2021.
638+ */
639+ GPT_3_5_TURBO_1106 ("gpt-3.5-turbo-1106" ),
640+
641+ /**
642+ * <b>GPT-3.5 Turbo Instruct</b> has similar capabilities to GPT-3 era models.
643+ * Compatible with the legacy Completions endpoint and not Chat Completions.
644+ * <p>
645+ * Context window: 4,096 tokens. Max output tokens: 4,096 tokens. Knowledge
646+ * cutoff: September, 2021.
440647 */
441- GPT_3_5_TURBO_1106 ("gpt-3.5-turbo-1106 " );
648+ GPT_3_5_TURBO_INSTRUCT ("gpt-3.5-turbo-instruct " );
442649
443650 public final String value ;
444651
0 commit comments