Skip to content

Commit 3b1e772

Browse files
committed
feat: Update OpenAI Chat Models to Latest Versions and Improve Documentation
This commit comprehensively updates the OpenAiApi `ChatModel` enum to include all the latest OpenAI chat completion models and enhances the documentation. Signed-off-by: Alexandros Pappas <[email protected]>
1 parent ca27895 commit 3b1e772

File tree

1 file changed

+251
-44
lines changed
  • models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api

1 file changed

+251
-44
lines changed

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java

Lines changed: 251 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -326,119 +326,326 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
326326
}
327327

328328
/**
329-
* OpenAI Chat Completion Models:
330-
*
329+
* OpenAI Chat Completion Models.
330+
* <p>
331+
* This enum provides a comprehensive list of chat completion models available through
332+
* the OpenAI API, along with their key features and links to the official OpenAI
333+
* documentation for further details.
334+
* <p>
335+
* The models are grouped by their capabilities and intended use cases. For each
336+
* model, a brief description is provided, highlighting its strengths, limitations,
337+
* and any specific features. When available, the description also includes
338+
* information about the model's context window, maximum output tokens, and knowledge
339+
* cutoff date.
340+
* <p>
341+
* <b>References:</b>
331342
* <ul>
332-
* <li><a href="https://platform.openai.com/docs/models/gpt-4o">GPT-4o</a></li>
333-
* <li><a href="https://platform.openai.com/docs/models/gpt-4o-mini">GPT-4o
334-
* mini</a></li>
335-
* <li><a href="https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo">GPT-4
343+
* <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
344+
* <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
336345
* and GPT-4 Turbo</a></li>
337-
* <li><a href="https://platform.openai.com/docs/models/gpt-3-5-turbo">GPT-3.5
346+
* <li><a href="https://platform.openai.com/docs/models#gpt-3-5-turbo">GPT-3.5
338347
* Turbo</a></li>
348+
* <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
349+
* o1-mini</a></li>
350+
* <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
339351
* </ul>
340352
*/
341353
public enum ChatModel implements ChatModelDescription {
342354

343355
/**
344-
* Points to the most recent snapshot of the o1 model:o1-2024-12-17
356+
* <b>o1</b> is trained with reinforcement learning to perform complex reasoning.
357+
* It thinks before it answers, producing a long internal chain of thought before
358+
* responding to the user.
359+
* <p>
360+
* The latest o1 model supports both text and image inputs, and produces text
361+
* outputs (including Structured Outputs).
362+
* <p>
363+
* The knowledge cutoff for o1 is October, 2023.
364+
* <p>
365+
* Currently points to {@link #O1_2024_12_17}.
345366
*/
346367
O1("o1"),
347368
/**
348-
* Latest o1 model snapshot
369+
* Latest o1 model snapshot. Supports both text and image inputs, and produces
370+
* text outputs (including Structured Outputs).
371+
* <p>
372+
* Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
373+
* cutoff: October, 2023.
349374
*/
350375
O1_2024_12_17("o1-2024-12-17"),
351376
/**
352-
* Points to the most recent snapshot of the o1 preview
353-
* model:o1-preview-2024-09-12
377+
* <b>o1-preview</b> is trained with reinforcement learning to perform complex
378+
* reasoning. It thinks before it answers, producing a long internal chain of
379+
* thought before responding to the user.
380+
* <p>
381+
* The latest o1-preview model supports both text and image inputs, and produces
382+
* text outputs (including Structured Outputs).
383+
* <p>
384+
* The knowledge cutoff for o1-preview is October, 2023.
385+
* <p>
386+
* Currently points to {@link #O1_PREVIEW_2024_09_12}.
354387
*/
355388
O1_PREVIEW("o1-preview"),
356389
/**
357-
* Latest o1 preview model snapshot
390+
* Latest o1-preview model snapshot. Supports both text and image inputs, and
391+
* produces text outputs (including Structured Outputs).
392+
* <p>
393+
* Context window: 128,000 tokens. Max output tokens: 32,768 tokens. Knowledge
394+
* cutoff: October, 2023.
358395
*/
359396
O1_PREVIEW_2024_09_12("o1-preview-2024-09-12"),
360397
/**
361-
* Points to the most recent o1-mini snapshot:o1-mini-2024-09-12
398+
* <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
399+
* o1-mini currently only supports text inputs and outputs.
400+
* <p>
401+
* The knowledge cutoff for o1-mini is October, 2023.
402+
* <p>
403+
* Currently points to {@link #O1_MINI_2024_09_12}.
362404
*/
363405
O1_MINI("o1-mini"),
364406
/**
365-
* Latest o1-mini model snapshot
407+
* Latest o1-mini model snapshot. Supports only text inputs and outputs.
408+
* <p>
409+
* Context window: 128,000 tokens. Max output tokens: 65,536 tokens. Knowledge
410+
* cutoff: October, 2023.
366411
*/
367412
O1_MINI_2024_09_12("o1-mini-2024-09-12"),
368413

369414
/**
370-
* Multimodal flagship model that’s cheaper and faster than GPT-4 Turbo. Currently
371-
* points to gpt-4o-2024-05-13.
415+
* <b>o3-mini</b> is our most recent small reasoning model, providing high
416+
* intelligence at the same cost and latency targets of o1-mini. o3-mini also
417+
* supports key developer features, like Structured Outputs, function calling,
418+
* Batch API, and more. Like other models in the o-series, it is designed to excel
419+
* at science, math, and coding tasks.
420+
* <p>
421+
* The knowledge cutoff for o3-mini models is October, 2023.
422+
* <p>
423+
* Currently points to {@link #O3_MINI_2025_01_31}.
424+
*/
425+
O3_MINI("o3-mini"),
426+
/**
427+
* Latest o3-mini model snapshot.
428+
* <p>
429+
* Context window: 200,000 tokens. Max output tokens: 100,000 tokens. Knowledge
430+
* cutoff: October, 2023.
431+
*/
432+
O3_MINI_2025_01_31("o3-mini-2025-01-31"),
433+
434+
/**
435+
* <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
436+
* accepts both text and image inputs and produces text outputs (including
437+
* Structured Outputs).
438+
* <p>
439+
* The knowledge cutoff for GPT-4o models is October, 2023.
440+
* <p>
441+
* Currently points to {@link #GPT_4_O_2024_08_06}.
372442
*/
373443
GPT_4_O("gpt-4o"),
444+
/**
445+
* The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
446+
* GPT-4o used in ChatGPT. It is updated frequently when there are significant
447+
* changes to ChatGPT's GPT-4o model.
448+
* <p>
449+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
450+
* cutoff: October, 2023.
451+
*/
452+
CHATGPT_4_O_LATEST("chatgpt-4o-latest"),
453+
/**
454+
* GPT-4o model snapshot.
455+
* <p>
456+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
457+
* cutoff: October, 2023.
458+
*/
459+
GPT_4_O_2024_11_20("gpt-4o-2024-11-20"),
460+
/**
461+
* GPT-4o model snapshot.
462+
* <p>
463+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
464+
* cutoff: October, 2023.
465+
*/
466+
GPT_4_O_2024_08_06("gpt-4o-2024-08-06"),
467+
/**
468+
* GPT-4o model snapshot.
469+
* <p>
470+
* Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
471+
* cutoff: October, 2023.
472+
*/
473+
GPT_4_O_2024_05_13("gpt-4o-2024-05-13"),
374474

375475
/**
376-
* Preview release for audio inputs in chat completions.
476+
* <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
477+
* outputs and can be used in the Chat Completions REST API.
478+
* <p>
479+
* The knowledge cutoff for GPT-4o Audio models is October, 2023.
480+
* <p>
481+
* Currently points to {@link #GPT_4_O_AUDIO_PREVIEW_2024_12_17}.
377482
*/
378483
GPT_4_O_AUDIO_PREVIEW("gpt-4o-audio-preview"),
379484

380485
/**
381-
* Affordable and intelligent small model for fast, lightweight tasks. GPT-4o mini
382-
* is cheaper and more capable than GPT-3.5 Turbo. Currently points to
383-
* gpt-4o-mini-2024-07-18.
486+
* GPT-4o Audio model snapshot.
487+
* <p>
488+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
489+
* cutoff: October, 2023.
490+
*/
491+
GPT_4_O_AUDIO_PREVIEW_2024_12_17("gpt-4o-audio-preview-2024-12-17"),
492+
493+
/**
494+
* GPT-4o Audio model snapshot.
495+
* <p>
496+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
497+
* cutoff: October, 2023.
498+
*/
499+
GPT_4_O_AUDIO_PREVIEW_2024_10_01("gpt-4o-audio-preview-2024-10-01"),
500+
501+
/**
502+
* <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
503+
* and outputs and can be used in the Chat Completions REST API.
504+
* <p>
505+
* The knowledge cutoff for GPT-4o-mini Audio models is October, 2023.
506+
* <p>
507+
* Currently points to {@link #GPT_4_O_MINI_AUDIO_PREVIEW_2024_12_17}.
508+
*/
509+
GPT_4_O_MINI_AUDIO_PREVIEW("gpt-4o-mini-audio-preview"),
510+
511+
/**
512+
* GPT-4o-mini Audio model snapshot.
513+
* <p>
514+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
515+
* cutoff: October, 2023.
516+
*/
517+
GPT_4_O_MINI_AUDIO_PREVIEW_2024_12_17("gpt-4o-mini-audio-preview-2024-12-17"),
518+
519+
/**
520+
* <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
521+
* accepts both text and image inputs and produces text outputs (including
522+
* Structured Outputs). It is ideal for fine-tuning, and model outputs from a
523+
* larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
524+
* results at lower cost and latency.
525+
* <p>
526+
* The knowledge cutoff for GPT-4o-mini models is October, 2023.
527+
* <p>
528+
* Currently points to {@link #GPT_4_O_MINI_2024_07_18}.
384529
*/
385530
GPT_4_O_MINI("gpt-4o-mini"),
386531

387532
/**
388-
* GPT-4 Turbo with Vision The latest GPT-4 Turbo model with vision capabilities.
389-
* Vision requests can now use JSON mode and function calling. Currently points to
390-
* gpt-4-turbo-2024-04-09.
533+
* GPT-4o-mini model snapshot.
534+
* <p>
535+
* Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
536+
* cutoff: October, 2023.
537+
*/
538+
GPT_4_O_MINI_2024_07_18("gpt-4o-mini-2024-07-18"),
539+
540+
/**
541+
* <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
542+
* usable in Chat Completions. Vision requests can now use JSON mode and function
543+
* calling.
544+
* <p>
545+
* The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
546+
* <p>
547+
* Currently points to {@link #GPT_4_TURBO_2024_04_09}.
391548
*/
392549
GPT_4_TURBO("gpt-4-turbo"),
393550

394551
/**
395-
* GPT-4 Turbo with Vision model. Vision requests can now use JSON mode and
396-
* function calling.
552+
* GPT-4 Turbo model snapshot with vision capabilities.
553+
* <p>
554+
* Context window: 128,000 tokens. Max output tokens: 4,096 tokens. Knowledge
555+
* cutoff: December, 2023.
397556
*/
398557
GPT_4_TURBO_2024_04_09("gpt-4-turbo-2024-04-09"),
399558

400559
/**
401-
* (New) GPT-4 Turbo - latest GPT-4 model intended to reduce cases of “laziness”
402-
* where the model doesn’t complete a task. Returns a maximum of 4,096 output
403-
* tokens. Context window: 128k tokens
560+
* <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
561+
* “laziness” where the model doesn’t complete a task.
562+
* <p>
563+
* Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
404564
*/
405565
GPT_4_0125_PREVIEW("gpt-4-0125-preview"),
406566

407567
/**
408-
* Currently points to gpt-4-0125-preview - model featuring improved instruction
409-
* following, JSON mode, reproducible outputs, parallel function calling, and
410-
* more. Returns a maximum of 4,096 output tokens Context window: 128k tokens
568+
* Currently points to {@link #GPT_4_0125_PREVIEW}.
569+
* <p>
570+
* Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
571+
*/
572+
GPT_4_1106_PREVIEW("gpt-4-1106-preview"),
573+
574+
/**
575+
* <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
576+
* Completions.
577+
* <p>
578+
* Currently points to {@link #GPT_4_0125_PREVIEW}.
579+
* <p>
580+
* Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
411581
*/
412582
GPT_4_TURBO_PREVIEW("gpt-4-turbo-preview"),
413583

414584
/**
415-
* Currently points to gpt-4-0613. Snapshot of gpt-4 from June 13th 2023 with
416-
* improved function calling support. Context window: 8k tokens
585+
* <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
586+
* Chat Completions.
587+
* <p>
588+
* Currently points to {@link #GPT_4_0613}.
589+
* <p>
590+
* Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
417591
*/
418592
GPT_4("gpt-4"),
593+
/**
594+
* GPT-4 model snapshot.
595+
* <p>
596+
* Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
597+
*/
598+
GPT_4_0613("gpt-4-0613"),
599+
/**
600+
* GPT-4 model snapshot.
601+
* <p>
602+
* Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
603+
*/
604+
GPT_4_0314("gpt-4-0314"),
419605

420606
/**
421-
* Currently points to gpt-3.5-turbo-0125. model with higher accuracy at
422-
* responding in requested formats and a fix for a bug which caused a text
423-
* encoding issue for non-English language function calls. Returns a maximum of
424-
* 4,096 Context window: 16k tokens
607+
* <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
608+
* code and have been optimized for chat using the Chat Completions API but work
609+
* well for non-chat tasks as well.
610+
* <p>
611+
* As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
612+
* gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
613+
* gpt-3.5-turbo is still available for use in the API.
614+
* <p>
615+
* Currently points to {@link #GPT_3_5_TURBO_0125}.
616+
* <p>
617+
* Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
618+
* cutoff: September, 2021.
425619
*/
426620
GPT_3_5_TURBO("gpt-3.5-turbo"),
427621

428622
/**
429-
* (new) The latest GPT-3.5 Turbo model with higher accuracy at responding in
430-
* requested formats and a fix for a bug which caused a text encoding issue for
431-
* non-English language function calls. Returns a maximum of 4,096 Context window:
432-
* 16k tokens
623+
* The latest GPT-3.5 Turbo model with higher accuracy at responding in requested
624+
* formats and a fix for a bug that caused a text encoding issue for non-English
625+
* language function calls.
626+
* <p>
627+
* Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
628+
* cutoff: September, 2021.
433629
*/
434630
GPT_3_5_TURBO_0125("gpt-3.5-turbo-0125"),
435631

436632
/**
437633
* GPT-3.5 Turbo model with improved instruction following, JSON mode,
438-
* reproducible outputs, parallel function calling, and more. Returns a maximum of
439-
* 4,096 output tokens. Context window: 16k tokens.
634+
* reproducible outputs, parallel function calling, and more.
635+
* <p>
636+
* Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
637+
* cutoff: September, 2021.
638+
*/
639+
GPT_3_5_TURBO_1106("gpt-3.5-turbo-1106"),
640+
641+
/**
642+
* <b>GPT-3.5 Turbo Instruct</b> has similar capabilities to GPT-3 era models.
643+
* Compatible with the legacy Completions endpoint and not Chat Completions.
644+
* <p>
645+
* Context window: 4,096 tokens. Max output tokens: 4,096 tokens. Knowledge
646+
* cutoff: September, 2021.
440647
*/
441-
GPT_3_5_TURBO_1106("gpt-3.5-turbo-1106");
648+
GPT_3_5_TURBO_INSTRUCT("gpt-3.5-turbo-instruct");
442649

443650
public final String value;
444651

0 commit comments

Comments
 (0)