@@ -326,119 +326,206 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
326
326
}
327
327
328
328
/**
329
- * OpenAI Chat Completion Models:
330
- *
329
+ * OpenAI Chat Completion Models.
330
+ * <p>
331
+ * This enum provides a selective list of chat completion models available through the
332
+ * OpenAI API, along with their key features and links to the official OpenAI
333
+ * documentation for further details.
334
+ * <p>
335
+ * The models are grouped by their capabilities and intended use cases. For each
336
+ * model, a brief description is provided, highlighting its strengths, limitations,
337
+ * and any specific features. When available, the description also includes
338
+ * information about the model's context window, maximum output tokens, and knowledge
339
+ * cutoff date.
340
+ * <p>
341
+ * <b>References:</b>
331
342
* <ul>
332
- * <li><a href="https://platform.openai.com/docs/models/gpt-4o">GPT-4o</a></li>
333
- * <li><a href="https://platform.openai.com/docs/models/gpt-4o-mini">GPT-4o
334
- * mini</a></li>
335
- * <li><a href="https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo">GPT-4
343
+ * <li><a href="https://platform.openai.com/docs/models#gpt-4o">GPT-4o</a></li>
344
+ * <li><a href="https://platform.openai.com/docs/models#gpt-4-and-gpt-4-turbo">GPT-4
336
345
* and GPT-4 Turbo</a></li>
337
- * <li><a href="https://platform.openai.com/docs/models/ gpt-3-5-turbo">GPT-3.5
346
+ * <li><a href="https://platform.openai.com/docs/models# gpt-3-5-turbo">GPT-3.5
338
347
* Turbo</a></li>
348
+ * <li><a href="https://platform.openai.com/docs/models#o1-and-o1-mini">o1 and
349
+ * o1-mini</a></li>
350
+ * <li><a href="https://platform.openai.com/docs/models#o3-mini">o3-mini</a></li>
339
351
* </ul>
340
352
*/
341
353
public enum ChatModel implements ChatModelDescription {
342
354
343
355
/**
344
- * Points to the most recent snapshot of the o1 model:o1-2024-12-17
356
+ * <b>o1</b> is trained with reinforcement learning to perform complex reasoning.
357
+ * It thinks before it answers, producing a long internal chain of thought before
358
+ * responding to the user.
359
+ * <p>
360
+ * The latest o1 model supports both text and image inputs, and produces text
361
+ * outputs (including Structured Outputs).
362
+ * <p>
363
+ * The knowledge cutoff for o1 is October, 2023.
364
+ * <p>
345
365
*/
346
366
O1 ("o1" ),
347
367
/**
348
- * Latest o1 model snapshot
349
- */
350
- O1_2024_12_17 ("o1-2024-12-17" ),
351
- /**
352
- * Points to the most recent snapshot of the o1 preview
353
- * model:o1-preview-2024-09-12
368
+ * <b>o1-preview</b> is trained with reinforcement learning to perform complex
369
+ * reasoning. It thinks before it answers, producing a long internal chain of
370
+ * thought before responding to the user.
371
+ * <p>
372
+ * The latest o1-preview model supports both text and image inputs, and produces
373
+ * text outputs (including Structured Outputs).
374
+ * <p>
375
+ * The knowledge cutoff for o1-preview is October, 2023.
376
+ * <p>
354
377
*/
355
378
O1_PREVIEW ("o1-preview" ),
379
+
356
380
/**
357
- * Latest o1 preview model snapshot
358
- */
359
- O1_PREVIEW_2024_09_12 ( "o1-preview-2024-09-12" ),
360
- /**
361
- * Points to the most recent o1-mini snapshot:o1-mini-2024-09-12
381
+ * <b>o1-mini</b> is a faster and more affordable reasoning model compared to o1.
382
+ * o1-mini currently only supports text inputs and outputs.
383
+ * <p>
384
+ * The knowledge cutoff for o1-mini is October, 2023.
385
+ * <p>
362
386
*/
363
387
O1_MINI ("o1-mini" ),
364
388
/**
365
- * Latest o1-mini model snapshot
389
+ * <b>o3-mini</b> is our most recent small reasoning model, providing high
390
+ * intelligence at the same cost and latency targets of o1-mini. o3-mini also
391
+ * supports key developer features, like Structured Outputs, function calling,
392
+ * Batch API, and more. Like other models in the o-series, it is designed to excel
393
+ * at science, math, and coding tasks.
394
+ * <p>
395
+ * The knowledge cutoff for o3-mini models is October, 2023.
396
+ * <p>
366
397
*/
367
- O1_MINI_2024_09_12 ( "o1 -mini-2024-09-12 " ),
398
+ O3_MINI ( "o3 -mini" ),
368
399
369
400
/**
370
- * Multimodal flagship model that’s cheaper and faster than GPT-4 Turbo. Currently
371
- * points to gpt-4o-2024-05-13.
401
+ * <b>GPT-4o ("omni")</b> is our versatile, high-intelligence flagship model. It
402
+ * accepts both text and image inputs and produces text outputs (including
403
+ * Structured Outputs).
404
+ * <p>
405
+ * The knowledge cutoff for GPT-4o models is October, 2023.
406
+ * <p>
372
407
*/
373
408
GPT_4_O ("gpt-4o" ),
409
+ /**
410
+ * The <b>chatgpt-4o-latest</b> model ID continuously points to the version of
411
+ * GPT-4o used in ChatGPT. It is updated frequently when there are significant
412
+ * changes to ChatGPT's GPT-4o model.
413
+ * <p>
414
+ * Context window: 128,000 tokens. Max output tokens: 16,384 tokens. Knowledge
415
+ * cutoff: October, 2023.
416
+ */
417
+ CHATGPT_4_O_LATEST ("chatgpt-4o-latest" ),
374
418
375
419
/**
376
- * Preview release for audio inputs in chat completions.
420
+ * <b>GPT-4o Audio</b> is a preview release model that accepts audio inputs and
421
+ * outputs and can be used in the Chat Completions REST API.
422
+ * <p>
423
+ * The knowledge cutoff for GPT-4o Audio models is October, 2023.
424
+ * <p>
377
425
*/
378
426
GPT_4_O_AUDIO_PREVIEW ("gpt-4o-audio-preview" ),
379
427
380
428
/**
381
- * Affordable and intelligent small model for fast, lightweight tasks. GPT-4o mini
382
- * is cheaper and more capable than GPT-3.5 Turbo. Currently points to
383
- * gpt-4o-mini-2024-07-18.
429
+ * <b>GPT-4o-mini Audio</b> is a preview release model that accepts audio inputs
430
+ * and outputs and can be used in the Chat Completions REST API.
431
+ * <p>
432
+ * The knowledge cutoff for GPT-4o-mini Audio models is October, 2023.
433
+ * <p>
434
+ */
435
+ GPT_4_O_MINI_AUDIO_PREVIEW ("gpt-4o-mini-audio-preview" ),
436
+
437
+ /**
438
+ * <b>GPT-4o-mini</b> is a fast, affordable small model for focused tasks. It
439
+ * accepts both text and image inputs and produces text outputs (including
440
+ * Structured Outputs). It is ideal for fine-tuning, and model outputs from a
441
+ * larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar
442
+ * results at lower cost and latency.
443
+ * <p>
444
+ * The knowledge cutoff for GPT-4o-mini models is October, 2023.
445
+ * <p>
384
446
*/
385
447
GPT_4_O_MINI ("gpt-4o-mini" ),
386
448
387
449
/**
388
- * GPT-4 Turbo with Vision The latest GPT-4 Turbo model with vision capabilities.
389
- * Vision requests can now use JSON mode and function calling. Currently points to
390
- * gpt-4-turbo-2024-04-09.
450
+ * <b>GPT-4 Turbo</b> is a high-intelligence GPT model with vision capabilities,
451
+ * usable in Chat Completions. Vision requests can now use JSON mode and function
452
+ * calling.
453
+ * <p>
454
+ * The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.
455
+ * <p>
391
456
*/
392
457
GPT_4_TURBO ("gpt-4-turbo" ),
393
458
394
459
/**
395
- * GPT-4 Turbo with Vision model. Vision requests can now use JSON mode and
396
- * function calling.
460
+ * <b>GPT-4-0125-preview</b> is the latest GPT-4 model intended to reduce cases of
461
+ * “laziness” where the model doesn’t complete a task.
462
+ * <p>
463
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
397
464
*/
398
- GPT_4_TURBO_2024_04_09 ("gpt-4-turbo-2024-04-09 " ),
465
+ GPT_4_0125_PREVIEW ("gpt-4-0125-preview " ),
399
466
400
467
/**
401
- * (New) GPT-4 Turbo - latest GPT-4 model intended to reduce cases of “laziness”
402
- * where the model doesn’t complete a task. Returns a maximum of 4,096 output
403
- * tokens. Context window: 128k tokens
468
+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
469
+ * <p>
470
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
404
471
*/
405
- GPT_4_0125_PREVIEW ("gpt-4-0125 -preview" ),
472
+ GPT_4_1106_PREVIEW ("gpt-4-1106 -preview" ),
406
473
407
474
/**
408
- * Currently points to gpt-4-0125-preview - model featuring improved instruction
409
- * following, JSON mode, reproducible outputs, parallel function calling, and
410
- * more. Returns a maximum of 4,096 output tokens Context window: 128k tokens
475
+ * <b>GPT-4 Turbo Preview</b> is a high-intelligence GPT model usable in Chat
476
+ * Completions.
477
+ * <p>
478
+ * Currently points to {@link #GPT_4_0125_PREVIEW}.
479
+ * <p>
480
+ * Context window: 128,000 tokens. Max output tokens: 4,096 tokens.
411
481
*/
412
482
GPT_4_TURBO_PREVIEW ("gpt-4-turbo-preview" ),
413
483
414
484
/**
415
- * Currently points to gpt-4-0613. Snapshot of gpt-4 from June 13th 2023 with
416
- * improved function calling support. Context window: 8k tokens
485
+ * <b>GPT-4</b> is an older version of a high-intelligence GPT model, usable in
486
+ * Chat Completions.
487
+ * <p>
488
+ * Currently points to {@link #GPT_4_0613}.
489
+ * <p>
490
+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
417
491
*/
418
492
GPT_4 ("gpt-4" ),
419
-
420
493
/**
421
- * Currently points to gpt-3.5-turbo-0125. model with higher accuracy at
422
- * responding in requested formats and a fix for a bug which caused a text
423
- * encoding issue for non-English language function calls. Returns a maximum of
424
- * 4,096 Context window: 16k tokens
494
+ * GPT-4 model snapshot.
495
+ * <p>
496
+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
425
497
*/
426
- GPT_3_5_TURBO ("gpt-3.5-turbo" ),
498
+ GPT_4_0613 ("gpt-4-0613" ),
499
+ /**
500
+ * GPT-4 model snapshot.
501
+ * <p>
502
+ * Context window: 8,192 tokens. Max output tokens: 8,192 tokens.
503
+ */
504
+ GPT_4_0314 ("gpt-4-0314" ),
427
505
428
506
/**
429
- * (new) The latest GPT-3.5 Turbo model with higher accuracy at responding in
430
- * requested formats and a fix for a bug which caused a text encoding issue for
431
- * non-English language function calls. Returns a maximum of 4,096 Context window:
432
- * 16k tokens
507
+ * <b>GPT-3.5 Turbo</b> models can understand and generate natural language or
508
+ * code and have been optimized for chat using the Chat Completions API but work
509
+ * well for non-chat tasks as well.
510
+ * <p>
511
+ * As of July 2024, {@link #GPT_4_O_MINI} should be used in place of
512
+ * gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.
513
+ * gpt-3.5-turbo is still available for use in the API.
514
+ * <p>
515
+ * <p>
516
+ * Context window: 16,385 tokens. Max output tokens: 4,096 tokens. Knowledge
517
+ * cutoff: September, 2021.
433
518
*/
434
- GPT_3_5_TURBO_0125 ("gpt-3.5-turbo-0125 " ),
519
+ GPT_3_5_TURBO ("gpt-3.5-turbo" ),
435
520
436
521
/**
437
- * GPT-3.5 Turbo model with improved instruction following, JSON mode,
438
- * reproducible outputs, parallel function calling, and more. Returns a maximum of
439
- * 4,096 output tokens. Context window: 16k tokens.
522
+ * <b>GPT-3.5 Turbo Instruct</b> has similar capabilities to GPT-3 era models.
523
+ * Compatible with the legacy Completions endpoint and not Chat Completions.
524
+ * <p>
525
+ * Context window: 4,096 tokens. Max output tokens: 4,096 tokens. Knowledge
526
+ * cutoff: September, 2021.
440
527
*/
441
- GPT_3_5_TURBO_1106 ("gpt-3.5-turbo-1106 " );
528
+ GPT_3_5_TURBO_INSTRUCT ("gpt-3.5-turbo-instruct " );
442
529
443
530
public final String value ;
444
531
0 commit comments