@@ -28,7 +28,8 @@ export class Evals extends APIResource {
2828
2929 /**
3030 * Create the structure of an evaluation that can be used to test a model's
31- * performance. An evaluation is a set of testing criteria and a datasource. After
31+ * performance. An evaluation is a set of testing criteria and the config for a
32+ * data source, which dictates the schema of the data used in the evaluation. After
3233 * creating an evaluation, you can run it on different models and model parameters.
3334 * We support several types of graders and datasources. For more information, see
3435 * the [Evals guide](https://platform.openai.com/docs/guides/evals).
@@ -115,9 +116,9 @@ export interface EvalStoredCompletionsDataSourceConfig {
115116 schema : Record < string , unknown > ;
116117
117118 /**
118- * The type of data source. Always `stored-completions `.
119+ * The type of data source. Always `stored_completions `.
119120 */
120- type : 'stored-completions ' ;
121+ type : 'stored_completions ' ;
121122
122123 /**
123124 * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -136,7 +137,7 @@ export interface EvalStoredCompletionsDataSourceConfig {
136137 *
137138 * - Improve the quality of my chatbot
138139 * - See how well my chatbot handles customer support
139- * - Check if o3 -mini is better at my usecase than gpt-4o
140+ * - Check if o4 -mini is better at my usecase than gpt-4o
140141 */
141142export interface EvalCreateResponse {
142143 /**
@@ -257,7 +258,7 @@ export namespace EvalCreateResponse {
257258 *
258259 * - Improve the quality of my chatbot
259260 * - See how well my chatbot handles customer support
260- * - Check if o3 -mini is better at my usecase than gpt-4o
261+ * - Check if o4 -mini is better at my usecase than gpt-4o
261262 */
262263export interface EvalRetrieveResponse {
263264 /**
@@ -378,7 +379,7 @@ export namespace EvalRetrieveResponse {
378379 *
379380 * - Improve the quality of my chatbot
380381 * - See how well my chatbot handles customer support
381- * - Check if o3 -mini is better at my usecase than gpt-4o
382+ * - Check if o4 -mini is better at my usecase than gpt-4o
382383 */
383384export interface EvalUpdateResponse {
384385 /**
@@ -499,7 +500,7 @@ export namespace EvalUpdateResponse {
499500 *
500501 * - Improve the quality of my chatbot
501502 * - See how well my chatbot handles customer support
502- * - Check if o3 -mini is better at my usecase than gpt-4o
503+ * - Check if o4 -mini is better at my usecase than gpt-4o
503504 */
504505export interface EvalListResponse {
505506 /**
@@ -624,12 +625,16 @@ export interface EvalDeleteResponse {
624625
625626export interface EvalCreateParams {
626627 /**
627- * The configuration for the data source used for the evaluation runs.
628+ * The configuration for the data source used for the evaluation runs. Dictates the
629+ * schema of the data used in the evaluation.
628630 */
629631 data_source_config : EvalCreateParams . Custom | EvalCreateParams . Logs | EvalCreateParams . StoredCompletions ;
630632
631633 /**
632- * A list of graders for all eval runs in this group.
634+ * A list of graders for all eval runs in this group. Graders can reference
635+ * variables in the data source using double curly braces notation, like
636+ * `{{item.variable_name}}`. To reference the model's output, use the `sample`
637+ * namespace (ie, `{{sample.output_text}}`).
633638 */
634639 testing_criteria : Array <
635640 | EvalCreateParams . LabelModel
@@ -699,13 +704,13 @@ export namespace EvalCreateParams {
699704 }
700705
701706 /**
702- * Deprecated in favor of LogsDataSourceConfig.
707+ * @deprecated Deprecated in favor of LogsDataSourceConfig.
703708 */
704709 export interface StoredCompletions {
705710 /**
706- * The type of data source. Always `stored-completions `.
711+ * The type of data source. Always `stored_completions `.
707712 */
708- type : 'stored-completions ' ;
713+ type : 'stored_completions ' ;
709714
710715 /**
711716 * Metadata filters for the stored completions data source.
@@ -720,7 +725,7 @@ export namespace EvalCreateParams {
720725 export interface LabelModel {
721726 /**
722727 * A list of chat messages forming the prompt or context. May include variable
723- * references to the " item" namespace, ie {{item.name}}.
728+ * references to the ` item` namespace, ie {{item.name}}.
724729 */
725730 input : Array < LabelModel . SimpleInputMessage | LabelModel . EvalItem > ;
726731
0 commit comments