Skip to content

Commit 2b9d5bc

Browse files
feat(api): further updates for evals API
1 parent 02c6df6 commit 2b9d5bc

18 files changed

+256
-423
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 99
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5fa16b9a02985ae06e41be14946a9c325dc672fb014b3c19abca65880c6990e6.yml
3-
openapi_spec_hash: da3e669f65130043b1170048c0727890
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-262e171d0a8150ea1192474d16ba3afdf9a054b399f1a49a9c9b697a3073c136.yml
3+
openapi_spec_hash: 33e00a48df8f94c94f46290c489f132b
44
config_hash: d8d5fda350f6db77c784f35429741a2e

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalCreateParams.kt

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ import kotlin.jvm.optionals.getOrNull
4141

4242
/**
4343
* Create the structure of an evaluation that can be used to test a model's performance. An
44-
* evaluation is a set of testing criteria and a datasource. After creating an evaluation, you can
45-
* run it on different models and model parameters. We support several types of graders and
46-
* datasources. For more information, see the
47-
* [Evals guide](https://platform.openai.com/docs/guides/evals).
44+
* evaluation is a set of testing criteria and the config for a data source, which dictates the
45+
* schema of the data used in the evaluation. After creating an evaluation, you can run it on
46+
* different models and model parameters. We support several types of graders and datasources. For
47+
* more information, see the [Evals guide](https://platform.openai.com/docs/guides/evals).
4848
*/
4949
class EvalCreateParams
5050
private constructor(
@@ -54,15 +54,18 @@ private constructor(
5454
) : Params {
5555

5656
/**
57-
* The configuration for the data source used for the evaluation runs.
57+
* The configuration for the data source used for the evaluation runs. Dictates the schema of
58+
* the data used in the evaluation.
5859
*
5960
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
6061
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
6162
*/
6263
fun dataSourceConfig(): DataSourceConfig = body.dataSourceConfig()
6364

6465
/**
65-
* A list of graders for all eval runs in this group.
66+
* A list of graders for all eval runs in this group. Graders can reference variables in the
67+
* data source using double curly braces notation, like `{{item.variable_name}}`. To reference
68+
* the model's output, use the `sample` namespace (ie, `{{sample.output_text}}`).
6669
*
6770
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
6871
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -167,7 +170,10 @@ private constructor(
167170
*/
168171
fun body(body: Body) = apply { this.body = body.toBuilder() }
169172

170-
/** The configuration for the data source used for the evaluation runs. */
173+
/**
174+
* The configuration for the data source used for the evaluation runs. Dictates the schema
175+
* of the data used in the evaluation.
176+
*/
171177
fun dataSourceConfig(dataSourceConfig: DataSourceConfig) = apply {
172178
body.dataSourceConfig(dataSourceConfig)
173179
}
@@ -207,11 +213,16 @@ private constructor(
207213
* Alias for calling [dataSourceConfig] with
208214
* `DataSourceConfig.ofStoredCompletions(storedCompletions)`.
209215
*/
216+
@Deprecated("deprecated")
210217
fun dataSourceConfig(storedCompletions: DataSourceConfig.StoredCompletions) = apply {
211218
body.dataSourceConfig(storedCompletions)
212219
}
213220

214-
/** A list of graders for all eval runs in this group. */
221+
/**
222+
* A list of graders for all eval runs in this group. Graders can reference variables in the
223+
* data source using double curly braces notation, like `{{item.variable_name}}`. To
224+
* reference the model's output, use the `sample` namespace (ie, `{{sample.output_text}}`).
225+
*/
215226
fun testingCriteria(testingCriteria: List<TestingCriterion>) = apply {
216227
body.testingCriteria(testingCriteria)
217228
}
@@ -468,7 +479,8 @@ private constructor(
468479
) : this(dataSourceConfig, testingCriteria, metadata, name, mutableMapOf())
469480

470481
/**
471-
* The configuration for the data source used for the evaluation runs.
482+
* The configuration for the data source used for the evaluation runs. Dictates the schema
483+
* of the data used in the evaluation.
472484
*
473485
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
474486
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -477,7 +489,9 @@ private constructor(
477489
dataSourceConfig.getRequired("data_source_config")
478490

479491
/**
480-
* A list of graders for all eval runs in this group.
492+
* A list of graders for all eval runs in this group. Graders can reference variables in the
493+
* data source using double curly braces notation, like `{{item.variable_name}}`. To
494+
* reference the model's output, use the `sample` namespace (ie, `{{sample.output_text}}`).
481495
*
482496
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
483497
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -584,7 +598,10 @@ private constructor(
584598
additionalProperties = body.additionalProperties.toMutableMap()
585599
}
586600

587-
/** The configuration for the data source used for the evaluation runs. */
601+
/**
602+
* The configuration for the data source used for the evaluation runs. Dictates the
603+
* schema of the data used in the evaluation.
604+
*/
588605
fun dataSourceConfig(dataSourceConfig: DataSourceConfig) =
589606
dataSourceConfig(JsonField.of(dataSourceConfig))
590607

@@ -622,10 +639,16 @@ private constructor(
622639
* Alias for calling [dataSourceConfig] with
623640
* `DataSourceConfig.ofStoredCompletions(storedCompletions)`.
624641
*/
642+
@Deprecated("deprecated")
625643
fun dataSourceConfig(storedCompletions: DataSourceConfig.StoredCompletions) =
626644
dataSourceConfig(DataSourceConfig.ofStoredCompletions(storedCompletions))
627645

628-
/** A list of graders for all eval runs in this group. */
646+
/**
647+
* A list of graders for all eval runs in this group. Graders can reference variables in
648+
* the data source using double curly braces notation, like `{{item.variable_name}}`. To
649+
* reference the model's output, use the `sample` namespace (ie,
650+
* `{{sample.output_text}}`).
651+
*/
629652
fun testingCriteria(testingCriteria: List<TestingCriterion>) =
630653
testingCriteria(JsonField.of(testingCriteria))
631654

@@ -813,7 +836,10 @@ private constructor(
813836
"Body{dataSourceConfig=$dataSourceConfig, testingCriteria=$testingCriteria, metadata=$metadata, name=$name, additionalProperties=$additionalProperties}"
814837
}
815838

816-
/** The configuration for the data source used for the evaluation runs. */
839+
/**
840+
* The configuration for the data source used for the evaluation runs. Dictates the schema of
841+
* the data used in the evaluation.
842+
*/
817843
@JsonDeserialize(using = DataSourceConfig.Deserializer::class)
818844
@JsonSerialize(using = DataSourceConfig.Serializer::class)
819845
class DataSourceConfig
@@ -839,14 +865,15 @@ private constructor(
839865
fun logs(): Optional<Logs> = Optional.ofNullable(logs)
840866

841867
/** Deprecated in favor of LogsDataSourceConfig. */
868+
@Deprecated("deprecated")
842869
fun storedCompletions(): Optional<StoredCompletions> =
843870
Optional.ofNullable(storedCompletions)
844871

845872
fun isCustom(): Boolean = custom != null
846873

847874
fun isLogs(): Boolean = logs != null
848875

849-
fun isStoredCompletions(): Boolean = storedCompletions != null
876+
@Deprecated("deprecated") fun isStoredCompletions(): Boolean = storedCompletions != null
850877

851878
/**
852879
* A CustomDataSourceConfig object that defines the schema for the data source used for the
@@ -863,6 +890,7 @@ private constructor(
863890
fun asLogs(): Logs = logs.getOrThrow("logs")
864891

865892
/** Deprecated in favor of LogsDataSourceConfig. */
893+
@Deprecated("deprecated")
866894
fun asStoredCompletions(): StoredCompletions =
867895
storedCompletions.getOrThrow("storedCompletions")
868896

@@ -968,6 +996,7 @@ private constructor(
968996
@JvmStatic fun ofLogs(logs: Logs) = DataSourceConfig(logs = logs)
969997

970998
/** Deprecated in favor of LogsDataSourceConfig. */
999+
@Deprecated("deprecated")
9711000
@JvmStatic
9721001
fun ofStoredCompletions(storedCompletions: StoredCompletions) =
9731002
DataSourceConfig(storedCompletions = storedCompletions)
@@ -995,6 +1024,7 @@ private constructor(
9951024
fun visitLogs(logs: Logs): T
9961025

9971026
/** Deprecated in favor of LogsDataSourceConfig. */
1027+
@Deprecated("deprecated")
9981028
fun visitStoredCompletions(storedCompletions: StoredCompletions): T
9991029

10001030
/**
@@ -1029,7 +1059,7 @@ private constructor(
10291059
DataSourceConfig(logs = it, _json = json)
10301060
} ?: DataSourceConfig(_json = json)
10311061
}
1032-
"stored-completions" -> {
1062+
"stored_completions" -> {
10331063
return tryDeserialize(node, jacksonTypeRef<StoredCompletions>())?.let {
10341064
DataSourceConfig(storedCompletions = it, _json = json)
10351065
} ?: DataSourceConfig(_json = json)
@@ -1726,6 +1756,7 @@ private constructor(
17261756
}
17271757

17281758
/** Deprecated in favor of LogsDataSourceConfig. */
1759+
@Deprecated("deprecated")
17291760
class StoredCompletions
17301761
private constructor(
17311762
private val type: JsonValue,
@@ -1742,11 +1773,11 @@ private constructor(
17421773
) : this(type, metadata, mutableMapOf())
17431774

17441775
/**
1745-
* The type of data source. Always `stored-completions`.
1776+
* The type of data source. Always `stored_completions`.
17461777
*
17471778
* Expected to always return the following:
17481779
* ```java
1749-
* JsonValue.from("stored-completions")
1780+
* JsonValue.from("stored_completions")
17501781
* ```
17511782
*
17521783
* However, this method can be useful for debugging and logging (e.g. if the server
@@ -1795,7 +1826,7 @@ private constructor(
17951826
/** A builder for [StoredCompletions]. */
17961827
class Builder internal constructor() {
17971828

1798-
private var type: JsonValue = JsonValue.from("stored-completions")
1829+
private var type: JsonValue = JsonValue.from("stored_completions")
17991830
private var metadata: JsonField<Metadata> = JsonMissing.of()
18001831
private var additionalProperties: MutableMap<String, JsonValue> = mutableMapOf()
18011832

@@ -1812,7 +1843,7 @@ private constructor(
18121843
* It is usually unnecessary to call this method because the field defaults to the
18131844
* following:
18141845
* ```java
1815-
* JsonValue.from("stored-completions")
1846+
* JsonValue.from("stored_completions")
18161847
* ```
18171848
*
18181849
* This method is primarily for setting the field to an undocumented or not yet
@@ -1871,7 +1902,7 @@ private constructor(
18711902
}
18721903

18731904
_type().let {
1874-
if (it != JsonValue.from("stored-completions")) {
1905+
if (it != JsonValue.from("stored_completions")) {
18751906
throw OpenAIInvalidDataException("'type' is invalid, received $it")
18761907
}
18771908
}
@@ -1895,7 +1926,7 @@ private constructor(
18951926
*/
18961927
@JvmSynthetic
18971928
internal fun validity(): Int =
1898-
type.let { if (it == JsonValue.from("stored-completions")) 1 else 0 } +
1929+
type.let { if (it == JsonValue.from("stored_completions")) 1 else 0 } +
18991930
(metadata.asKnown().getOrNull()?.validity() ?: 0)
19001931

19011932
/** Metadata filters for the stored completions data source. */
@@ -2353,7 +2384,7 @@ private constructor(
23532384

23542385
/**
23552386
* A list of chat messages forming the prompt or context. May include variable
2356-
* references to the "item" namespace, ie {{item.name}}.
2387+
* references to the `item` namespace, ie {{item.name}}.
23572388
*
23582389
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
23592390
* unexpectedly missing or null (e.g. if the server responded with an unexpected
@@ -2501,7 +2532,7 @@ private constructor(
25012532

25022533
/**
25032534
* A list of chat messages forming the prompt or context. May include variable
2504-
* references to the "item" namespace, ie {{item.name}}.
2535+
* references to the `item` namespace, ie {{item.name}}.
25052536
*/
25062537
fun input(input: List<Input>) = input(JsonField.of(input))
25072538

@@ -2724,7 +2755,7 @@ private constructor(
27242755

27252756
/**
27262757
* A chat message that makes up the prompt or context. May include variable references
2727-
* to the "item" namespace, ie {{item.name}}.
2758+
* to the `item` namespace, ie {{item.name}}.
27282759
*/
27292760
@JsonDeserialize(using = Input.Deserializer::class)
27302761
@JsonSerialize(using = Input.Serializer::class)

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalCreateResponse.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import kotlin.jvm.optionals.getOrNull
4040
* done for your LLM integration. Like:
4141
* - Improve the quality of my chatbot
4242
* - See how well my chatbot handles customer support
43-
* - Check if o3-mini is better at my usecase than gpt-4o
43+
* - Check if o4-mini is better at my usecase than gpt-4o
4444
*/
4545
class EvalCreateResponse
4646
private constructor(
@@ -771,7 +771,7 @@ private constructor(
771771
DataSourceConfig(logs = it, _json = json)
772772
} ?: DataSourceConfig(_json = json)
773773
}
774-
"stored-completions" -> {
774+
"stored_completions" -> {
775775
return tryDeserialize(
776776
node,
777777
jacksonTypeRef<EvalStoredCompletionsDataSourceConfig>(),

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalListResponse.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import kotlin.jvm.optionals.getOrNull
4040
* done for your LLM integration. Like:
4141
* - Improve the quality of my chatbot
4242
* - See how well my chatbot handles customer support
43-
* - Check if o3-mini is better at my usecase than gpt-4o
43+
* - Check if o4-mini is better at my usecase than gpt-4o
4444
*/
4545
class EvalListResponse
4646
private constructor(
@@ -771,7 +771,7 @@ private constructor(
771771
DataSourceConfig(logs = it, _json = json)
772772
} ?: DataSourceConfig(_json = json)
773773
}
774-
"stored-completions" -> {
774+
"stored_completions" -> {
775775
return tryDeserialize(
776776
node,
777777
jacksonTypeRef<EvalStoredCompletionsDataSourceConfig>(),

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalRetrieveResponse.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import kotlin.jvm.optionals.getOrNull
4040
* done for your LLM integration. Like:
4141
* - Improve the quality of my chatbot
4242
* - See how well my chatbot handles customer support
43-
* - Check if o3-mini is better at my usecase than gpt-4o
43+
* - Check if o4-mini is better at my usecase than gpt-4o
4444
*/
4545
class EvalRetrieveResponse
4646
private constructor(
@@ -771,7 +771,7 @@ private constructor(
771771
DataSourceConfig(logs = it, _json = json)
772772
} ?: DataSourceConfig(_json = json)
773773
}
774-
"stored-completions" -> {
774+
"stored_completions" -> {
775775
return tryDeserialize(
776776
node,
777777
jacksonTypeRef<EvalStoredCompletionsDataSourceConfig>(),

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalStoredCompletionsDataSourceConfig.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ private constructor(
4545
fun schema(): Schema = schema.getRequired("schema")
4646

4747
/**
48-
* The type of data source. Always `stored-completions`.
48+
* The type of data source. Always `stored_completions`.
4949
*
5050
* Expected to always return the following:
5151
* ```java
52-
* JsonValue.from("stored-completions")
52+
* JsonValue.from("stored_completions")
5353
* ```
5454
*
5555
* However, this method can be useful for debugging and logging (e.g. if the server responded
@@ -114,7 +114,7 @@ private constructor(
114114
class Builder internal constructor() {
115115

116116
private var schema: JsonField<Schema>? = null
117-
private var type: JsonValue = JsonValue.from("stored-completions")
117+
private var type: JsonValue = JsonValue.from("stored_completions")
118118
private var metadata: JsonField<Metadata> = JsonMissing.of()
119119
private var additionalProperties: MutableMap<String, JsonValue> = mutableMapOf()
120120

@@ -149,7 +149,7 @@ private constructor(
149149
* It is usually unnecessary to call this method because the field defaults to the
150150
* following:
151151
* ```java
152-
* JsonValue.from("stored-completions")
152+
* JsonValue.from("stored_completions")
153153
* ```
154154
*
155155
* This method is primarily for setting the field to an undocumented or not yet supported
@@ -228,7 +228,7 @@ private constructor(
228228

229229
schema().validate()
230230
_type().let {
231-
if (it != JsonValue.from("stored-completions")) {
231+
if (it != JsonValue.from("stored_completions")) {
232232
throw OpenAIInvalidDataException("'type' is invalid, received $it")
233233
}
234234
}
@@ -252,7 +252,7 @@ private constructor(
252252
@JvmSynthetic
253253
internal fun validity(): Int =
254254
(schema.asKnown().getOrNull()?.validity() ?: 0) +
255-
type.let { if (it == JsonValue.from("stored-completions")) 1 else 0 } +
255+
type.let { if (it == JsonValue.from("stored_completions")) 1 else 0 } +
256256
(metadata.asKnown().getOrNull()?.validity() ?: 0)
257257

258258
/**

openai-java-core/src/main/kotlin/com/openai/models/evals/EvalUpdateResponse.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import kotlin.jvm.optionals.getOrNull
4040
* done for your LLM integration. Like:
4141
* - Improve the quality of my chatbot
4242
* - See how well my chatbot handles customer support
43-
* - Check if o3-mini is better at my usecase than gpt-4o
43+
* - Check if o4-mini is better at my usecase than gpt-4o
4444
*/
4545
class EvalUpdateResponse
4646
private constructor(
@@ -771,7 +771,7 @@ private constructor(
771771
DataSourceConfig(logs = it, _json = json)
772772
} ?: DataSourceConfig(_json = json)
773773
}
774-
"stored-completions" -> {
774+
"stored_completions" -> {
775775
return tryDeserialize(
776776
node,
777777
jacksonTypeRef<EvalStoredCompletionsDataSourceConfig>(),

0 commit comments

Comments
 (0)