@@ -76,14 +76,16 @@ private constructor(
76
76
77
77
/* *
78
78
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null`
79
- * to turn off, in which case the client must manually trigger model response. Server VAD means
80
- * that the model will detect the start and end of speech based on audio volume and respond at
81
- * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in
82
- * conjunction with VAD) to semantically estimate whether the user has finished speaking, then
83
- * dynamically sets a timeout based on this probability. For example, if user audio trails off
84
- * with "uhhm", the model will score a low probability of turn end and wait longer for the user
85
- * to continue speaking. This can be useful for more natural conversations, but may have a
86
- * higher latency.
79
+ * to turn off, in which case the client must manually trigger model response.
80
+ *
81
+ * Server VAD means that the model will detect the start and end of speech based on audio volume
82
+ * and respond at the end of user speech.
83
+ *
84
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to
85
+ * semantically estimate whether the user has finished speaking, then dynamically sets a timeout
86
+ * based on this probability. For example, if user audio trails off with "uhhm", the model will
87
+ * score a low probability of turn end and wait longer for the user to continue speaking. This
88
+ * can be useful for more natural conversations, but may have a higher latency.
87
89
*
88
90
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
89
91
* server responded with an unexpected value).
@@ -230,17 +232,24 @@ private constructor(
230
232
231
233
/* *
232
234
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to
233
- * `null` to turn off, in which case the client must manually trigger model response. Server
234
- * VAD means that the model will detect the start and end of speech based on audio volume
235
- * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn
236
- * detection model (in conjunction with VAD) to semantically estimate whether the user has
237
- * finished speaking, then dynamically sets a timeout based on this probability. For
238
- * example, if user audio trails off with "uhhm", the model will score a low probability of
239
- * turn end and wait longer for the user to continue speaking. This can be useful for more
240
- * natural conversations, but may have a higher latency.
235
+ * `null` to turn off, in which case the client must manually trigger model response.
236
+ *
237
+ * Server VAD means that the model will detect the start and end of speech based on audio
238
+ * volume and respond at the end of user speech.
239
+ *
240
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD)
241
+ * to semantically estimate whether the user has finished speaking, then dynamically sets a
242
+ * timeout based on this probability. For example, if user audio trails off with "uhhm", the
243
+ * model will score a low probability of turn end and wait longer for the user to continue
244
+ * speaking. This can be useful for more natural conversations, but may have a higher
245
+ * latency.
241
246
*/
242
- fun turnDetection (turnDetection : RealtimeAudioInputTurnDetection ) =
243
- turnDetection(JsonField .of(turnDetection))
247
+ fun turnDetection (turnDetection : RealtimeAudioInputTurnDetection ? ) =
248
+ turnDetection(JsonField .ofNullable(turnDetection))
249
+
250
+ /* * Alias for calling [Builder.turnDetection] with `turnDetection.orElse(null)`. */
251
+ fun turnDetection (turnDetection : Optional <RealtimeAudioInputTurnDetection >) =
252
+ turnDetection(turnDetection.getOrNull())
244
253
245
254
/* *
246
255
* Sets [Builder.turnDetection] to an arbitrary JSON value.
@@ -253,6 +262,20 @@ private constructor(
253
262
this .turnDetection = turnDetection
254
263
}
255
264
265
+ /* *
266
+ * Alias for calling [turnDetection] with
267
+ * `RealtimeAudioInputTurnDetection.ofServerVad(serverVad)`.
268
+ */
269
+ fun turnDetection (serverVad : RealtimeAudioInputTurnDetection .ServerVad ) =
270
+ turnDetection(RealtimeAudioInputTurnDetection .ofServerVad(serverVad))
271
+
272
+ /* *
273
+ * Alias for calling [turnDetection] with
274
+ * `RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad)`.
275
+ */
276
+ fun turnDetection (semanticVad : RealtimeAudioInputTurnDetection .SemanticVad ) =
277
+ turnDetection(RealtimeAudioInputTurnDetection .ofSemanticVad(semanticVad))
278
+
256
279
fun additionalProperties (additionalProperties : Map <String , JsonValue >) = apply {
257
280
this .additionalProperties.clear()
258
281
putAllAdditionalProperties(additionalProperties)
0 commit comments