1
1
package req
2
2
3
+ import (
4
+ "encoding/json"
5
+ )
6
+
3
7
// @brief Defines advanced feature configurations for the agent to join the RTC channel
4
8
//
5
9
// @since v0.7.0
@@ -27,27 +31,40 @@ type TTSVendorParamsInterface interface {
27
31
}
28
32
29
33
type TTSMinimaxVendorVoiceSettingParam struct {
30
- VoiceId string `json:"voice_id"`
31
- Speed float32 `json:"speed"`
32
- Vol float32 `json:"vol"`
33
- Pitch int `json:"pitch"`
34
- Emotion string `json:"emotion"`
34
+ VoiceId string `json:"voice_id"`
35
+ Speed float32 `json:"speed"`
36
+ Vol float32 `json:"vol"`
37
+ Pitch int `json:"pitch"`
38
+ Emotion string `json:"emotion"`
39
+ LatexRender bool `json:"latex_render"`
40
+ EnglishNormalization bool `json:"english_normalization"`
35
41
}
36
42
37
43
type TTSMinimaxVendorAudioSettingParam struct {
38
44
SampleRate int `json:"sample_rate"`
39
45
}
40
46
47
+ type PronunciationDictParam struct {
48
+ Tone []string `json:"tone"`
49
+ }
50
+
51
+ type TimberWeightsParam struct {
52
+ VoiceId string `json:"voice_id"`
53
+ Weight int `json:"weight"`
54
+ }
55
+
41
56
// @brief Defines the Minimax vendor parameters for the Text-to-Speech (TTS) module when the agent joins the RTC channel, see
42
57
// https://platform.minimaxi.com/document/T2A%20V2 for details
43
58
//
44
59
// @since v0.7.0
45
60
type TTSMinimaxVendorParams struct {
46
- GroupId string `json:"group_id"`
47
- Key string `json:"key"`
48
- Model string `json:"model"`
49
- VoiceSetting TTSMinimaxVendorVoiceSettingParam `json:"voice_setting"`
50
- AudioSetting TTSMinimaxVendorAudioSettingParam `json:"audio_setting"`
61
+ GroupId string `json:"group_id"`
62
+ Key string `json:"key"`
63
+ Model string `json:"model"`
64
+ VoiceSetting * TTSMinimaxVendorVoiceSettingParam `json:"voice_setting,omitempty"`
65
+ AudioSetting * TTSMinimaxVendorAudioSettingParam `json:"audio_setting,omitempty"`
66
+ PronunciationDict * PronunciationDictParam `json:"pronunciation_dict,omitempty"`
67
+ TimberWeights []TimberWeightsParam `json:"timber_weights,omitempty"`
51
68
}
52
69
53
70
func (TTSMinimaxVendorParams ) VendorParam () {}
@@ -87,19 +104,53 @@ type TTSBytedanceVendorParams struct {
87
104
func (TTSBytedanceVendorParams ) VendorParam () {}
88
105
89
106
type TTSMicrosoftVendorParams struct {
90
- Key string `json:"key"`
91
- Region string `json:"region"`
92
- VoiceName string `json:"voice_name"`
93
- Rate float32 `json:"rate"`
94
- Volume float32 `json:"volume"`
107
+ // The API key used for authentication.(Required)
108
+ Key string `json:"key"`
109
+ // The Azure region where the speech service is hosted.(Required)
110
+ Region string `json:"region"`
111
+ // The identifier for the selected voice for speech synthesis.(Optional)
112
+ VoiceName string `json:"voice_name"`
113
+ // Indicates the speaking rate of the text.(Optional)
114
+ //
115
+ // The rate can be applied at the word or sentence level and should be between 0.5 and 2.0 times the original audio speed.
116
+ Speed float32 `json:"speed"`
117
+ // Specifies the audio volume as a number between 0.0 and 100.0, where 0.0 is the quietest and 100.0 is the loudest.
118
+ //
119
+ // For example, a value of 75 sets the volume to 75% of the maximum.
120
+ //
121
+ // The default value is100.
122
+ Volume float32 `json:"volume"`
123
+ // Specifies the audio sampling rate in Hz.(Optional)
124
+ //
125
+ // The default value is 24000.
126
+ SampleRate int `json:"sample_rate"`
95
127
}
96
128
97
129
func (TTSMicrosoftVendorParams ) VendorParam () {}
98
130
99
131
type TTSElevenLabsVendorParams struct {
100
- APIKey string `json:"api_key"`
132
+ // The API key used for authentication.(Required)
133
+ Key string `json:"key"`
134
+ // Identifier of the model to be used.(Required)
101
135
ModelId string `json:"model_id"`
136
+ // The identifier for the selected voice for speech synthesis.(Required)
102
137
VoiceId string `json:"voice_id"`
138
+ // Specifies the audio sampling rate in Hz.(Optional)
139
+ //
140
+ // The default value is 24000.
141
+ SampleRate int `json:"sample_rate"`
142
+ // The stability for voice settings.(Optional)
143
+ Stability float32 `json:"stability"`
144
+ // Determines how closely the AI should adhere to the original voice when attempting to replicate it.
145
+ SimilarityBoost float32 `json:"similarity_boost"`
146
+ // Determines the style exaggeration of the voice. This setting attempts to amplify the style of the original speaker.
147
+ //
148
+ // It does consume additional computational resources and might increase latency if set to anything other than 0.
149
+ Style float32 `json:"style"`
150
+ // This setting boosts the similarity to the original speaker.
151
+ //
152
+ // Using this setting requires a slightly higher computational load, which in turn increases latency.
153
+ UseSpeakerBoost bool `json:"use_speaker_boost"`
103
154
}
104
155
105
156
func (TTSElevenLabsVendorParams ) VendorParam () {}
@@ -208,7 +259,35 @@ type JoinPropertiesCustomLLMBody struct {
208
259
// and then recalculate the silence time.
209
260
//
210
261
// When silence_timeout is set to 0, this parameter is ignored.
262
+ //
263
+ // Deprecated: Use [Parameters.SilenceConfig] instead
264
+ //
265
+ // @deprecated This field is deprecated since v0.11.0
211
266
SilenceMessage * string `json:"silence_message,omitempty"`
267
+ // LLM provider(Optional), supports the following settings:
268
+ //
269
+ // - "custom": Custom LLM provider.
270
+ // When you set this option, the agent includes the following fields, in addition to role and content when making requests to the custom LLM:
271
+ // - turn_id: A unique identifier for each conversation turn. It starts from 0 and increments with each turn. One user-agent interaction corresponds to one turn_id.
272
+ // - timestamp: The request timestamp, in milliseconds.
273
+ // - "aliyun": Aliyun LLM provider.(Only available in China Mainland service region)
274
+ //
275
+ // - "bytedance": Bytedance LLM provider.(Only available in China Mainland service region)
276
+ //
277
+ // - "deepseek": DeepSeek LLM provider.(Only available in China Mainland service region)
278
+ //
279
+ // - "tencent": Tencent LLM provider.(Only available in China Mainland service region)
280
+ //
281
+ Vendor string `json:"vendor,omitempty"`
282
+
283
+ // The request style for chat completion.(Optional)(Only available in global service region)
284
+ //
285
+ // - "openai": OpenAI style.(Default)
286
+ //
287
+ // - "gemini": Gemini style.
288
+ //
289
+ // - "anthropic": Anthropic style.
290
+ Style string `json:"style,omitempty"`
212
291
}
213
292
214
293
// @brief Defines the Voice Activity Detection (VAD) configuration for the agent to join the RTC channel
@@ -289,11 +368,19 @@ type JoinPropertiesReqBody struct {
289
368
// - 0 (default): Do not enable this feature.
290
369
//
291
370
// - (0,60]: Must also set llm.silence_message to enable the feature.
371
+ //
372
+ // Deprecated: Use [Parameters.SilenceConfig] instead
373
+ //
374
+ // @deprecated This field is deprecated since v0.11.0
292
375
SilenceTimeout * int `json:"silence_timeout,omitempty"`
293
376
294
377
// Agent user ID in the RTM channel
295
378
//
296
379
// Only valid when advanced_features.enable_rtm is true
380
+ //
381
+ // Deprecated: Use AgentRtcUId instead
382
+ //
383
+ // @deprecated This field is deprecated since v0.11.0
297
384
AgentRtmUId * string `json:"agent_rtm_uid,omitempty"`
298
385
// Advanced feature configurations (optional), see JoinPropertiesAdvancedFeaturesBody for details
299
386
AdvancedFeatures * JoinPropertiesAdvancedFeaturesBody `json:"advanced_features,omitempty"`
@@ -304,6 +391,118 @@ type JoinPropertiesReqBody struct {
304
391
// Voice Activity Detection (VAD) configuration (optional), see JoinPropertiesVadBody for details
305
392
Vad * JoinPropertiesVadBody `json:"vad,omitempty"`
306
393
// Automatic Speech Recognition (ASR) configuration (optional), see JoinPropertiesAsrBody for details
307
- Asr * JoinPropertiesAsrBody `json:"asr,omitempty"`
308
- Parameters map [string ]any `json:"parameters,omitempty"`
394
+ Asr * JoinPropertiesAsrBody `json:"asr,omitempty"`
395
+ // Conversation turn detection settings
396
+ TurnDetection * TurnDetectionBody `json:"turn_detection,omitempty"`
397
+ // Agent parameters configuration (optional), see Parameters for details
398
+ Parameters * Parameters `json:"parameters,omitempty"`
399
+ }
400
+
401
+ // @brief Conversation turn detection settings
402
+ //
403
+ // @since v0.11.0
404
+ type TurnDetectionBody struct {
405
+ // When the agent is interacting (speaking or thinking), the mode of human voice interrupting the agent's behavior, support the following values:
406
+ //
407
+ // - "interrupt"(Default): Interrupt mode, human voice immediately interrupts the agent's interaction.
408
+ // The agent will terminate the current interaction and directly process the human voice input.
409
+ //
410
+ // - "append": Append mode, human voice does not interrupt the agent. (Default)
411
+ // The agent will process the human voice request after the current interaction ends.
412
+ //
413
+ // - "ignore": Ignore mode, the agent ignores the human voice request.
414
+ // If the agent is speaking or thinking and receives human voice during the process,
415
+ // the agent will directly ignore and discard the human voice request, not storing it in the context.
416
+ InterruptMode string `json:"interrupt_mode,omitempty"`
417
+ }
418
+
419
+ // @brief Structured data for parameters
420
+ //
421
+ // @since v0.11.0
422
+ type ParametersStructData struct {
423
+ // Silence configuration for the agent
424
+ SilenceConfig * SilenceConfig `json:"silence_config,omitempty"`
425
+ }
426
+
427
+ // @brief Silence configuration for the agent
428
+ //
429
+ // @since v0.11.0
430
+ type SilenceConfig struct {
431
+ // Agent maximum silence time (ms).(Optional)
432
+ //
433
+ // After the agent is created and a user joins the channel,
434
+ // the duration of the agent's non-listening, thinking, or speaking state is called the agent's silence time.
435
+ //
436
+ // When the silence time reaches the set value, the agent will report the silence prompt message.
437
+ //
438
+ // This feature can be used to let the agent remind users when users are inactive.
439
+ //
440
+ // Set 0: Do not enable this feature.
441
+ //
442
+ // Set to (0,60000]: Must also set content to enable normal reporting of silence prompts, otherwise the setting is invalid.
443
+ TimeoutMs * int `json:"timeout_ms,omitempty"`
444
+
445
+ // When the silence time reaches the set value, the agent will take the following actions(Optional):
446
+ //
447
+ // - "speak": Use TTS module to report the silence message (Default)
448
+ //
449
+ // - "think": Append the silence message to the end of the context and pass it to LLM
450
+ Action * string `json:"action,omitempty"`
451
+
452
+ // Content of the silence message (Optional)
453
+ //
454
+ // The content will be used in different ways according to the settings in the action.
455
+ Content * string `json:"content,omitempty"`
456
+ }
457
+
458
+ // @brief Agent parameters configuration
459
+ //
460
+ // @note Parameters that contains both extra data and fixed data. The same key in extra data and fixed data will be merged.
461
+ //
462
+ // @since v0.11.0
463
+ type Parameters struct {
464
+ // Extra parameters for flexible key-value pairs
465
+ ExtraParams map [string ]any `json:"-"`
466
+ // Fixed parameters for type-safe parameters
467
+ FixedParams * ParametersStructData `json:"-"`
468
+ }
469
+
470
+ // MarshalJSON implements custom JSON marshaling
471
+ func (p * Parameters ) MarshalJSON () ([]byte , error ) {
472
+ // Create a map to hold the merged data
473
+ merged := make (map [string ]any )
474
+
475
+ // Add fixed parameters if present
476
+ if p .FixedParams != nil {
477
+ structBytes , err := json .Marshal (p .FixedParams )
478
+ if err != nil {
479
+ return nil , err
480
+ }
481
+ var structMap map [string ]any
482
+ if err := json .Unmarshal (structBytes , & structMap ); err != nil {
483
+ return nil , err
484
+ }
485
+ for k , v := range structMap {
486
+ merged [k ] = v
487
+ }
488
+ }
489
+
490
+ // Add extra parameters if present
491
+ if p .ExtraParams != nil {
492
+ for k , v := range p .ExtraParams {
493
+ merged [k ] = v
494
+ }
495
+ }
496
+
497
+ return json .Marshal (merged )
498
+ }
499
+
500
+ // UnmarshalJSON implements custom JSON unmarshaling
501
+ func (p * Parameters ) UnmarshalJSON (data []byte ) error {
502
+ var mapData map [string ]any
503
+ if err := json .Unmarshal (data , & mapData ); err != nil {
504
+ return err
505
+ }
506
+ p .ExtraParams = mapData
507
+ return nil
309
508
}
0 commit comments