val) {
+ phraseList = val;
+ return this;
+ }
+
+ public MicrosoftASRVendorParams build() {
+ return new MicrosoftASRVendorParams(this);
+ }
+ }
+
+ @Override
+ public ASRVendorEnum getVendor() {
+ return ASRVendorEnum.MICROSOFT;
+ }
+ }
+
+ /**
+ * @brief Define Tencent ASR module parameters
+ *
+ * @since v0.7.0
+ */
+ public static class TencentASRVendorParams implements ASRVendorParams {
+
+ @JsonProperty("key")
+ private String key;
+
+ @JsonProperty("app_id")
+ private String appId;
+
+ @JsonProperty("secret")
+ private String secret;
+
+ @JsonProperty("engine_model_type")
+ private String engineModelType;
+
+ @JsonProperty("voice_id")
+ private String voiceId;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private TencentASRVendorParams(Builder builder) {
+ setKey(builder.key);
+ setAppId(builder.appId);
+ setSecret(builder.secret);
+ setEngineModelType(builder.engineModelType);
+ setVoiceId(builder.voiceId);
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public String getAppId() {
+ return appId;
+ }
+
+ public void setAppId(String appId) {
+ this.appId = appId;
+ }
+
+ public String getSecret() {
+ return secret;
+ }
+
+ public void setSecret(String secret) {
+ this.secret = secret;
+ }
+
+ public String getEngineModelType() {
+ return engineModelType;
+ }
+
+ public void setEngineModelType(String engineModelType) {
+ this.engineModelType = engineModelType;
+ }
+
+ public String getVoiceId() {
+ return voiceId;
+ }
+
+ public void setVoiceId(String voiceId) {
+ this.voiceId = voiceId;
+ }
+
+ public static final class Builder {
+ private String key;
+ private String appId;
+ private String secret;
+ private String engineModelType;
+ private String voiceId;
+
+ private Builder() {
+ }
+
+ public Builder key(String val) {
+ key = val;
+ return this;
+ }
+
+ public Builder appId(String val) {
+ appId = val;
+ return this;
+ }
+
+ public Builder secret(String val) {
+ secret = val;
+ return this;
+ }
+
+ public Builder engineModelType(String val) {
+ engineModelType = val;
+ return this;
+ }
+
+ public Builder voiceId(String val) {
+ voiceId = val;
+ return this;
+ }
+
+ public TencentASRVendorParams build() {
+ return new TencentASRVendorParams(this);
+ }
+ }
+
+ @Override
+ public ASRVendorEnum getVendor() {
+ return ASRVendorEnum.TENCENT;
+ }
+ }
+
+
+ /**
+ * @brief Define Deepgram ASR module parameters
+ *
+ * @since v0.7.0
+ */
+ public static class DeepgramASRVendorParams implements ASRVendorParams {
+
+ @JsonProperty("url")
+ private String url;
+
+ @JsonProperty("key")
+ private String key;
+
+ @JsonProperty("model")
+ private String model;
+
+ @JsonProperty("language")
+ private String language;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public String getModel() {
+ return model;
+ }
+
+ public void setModel(String model) {
+ this.model = model;
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ private DeepgramASRVendorParams(Builder builder) {
+ setUrl(builder.url);
+ setKey(builder.key);
+ setModel(builder.model);
+ setLanguage(builder.language);
+ }
+
+ public static final class Builder {
+ private String url;
+ private String key;
+ private String model;
+ private String language;
+
+ private Builder() {
+ }
+
+ public Builder url(String val) {
+ url = val;
+ return this;
+ }
+
+ public Builder key(String val) {
+ key = val;
+ return this;
+ }
+
+ public Builder model(String val) {
+ model = val;
+ return this;
+ }
+
+ public Builder language(String val) {
+ language = val;
+ return this;
+ }
+
+ public DeepgramASRVendorParams build() {
+ return new DeepgramASRVendorParams(this);
+ }
+ }
+
+ @Override
+ public ASRVendorEnum getVendor() {
+ return ASRVendorEnum.DEEPGRAM;
+ }
+ }
+
+
+ /**
+ * @brief Define Fengming ASR module parameters
+ *
+ * @note Fengming ASR does not contain any parameters
+ *
+ * @since v0.7.0
+ */
+ public static class FengmingASRVendorParams implements ASRVendorParams {
+
+ private FengmingASRVendorParams() {
+ }
+
+ private FengmingASRVendorParams(Builder builder) {
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+ private Builder() {
+ }
+
+ public FengmingASRVendorParams build() {
+ return new FengmingASRVendorParams(this);
+ }
+ }
+
+ @Override
+ public ASRVendorEnum getVendor() {
+ return ASRVendorEnum.FENGMING;
+ }
+ }
+
+ /**
+ * @brief Define ARES ASR module parameters
+ *
+ * @note ARES ASR does not contain any parameters
+ *
+ * @since v0.7.0
+ */
+ public static class ARESASRVendorParams implements ASRVendorParams {
+
+ private ARESASRVendorParams() {
+ }
+
+ private ARESASRVendorParams(Builder builder) {
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+ private Builder() {
+ }
+
+ public ARESASRVendorParams build() {
+ return new ARESASRVendorParams(this);
+ }
+ }
+
+ @Override
+ public ASRVendorEnum getVendor() {
+ return ASRVendorEnum.ARES;
+ }
+ }
+
+ /**
+ * @brief Defines ASR module vendor enumeration for agent to join RTC channel
+ * @since v0.7.0
+ */
+ public enum ASRVendorEnum {
+
+ /**
+ * Fengming ASR vendor
+ *
+ * @since v0.7.0
+ */
+ FENGMING("fengming"),
+
+ /**
+ * Tencent ASR vendor
+ *
+ * @since v0.7.0
+ */
+ TENCENT("tencent"),
+
+ /**
+ * Microsoft ASR vendor
+ *
+ * @since v0.7.0
+ */
+ MICROSOFT("microsoft"),
+
+ /**
+ * ARES ASR vendor
+ *
+ * @since v0.7.0
+ */
+ ARES("ares"),
+
+ /**
+ * Deepgram ASR vendor
+ *
+ * @since v0.7.0
+ */
+ DEEPGRAM("deepgram");
+
+ private final String vendor;
+
+ ASRVendorEnum(String vendor) {
+ this.vendor = vendor;
+ }
+
+ public static ASRVendorEnum getEnum(String vendor) {
+ for (ASRVendorEnum e : ASRVendorEnum.values()) {
+ if (e.vendor.equals(vendor)) {
+ return e;
+ }
+ }
+ return null;
+ }
+
+ @JsonValue
+ public String toJson() {
+ return vendor;
+ }
+
+ @Override
+ public String toString() {
+ return this.vendor;
+ }
+
+ }
+
+ /**
+ * @brief Defines the Automatic Speech Recognition (ASR) configuration for agent
+ * to join RTC channel
+ * @since v0.3.0
+ */
+ public static class ASRPayload {
+
+ /**
+ * Language used for interaction between user and agent (optional)
+ *
+ * - zh-CN: Chinese (supports mixed Chinese and English) (default)
+ *
+ * - en-US: English
+ */
+ @JsonProperty("language")
+ private String language;
+
+ /**
+ * ASR vendor, see {@link ASRVendorEnum}
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("vendor")
+ private ASRVendorEnum vendor;
+
+ /**
+ * ASR vendor parameter description, see
+ *
+ * - {@link FengmingASRVendorParams}
+ *
+ * - {@link TencentASRVendorParams}
+ *
+ * - {@link MicrosoftASRVendorParams}
+ *
+ * - {@link ARESASRVendorParams}
+ *
+ * - {@link DeepgramASRVendorParams}
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("params")
+ private ASRVendorParams params;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private ASRPayload(Builder builder) {
+ setLanguage(builder.language);
+ setVendor(builder.vendor);
+ setParams(builder.params);
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ public ASRVendorEnum getVendor() {
+ return vendor;
+ }
+
+ public void setVendor(ASRVendorEnum vendor) {
+ this.vendor = vendor;
+ }
+
+ public ASRVendorParams getParams() {
+ return params;
+ }
+
+ public void setParams(ASRVendorParams params) {
+ this.params = params;
+ }
+
public static final class Builder {
private String language;
+ private ASRVendorEnum vendor;
+ private ASRVendorParams params;
private Builder() {
}
@@ -2377,6 +3520,16 @@ public Builder language(String val) {
return this;
}
+ public Builder vendor(ASRVendorEnum val) {
+ vendor = val;
+ return this;
+ }
+
+ public Builder params(ASRVendorParams val) {
+ params = val;
+ return this;
+ }
+
public ASRPayload build() {
return new ASRPayload(this);
}
@@ -2446,16 +3599,213 @@ public void setInterruptMode(String interruptMode) {
@JsonProperty("interrupt_mode")
private String interruptMode;
+ /**
+ * Turn detection mechanism.(Optional)
+ *
+ * - "agora_vad": Agora VAD.(Default)
+ *
+ * - "server_vad": The model detects the start and end of speech based on audio
+ * volume and responds at the end of user speech. Only available when mllm is
+ * enabled and OpenAI is selected.
+ *
+ * - "semantic_vad": Uses a turn detection model in conjunction with VAD to
+ * semantically estimate whether the user has finished speaking, then dynamically
+ * sets a timeout based on this probability for more natural conversations.Only
+ * available when mllm is enabled and OpenAI is selected.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("type")
+ private String type;
+
+ /**
+ * The amount of time in milliseconds that the user's voice must exceed the VAD
+ * threshold before an interruption is triggered.(Optional)
+ *
+ * Default value is 160.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("interrupt_duration_ms")
+ private Integer interruptDurationMs;
+
+ /**
+ * The extra forward padding time in milliseconds before the processing system
+ * starts to process the speech input. This padding helps capture the beginning
+ * of the speech.
+ *
+ * Default value is 800.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("prefix_padding_ms")
+ private Integer prefixPaddingMs;
+
+ /**
+ * The duration of audio silence in milliseconds.(Optional)
+ *
+ * If no voice activity is detected during this period, the agent assumes that
+ * the user has stopped speaking.
+ *
+ * Default value is 480.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("silence_duration_ms")
+ private Integer silenceDurationMs;
+
+ /**
+ * Identification sensitivity determines the level of sound in the audio signal
+ * that is considered voice activity.(Optional)
+ *
+ * Lower values make it easier for the agent to detect speech, and higher values
+ * ignore weak sounds.
+ *
+ * The value range is (0.0, 1.0).
+ *
+ * Default value is 0.5.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("threshold")
+ private Float threshold;
+
+ /**
+ * Whether to automatically generate a response when a VAD stop event occurs.
+ * (Optional)
+ *
+ * Only available in server_vad and semantic_vad modes when using OpenAI
+ * Realtime API.
+ *
+ * Default value is true.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("create_response")
+ private Boolean createResponse;
+
+ /**
+ * Whether to automatically interrupt any ongoing response when a VAD start
+ * event occurs.
+ *
+ * Only available in server_vad and semantic_vad modes when using OpenAI
+ * Realtime API.
+ *
+ * Default value is true.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("interrupt_response")
+ private Boolean interruptResponse;
+
+ /**
+ * The eagerness of the model to respond(Optional):
+ *
+ * - "auto": Equivalent to medium(Default)
+ *
+ * - "low": Wait longer for the user to continue speaking
+ *
+ * - "high": Respond more quickly
+ *
+ * Only available in semantic_vad mode when using OpenAI Realtime API.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("eagerness")
+ private String eagerness;
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public Integer getInterruptDurationMs() {
+ return interruptDurationMs;
+ }
+
+ public void setInterruptDurationMs(Integer interruptDurationMs) {
+ this.interruptDurationMs = interruptDurationMs;
+ }
+
+ public Integer getPrefixPaddingMs() {
+ return prefixPaddingMs;
+ }
+
+ public void setPrefixPaddingMs(Integer prefixPaddingMs) {
+ this.prefixPaddingMs = prefixPaddingMs;
+ }
+
+ public Integer getSilenceDurationMs() {
+ return silenceDurationMs;
+ }
+
+ public void setSilenceDurationMs(Integer silenceDurationMs) {
+ this.silenceDurationMs = silenceDurationMs;
+ }
+
+ public Float getThreshold() {
+ return threshold;
+ }
+
+ public void setThreshold(Float threshold) {
+ this.threshold = threshold;
+ }
+
+ public Boolean getCreateResponse() {
+ return createResponse;
+ }
+
+ public void setCreateResponse(Boolean createResponse) {
+ this.createResponse = createResponse;
+ }
+
+ public Boolean getInterruptResponse() {
+ return interruptResponse;
+ }
+
+ public void setInterruptResponse(Boolean interruptResponse) {
+ this.interruptResponse = interruptResponse;
+ }
+
+ public String getEagerness() {
+ return eagerness;
+ }
+
+ public void setEagerness(String eagerness) {
+ this.eagerness = eagerness;
+ }
+
+
public static TurnDetectionPayload.Builder builder() {
return new TurnDetectionPayload.Builder();
}
private TurnDetectionPayload(Builder builder) {
setInterruptMode(builder.interruptMode);
+ setType(builder.type);
+ setInterruptDurationMs(builder.interruptDurationMs);
+ setPrefixPaddingMs(builder.prefixPaddingMs);
+ setSilenceDurationMs(builder.silenceDurationMs);
+ setThreshold(builder.threshold);
+ setCreateResponse(builder.createResponse);
+ setInterruptResponse(builder.interruptResponse);
+ setEagerness(builder.eagerness);
}
public static final class Builder {
private String interruptMode;
+ private String type;
+ private Integer interruptDurationMs;
+ private Integer prefixPaddingMs;
+ private Integer silenceDurationMs;
+ private Float threshold;
+ private Boolean createResponse;
+ private Boolean interruptResponse;
+ private String eagerness;
+
private Builder() {
}
@@ -2465,9 +3815,51 @@ public Builder interruptMode(String val) {
return this;
}
+ public Builder type(String val) {
+ type = val;
+ return this;
+ }
+
+ public Builder interruptDurationMs(Integer val) {
+ interruptDurationMs = val;
+ return this;
+ }
+
+ public Builder prefixPaddingMs(Integer val) {
+ prefixPaddingMs = val;
+ return this;
+ }
+
+ public Builder silenceDurationMs(Integer val) {
+ silenceDurationMs = val;
+ return this;
+ }
+
+ public Builder threshold(Float val) {
+ threshold = val;
+ return this;
+ }
+
+ public Builder interruptResponse(Boolean val) {
+ interruptResponse = val;
+ return this;
+ }
+
+ public Builder eagerness(String val) {
+ eagerness = val;
+ return this;
+ }
+
+ public Builder createResponse(Boolean val) {
+ createResponse = val;
+ return this;
+ }
+
+
public TurnDetectionPayload build() {
return new TurnDetectionPayload(this);
}
+
}
}
@@ -2565,9 +3957,55 @@ public Parameters build() {
*/
public static class FixedParams {
+ /**
+ * Silence config
+ *
+ * @since v0.6.0
+ */
@JsonProperty("silence_config")
private SilenceConfig silenceConfig;
+ /**
+ * Agent data transmission channel(Optional):
+ *
+ * - "rtm": Use RTM transmission. This configuration takes effect only when advanced_features.enable_rtm is true.
+ *
+ * - "datastream": Use RTC data stream transport.(Default)
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("data_channel")
+ private String dataChannel;
+
+
+ /**
+ * Whether to receive agent performance data(Optional):
+ *
+ * - true: Receive agent performance data.
+ *
+ * - false: Do not receive agent performance data.(Default)
+ *
+ * This setting only takes effect when advanced_features.enable_rtm is true.
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("enable_metrics")
+ private Boolean enableMetrics;
+
+ /**
+ * Whether to receive agent error events(Optional):
+ *
+ * - true: Receive agent error events.
+ *
+ * - false: Do not receive agent error events.(Default)
+ *
+ * This setting only takes effect when advanced_features.enable_rtm is true
+ *
+ * @since v0.7.0
+ */
+ @JsonProperty("enable_error_message")
+ private Boolean enableErrorMessage;
+
public SilenceConfig getSilenceConfig() {
return silenceConfig;
}
@@ -2576,16 +4014,36 @@ public void setSilenceConfig(SilenceConfig silenceConfig) {
this.silenceConfig = silenceConfig;
}
+ public Boolean getEnableMetrics() {
+ return enableMetrics;
+ }
+
+ public Boolean getEnableErrorMessage() {
+ return enableErrorMessage;
+ }
+
+ public void setEnableMetrics(Boolean enableMetrics) {
+ this.enableMetrics = enableMetrics;
+ }
+
+ public void setEnableErrorMessage(Boolean enableErrorMessage) {
+ this.enableErrorMessage = enableErrorMessage;
+ }
+
public static Builder builder() {
return new Builder();
}
private FixedParams(Builder builder) {
setSilenceConfig(builder.silenceConfig);
+ setEnableMetrics(builder.enableMetrics);
+ setEnableErrorMessage(builder.enableErrorMessage);
}
public static final class Builder {
private SilenceConfig silenceConfig;
+ private Boolean enableMetrics;
+ private Boolean enableErrorMessage;
private Builder() {
}
@@ -2595,6 +4053,16 @@ public Builder silenceConfig(SilenceConfig val) {
return this;
}
+ public Builder enableMetrics(Boolean val) {
+ enableMetrics = val;
+ return this;
+ }
+
+ public Builder enableErrorMessage(Boolean val) {
+ enableErrorMessage = val;
+ return this;
+ }
+
public FixedParams build() {
return new FixedParams(this);
}
diff --git a/examples/convoai/README.md b/examples/convoai/README.md
index 4ac26f7..225657d 100644
--- a/examples/convoai/README.md
+++ b/examples/convoai/README.md
@@ -37,6 +37,25 @@ export CONVOAI_TTS_ELEVENLABS_MODEL_ID=
export CONVOAI_TTS_ELEVENLABS_VOICE_ID=
```
+### cartesia
+
+```bash
+export CONVOAI_TTS_CARTESIA_API_KEY=
+export CONVOAI_TTS_CARTESIA_MODEL_ID=
+export CONVOAI_TTS_CARTESIA_VOICE_MODE=
+export CONVOAI_TTS_CARTESIA_VOICE_ID=
+```
+
+### openai
+
+```bash
+export CONVOAI_TTS_OPENAI_API_KEY=
+export CONVOAI_TTS_OPENAI_MODEL=
+export CONVOAI_TTS_OPENAI_VOICE=
+export CONVOAI_TTS_OPENAI_INSTRUCTIONS=
+export CONVOAI_TTS_OPENAI_SPEED=
+```
+
## Execution
Please ensure that you have executed the module installation operation in the main module directory:
@@ -55,5 +74,7 @@ mvn exec:java -Dexec.mainClass="io.agora.rest.examples.convoai.Main" -Dexec.args
- `microsoft`
- `elevenLabs`
+- `cartesia`
+- `openai`
Choose the appropriate TTS provider based on your requirements.
diff --git a/examples/convoai/src/main/java/io/agora/rest/examples/convoai/Main.java b/examples/convoai/src/main/java/io/agora/rest/examples/convoai/Main.java
index 8f1795e..9fa7b7f 100644
--- a/examples/convoai/src/main/java/io/agora/rest/examples/convoai/Main.java
+++ b/examples/convoai/src/main/java/io/agora/rest/examples/convoai/Main.java
@@ -25,7 +25,7 @@ public class Main implements Callable {
private final DomainArea domainArea = DomainArea.CN;
- @Option(names = { "-t", "--ttsVendor" }, description = "bytedance,microsoft,tencent,minimax,elevenlabs")
+ @Option(names = { "-t", "--ttsVendor" }, description = "bytedance,microsoft,tencent,minimax,elevenlabs,cartesia,openai")
private String ttsVendor = "";
@Option(names = { "-s", "--serviceRegion" }, description = "chineseMainland,global")
@@ -82,9 +82,6 @@ public Integer call() throws Exception {
svc.runBytedanceTTS();
break;
case MICROSOFT:
- if (convoAIServiceRegionEnum != ConvoAIServiceRegionEnum.GLOBAL) {
- throw new IllegalArgumentException("Microsoft TTS is only available in Global");
- }
svc.runMicrosoftTTS();
break;
case TENCENT:
@@ -105,6 +102,18 @@ public Integer call() throws Exception {
}
svc.runElevenlabsTTS();
break;
+ case CARTERSIA:
+ if (convoAIServiceRegionEnum != ConvoAIServiceRegionEnum.GLOBAL) {
+ throw new IllegalArgumentException("Cartesia TTS is only available in Global");
+ }
+ svc.runCartesiaTTS();
+ break;
+ case OPENAI:
+ if (convoAIServiceRegionEnum != ConvoAIServiceRegionEnum.GLOBAL) {
+ throw new IllegalArgumentException("OpenAI TTS is only available in Global");
+ }
+ svc.runOpenAITTS();
+ break;
default:
throw new IllegalArgumentException("Invalid ttsVendor: " + ttsVendor);
}
diff --git a/examples/convoai/src/main/java/io/agora/rest/examples/convoai/service/Service.java b/examples/convoai/src/main/java/io/agora/rest/examples/convoai/service/Service.java
index 9aa51b4..92028eb 100644
--- a/examples/convoai/src/main/java/io/agora/rest/examples/convoai/service/Service.java
+++ b/examples/convoai/src/main/java/io/agora/rest/examples/convoai/service/Service.java
@@ -24,7 +24,7 @@ public Service(DomainArea domainArea, String appId, String cname, String uid, Cr
super(domainArea, appId, cname, uid, credential, serviceRegion);
}
- public void runCustomTTS(JoinConvoAIReq.TTSVendorEnum ttsVendor, JoinConvoAIReq.TTSVendorParams ttsVendorParams) {
+ public void runCustomTTS(JoinConvoAIReq.TTSVendorParams ttsVendorParams) {
// Run Conversational AI service with custom TTS
String token = System.getenv("CONVOAI_TOKEN");
@@ -106,15 +106,9 @@ public void runCustomTTS(JoinConvoAIReq.TTSVendorEnum ttsVendor, JoinConvoAIReq.
.greetingMessage("Hello,how can I help you?")
.build())
.ttsPayload(JoinConvoAIReq.TTSPayload.builder()
- .vendor(ttsVendor)
+ .vendor(ttsVendorParams.getVendor())
.params(ttsVendorParams)
.build())
- .vadPayload(JoinConvoAIReq.VADPayload.builder()
- .interruptDurationMs(160)
- .prefixPaddingMs(300)
- .silenceDurationMs(480)
- .threshold(0.5F)
- .build())
.asrPayload(JoinConvoAIReq.ASRPayload.builder()
.language("zh-CN")
.build())
@@ -337,7 +331,7 @@ public void runBytedanceTTS() {
.emotion("happy")
.build();
- runCustomTTS(JoinConvoAIReq.TTSVendorEnum.BYTEDANCE, ttsVendorParams);
+ runCustomTTS(ttsVendorParams);
}
public void runTencentTTS() {
@@ -368,7 +362,7 @@ public void runTencentTTS() {
.emotionIntensity(100)
.build();
- runCustomTTS(JoinConvoAIReq.TTSVendorEnum.TENCENT, ttsVendorParams);
+ runCustomTTS(ttsVendorParams);
}
@@ -408,8 +402,7 @@ public void runMinimaxTTS() {
.languageBoost("auto")
.build();
- runCustomTTS(JoinConvoAIReq.TTSVendorEnum.MINIMAX, ttsVendorParams);
-
+ runCustomTTS(ttsVendorParams);
}
public void runMicrosoftTTS() {
@@ -438,7 +431,7 @@ public void runMicrosoftTTS() {
.volume(70F)
.build();
- runCustomTTS(JoinConvoAIReq.TTSVendorEnum.MICROSOFT, ttsVendorParams);
+ runCustomTTS(ttsVendorParams);
}
public void runElevenlabsTTS() {
@@ -465,6 +458,85 @@ public void runElevenlabsTTS() {
.sampleRate(24000)
.build();
- runCustomTTS(JoinConvoAIReq.TTSVendorEnum.ELEVENLABS, ttsVendorParams);
+ runCustomTTS(ttsVendorParams);
+ }
+
+ public void runCartesiaTTS() {
+ // Run Conversational AI service with Cartesia TTS
+ String ttsApiKey = System.getenv("CONVOAI_TTS_CARTESIA_API_KEY");
+ if (ttsApiKey == null || ttsApiKey.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_CARTESIA_API_KEY is required");
+ }
+
+ String ttsModelId = System.getenv("CONVOAI_TTS_CARTESIA_MODEL_ID");
+ if (ttsModelId == null || ttsModelId.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_CARTESIA_MODEL_ID is required");
+ }
+
+ String ttsVoiceMode = System.getenv("CONVOAI_TTS_CARTESIA_VOICE_MODE");
+ if (ttsVoiceMode == null || ttsVoiceMode.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_CARTESIA_VOICE_MODE is required");
+ }
+
+ String ttsVoiceId = System.getenv("CONVOAI_TTS_CARTESIA_VOICE_ID");
+ if (ttsVoiceId == null || ttsVoiceId.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_CARTESIA_VOICE_ID is required");
+ }
+
+ JoinConvoAIReq.CartesiaTTSVendorParams ttsVendorParams = JoinConvoAIReq.CartesiaTTSVendorParams.builder()
+ .apiKey(ttsApiKey)
+ .modelId(ttsModelId)
+ .voice(JoinConvoAIReq.TTSCartesiaVendorVoice.builder()
+ .mode(ttsVoiceMode)
+ .id(ttsVoiceId)
+ .build())
+ .build();
+
+ runCustomTTS(ttsVendorParams);
+ }
+
+ public void runOpenAITTS() {
+ // Run Conversational AI service with OpenAI TTS
+ String ttsApiKey = System.getenv("CONVOAI_TTS_OPENAI_API_KEY");
+ if (ttsApiKey == null || ttsApiKey.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_API_KEY is required");
+ }
+
+ String ttsModel = System.getenv("CONVOAI_TTS_OPENAI_MODEL");
+ if (ttsModel == null || ttsModel.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_MODEL is required");
+ }
+
+ String ttsVoice = System.getenv("CONVOAI_TTS_OPENAI_VOICE");
+ if (ttsVoice == null || ttsVoice.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_VOICE is required");
+ }
+
+ String ttsInstructions = System.getenv("CONVOAI_TTS_OPENAI_INSTRUCTIONS");
+ if (ttsInstructions == null || ttsInstructions.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_INSTRUCTIONS is required");
+ }
+
+ String ttsSpeed = System.getenv("CONVOAI_TTS_OPENAI_SPEED");
+ if (ttsSpeed == null || ttsSpeed.isEmpty()) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_SPEED is required");
+ }
+
+ Float speed = null;
+ try {
+ speed = Float.parseFloat(ttsSpeed);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("CONVOAI_TTS_OPENAI_SPEED is not a valid float");
+ }
+
+ JoinConvoAIReq.TTSOpenAIVendorParams ttsVendorParams = JoinConvoAIReq.TTSOpenAIVendorParams.builder()
+ .apiKey(ttsApiKey)
+ .model(ttsModel)
+ .voice(ttsVoice)
+ .instructions(ttsInstructions)
+ .speed(speed)
+ .build();
+
+ runCustomTTS(ttsVendorParams);
}
}