Skip to content

Commit fbfbdbd

Browse files
stt working
1 parent 259d3a1 commit fbfbdbd

File tree

1 file changed

+74
-12
lines changed

1 file changed

+74
-12
lines changed

sdk/runanywhere-commons/src/jni/runanywhere_commons_jni.cpp

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,12 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentLoadMode
444444
LOGw("NO providers registered for TEXT_GENERATION!");
445445
}
446446

447+
// model_path, model_id (use path as id), model_name (optional)
447448
rac_result_t result = rac_llm_component_load_model(
448449
reinterpret_cast<rac_handle_t>(handle),
449-
path.c_str()
450+
path.c_str(), // model_path
451+
path.c_str(), // model_id (use path as id)
452+
nullptr // model_name (optional)
450453
);
451454
LOGi("rac_llm_component_load_model returned: %d", result);
452455

@@ -980,9 +983,16 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentLoadMode
980983
LOGw("NO providers registered for STT!");
981984
}
982985

986+
// Parse configJson to extract model_id and model_name
987+
std::string config_str = getCString(env, configJson);
988+
std::string model_id = path; // Use path as model_id by default
989+
std::string model_name = ""; // Optional model name
990+
983991
rac_result_t result = rac_stt_component_load_model(
984992
reinterpret_cast<rac_handle_t>(handle),
985-
path.c_str()
993+
path.c_str(), // model_path
994+
model_id.c_str(), // model_id
995+
model_name.empty() ? nullptr : model_name.c_str() // model_name (optional)
986996
);
987997
LOGi("rac_stt_component_load_model returned: %d", result);
988998

@@ -1003,30 +1013,76 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentTranscri
10031013
jsize len = env->GetArrayLength(audioData);
10041014
jbyte* data = env->GetByteArrayElements(audioData, nullptr);
10051015

1006-
rac_stt_options_t options = {};
1016+
// Use default options which properly initializes sample_rate to 16000
1017+
rac_stt_options_t options = RAC_STT_OPTIONS_DEFAULT;
1018+
1019+
// Parse configJson to override sample_rate if provided
1020+
if (configJson != nullptr) {
1021+
const char* json = env->GetStringUTFChars(configJson, nullptr);
1022+
if (json != nullptr) {
1023+
// Simple JSON parsing for sample_rate
1024+
const char* sample_rate_key = "\"sample_rate\":";
1025+
const char* pos = strstr(json, sample_rate_key);
1026+
if (pos != nullptr) {
1027+
pos += strlen(sample_rate_key);
1028+
int sample_rate = atoi(pos);
1029+
if (sample_rate > 0) {
1030+
options.sample_rate = sample_rate;
1031+
LOGd("Using sample_rate from config: %d", sample_rate);
1032+
}
1033+
}
1034+
env->ReleaseStringUTFChars(configJson, json);
1035+
}
1036+
}
1037+
1038+
LOGd("STT transcribe: %d bytes, sample_rate=%d", (int)len, options.sample_rate);
1039+
10071040
rac_stt_result_t result = {};
10081041

1042+
// Audio data is 16-bit PCM (ByteArray from Android AudioRecord)
1043+
// Pass the raw bytes - the audio_format in options tells C++ how to interpret it
10091044
rac_result_t status = rac_stt_component_transcribe(
10101045
reinterpret_cast<rac_handle_t>(handle),
1011-
reinterpret_cast<const float*>(data),
1012-
static_cast<size_t>(len / sizeof(float)),
1046+
data, // Pass raw bytes (void*)
1047+
static_cast<size_t>(len), // Size in bytes
10131048
&options,
10141049
&result
10151050
);
10161051

10171052
env->ReleaseByteArrayElements(audioData, data, JNI_ABORT);
10181053

10191054
if (status != RAC_SUCCESS) {
1055+
LOGe("STT transcribe failed with status: %d", status);
10201056
return nullptr;
10211057
}
10221058

1059+
// Build JSON result
1060+
std::string json_result = "{";
1061+
json_result += "\"text\":\"";
10231062
if (result.text != nullptr) {
1024-
jstring jResult = env->NewStringUTF(result.text);
1025-
rac_stt_result_free(&result);
1026-
return jResult;
1063+
// Escape special characters in text
1064+
for (const char* p = result.text; *p; ++p) {
1065+
switch (*p) {
1066+
case '"': json_result += "\\\""; break;
1067+
case '\\': json_result += "\\\\"; break;
1068+
case '\n': json_result += "\\n"; break;
1069+
case '\r': json_result += "\\r"; break;
1070+
case '\t': json_result += "\\t"; break;
1071+
default: json_result += *p; break;
1072+
}
1073+
}
10271074
}
1075+
json_result += "\",";
1076+
json_result += "\"language\":\"" + std::string(result.detected_language ? result.detected_language : "en") + "\",";
1077+
json_result += "\"duration_ms\":" + std::to_string(result.processing_time_ms) + ",";
1078+
json_result += "\"completion_reason\":1,"; // END_OF_AUDIO
1079+
json_result += "\"confidence\":" + std::to_string(result.confidence);
1080+
json_result += "}";
1081+
1082+
rac_stt_result_free(&result);
10281083

1029-
return env->NewStringUTF("{}");
1084+
LOGd("STT transcribe result: %s", json_result.c_str());
1085+
return env->NewStringUTF(json_result.c_str());
10301086
}
10311087

10321088
JNIEXPORT jstring JNICALL
@@ -1105,12 +1161,15 @@ JNIEXPORT jint JNICALL
11051161
Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racTtsComponentLoadModel(JNIEnv* env, jclass clazz, jlong handle, jstring modelPath, jstring configJson) {
11061162
if (handle == 0) return RAC_ERROR_INVALID_HANDLE;
11071163

1108-
std::string voiceId = getCString(env, modelPath); // modelPath is actually voiceId for TTS
1164+
std::string voicePath = getCString(env, modelPath); // modelPath is actually voice path for TTS
11091165

11101166
// TTS component uses load_voice instead of load_model
1167+
// voice_path, voice_id (use path as id), voice_name (optional)
11111168
return static_cast<jint>(rac_tts_component_load_voice(
11121169
reinterpret_cast<rac_handle_t>(handle),
1113-
voiceId.c_str()
1170+
voicePath.c_str(), // voice_path
1171+
voicePath.c_str(), // voice_id (use path as id)
1172+
nullptr // voice_name (optional)
11141173
));
11151174
}
11161175

@@ -1204,9 +1263,12 @@ JNIEXPORT jint JNICALL
12041263
Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racTtsComponentSetVoice(JNIEnv* env, jclass clazz, jlong handle, jstring voiceId) {
12051264
if (handle == 0) return RAC_ERROR_INVALID_HANDLE;
12061265
std::string voice = getCString(env, voiceId);
1266+
// voice_path, voice_id (use path as id), voice_name (optional)
12071267
return static_cast<jint>(rac_tts_component_load_voice(
12081268
reinterpret_cast<rac_handle_t>(handle),
1209-
voice.c_str()
1269+
voice.c_str(), // voice_path
1270+
voice.c_str(), // voice_id
1271+
nullptr // voice_name (optional)
12101272
));
12111273
}
12121274

0 commit comments

Comments
 (0)