@@ -444,9 +444,12 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentLoadMode
444444 LOGw (" NO providers registered for TEXT_GENERATION!" );
445445 }
446446
447+ // model_path, model_id (use path as id), model_name (optional)
447448 rac_result_t result = rac_llm_component_load_model (
448449 reinterpret_cast <rac_handle_t >(handle),
449- path.c_str ()
450+ path.c_str (), // model_path
451+ path.c_str (), // model_id (use path as id)
452+ nullptr // model_name (optional)
450453 );
451454 LOGi (" rac_llm_component_load_model returned: %d" , result);
452455
@@ -980,9 +983,16 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentLoadMode
980983 LOGw (" NO providers registered for STT!" );
981984 }
982985
986+ // Parse configJson to extract model_id and model_name
987+ std::string config_str = getCString (env, configJson);
988+ std::string model_id = path; // Use path as model_id by default
989+ std::string model_name = " " ; // Optional model name
990+
983991 rac_result_t result = rac_stt_component_load_model (
984992 reinterpret_cast <rac_handle_t >(handle),
985- path.c_str ()
993+ path.c_str (), // model_path
994+ model_id.c_str (), // model_id
995+ model_name.empty () ? nullptr : model_name.c_str () // model_name (optional)
986996 );
987997 LOGi (" rac_stt_component_load_model returned: %d" , result);
988998
@@ -1003,30 +1013,76 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentTranscri
10031013 jsize len = env->GetArrayLength (audioData);
10041014 jbyte* data = env->GetByteArrayElements (audioData, nullptr );
10051015
1006- rac_stt_options_t options = {};
1016+ // Use default options which properly initializes sample_rate to 16000
1017+ rac_stt_options_t options = RAC_STT_OPTIONS_DEFAULT;
1018+
1019+ // Parse configJson to override sample_rate if provided
1020+ if (configJson != nullptr ) {
1021+ const char * json = env->GetStringUTFChars (configJson, nullptr );
1022+ if (json != nullptr ) {
1023+ // Simple JSON parsing for sample_rate
1024+ const char * sample_rate_key = " \" sample_rate\" :" ;
1025+ const char * pos = strstr (json, sample_rate_key);
1026+ if (pos != nullptr ) {
1027+ pos += strlen (sample_rate_key);
1028+ int sample_rate = atoi (pos);
1029+ if (sample_rate > 0 ) {
1030+ options.sample_rate = sample_rate;
1031+ LOGd (" Using sample_rate from config: %d" , sample_rate);
1032+ }
1033+ }
1034+ env->ReleaseStringUTFChars (configJson, json);
1035+ }
1036+ }
1037+
1038+ LOGd (" STT transcribe: %d bytes, sample_rate=%d" , (int )len, options.sample_rate );
1039+
10071040 rac_stt_result_t result = {};
10081041
1042+ // Audio data is 16-bit PCM (ByteArray from Android AudioRecord)
1043+ // Pass the raw bytes - the audio_format in options tells C++ how to interpret it
10091044 rac_result_t status = rac_stt_component_transcribe (
10101045 reinterpret_cast <rac_handle_t >(handle),
1011- reinterpret_cast < const float *>(data),
1012- static_cast <size_t >(len / sizeof ( float )),
1046+ data, // Pass raw bytes (void*)
1047+ static_cast <size_t >(len), // Size in bytes
10131048 &options,
10141049 &result
10151050 );
10161051
10171052 env->ReleaseByteArrayElements (audioData, data, JNI_ABORT);
10181053
10191054 if (status != RAC_SUCCESS) {
1055+ LOGe (" STT transcribe failed with status: %d" , status);
10201056 return nullptr ;
10211057 }
10221058
1059+ // Build JSON result
1060+ std::string json_result = " {" ;
1061+ json_result += " \" text\" :\" " ;
10231062 if (result.text != nullptr ) {
1024- jstring jResult = env->NewStringUTF (result.text );
1025- rac_stt_result_free (&result);
1026- return jResult;
1063+ // Escape special characters in text
1064+ for (const char * p = result.text ; *p; ++p) {
1065+ switch (*p) {
1066+ case ' "' : json_result += " \\\" " ; break ;
1067+ case ' \\ ' : json_result += " \\\\ " ; break ;
1068+ case ' \n ' : json_result += " \\ n" ; break ;
1069+ case ' \r ' : json_result += " \\ r" ; break ;
1070+ case ' \t ' : json_result += " \\ t" ; break ;
1071+ default : json_result += *p; break ;
1072+ }
1073+ }
10271074 }
1075+ json_result += " \" ," ;
1076+ json_result += " \" language\" :\" " + std::string (result.detected_language ? result.detected_language : " en" ) + " \" ," ;
1077+ json_result += " \" duration_ms\" :" + std::to_string (result.processing_time_ms ) + " ," ;
1078+ json_result += " \" completion_reason\" :1," ; // END_OF_AUDIO
1079+ json_result += " \" confidence\" :" + std::to_string (result.confidence );
1080+ json_result += " }" ;
1081+
1082+ rac_stt_result_free (&result);
10281083
1029- return env->NewStringUTF (" {}" );
1084+ LOGd (" STT transcribe result: %s" , json_result.c_str ());
1085+ return env->NewStringUTF (json_result.c_str ());
10301086}
10311087
10321088JNIEXPORT jstring JNICALL
@@ -1105,12 +1161,15 @@ JNIEXPORT jint JNICALL
11051161Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racTtsComponentLoadModel (JNIEnv* env, jclass clazz, jlong handle, jstring modelPath, jstring configJson) {
11061162 if (handle == 0 ) return RAC_ERROR_INVALID_HANDLE;
11071163
1108- std::string voiceId = getCString (env, modelPath); // modelPath is actually voiceId for TTS
1164+ std::string voicePath = getCString (env, modelPath); // modelPath is actually voice path for TTS
11091165
11101166 // TTS component uses load_voice instead of load_model
1167+ // voice_path, voice_id (use path as id), voice_name (optional)
11111168 return static_cast <jint>(rac_tts_component_load_voice (
11121169 reinterpret_cast <rac_handle_t >(handle),
1113- voiceId.c_str ()
1170+ voicePath.c_str (), // voice_path
1171+ voicePath.c_str (), // voice_id (use path as id)
1172+ nullptr // voice_name (optional)
11141173 ));
11151174}
11161175
@@ -1204,9 +1263,12 @@ JNIEXPORT jint JNICALL
12041263Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racTtsComponentSetVoice (JNIEnv* env, jclass clazz, jlong handle, jstring voiceId) {
12051264 if (handle == 0 ) return RAC_ERROR_INVALID_HANDLE;
12061265 std::string voice = getCString (env, voiceId);
1266+ // voice_path, voice_id (use path as id), voice_name (optional)
12071267 return static_cast <jint>(rac_tts_component_load_voice (
12081268 reinterpret_cast <rac_handle_t >(handle),
1209- voice.c_str ()
1269+ voice.c_str (), // voice_path
1270+ voice.c_str (), // voice_id
1271+ nullptr // voice_name (optional)
12101272 ));
12111273}
12121274
0 commit comments