Skip to content

Commit 6f5c781

Browse files
committed
feat: expose language detection probabilities to server.cpp
1 parent 43f5030 commit 6f5c781

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

examples/server/server.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,13 +919,34 @@ int main(int argc, char ** argv) {
919919
} else if (params.response_format == vjson_format) {
920920
/* try to match openai/whisper's Python format */
921921
std::string results = output_str(ctx, params, pcmf32s);
922+
923+
// Get language probabilities
924+
std::vector<float> lang_probs(whisper_lang_max_id() + 1, 0.0f);
925+
const auto detected_lang_id = whisper_lang_auto_detect(ctx, 0, params.n_threads, lang_probs.data());
926+
922927
json jres = json{
923928
{"task", params.translate ? "translate" : "transcribe"},
924929
{"language", whisper_lang_str_full(whisper_full_lang_id(ctx))},
925930
{"duration", float(pcmf32.size())/WHISPER_SAMPLE_RATE},
926931
{"text", results},
927932
{"segments", json::array()}
928933
};
934+
935+
// Always include language detection info
936+
json lang_info = json::object();
937+
// Include the probability of the detected language
938+
lang_info["probability"] = lang_probs[detected_lang_id];
939+
940+
// Add all language probabilities
941+
json all_lang_probs = json::object();
942+
for (int i = 0; i <= whisper_lang_max_id(); ++i) {
943+
if (lang_probs[i] > 0.001f) { // Only include non-negligible probabilities
944+
all_lang_probs[whisper_lang_str(i)] = lang_probs[i];
945+
}
946+
}
947+
lang_info["language_probabilities"] = all_lang_probs;
948+
jres["language_detection"] = lang_info;
949+
929950
const int n_segments = whisper_full_n_segments(ctx);
930951
for (int i = 0; i < n_segments; ++i)
931952
{

0 commit comments

Comments
 (0)