Commit d0f38de
committed
Expose more ctx->vocab interfaces.
I need these functions to implement a kind of weighting coefficient
logits_filter_callback like:
```
void filter_callback(
struct whisper_context * ctx,
struct whisper_state * state,
const whisper_token_data * tokens,
int n_tokens,
float * logits,
void * user_data
) {
const static std::vector<std::string> good_words = {
"音声", "認識"
};
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
auto prev = n_tokens > 0 ? std::string(whisper_token_to_str(ctx, tokens[n_tokens - 1].id)) : "";
for (const std::string & token : good_words) {
auto s32 = conv.from_bytes(token);
auto s0 = conv.to_bytes(s32[0]);
auto s1 = conv.to_bytes(s32[1]);
if (whisper_token_exists(ctx, token.c_str())) {
logits[whisper_str_to_token(ctx, token.c_str())] *= 2;
} else if (
prev.size() >= s0.size()
&& prev.compare(prev.size() - s0.size(), s0.size(), s0) == 0
&& whisper_token_exists(ctx, s1.c_str())
) {
logits[whisper_str_to_token(ctx, s1.c_str())] *= 1.6;
} else if (whisper_token_exists(ctx, s0.c_str())) {
logits[whisper_str_to_token(ctx, s0.c_str())] *= 1.2;
}
}
}
```1 parent e4e0598 commit d0f38de
2 files changed
+11
-0
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
408 | 408 | | |
409 | 409 | | |
410 | 410 | | |
| 411 | + | |
| 412 | + | |
| 413 | + | |
411 | 414 | | |
412 | 415 | | |
413 | 416 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
4068 | 4068 | | |
4069 | 4069 | | |
4070 | 4070 | | |
| 4071 | + | |
| 4072 | + | |
| 4073 | + | |
| 4074 | + | |
| 4075 | + | |
| 4076 | + | |
| 4077 | + | |
| 4078 | + | |
4071 | 4079 | | |
4072 | 4080 | | |
4073 | 4081 | | |
| |||
0 commit comments