Skip to content

Commit 779a41f

Browse files
committed
Merge commit 'c3a2624339187e89c4f65fd72a5fe7103968b5ad' into concedo_experimental
2 parents f97bbdd + c3a2624 commit 779a41f

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
212212
}
213213
}
214214
if (__all_sync(0xFFFFFFFF, skip)) {
215+
__syncthreads();
215216
continue;
216217
}
217218
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
217217
}
218218
}
219219
if (__all_sync(0xFFFFFFFF, skip)) {
220+
__syncthreads();
220221
continue;
221222
}
222223
#endif // GGML_USE_HIP

src/llama-vocab.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,7 @@ struct llm_tokenizer_ugm_session {
10601060
}
10611061

10621062
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
1063-
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
1063+
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
10641064
// at the beginning tokenization score is zero
10651065
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
10661066

@@ -1092,7 +1092,7 @@ struct llm_tokenizer_ugm_session {
10921092
const double challenger_score = current_best.score_sum + token_score;
10931093
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
10941094
if (challenger_score > current_champ.score_sum) {
1095-
struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
1095+
struct best_tokenization challenger = { token_id, input_offset, challenger_score };
10961096
current_champ = challenger;
10971097
}
10981098
}
@@ -1106,7 +1106,7 @@ struct llm_tokenizer_ugm_session {
11061106
prefix_offset = input_offset + n_utf8_code_units;
11071107
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
11081108
if (challenger_score > current_champ.score_sum) {
1109-
struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
1109+
struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
11101110
current_champ = challenger;
11111111
}
11121112
}
@@ -1232,7 +1232,7 @@ struct llm_tokenizer_ugm_session {
12321232
struct best_tokenization {
12331233
llama_token token_id;
12341234
size_t input_offset;
1235-
float score_sum;
1235+
double score_sum;
12361236
};
12371237

12381238
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {

0 commit comments

Comments
 (0)