Merge commit 'c3a2624339187e89c4f65fd72a5fe7103968b5ad' into concedo_experimental

LostRuins · LostRuins · commit 779a41f23e1e · 2025-05-24T22:56:02.000+08:00
diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
                 }
             }
             if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                 continue;
             }
 #endif // GGML_USE_HIP
diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
                 }
             }
             if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                 continue;
             }
 #endif // GGML_USE_HIP
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -1060,7 +1060,7 @@ struct llm_tokenizer_ugm_session {
         }
 
         // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
-        std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
+        std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
         // at the beginning tokenization score is zero
         tokenization_results[0] = { vocab.token_unk(), 0, 0 };
 
@@ -1092,7 +1092,7 @@ struct llm_tokenizer_ugm_session {
                     const double challenger_score = current_best.score_sum + token_score;
                     struct best_tokenization & current_champ = tokenization_results[prefix_offset];
                     if (challenger_score > current_champ.score_sum) {
-                        struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
+                        struct best_tokenization challenger = { token_id, input_offset, challenger_score };
                         current_champ = challenger;
                     }
                 }
@@ -1106,7 +1106,7 @@ struct llm_tokenizer_ugm_session {
                 prefix_offset = input_offset + n_utf8_code_units;
                 struct best_tokenization & current_champ = tokenization_results[prefix_offset];
                 if (challenger_score > current_champ.score_sum) {
-                    struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
+                    struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
                     current_champ = challenger;
                 }
             }
@@ -1232,7 +1232,7 @@ struct llm_tokenizer_ugm_session {
     struct best_tokenization {
         llama_token token_id;
         size_t input_offset;
-        float score_sum;
+        double score_sum;
     };
 
     struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {

Original file line number	Diff line number	Diff line change
`@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(`
`212`	`212`	`}`
`213`	`213`	`}`
`214`	`214`	`if (__all_sync(0xFFFFFFFF, skip)) {`
	`215`	`+ __syncthreads();`
`215`	`216`	`continue;`
`216`	`217`	`}`
`217`	`218`	`#endif // GGML_USE_HIP`
Original file line number	Diff line number	Diff line change
`@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(`
`217`	`217`	`}`
`218`	`218`	`}`
`219`	`219`	`if (__all_sync(0xFFFFFFFF, skip)) {`
	`220`	`+ __syncthreads();`
`220`	`221`	`continue;`
`221`	`222`	`}`
`222`	`223`	`#endif // GGML_USE_HIP`