@@ -10217,13 +10217,53 @@ static void ggml_compute_forward_diag_mask_zero(
1021710217 }
1021810218}
1021910219
10220+
10221+ __attribute__((noinline)) static void debug_hook(void) {
10222+ }
10223+
1022010224// ggml_compute_forward_soft_max
10225+ static bool check_invalid_values(const struct ggml_tensor * src0) {
10226+ if (!src0) {
10227+ printf("Error: src0 is NULL!\n");
10228+ return false;
10229+ }
10230+
10231+ const int nc = src0->ne[0]; // 列数
10232+ const int nr = ggml_nrows(src0); // 行数
10233+
10234+ int nan_count = 0, inf_count = 0;
10235+
10236+ // printf("Checking tensor for NaN/Inf values...\n");
10237+
10238+ for (int i1 = 0; i1 < nr; i1++) {
10239+ float * sp = (float *)((char *) src0->data + i1 * src0->nb[1]);
10240+
10241+ for (int i = 0; i < nc; ++i) {
10242+ if (isnan(sp[i])) {
10243+ nan_count++;
10244+ // printf("NaN detected at row %d, col %d (index %d)\n", i1, i, i1 * nc + i);
10245+ }
10246+ else if (isinf(sp[i])) {
10247+ inf_count++;
10248+ // printf("Inf detected at row %d, col %d (index %d)\n", i1, i, i1 * nc + i);
10249+ }
10250+ }
10251+ }
10252+
10253+
10254+ if (nan_count > 0 || inf_count > 0) {
10255+ debug_hook();
10256+ return true;
10257+ }
10258+ }
1022110259
1022210260static void ggml_compute_forward_soft_max_f32(
1022310261 const struct ggml_compute_params * params,
1022410262 struct ggml_tensor * dst) {
1022510263
1022610264 const struct ggml_tensor * src0 = dst->src[0];
10265+
10266+ // check_invalid_values(src0);
1022710267 const struct ggml_tensor * src1 = dst->src[1];
1022810268
1022910269 assert(ggml_is_contiguous(dst));
@@ -10266,6 +10306,12 @@ static void ggml_compute_forward_soft_max_f32(
1026610306
1026710307 const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16);
1026810308
10309+ // 限制 scale 避免溢出
10310+ if (!isfinite(scale) || scale > 1e6) {
10311+ // printf("Warning: scale is invalid (%f), resetting to 1.0\n", scale);
10312+ scale = 1.0f;
10313+ }
10314+
1026910315 for (int i1 = ir0; i1 < ir1; i1++) {
1027010316 // ALiBi
1027110317 const uint32_t h = (i1/ne01)%ne02; // head
@@ -10278,6 +10324,27 @@ static void ggml_compute_forward_soft_max_f32(
1027810324 ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL;
1027910325 float * mp_f32 = src1 ? (float *)((char *) src1->data) + (i1%ne01)*ne00 : NULL;
1028010326
10327+ int nan_count = 0, inf_count = 0;
10328+ for (int i = 0; i < nc; ++i)
10329+ {
10330+ if (isnan(sp[i])) nan_count++;
10331+ else if (isinf(sp[i])) {
10332+ // printf("Error: sp contains inf value!\n");
10333+ inf_count++;
10334+ sp[i] = FLT_MAX;
10335+ }
10336+ }
10337+
10338+ if(inf_count)
10339+ {
10340+ // printf("sp count: col: %d, row: %d, inf: [%d]\n", nc, nr, inf_count);
10341+ }
10342+
10343+ if (nan_count) {
10344+ // printf("Error: sp contains %d NaN values, aborting!\n", nan_count);
10345+ exit(1);
10346+ }
10347+
1028110348 ggml_vec_cpy_f32 (nc, wp, sp);
1028210349 ggml_vec_scale_f32(nc, wp, scale);
1028310350 if (mp_f32) {
@@ -10302,6 +10369,10 @@ static void ggml_compute_forward_soft_max_f32(
1030210369 float max = -INFINITY;
1030310370 ggml_vec_max_f32(nc, &max, wp);
1030410371
10372+ if (!isfinite(max)) {
10373+ max = FLT_MAX;
10374+ }
10375+
1030510376 ggml_float sum = ggml_vec_soft_max_f32(nc, dp, wp, max);
1030610377 assert(sum > 0.0);
1030710378
@@ -15431,6 +15502,9 @@ struct ggml_cplan ggml_graph_plan(
1543115502 return cplan;
1543215503}
1543315504
15505+ // ggml_graph_compute_with_ctx
15506+ // ggml_graph_compute
15507+ // check_invalid_values
1543415508static thread_ret_t ggml_graph_compute_thread(void * data) {
1543515509 struct ggml_compute_state * state = (struct ggml_compute_state *) data;
1543615510 struct ggml_threadpool * tp = state->threadpool;
@@ -15450,6 +15524,27 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1545015524
1545115525 for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
1545215526 struct ggml_tensor * node = cgraph->nodes[node_n];
15527+ struct ggml_tensor * tensor = node;
15528+
15529+ {
15530+ if (tensor->op == GGML_OP_NONE || ggml_is_empty(tensor))
15531+ {
15532+
15533+ }
15534+ else if (ggml_cpu_extra_compute_forward(¶ms, tensor))
15535+ {
15536+
15537+ }
15538+ else if(tensor->op == GGML_OP_SOFT_MAX)
15539+ {
15540+ // ggml_compute_forward
15541+ // GGML_OP_SOFT_MAX
15542+ // ggml_compute_forward_soft_max
15543+ // ggml_compute_forward_soft_max_f32
15544+ // check_invalid_values
15545+ check_invalid_values(tensor);
15546+ }
15547+ }
1545315548
1545415549 ggml_compute_forward(¶ms, node);
1545515550
@@ -15726,6 +15821,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1572615821 atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
1572715822 }
1572815823
15824+ // printf("GGML_USE_OPENMP->ggml_graph_compute_thread: %d\n", omp_get_thread_num());
1572915825 ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
1573015826 }
1573115827 } else {
@@ -15757,6 +15853,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1575715853 return ret;
1575815854}
1575915855
15856+ // TODO cgraph
1576015857enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
1576115858 struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads, NULL);
1576215859
0 commit comments