@@ -468,7 +468,7 @@ inline static void * ggml_calloc(size_t num, size_t size) {
468
468
#endif
469
469
470
470
// floating point type used to accumulate sums
471
- typedef double ggml_float;
471
+ typedef float ggml_float;
472
472
473
473
#undef MIN
474
474
#undef MAX
@@ -13937,7 +13937,7 @@ static void ggml_compute_forward_soft_max_f32(
13937
13937
ggml_float sum = ggml_vec_soft_max_f32(nc, dp, wp, max);
13938
13938
assert(sum > 0.0);
13939
13939
13940
- sum = 1.0 /sum;
13940
+ sum = 1.f /sum;
13941
13941
ggml_vec_scale_f32(nc, dp, sum);
13942
13942
13943
13943
#ifndef NDEBUG
@@ -16020,7 +16020,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
16020
16020
16021
16021
assert(sum > 0.0);
16022
16022
16023
- sum = 1.0 /sum;
16023
+ sum = 1.f /sum;
16024
16024
ggml_vec_scale_f32(masked_begin, SM, sum);
16025
16025
16026
16026
}
@@ -17091,7 +17091,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
17091
17091
ggml_vec_max_f32(nc, &max, s0);
17092
17092
ggml_float sum = ggml_vec_soft_max_f32(nc, ds0, s0, max);
17093
17093
assert(sum > 0.0);
17094
- ggml_vec_scale_f32(nc, ds0, 1.0 /sum);
17094
+ ggml_vec_scale_f32(nc, ds0, 1.f /sum);
17095
17095
17096
17096
// grad(src0) = (softmax(src0) - src1) * grad(cross_entropy_loss(src0, src1)) / nr
17097
17097
ggml_vec_sub_f32(nc, ds0, ds0, s1);
@@ -19414,7 +19414,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data);
19414
19414
#include "windows.h"
19415
19415
19416
19416
// TODO: support > 64 CPUs
19417
- bool ggml_thread_apply_affinity(bool * mask) {
19417
+ static bool ggml_thread_apply_affinity(bool * mask) {
19418
19418
HANDLE h = GetCurrentThread();
19419
19419
uint64_t bitmask = 0ULL;
19420
19420
@@ -21081,13 +21081,13 @@ static enum ggml_opt_result ggml_opt_adam(
21081
21081
float gnorm = 1.0f;
21082
21082
if (gclip > 0.0f) {
21083
21083
// gradient clipping
21084
- ggml_float sum = 0.0;
21084
+ double sum = 0.0;
21085
21085
for (int64_t i = 0; i < nx; ++i) {
21086
- sum += (ggml_float )(g[i]*g[i]);
21086
+ sum += (double )(g[i]*g[i]);
21087
21087
}
21088
- ggml_float norm = sqrt(sum);
21089
- if (norm > (ggml_float) gclip) {
21090
- gnorm = (float) ((ggml_float) gclip / norm);
21088
+ ggml_float norm = (ggml_float) sqrt(sum);
21089
+ if (norm > gclip) {
21090
+ gnorm = (float) (gclip / norm);
21091
21091
}
21092
21092
}
21093
21093
const float beta1h = alpha*sched/(1.0f - powf(beta1, opt->iter));
0 commit comments