@@ -20,14 +20,14 @@ inline uint8_t from_float(float value) {
2020 } in = {value};
2121 out.bits = (in.bits >> 24 ) & 0x80 ;
2222 in.bits &= 0x7fffffff ;
23- if (in.f >= FP8<E>::MAX () ) {
23+ if (in.f >= FP8<E>::MAX) {
2424 out.bits |= 0x7E ;
25- } else if (in.f < FP8<E>::MIN () ) { // => 0.
25+ } else if (in.f < FP8<E>::MIN) { // => 0.
2626 } else {
27- in.f *= exp_m2 <FP8<E>::E_BIAS () -127 >();
28- uint32_t eps = (0x3fffff >>FP8<E>::M ()) + ((in.bits >> (23 -FP8<E>::M () )) & 0x1 );
27+ in.f *= exp_f2 <FP8<E>::E_BIAS-127 >();
28+ uint32_t eps = (0x3fffff >>FP8<E>::M) + ((in.bits >> (23 -FP8<E>::M)) & 0x1 );
2929 in.bits += eps;
30- out.bits |= (in.bits >> (23 -FP8<E>::M () )) & 0x7F ;
30+ out.bits |= (in.bits >> (23 -FP8<E>::M)) & 0x7F ;
3131 }
3232 return out.bits ;
3333}
@@ -44,9 +44,9 @@ inline float to_float(const FP8<E>& in) {
4444 out.bits = in.bits & 0x80 ;
4545 out.bits <<= 24 ;
4646 uint32_t _bits = in.bits & 0x7F ;
47- _bits <<= (23 -FP8<E>::M () );
47+ _bits <<= (23 -FP8<E>::M);
4848 out.bits |= _bits;
49- out.f *= exp_p2 <127 -FP8<E>::E_BIAS () >();
49+ out.f *= exp_f2 <127 -FP8<E>::E_BIAS>();
5050 return out.f ;
5151}
5252} // namespace fp8
@@ -91,8 +91,8 @@ static inline void conv(const float* x, bloc_fp8<E, QK>* y, int64_t size) {
9191 for (int64_t i=0 ; i<QK; i++) {
9292 m = std::max (std::abs (x[q*QK+i]),m);
9393 }
94- const float D = FP8<E>::MAX () /m;
95- y[q].d = m/FP8<E>::MAX () ;
94+ const float D = FP8<E>::MAX/m;
95+ y[q].d = m/FP8<E>::MAX;
9696#ifdef GGML_USE_OPENMP_SIMD
9797 #pragma omp simd
9898#endif
@@ -154,22 +154,22 @@ float dot_reg(const bloc_fp8<E, QK>* x, const _Y* y, int64_t size) {
154154 for (int64_t v=0 ; v<VECT_SIZE; ++v) { mantice_16bits[v] = mantice_8bits[v]; }
155155
156156 for (int64_t v=0 ; v<VECT_SIZE; ++v) { sign_16bits[v] <<= 8 ; }
157- for (int64_t v=0 ; v<VECT_SIZE; ++v) { mantice_16bits[v] <<= (7 -fp8_t::M () ); }
157+ for (int64_t v=0 ; v<VECT_SIZE; ++v) { mantice_16bits[v] <<= (7 -fp8_t ::M); }
158158
159159 for (int64_t v=0 ; v<VECT_SIZE; ++v) { x_bf16[v] = sign_16bits[v] | mantice_16bits[v]; }
160160
161161 for (int64_t v=0 ; v<VECT_SIZE; ++v) { ux[v].bits = x_bf16[v]; }
162162 for (int64_t v=0 ; v<VECT_SIZE; ++v) { ux[v].bits <<= 16 ; }
163163
164- for (int64_t v=0 ; v<VECT_SIZE; ++v) { X[v] = ux[v].f ; } // * exp_p2 <127-fp8_t::E_BIAS() >(); }
164+ for (int64_t v=0 ; v<VECT_SIZE; ++v) { X[v] = ux[v].f ; } // * exp_f2 <127-fp8_t::E_BIAS>(); }
165165 for (int64_t v=0 ; v<VECT_SIZE; ++v) { Y[v] = (float )y[q*QK+i+r*VECT_SIZE+v]; }
166166 for (int64_t v=0 ; v<VECT_SIZE; ++v) { Z0[r][v] += X[v]*Y[v]; }
167167 }
168168 }
169169 // apply scale
170170 for (int64_t r=0 ; r<NB_REG; ++r) {
171171 for (int64_t v=0 ; v<VECT_SIZE; ++v) {
172- Z[r][v] += Z0[r][v]*(x[q]).d * exp_p2 <127 -fp8_t::E_BIAS () >();
172+ Z[r][v] += Z0[r][v]*(x[q]).d * exp_f2 <127 -fp8_t ::E_BIAS>();
173173 }
174174 }
175175 }
0 commit comments