55#include < iostream>
66
77/* Computations related to floating-point types. */
8- template <typename fp_t > size_t numExpBits () {return 0 ;}
9- template <> size_t numExpBits <float >() {return 8 ;}
10- template <> size_t numExpBits <double >() {return 11 ;}
8+ template <typename fp_t > size_t computeNumExpBits () {return 0 ;}
9+ template <> size_t computeNumExpBits <float >() {return 8 ;}
10+ template <> size_t computeNumExpBits <double >() {return 11 ;}
1111
12- template <typename fp_t > size_t numFracBits () {return 0 ;}
13- template <> size_t numFracBits <float >() {return 24 ;}
14- template <> size_t numFracBits <double >() {return 53 ;}
12+ template <typename fp_t > size_t computeNumFracBits () {return 0 ;}
13+ template <> size_t computeNumFracBits <float >() {return 24 ;}
14+ template <> size_t computeNumFracBits <double >() {return 53 ;}
1515
1616template <typename T>
1717struct get_storage_format ;
@@ -104,7 +104,7 @@ struct MatrixSplit {
104104 }
105105 // Compute the smallest power of 2 that is strictly greater than the
106106 // maximum value in the row/column.
107- // NOTE 1: This is not the same technique used in uoi24.
107+ // NOTE 1: This is not the technique used in uoi24.
108108 // NOTE 2: I use exponents instead of powers of 2, as I need the former
109109 // to shift correctly.
110110 frexp (this ->powersVector [i], this ->scalingExponents .data () + i);
@@ -130,7 +130,7 @@ struct MatrixSplit {
130130 auto localMatrix = this ->matrix ;
131131 for (size_t slice = 0 ; slice < numSplits; slice++) {
132132 for (size_t i = 0 ; i < this ->otherDimension (); i++) {
133- fp_t sigma = ldexp (0.75 , numFracBits <fp_t >() - this ->bitsPerSlice * slice + 1 - this ->bitsPerSlice ) * powersVector[i];
133+ fp_t sigma = ldexp (0.75 , computeNumFracBits <fp_t >() - this ->bitsPerSlice * slice + 1 - this ->bitsPerSlice ) * powersVector[i];
134134 for (size_t j = 0 ; j < this ->innerProductDimension (); j++) {
135135 auto value = (localMatrix[i * iStride + j * jStride] + sigma);
136136 value -= sigma;
@@ -154,8 +154,8 @@ struct MatrixSplit {
154154 void computeSplitsWithBitMasking () {
155155 this ->splitType = splittingStrategy::bitMasking;
156156 // Compute splits one row/column at a time.
157- auto nunExpBits = numExpBits <fp_t >();
158- auto nunFracBits = numFracBits <fp_t >();
157+ auto numExpBits = computeNumExpBits <fp_t >();
158+ auto numFracBits = computeNumFracBits <fp_t >();
159159 auto iStride = this ->iStride ();
160160 auto jStride = this ->jStride ();
161161 std::vector<uint_t > tmp (this ->innerProductDimension ());
@@ -167,18 +167,19 @@ struct MatrixSplit {
167167 fp_t value = this ->matrix [index]; // powersVector[i];
168168 tmp[j] = std::bit_cast<uint_t >(value); // To bitstring.
169169 sign[j] = std::signbit (value); // Extract sign.
170- tmp[j] &= (~(uint_t )(0 )) >> (nunExpBits + 1 ); // Remove exponent.
170+ uint_t bitmask = (~((uint_t )(0 ))) >> (numExpBits + 1 );
171+ tmp[j] = tmp[j] & bitmask; // Remove exponent and sign.
171172 // Restore implicit bit for normal numbers.
172173 // NOTE: NaNs and infs are currently not supported.
173174 if (std::fpclassify (value) == FP_NORMAL)
174- tmp[j] |= ((uint_t )1 << (nunFracBits - 1 ));
175+ tmp[j] |= ((uint_t )1 << (numFracBits - 1 ));
175176 }
176177
177178 // Create bitmask.
178179 const uint_t smallBitmask = (1 << this ->bitsPerSlice ) - 1 ;
179180 // Perform the split.
180181 for (size_t j = 0 ; j < this ->innerProductDimension (); j++) {
181- int16_t shiftCounter = nunFracBits - this ->bitsPerSlice ;
182+ int16_t shiftCounter = numFracBits - this ->bitsPerSlice ;
182183 int currentExponent;
183184 frexp (this ->matrix [i * iStride + j * jStride], ¤tExponent);
184185 int16_t exponentDifference = scalingExponents[i] - currentExponent;
@@ -192,10 +193,10 @@ struct MatrixSplit {
192193 smallBitmask << shiftCounter :
193194 smallBitmask >> -shiftCounter;
194195 uint_t currentSlice = tmp[j] & bitmask;
195- uint_t current_split = shiftCounter > 0 ?
196+ uint_t currentSplit = shiftCounter > 0 ?
196197 currentSlice >> shiftCounter :
197198 currentSlice << -shiftCounter;
198- splitint_t value = (splitint_t )(current_split ) * (sign[j] ? -1 : 1 );
199+ splitint_t value = (splitint_t )(currentSplit ) * (sign[j] ? -1 : 1 );
199200 this ->memory [i * iStride + j * jStride + slice * this ->matrix .size ()] = value;
200201 shiftCounter -= this ->bitsPerSlice ;
201202 }
@@ -213,7 +214,7 @@ std::vector<fp_t> mergeIntToFloats(const MatrixSplit<splitint_t, fp_t> &A,
213214 for (size_t i = 0 ; i < A.m ; i++) {
214215 decltype (A.memory [0 ]) tmp = 0 ;
215216 for (size_t j = 0 ; j < A.n ; j++) {
216- int8_t shiftValue = numFracBits <fp_t >() - bitsPerSlice;
217+ int8_t shiftValue = computeNumFracBits <fp_t >() - bitsPerSlice;
217218 for (size_t iBlock = 0 ; iBlock < A.numSplits ; iBlock++) {
218219 auto slice = A.memory [i + j * A.m + iBlock * A.m * A.n ];
219220 auto new_slice = shiftValue > 0 ?
@@ -222,7 +223,7 @@ std::vector<fp_t> mergeIntToFloats(const MatrixSplit<splitint_t, fp_t> &A,
222223 tmp |= new_slice;
223224 shiftValue -= bitsPerSlice;
224225 }
225- C[i + j * A.m ] = std::ldexp (tmp, -(int )numFracBits <fp_t >()) *
226+ C[i + j * A.m ] = std::ldexp (tmp, -(int )computeNumFracBits <fp_t >()) *
226227 A.powersVector [i];
227228 }
228229 }
@@ -247,7 +248,7 @@ void computeExactIntegerGEMM(const MatrixSplit<splitint_t, fp_t> &A,
247248
248249/* Compute scaling constant for using the split strategy. */
249250template <typename splitint_t , typename fp_t >
250- fp_t computeScalingConstantforUsingSplittingStrategy (const MatrixSplit<splitint_t , fp_t > &A,
251+ fp_t computeScalingConstantForSplittingStrategy (const MatrixSplit<splitint_t , fp_t > &A,
251252 const MatrixSplit<splitint_t , fp_t > &B) {
252253 // When splitting with round-to-nearest, the first slice has bitsPerSlice - 1 bits,
253254 // and we need to account for this when scaling the final result.
@@ -273,7 +274,7 @@ std::vector<fp_t> computeProductsWithFloatingPointAccumulation(const MatrixSplit
273274
274275 std::vector<fp_t > C (A.m * B.n );
275276
276- auto scalingConstant = computeScalingConstantforUsingSplittingStrategy (A, B);
277+ auto scalingConstant = computeScalingConstantForSplittingStrategy (A, B);
277278
278279 for (size_t diagonal = 0 ; diagonal <= numDiagonals; diagonal++) {
279280 int Aindex = diagonal < A.numSplits - 1 ? diagonal : A.numSplits - 1 ;
@@ -313,7 +314,7 @@ std::vector<fp_t> computeProductsWithIntegerAccumulation(const MatrixSplit<split
313314
314315 std::vector<fp_t > C (A.m * B.n );
315316
316- auto scalingConstant = computeScalingConstantforUsingSplittingStrategy (A, B);
317+ auto scalingConstant = computeScalingConstantForSplittingStrategy (A, B);
317318
318319 for (size_t diagonal = 0 ; diagonal <= numDiagonals; diagonal++) {
319320 int Aindex = diagonal < A.numSplits - 1 ? diagonal : A.numSplits - 1 ;
0 commit comments