77class DaphneContext ;
88
99extern " C" {
10- // Custom sequential sum-kernel.
11- void mySumSeq (
12- float * res,
13- const DenseMatrix<float > * arg,
14- DaphneContext * ctx
15- ) {
16- std::cerr << " hello from mySumSeq()" << std::endl;
17- const float * valuesArg = arg->getValues ();
18- *res = 0 ;
19- for (size_t r = 0 ; r < arg->getNumRows (); r++) {
20- for (size_t c = 0 ; c < arg->getNumCols (); c++)
21- *res += valuesArg[c];
22- valuesArg += arg->getRowSkip ();
23- }
10+ // Custom sequential sum-kernel.
11+ void mySumSeq (float *res, const DenseMatrix<float > *arg, DaphneContext *ctx) {
12+ std::cerr << " hello from mySumSeq()" << std::endl;
13+ const float *valuesArg = arg->getValues ();
14+ *res = 0 ;
15+ for (size_t r = 0 ; r < arg->getNumRows (); r++) {
16+ for (size_t c = 0 ; c < arg->getNumCols (); c++)
17+ *res += valuesArg[c];
18+ valuesArg += arg->getRowSkip ();
2419 }
25-
26- // Custom SIMD-enabled sum-kernel.
27- void mySumSIMD (
28- float * res,
29- const DenseMatrix<float > * arg,
30- DaphneContext * ctx
31- ) {
32- std::cerr << " hello from mySumSIMD()" << std::endl;
20+ }
3321
34- // Validation.
35- const size_t numCells = arg->getNumRows () * arg->getNumCols ();
36- if (numCells % 8 )
37- throw std::runtime_error (
38- " for simplicity, the number of cells must be "
39- " a multiple of 8"
40- );
41- if (arg->getNumCols () != arg->getRowSkip ())
42- throw std::runtime_error (
43- " for simplicity, the argument must not be "
44- " a column segment of another matrix"
45- );
46-
47- // SIMD accumulation (8x f32).
48- const float * valuesArg = arg->getValues ();
49- __m256 acc = _mm256_setzero_ps ();
50- for (size_t i = 0 ; i < numCells / 8 ; i++) {
51- acc = _mm256_add_ps (acc, _mm256_loadu_ps (valuesArg));
52- valuesArg += 8 ;
53- }
54-
55- // Summation of accumulator elements.
56- *res =
57- (reinterpret_cast <float *>(&acc))[0 ] +
58- (reinterpret_cast <float *>(&acc))[1 ] +
59- (reinterpret_cast <float *>(&acc))[2 ] +
60- (reinterpret_cast <float *>(&acc))[3 ] +
61- (reinterpret_cast <float *>(&acc))[4 ] +
62- (reinterpret_cast <float *>(&acc))[5 ] +
63- (reinterpret_cast <float *>(&acc))[6 ] +
64- (reinterpret_cast <float *>(&acc))[7 ];
22+ // Custom SIMD-enabled sum-kernel.
23+ void mySumSIMD (float *res, const DenseMatrix<float > *arg, DaphneContext *ctx) {
24+ std::cerr << " hello from mySumSIMD()" << std::endl;
25+
26+ // Validation.
27+ const size_t numCells = arg->getNumRows () * arg->getNumCols ();
28+ if (numCells % 8 )
29+ throw std::runtime_error (" for simplicity, the number of cells must be "
30+ " a multiple of 8" );
31+ if (arg->getNumCols () != arg->getRowSkip ())
32+ throw std::runtime_error (" for simplicity, the argument must not be "
33+ " a column segment of another matrix" );
34+
35+ // SIMD accumulation (8x f32).
36+ const float *valuesArg = arg->getValues ();
37+ __m256 acc = _mm256_setzero_ps ();
38+ for (size_t i = 0 ; i < numCells / 8 ; i++) {
39+ acc = _mm256_add_ps (acc, _mm256_loadu_ps (valuesArg));
40+ valuesArg += 8 ;
6541 }
42+
43+ // Summation of accumulator elements.
44+ *res = (reinterpret_cast <float *>(&acc))[0 ] + (reinterpret_cast <float *>(&acc))[1 ] +
45+ (reinterpret_cast <float *>(&acc))[2 ] + (reinterpret_cast <float *>(&acc))[3 ] +
46+ (reinterpret_cast <float *>(&acc))[4 ] + (reinterpret_cast <float *>(&acc))[5 ] +
47+ (reinterpret_cast <float *>(&acc))[6 ] + (reinterpret_cast <float *>(&acc))[7 ];
48+ }
6649}
0 commit comments