2121
2222
2323
24+ template <typename eT>
25+ arma_inline
26+ typename arma_not_cx<eT>::result
27+ dense_sparse_helper::dot (const eT* A_mem, const SpMat<eT>& B, const uword col)
28+ {
29+ arma_extra_debug_sigprint ();
30+
31+ uword col_offset = B.col_ptrs [col ];
32+ const uword next_col_offset = B.col_ptrs [col + 1 ];
33+
34+ const uword* start_ptr = &(B.row_indices [ col_offset]);
35+ const uword* end_ptr = &(B.row_indices [next_col_offset]);
36+
37+ const eT* B_values = B.values ;
38+
39+ eT acc = eT (0 );
40+
41+ for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
42+ {
43+ const uword index = (*ptr);
44+
45+ acc += A_mem[index] * B_values[col_offset];
46+
47+ ++col_offset;
48+ }
49+
50+ return acc;
51+ }
52+
53+
54+
55+ template <typename eT>
56+ arma_inline
57+ typename arma_cx_only<eT>::result
58+ dense_sparse_helper::dot (const eT* A_mem, const SpMat<eT>& B, const uword col)
59+ {
60+ arma_extra_debug_sigprint ();
61+
62+ typedef typename get_pod_type<eT>::result T;
63+
64+ uword col_offset = B.col_ptrs [col ];
65+ const uword next_col_offset = B.col_ptrs [col + 1 ];
66+
67+ const uword* start_ptr = &(B.row_indices [ col_offset]);
68+ const uword* end_ptr = &(B.row_indices [next_col_offset]);
69+
70+ const eT* B_values = B.values ;
71+
72+ T acc_real = T (0 );
73+ T acc_imag = T (0 );
74+
75+ for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
76+ {
77+ const uword index = (*ptr);
78+
79+ const std::complex <T>& X = A_mem[index];
80+ const std::complex <T>& Y = B_values[col_offset];
81+
82+ const T a = X.real ();
83+ const T b = X.imag ();
84+
85+ const T c = Y.real ();
86+ const T d = Y.imag ();
87+
88+ acc_real += (a*c) - (b*d);
89+ acc_imag += (a*d) + (b*c);
90+
91+ ++col_offset;
92+ }
93+
94+ return std::complex <T>(acc_real, acc_imag);
95+ }
96+
97+
98+
2499template <typename T1, typename T2>
25100inline
26101void
@@ -69,7 +144,7 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
69144
70145 out.set_size (A.n_rows , B.n_cols );
71146
72- if ((A.n_elem == 0 ) || (B.n_nonzero == 0 )) { return ; }
147+ if ((A.n_elem == 0 ) || (B.n_nonzero == 0 )) { out. zeros (); return ; }
73148
74149 if ((resolves_to_rowvector<T1>::value) || (A.n_rows == 1 ))
75150 {
@@ -81,34 +156,16 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
81156 {
82157 arma_extra_debug_print (" openmp implementation" );
83158
84- eT* out_mem = out.memptr ();
85- const eT* A_mem = A.memptr ();
86- const eT* B_values = B.values ;
159+ eT* out_mem = out.memptr ();
160+ const eT* A_mem = A.memptr ();
87161
88162 const uword B_n_cols = B.n_cols ;
89163 const int n_threads = mp_thread_limit::get ();
90164
91165 #pragma omp parallel for schedule(static) num_threads(n_threads)
92166 for (uword col=0 ; col < B_n_cols; ++col)
93167 {
94- uword col_offset = B.col_ptrs [col ];
95- const uword next_col_offset = B.col_ptrs [col + 1 ];
96-
97- const uword* start_ptr = &(B.row_indices [ col_offset]);
98- const uword* end_ptr = &(B.row_indices [next_col_offset]);
99-
100- eT acc = eT (0 );
101-
102- for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
103- {
104- const uword index = (*ptr);
105-
106- acc += A_mem[index] * B_values[col_offset];
107-
108- ++col_offset;
109- }
110-
111- out_mem[col] = acc;
168+ out_mem[col] = dense_sparse_helper::dot (A_mem, B, col);
112169 }
113170 }
114171 #endif
@@ -117,32 +174,14 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
117174 {
118175 arma_extra_debug_print (" serial implementation" );
119176
120- eT* out_mem = out.memptr ();
121- const eT* A_mem = A.memptr ();
122- const eT* B_values = B.values ;
177+ eT* out_mem = out.memptr ();
178+ const eT* A_mem = A.memptr ();
123179
124180 const uword B_n_cols = B.n_cols ;
125181
126182 for (uword col=0 ; col < B_n_cols; ++col)
127183 {
128- uword col_offset = B.col_ptrs [col ];
129- const uword next_col_offset = B.col_ptrs [col + 1 ];
130-
131- const uword* start_ptr = &(B.row_indices [ col_offset]);
132- const uword* end_ptr = &(B.row_indices [next_col_offset]);
133-
134- eT acc = eT (0 );
135-
136- for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
137- {
138- const uword index = (*ptr);
139-
140- acc += A_mem[index] * B_values[col_offset];
141-
142- ++col_offset;
143- }
144-
145- out_mem[col] = acc;
184+ out_mem[col] = dense_sparse_helper::dot (A_mem, B, col);
146185 }
147186 }
148187 }
@@ -457,33 +496,15 @@ glue_times_sparse_dense::apply_noalias_trans(Mat<typename T1::elem_type>& out, c
457496 {
458497 out.zeros (A_n_cols, 1 );
459498
460- eT* out_mem = out.memptr ();
461- const eT* A_values = A.values ;
462- const eT* B_mem = B.memptr ();
499+ eT* out_mem = out.memptr ();
500+ const eT* B_mem = B.memptr ();
463501
464502 const int n_threads = mp_thread_limit::get ();
465503
466504 #pragma omp parallel for schedule(static) num_threads(n_threads)
467505 for (uword col=0 ; col < A_n_cols; ++col)
468506 {
469- uword col_offset = A.col_ptrs [col ];
470- const uword next_col_offset = A.col_ptrs [col + 1 ];
471-
472- const uword* start_ptr = &(A.row_indices [ col_offset]);
473- const uword* end_ptr = &(A.row_indices [next_col_offset]);
474-
475- eT acc = eT (0 );
476-
477- for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
478- {
479- const uword index = (*ptr);
480-
481- acc += A_values[col_offset] * B_mem[index];
482-
483- ++col_offset;
484- }
485-
486- out_mem[col] = acc;
507+ out_mem[col] = dense_sparse_helper::dot (B_mem, A, col);
487508 }
488509 }
489510 #endif
@@ -494,30 +515,12 @@ glue_times_sparse_dense::apply_noalias_trans(Mat<typename T1::elem_type>& out, c
494515
495516 out.zeros (A_n_cols, 1 );
496517
497- eT* out_mem = out.memptr ();
498- const eT* A_values = A.values ;
499- const eT* B_mem = B.memptr ();
518+ eT* out_mem = out.memptr ();
519+ const eT* B_mem = B.memptr ();
500520
501521 for (uword col=0 ; col < A_n_cols; ++col)
502522 {
503- uword col_offset = A.col_ptrs [col ];
504- const uword next_col_offset = A.col_ptrs [col + 1 ];
505-
506- const uword* start_ptr = &(A.row_indices [ col_offset]);
507- const uword* end_ptr = &(A.row_indices [next_col_offset]);
508-
509- eT acc = eT (0 );
510-
511- for (const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
512- {
513- const uword index = (*ptr);
514-
515- acc += A_values[col_offset] * B_mem[index];
516-
517- ++col_offset;
518- }
519-
520- out_mem[col] = acc;
523+ out_mem[col] = dense_sparse_helper::dot (B_mem, A, col);
521524 }
522525 }
523526 }
0 commit comments