Skip to content

Commit 0dd6f79

Browse files
committed
Armadilo 12.6.1
1 parent 8d24c46 commit 0dd6f79

File tree

4 files changed

+105
-89
lines changed

4 files changed

+105
-89
lines changed

inst/include/armadillo_bits/arma_version.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#define ARMA_VERSION_MAJOR 12
2525
#define ARMA_VERSION_MINOR 6
26-
#define ARMA_VERSION_PATCH 0
26+
#define ARMA_VERSION_PATCH 1
2727
#define ARMA_VERSION_NAME "Cortisol Retox"
2828

2929

inst/include/armadillo_bits/glue_times_misc_bones.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,19 @@
2121

2222

2323

24+
class dense_sparse_helper
25+
{
26+
public:
27+
28+
template<typename eT>
29+
arma_inline static typename arma_not_cx<eT>::result dot(const eT* A_mem, const SpMat<eT>& B, const uword col);
30+
31+
template<typename eT>
32+
arma_inline static typename arma_cx_only<eT>::result dot(const eT* A_mem, const SpMat<eT>& B, const uword col);
33+
};
34+
35+
36+
2437
class glue_times_dense_sparse
2538
{
2639
public:

inst/include/armadillo_bits/glue_times_misc_meat.hpp

Lines changed: 88 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,81 @@
2121

2222

2323

24+
template<typename eT>
25+
arma_inline
26+
typename arma_not_cx<eT>::result
27+
dense_sparse_helper::dot(const eT* A_mem, const SpMat<eT>& B, const uword col)
28+
{
29+
arma_extra_debug_sigprint();
30+
31+
uword col_offset = B.col_ptrs[col ];
32+
const uword next_col_offset = B.col_ptrs[col + 1];
33+
34+
const uword* start_ptr = &(B.row_indices[ col_offset]);
35+
const uword* end_ptr = &(B.row_indices[next_col_offset]);
36+
37+
const eT* B_values = B.values;
38+
39+
eT acc = eT(0);
40+
41+
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
42+
{
43+
const uword index = (*ptr);
44+
45+
acc += A_mem[index] * B_values[col_offset];
46+
47+
++col_offset;
48+
}
49+
50+
return acc;
51+
}
52+
53+
54+
55+
template<typename eT>
56+
arma_inline
57+
typename arma_cx_only<eT>::result
58+
dense_sparse_helper::dot(const eT* A_mem, const SpMat<eT>& B, const uword col)
59+
{
60+
arma_extra_debug_sigprint();
61+
62+
typedef typename get_pod_type<eT>::result T;
63+
64+
uword col_offset = B.col_ptrs[col ];
65+
const uword next_col_offset = B.col_ptrs[col + 1];
66+
67+
const uword* start_ptr = &(B.row_indices[ col_offset]);
68+
const uword* end_ptr = &(B.row_indices[next_col_offset]);
69+
70+
const eT* B_values = B.values;
71+
72+
T acc_real = T(0);
73+
T acc_imag = T(0);
74+
75+
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
76+
{
77+
const uword index = (*ptr);
78+
79+
const std::complex<T>& X = A_mem[index];
80+
const std::complex<T>& Y = B_values[col_offset];
81+
82+
const T a = X.real();
83+
const T b = X.imag();
84+
85+
const T c = Y.real();
86+
const T d = Y.imag();
87+
88+
acc_real += (a*c) - (b*d);
89+
acc_imag += (a*d) + (b*c);
90+
91+
++col_offset;
92+
}
93+
94+
return std::complex<T>(acc_real, acc_imag);
95+
}
96+
97+
98+
2499
template<typename T1, typename T2>
25100
inline
26101
void
@@ -69,7 +144,7 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
69144

70145
out.set_size(A.n_rows, B.n_cols);
71146

72-
if((A.n_elem == 0) || (B.n_nonzero == 0)) { return; }
147+
if((A.n_elem == 0) || (B.n_nonzero == 0)) { out.zeros(); return; }
73148

74149
if((resolves_to_rowvector<T1>::value) || (A.n_rows == 1))
75150
{
@@ -81,34 +156,16 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
81156
{
82157
arma_extra_debug_print("openmp implementation");
83158

84-
eT* out_mem = out.memptr();
85-
const eT* A_mem = A.memptr();
86-
const eT* B_values = B.values;
159+
eT* out_mem = out.memptr();
160+
const eT* A_mem = A.memptr();
87161

88162
const uword B_n_cols = B.n_cols;
89163
const int n_threads = mp_thread_limit::get();
90164

91165
#pragma omp parallel for schedule(static) num_threads(n_threads)
92166
for(uword col=0; col < B_n_cols; ++col)
93167
{
94-
uword col_offset = B.col_ptrs[col ];
95-
const uword next_col_offset = B.col_ptrs[col + 1];
96-
97-
const uword* start_ptr = &(B.row_indices[ col_offset]);
98-
const uword* end_ptr = &(B.row_indices[next_col_offset]);
99-
100-
eT acc = eT(0);
101-
102-
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
103-
{
104-
const uword index = (*ptr);
105-
106-
acc += A_mem[index] * B_values[col_offset];
107-
108-
++col_offset;
109-
}
110-
111-
out_mem[col] = acc;
168+
out_mem[col] = dense_sparse_helper::dot(A_mem, B, col);
112169
}
113170
}
114171
#endif
@@ -117,32 +174,14 @@ glue_times_dense_sparse::apply_noalias(Mat<typename T1::elem_type>& out, const T
117174
{
118175
arma_extra_debug_print("serial implementation");
119176

120-
eT* out_mem = out.memptr();
121-
const eT* A_mem = A.memptr();
122-
const eT* B_values = B.values;
177+
eT* out_mem = out.memptr();
178+
const eT* A_mem = A.memptr();
123179

124180
const uword B_n_cols = B.n_cols;
125181

126182
for(uword col=0; col < B_n_cols; ++col)
127183
{
128-
uword col_offset = B.col_ptrs[col ];
129-
const uword next_col_offset = B.col_ptrs[col + 1];
130-
131-
const uword* start_ptr = &(B.row_indices[ col_offset]);
132-
const uword* end_ptr = &(B.row_indices[next_col_offset]);
133-
134-
eT acc = eT(0);
135-
136-
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
137-
{
138-
const uword index = (*ptr);
139-
140-
acc += A_mem[index] * B_values[col_offset];
141-
142-
++col_offset;
143-
}
144-
145-
out_mem[col] = acc;
184+
out_mem[col] = dense_sparse_helper::dot(A_mem, B, col);
146185
}
147186
}
148187
}
@@ -457,33 +496,15 @@ glue_times_sparse_dense::apply_noalias_trans(Mat<typename T1::elem_type>& out, c
457496
{
458497
out.zeros(A_n_cols, 1);
459498

460-
eT* out_mem = out.memptr();
461-
const eT* A_values = A.values;
462-
const eT* B_mem = B.memptr();
499+
eT* out_mem = out.memptr();
500+
const eT* B_mem = B.memptr();
463501

464502
const int n_threads = mp_thread_limit::get();
465503

466504
#pragma omp parallel for schedule(static) num_threads(n_threads)
467505
for(uword col=0; col < A_n_cols; ++col)
468506
{
469-
uword col_offset = A.col_ptrs[col ];
470-
const uword next_col_offset = A.col_ptrs[col + 1];
471-
472-
const uword* start_ptr = &(A.row_indices[ col_offset]);
473-
const uword* end_ptr = &(A.row_indices[next_col_offset]);
474-
475-
eT acc = eT(0);
476-
477-
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
478-
{
479-
const uword index = (*ptr);
480-
481-
acc += A_values[col_offset] * B_mem[index];
482-
483-
++col_offset;
484-
}
485-
486-
out_mem[col] = acc;
507+
out_mem[col] = dense_sparse_helper::dot(B_mem, A, col);
487508
}
488509
}
489510
#endif
@@ -494,30 +515,12 @@ glue_times_sparse_dense::apply_noalias_trans(Mat<typename T1::elem_type>& out, c
494515

495516
out.zeros(A_n_cols, 1);
496517

497-
eT* out_mem = out.memptr();
498-
const eT* A_values = A.values;
499-
const eT* B_mem = B.memptr();
518+
eT* out_mem = out.memptr();
519+
const eT* B_mem = B.memptr();
500520

501521
for(uword col=0; col < A_n_cols; ++col)
502522
{
503-
uword col_offset = A.col_ptrs[col ];
504-
const uword next_col_offset = A.col_ptrs[col + 1];
505-
506-
const uword* start_ptr = &(A.row_indices[ col_offset]);
507-
const uword* end_ptr = &(A.row_indices[next_col_offset]);
508-
509-
eT acc = eT(0);
510-
511-
for(const uword* ptr = start_ptr; ptr != end_ptr; ++ptr)
512-
{
513-
const uword index = (*ptr);
514-
515-
acc += A_values[col_offset] * B_mem[index];
516-
517-
++col_offset;
518-
}
519-
520-
out_mem[col] = acc;
523+
out_mem[col] = dense_sparse_helper::dot(B_mem, A, col);
521524
}
522525
}
523526
}

inst/include/armadillo_bits/traits.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -559,10 +559,7 @@ struct is_arma_type2
559559
= is_Mat<T1>::value
560560
|| is_Gen<T1>::value
561561
|| is_Op<T1>::value
562-
|| is_CubeToMatOp<T1>::value
563-
|| is_SpToDOp<T1>::value
564562
|| is_Glue<T1>::value
565-
|| is_SpToDGlue<T1>::value
566563
|| is_eOp<T1>::value
567564
|| is_eGlue<T1>::value
568565
|| is_mtOp<T1>::value
@@ -574,6 +571,9 @@ struct is_arma_type2
574571
|| is_subview_cols<T1>::value
575572
|| is_subview_elem1<T1>::value
576573
|| is_subview_elem2<T1>::value
574+
|| is_CubeToMatOp<T1>::value
575+
|| is_SpToDOp<T1>::value
576+
|| is_SpToDGlue<T1>::value
577577
;
578578
};
579579

0 commit comments

Comments
 (0)