Skip to content

Commit 0d8259c

Browse files
committed
Armadillo 12.6.4
1 parent 036513a commit 0d8259c

File tree

12 files changed

+163
-269
lines changed

12 files changed

+163
-269
lines changed

inst/include/armadillo_bits/arma_version.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#define ARMA_VERSION_MAJOR 12
2525
#define ARMA_VERSION_MINOR 6
26-
#define ARMA_VERSION_PATCH 3
26+
#define ARMA_VERSION_PATCH 4
2727
#define ARMA_VERSION_NAME "Cortisol Retox"
2828

2929

inst/include/armadillo_bits/auxlib_bones.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,15 @@ class auxlib
433433

434434
template<typename T>
435435
inline static bool rudimentary_sym_check(const Mat< std::complex<T> >& X);
436+
437+
template<typename eT>
438+
inline static typename get_pod_type<eT>::result norm1_gen(const Mat<eT>& A);
439+
440+
template<typename eT>
441+
inline static typename get_pod_type<eT>::result norm1_sym(const Mat<eT>& A);
442+
443+
template<typename eT>
444+
inline static typename get_pod_type<eT>::result norm1_band(const Mat<eT>& A, const uword KL, const uword KU);
436445
};
437446

438447

inst/include/armadillo_bits/auxlib_meat.hpp

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ auxlib::inv_rcond(Mat<eT>& A, typename get_pod_type<eT>::result& out_rcond)
121121
podarray<blas_int> ipiv(A.n_rows);
122122

123123
arma_extra_debug_print("lapack::lange()");
124-
norm_val = lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
124+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
125125

126126
arma_extra_debug_print("lapack::getrf()");
127127
lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info);
@@ -333,7 +333,7 @@ auxlib::inv_sympd_rcond(Mat<eT>& A, bool& out_sympd_state, eT& out_rcond)
333333
podarray<T> work(A.n_rows);
334334

335335
arma_extra_debug_print("lapack::lansy()");
336-
norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
336+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
337337

338338
arma_extra_debug_print("lapack::potrf()");
339339
lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
@@ -399,7 +399,7 @@ auxlib::inv_sympd_rcond(Mat< std::complex<T> >& A, bool& out_sympd_state, T& out
399399
podarray<T> work(A.n_rows);
400400

401401
arma_extra_debug_print("lapack::lanhe()");
402-
norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
402+
norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
403403

404404
arma_extra_debug_print("lapack::potrf()");
405405
lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
@@ -4039,7 +4039,7 @@ auxlib::solve_square_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_ty
40394039
podarray<blas_int> ipiv(A.n_rows + 2); // +2 for paranoia
40404040

40414041
arma_extra_debug_print("lapack::lange()");
4042-
norm_val = lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
4042+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
40434043

40444044
arma_extra_debug_print("lapack::getrf()");
40454045
lapack::getrf<eT>(&n, &n, A.memptr(), &n, ipiv.memptr(), &info);
@@ -4368,7 +4368,7 @@ auxlib::solve_sympd_rcond(Mat<typename T1::pod_type>& out, bool& out_sympd_state
43684368
podarray<T> work(A.n_rows);
43694369

43704370
arma_extra_debug_print("lapack::lansy()");
4371-
norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
4371+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
43724372

43734373
arma_extra_debug_print("lapack::potrf()");
43744374
lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
@@ -4446,7 +4446,7 @@ auxlib::solve_sympd_rcond(Mat< std::complex<typename T1::pod_type> >& out, bool&
44464446
podarray<T> work(A.n_rows);
44474447

44484448
arma_extra_debug_print("lapack::lanhe()");
4449-
norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
4449+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
44504450

44514451
arma_extra_debug_print("lapack::potrf()");
44524452
lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
@@ -5387,7 +5387,7 @@ auxlib::solve_band_rcond_common(Mat<typename T1::elem_type>& out, typename T1::p
53875387

53885388
arma_debug_assert_blas_size(AB,out);
53895389

5390-
char norm_id = '1';
5390+
//char norm_id = '1';
53915391
char trans = 'N';
53925392
blas_int n = blas_int(N); // assuming square matrix
53935393
blas_int kl = blas_int(KL);
@@ -5398,11 +5398,14 @@ auxlib::solve_band_rcond_common(Mat<typename T1::elem_type>& out, typename T1::p
53985398
blas_int info = blas_int(0);
53995399
T norm_val = T(0);
54005400

5401-
podarray<T> junk(1);
5401+
//podarray<T> junk(1);
54025402
podarray<blas_int> ipiv(N + 2); // +2 for paranoia
54035403

5404-
arma_extra_debug_print("lapack::langb()");
5405-
norm_val = lapack::langb<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr());
5404+
// // NOTE: lapack::langb() and lapack::gbtrf() use incompatible storage formats for the band matrix
5405+
// arma_extra_debug_print("lapack::langb()");
5406+
// norm_val = lapack::langb<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr());
5407+
5408+
norm_val = auxlib::norm1_band(A,KL,KU);
54065409

54075410
arma_extra_debug_print("lapack::gbtrf()");
54085411
lapack::gbtrf<eT>(&n, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &info);
@@ -6098,7 +6101,7 @@ auxlib::rcond(Mat<eT>& A)
60986101
podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
60996102

61006103
arma_extra_debug_print("lapack::lange()");
6101-
norm_val = lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr());
6104+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr());
61026105

61036106
arma_extra_debug_print("lapack::getrf()");
61046107
lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
@@ -6148,7 +6151,7 @@ auxlib::rcond(Mat< std::complex<T> >& A)
61486151
podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
61496152

61506153
arma_extra_debug_print("lapack::lange()");
6151-
norm_val = lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr());
6154+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr());
61526155

61536156
arma_extra_debug_print("lapack::getrf()");
61546157
lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
@@ -6196,7 +6199,7 @@ auxlib::rcond_sympd(Mat<eT>& A, bool& calc_ok)
61966199
podarray<blas_int> iwork( A.n_rows);
61976200

61986201
arma_extra_debug_print("lapack::lansy()");
6199-
norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr());
6202+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr());
62006203

62016204
arma_extra_debug_print("lapack::potrf()");
62026205
lapack::potrf(&uplo, &n, A.memptr(), &lda, &info);
@@ -6257,7 +6260,7 @@ auxlib::rcond_sympd(Mat< std::complex<T> >& A, bool& calc_ok)
62576260
podarray< T> rwork( A.n_rows);
62586261

62596262
arma_extra_debug_print("lapack::lanhe()");
6260-
norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, rwork.memptr());
6263+
norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, rwork.memptr());
62616264

62626265
arma_extra_debug_print("lapack::potrf()");
62636266
lapack::potrf(&uplo, &n, A.memptr(), &lda, &info);
@@ -6701,6 +6704,105 @@ auxlib::rudimentary_sym_check(const Mat< std::complex<T> >& X)
67016704

67026705

67036706

6707+
template<typename eT>
6708+
inline
6709+
typename get_pod_type<eT>::result
6710+
auxlib::norm1_gen(const Mat<eT>& A)
6711+
{
6712+
arma_extra_debug_sigprint();
6713+
6714+
typedef typename get_pod_type<eT>::result T;
6715+
6716+
if(A.n_elem == 0) { return T(0); }
6717+
6718+
const uword n_rows = A.n_rows;
6719+
const uword n_cols = A.n_cols;
6720+
6721+
T max_val = T(0);
6722+
6723+
for(uword c=0; c < n_cols; ++c)
6724+
{
6725+
const eT* colmem = A.colptr(c);
6726+
T acc_val = T(0);
6727+
6728+
for(uword r=0; r < n_rows; ++r) { acc_val += std::abs(colmem[r]); }
6729+
6730+
max_val = (acc_val > max_val) ? acc_val : max_val;
6731+
}
6732+
6733+
return max_val;
6734+
}
6735+
6736+
6737+
6738+
template<typename eT>
6739+
inline
6740+
typename get_pod_type<eT>::result
6741+
auxlib::norm1_sym(const Mat<eT>& A)
6742+
{
6743+
arma_extra_debug_sigprint();
6744+
6745+
typedef typename get_pod_type<eT>::result T;
6746+
6747+
if(A.n_elem == 0) { return T(0); }
6748+
6749+
const uword N = (std::min)(A.n_rows, A.n_cols);
6750+
6751+
T max_val = T(0);
6752+
6753+
for(uword col=0; col < N; ++col)
6754+
{
6755+
const eT* colmem = A.colptr(col);
6756+
T acc_val = T(0);
6757+
6758+
for(uword c=0; c < col; ++c) { acc_val += std::abs(A.at(col,c)); }
6759+
6760+
for(uword r=col; r < N; ++r) { acc_val += std::abs(colmem[r]); }
6761+
6762+
max_val = (acc_val > max_val) ? acc_val : max_val;
6763+
}
6764+
6765+
return max_val;
6766+
}
6767+
6768+
6769+
6770+
template<typename eT>
6771+
inline
6772+
typename get_pod_type<eT>::result
6773+
auxlib::norm1_band(const Mat<eT>& A, const uword KL, const uword KU)
6774+
{
6775+
arma_extra_debug_sigprint();
6776+
6777+
typedef typename get_pod_type<eT>::result T;
6778+
6779+
if(A.n_elem == 0) { return T(0); }
6780+
6781+
const uword n_rows = A.n_rows;
6782+
const uword n_cols = A.n_cols;
6783+
6784+
T max_val = T(0);
6785+
6786+
for(uword c=0; c < n_cols; ++c)
6787+
{
6788+
const eT* colmem = A.colptr(c);
6789+
T acc_val = T(0);
6790+
6791+
// use values only from main diagonal + KU upper diagonals + KL lower diagonals
6792+
6793+
const uword start = ( c > KU ) ? (c - KU) : 0;
6794+
const uword end = ((c + KL) < n_rows) ? (c + KL) : (n_rows-1);
6795+
6796+
for(uword r=start; r <= end; ++r) { acc_val += std::abs(colmem[r]); }
6797+
6798+
max_val = (acc_val > max_val) ? acc_val : max_val;
6799+
}
6800+
6801+
return max_val;
6802+
}
6803+
6804+
6805+
67046806
//
67056807

67066808

inst/include/armadillo_bits/compiler_setup.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,10 @@
114114

115115

116116
#if defined(__APPLE__) || defined(__apple_build_version__)
117-
#undef ARMA_BLAS_SDOT_BUG
118-
#define ARMA_BLAS_SDOT_BUG
117+
// NOTE: Apple accelerate framework has broken implementations of functions that return a float value,
118+
// NOTE: such as sdot(), slange(), clange(), slansy(), clanhe(), slangb()
119+
#undef ARMA_BLAS_FLOAT_BUG
120+
#define ARMA_BLAS_FLOAT_BUG
119121

120122
// #undef ARMA_HAVE_POSIX_MEMALIGN
121123
// NOTE: posix_memalign() is available since macOS 10.6 (late 2009 onwards)

inst/include/armadillo_bits/op_det_bones.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ class op_det
3131
{
3232
static constexpr uword n2 = row + col*2;
3333
static constexpr uword n3 = row + col*3;
34-
static constexpr uword n4 = row + col*4;
3534
};
3635

3736
template<typename T1>
@@ -48,9 +47,6 @@ class op_det
4847

4948
template<typename eT>
5049
arma_cold inline static eT apply_tiny_3x3(const Mat<eT>& X);
51-
52-
template<typename eT>
53-
arma_cold inline static eT apply_tiny_4x4(const Mat<eT>& X);
5450
};
5551

5652

inst/include/armadillo_bits/op_det_meat.hpp

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ op_det::apply_direct(typename T1::elem_type& out_val, const Base<typename T1::el
5858
if(N == 0) { out_val = eT(1); return true; }
5959
if(N == 1) { out_val = A[0]; return true; }
6060

61-
if((is_cx<eT>::no) && (N <= 4))
61+
if((is_cx<eT>::no) && (N <= 3))
6262
{
6363
constexpr T det_min = std::numeric_limits<T>::epsilon();
6464
constexpr T det_max = T(1) / std::numeric_limits<T>::epsilon();
@@ -67,7 +67,6 @@ op_det::apply_direct(typename T1::elem_type& out_val, const Base<typename T1::el
6767

6868
if(N == 2) { det_val = op_det::apply_tiny_2x2(A); }
6969
if(N == 3) { det_val = op_det::apply_tiny_3x3(A); }
70-
if(N == 4) { det_val = op_det::apply_tiny_4x4(A); }
7170

7271
const T abs_det_val = std::abs(det_val);
7372

@@ -176,77 +175,4 @@ op_det::apply_tiny_3x3(const Mat<eT>& X)
176175

177176

178177

179-
template<typename eT>
180-
inline
181-
eT
182-
op_det::apply_tiny_4x4(const Mat<eT>& X)
183-
{
184-
arma_extra_debug_sigprint();
185-
186-
const eT* Xm = X.memptr();
187-
188-
const eT val_03_12 = Xm[pos<0,3>::n4] * Xm[pos<1,2>::n4];
189-
const eT val_02_13 = Xm[pos<0,2>::n4] * Xm[pos<1,3>::n4];
190-
const eT val_03_11 = Xm[pos<0,3>::n4] * Xm[pos<1,1>::n4];
191-
192-
const eT val_01_13 = Xm[pos<0,1>::n4] * Xm[pos<1,3>::n4];
193-
const eT val_02_11 = Xm[pos<0,2>::n4] * Xm[pos<1,1>::n4];
194-
const eT val_01_12 = Xm[pos<0,1>::n4] * Xm[pos<1,2>::n4];
195-
196-
const eT val_03_10 = Xm[pos<0,3>::n4] * Xm[pos<1,0>::n4];
197-
const eT val_00_13 = Xm[pos<0,0>::n4] * Xm[pos<1,3>::n4];
198-
const eT val_02_10 = Xm[pos<0,2>::n4] * Xm[pos<1,0>::n4];
199-
const eT val_00_12 = Xm[pos<0,0>::n4] * Xm[pos<1,2>::n4];
200-
201-
const eT val_01_10 = Xm[pos<0,1>::n4] * Xm[pos<1,0>::n4];
202-
const eT val_00_11 = Xm[pos<0,0>::n4] * Xm[pos<1,1>::n4];
203-
204-
const eT val_21_30 = Xm[pos<2,1>::n4] * Xm[pos<3,0>::n4];
205-
const eT val_22_30 = Xm[pos<2,2>::n4] * Xm[pos<3,0>::n4];
206-
const eT val_23_30 = Xm[pos<2,3>::n4] * Xm[pos<3,0>::n4];
207-
208-
const eT val_20_31 = Xm[pos<2,0>::n4] * Xm[pos<3,1>::n4];
209-
const eT val_22_31 = Xm[pos<2,2>::n4] * Xm[pos<3,1>::n4];
210-
const eT val_23_31 = Xm[pos<2,3>::n4] * Xm[pos<3,1>::n4];
211-
212-
const eT val_20_32 = Xm[pos<2,0>::n4] * Xm[pos<3,2>::n4];
213-
const eT val_21_32 = Xm[pos<2,1>::n4] * Xm[pos<3,2>::n4];
214-
const eT val_23_32 = Xm[pos<2,3>::n4] * Xm[pos<3,2>::n4];
215-
216-
const eT val_20_33 = Xm[pos<2,0>::n4] * Xm[pos<3,3>::n4];
217-
const eT val_21_33 = Xm[pos<2,1>::n4] * Xm[pos<3,3>::n4];
218-
const eT val_22_33 = Xm[pos<2,2>::n4] * Xm[pos<3,3>::n4];
219-
220-
const eT val = \
221-
val_03_12 * val_21_30 \
222-
- val_02_13 * val_21_30 \
223-
- val_03_11 * val_22_30 \
224-
+ val_01_13 * val_22_30 \
225-
+ val_02_11 * val_23_30 \
226-
- val_01_12 * val_23_30 \
227-
- val_03_12 * val_20_31 \
228-
+ val_02_13 * val_20_31 \
229-
+ val_03_10 * val_22_31 \
230-
- val_00_13 * val_22_31 \
231-
- val_02_10 * val_23_31 \
232-
+ val_00_12 * val_23_31 \
233-
+ val_03_11 * val_20_32 \
234-
- val_01_13 * val_20_32 \
235-
- val_03_10 * val_21_32 \
236-
+ val_00_13 * val_21_32 \
237-
+ val_01_10 * val_23_32 \
238-
- val_00_11 * val_23_32 \
239-
- val_02_11 * val_20_33 \
240-
+ val_01_12 * val_20_33 \
241-
+ val_02_10 * val_21_33 \
242-
- val_00_12 * val_21_33 \
243-
- val_01_10 * val_22_33 \
244-
+ val_00_11 * val_22_33 \
245-
;
246-
247-
return val;
248-
}
249-
250-
251-
252178
//! @}

0 commit comments

Comments
 (0)