Skip to content

Commit 51f774e

Browse files
authored
Use dgetrf/dgetri for inverse instead of dgesv (#16)
* Use dgetrf/dgetri for inverse instead of dgesv also, remove hardcoded test for inverse and just verify that A * A**-1 = I also, make cholesky an "out-of-place" benchmark where copy occurs inside compute * Remove r_mat and identity declarations in inv * Generate random test matrices * lu: split for loops as in scipy * cholesky: mimic scipy's for loop * runner: only warm-up once * lu: we actually do equivalent of permute_l=False * cholesky: use dsyrk to generate input matrix * cholesky: use dsyrk in testing as well
1 parent 568e53c commit 51f774e

File tree

19 files changed

+153
-101
lines changed

19 files changed

+153
-101
lines changed

numpy/linalg/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33
# SPDX-License-Identifier: MIT
44

55
CXX = icc
6-
CXXFLAGS = -O3 -g -xCORE-AVX2 -axCOMMON-AVX512 -qopenmp \
6+
CXXFLAGS = -O3 -g -xCORE-AVX2 -axCOMMON-AVX512 \
77
-qopt-report=5 -qopt-report-phase=openmp,par,vec
8-
LDFLAGS = -lmkl_rt -qopenmp
8+
LDFLAGS = -lmkl_rt
99

1010
TARGET = linalg
1111
BENCHES = cholesky det dot eig inv lu qr svd
1212
SOURCES = $(addsuffix .cc,$(BENCHES)) linalg.cc
1313

1414
ifneq ($(CONDA_PREFIX),)
15-
LDFLAGS += -L$(CONDA_PREFIX)/lib
15+
LDFLAGS += -L$(CONDA_PREFIX)/lib -Wl,-rpath,$(CONDA_PREFIX)
1616
CXXFLAGS += -I$(CONDA_PREFIX)/include
1717
endif
1818

numpy/linalg/bench.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,5 +135,5 @@ class Bench {
135135
virtual void print_args() = 0;
136136
virtual void print_result() = 0;
137137
virtual void compute() = 0;
138-
virtual bool test() {return false;};
138+
virtual bool test(bool verbose) {return false;};
139139
};

numpy/linalg/cholesky.cc

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,7 @@
88
#include <cstring>
99
#include <iostream>
1010

11-
static const double x_mat_test[] = {
12-
5.551063927745538, 0.034194385271978, -0.276508795460738,
13-
0.034194385271978, 4.704686460853461, 0.087572555571367,
14-
-0.276508795460738, 0.087572555571367, 6.07658590927362};
15-
16-
static const double r_mat_test[] = {2.356069593145656,
17-
0.,
18-
0.,
19-
0.014513317166631,
20-
2.16898036516661,
21-
0.,
22-
-0.117360198639788,
23-
0.041160281019929,
24-
2.461933858639425};
25-
26-
static const int test_size = 3;
11+
static const int test_size = 5;
2712

2813
Cholesky::Cholesky() {
2914
x_mat = r_mat = 0;
@@ -45,50 +30,86 @@ void Cholesky::make_args(int size) {
4530
for (int i = 0; i < n; i++) {
4631
r_mat[i * n + i] = 1;
4732
}
48-
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, n, n, n, 1.0, x_mat, n,
49-
x_mat, n, n, r_mat, n);
33+
34+
static const char uplo = 'U';
35+
static const char trans = 'T';
36+
static const double alpha = 1.;
37+
static const double beta = n;
38+
dsyrk(&uplo, &trans, &n, &n, &alpha, x_mat, &n, &beta, r_mat, &n);
5039

5140
// we now have r_mat = x_mat * x_mat' + n * np.eye(n)
5241
// copy back into x_mat
5342
memcpy(x_mat, r_mat, mat_size * sizeof(*x_mat));
5443
}
5544

5645
void Cholesky::copy_args() {
57-
memcpy(r_mat, x_mat, mat_size * sizeof(*x_mat));
46+
// copy moved to compute()
5847
}
5948

6049
void Cholesky::compute() {
50+
// perform copy here.
51+
static const int one = 1;
52+
dcopy(&mat_size, x_mat, &one, r_mat, &one);
53+
6154
// compute cholesky decomposition
6255
int info;
63-
const char uplo = 'U';
56+
static const char uplo = 'U';
6457
dpotrf(&uplo, &n, r_mat, &lda, &info);
6558
assert(info == 0);
6659

6760
// we only want an upper triangular matrix
68-
for (int i = 0; i < n - 1; i++) {
69-
memset(&r_mat[i * n + i + 1], 0, (n - i - 1) * sizeof(*r_mat));
61+
// in scipy, this is done in *potrf wrapper.
62+
// https://github.com/scipy/scipy/blob/maintenance/1.3.x/scipy/linalg/flapack_pos_def.pyf.src#L85
63+
for (int i = 0; i < n; i++) {
64+
for (int j = i + 1; j < n; j++) {
65+
r_mat[i * n + j] = 0.;
66+
}
7067
}
7168
}
7269

73-
bool Cholesky::test() {
70+
bool Cholesky::test(bool verbose) {
7471
clean_args();
7572
make_args(test_size);
76-
memcpy(x_mat, x_mat_test, mat_size * sizeof(*x_mat));
7773
copy_args();
7874
compute();
7975

80-
return mat_equal(r_mat, r_mat_test, mat_size);
76+
// verify that r_mat is upper triangular
77+
for (int i = 0; i < n; i++) {
78+
for (int j = i + 1; j < n; j++) {
79+
if (r_mat[i * n + j] != 0.) {
80+
if (verbose) {
81+
std::cerr << "r_mat is not upper triangular!" << std::endl;
82+
}
83+
return false;
84+
}
85+
}
86+
}
87+
88+
// try to reconstruct x_mat from its Cholesky decomposition
89+
static const double alpha = 1., beta = 0.;
90+
static const char uplo = 'U';
91+
static const char trans = 'T';
92+
double *c = make_mat(mat_size);
93+
dsyrk(&uplo, &trans, &n, &n, &alpha, r_mat, &n, &beta, c, &n);
94+
95+
if (verbose) {
96+
std::cout << "U* * U = (should be equal to A)" << std::endl;
97+
print_mat('c', c, n, n);
98+
}
99+
bool equal = mat_equal(c, x_mat, mat_size);
100+
mkl_free(c);
101+
return equal;
81102
}
82103

83104
void Cholesky::print_args() {
84-
std::cout << "Cholesky decomposition, A = LL*, of a "
105+
std::cout << "Cholesky decomposition, A = U* * U, of a "
85106
<< "Hermitian positive-definite matrix A." << std::endl;
86107
std::cout << "A = " << std::endl;
87108
print_mat('c', x_mat, n, n);
88109
}
89110

90111
void Cholesky::print_result() {
91-
std::cout << "L = " << std::endl;
112+
std::cout << "U = " << std::endl;
92113
print_mat('c', r_mat, n, n);
93114
}
94115

numpy/linalg/cholesky.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class Cholesky : public Bench {
1616
void print_args();
1717
void print_result();
1818
void compute();
19-
bool test();
19+
bool test(bool verbose);
2020

2121
private:
2222
double *x_mat, *r_mat;

numpy/linalg/det.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ void Det::compute() {
6060
result = t;
6161
}
6262

63-
bool Det::test() {
63+
bool Det::test(bool verbose) {
6464
clean_args();
6565
make_args(test_size);
6666
memcpy(x_mat, x_mat_test, mat_size * sizeof(*x_mat));

numpy/linalg/det.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Det : public Bench {
1313
void make_args(int size);
1414
void copy_args();
1515
void clean_args();
16-
bool test();
16+
bool test(bool verbose);
1717
void print_args();
1818
void print_result();
1919
void compute();

numpy/linalg/dot.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void Dot::compute() {
6868
a_mat, k, b_mat, n, beta, r_mat, n);
6969
}
7070

71-
bool Dot::test() {
71+
bool Dot::test(bool verbose) {
7272
clean_args();
7373
make_args(test_size);
7474
memcpy(a_mat, a_mat_test, m * k * sizeof(*a_mat));

numpy/linalg/dot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Dot : public Bench {
1313
void make_args(int size);
1414
void copy_args();
1515
void clean_args();
16-
bool test();
16+
bool test(bool verbose);
1717
void print_args();
1818
void print_result();
1919
void compute();

numpy/linalg/eig.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void Eig::compute() {
114114
}
115115
}
116116

117-
bool Eig::test() {
117+
bool Eig::test(bool verbose) {
118118
clean_args();
119119
make_args(test_size);
120120
memcpy(a_mat, a_mat_test, mat_size * sizeof(*a_mat));

numpy/linalg/eig.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class Eig : public Bench {
1414
void make_args(int size);
1515
void copy_args();
1616
void clean_args();
17-
bool test();
17+
bool test(bool verbose);
1818
void print_args();
1919
void print_result();
2020
void compute();

0 commit comments

Comments
 (0)