Skip to content

Commit d7b7fd3

Browse files
committed
Merge branch '750' into 'master'
Resolve "直接法の線型方程式ソルバーのバグ修正" Closes #750 See merge request ricos/monolish!510
2 parents 0a2df91 + 1c4cb6b commit d7b7fd3

File tree

9 files changed

+45
-11
lines changed

9 files changed

+45
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Unreleased
3131
### Fixed
3232
- Fix benchmark result token <https://gitlab.ritc.jp/ricos/monolish/-/merge_requests/502> <https://github.com/ricosjp/monolish/issues/733>
3333
- Fix set_ptr bug <https://gitlab.ritc.jp/ricos/monolish/-/merge_requests/508> <https://github.com/ricosjp/monolish/issues/741>
34+
- Fix linear solver bug for non-symmetric matrix <https://gitlab.ritc.jp/ricos/monolish/-/merge_requests/510> <https://github.com/ricosjp/monolish/issues/750>
3435

3536
### Changed
3637
- Update cuda version of allgebra <https://gitlab.ritc.jp/ricos/monolish/-/merge_requests/506> <https://github.com/ricosjp/monolish/issues/739>

src/internal/lapack/getrs/dense_double_getrs.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ int internal::lapack::getrs(const matrix::Dense<double> &A, vector<double> &B,
2626
const double *Ad = A.data();
2727
double *Bd = B.data();
2828
const int *ipivd = ipiv.data();
29-
const char trans = 'N';
29+
const char trans = 'T';
3030

3131
if (A.get_device_mem_stat() == true && B.get_device_mem_stat() == true) {
3232
#if MONOLISH_USE_NVIDIA_GPU
@@ -42,7 +42,11 @@ int internal::lapack::getrs(const matrix::Dense<double> &A, vector<double> &B,
4242

4343
#pragma omp target data use_device_ptr(Ad, ipivd, Bd, devinfod)
4444
{
45-
internal::check_CUDA(cusolverDnDgetrs(h, CUBLAS_OP_N, M, K, Ad, N, ipivd,
45+
auto cublas_trans = CUBLAS_OP_N;
46+
if (trans == 'T') {
47+
cublas_trans = CUBLAS_OP_T;
48+
}
49+
internal::check_CUDA(cusolverDnDgetrs(h, cublas_trans, M, K, Ad, N, ipivd,
4650
Bd, M, devinfod));
4751
}
4852

src/internal/lapack/getrs/dense_float_getrs.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ int internal::lapack::getrs(const matrix::Dense<float> &A, vector<float> &B,
2626
const float *Ad = A.data();
2727
float *Bd = B.data();
2828
const int *ipivd = ipiv.data();
29-
const char trans = 'N';
29+
const char trans = 'T';
3030

3131
if (A.get_device_mem_stat() == true && B.get_device_mem_stat() == true) {
3232
#if MONOLISH_USE_NVIDIA_GPU
@@ -42,7 +42,11 @@ int internal::lapack::getrs(const matrix::Dense<float> &A, vector<float> &B,
4242

4343
#pragma omp target data use_device_ptr(Ad, ipivd, Bd, devinfod)
4444
{
45-
internal::check_CUDA(cusolverDnSgetrs(h, CUBLAS_OP_N, M, K, Ad, N, ipivd,
45+
auto cublas_trans = CUBLAS_OP_N;
46+
if (trans == 'T') {
47+
cublas_trans = CUBLAS_OP_T;
48+
}
49+
internal::check_CUDA(cusolverDnSgetrs(h, cublas_trans, M, K, Ad, N, ipivd,
4650
Bd, M, devinfod));
4751
}
4852

test/equation/dense_lu/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@ sxat:
1616

1717
run_cpu:
1818
./$(FUNC)_cpu.out ../../test.mtx 1
19+
./$(FUNC)_cpu.out ../../test2.mtx 1
1920

2021
run_gpu:
2122
$(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1
23+
$(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1
2224

2325
run_a64fx:
2426
$(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1
27+
$(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1
2528

2629
run_sxat:
2730
$(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1
31+
$(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1
2832

2933
clean:
3034
- rm *.out

test/equation/sparse_ic/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@ sxat:
1616

1717
run_cpu:
1818
./$(FUNC)_cpu.out ../../test.mtx 1
19+
./$(FUNC)_cpu.out ../../test2.mtx 1
1920

2021
run_gpu:
2122
$(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1
23+
$(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1
2224

2325
run_a64fx:
2426
$(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1
27+
$(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1
2528

2629
run_sxat:
2730
$(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1
31+
$(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1
2832

2933
clean:
3034
- rm *.out

test/equation/sparse_ilu/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@ sxat:
1616

1717
run_cpu:
1818
./$(FUNC)_cpu.out ../../test.mtx 1
19+
./$(FUNC)_cpu.out ../../test2.mtx 1
1920

2021
run_gpu:
2122
$(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1
23+
$(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1
2224

2325
run_a64fx:
2426
$(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1
27+
$(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1
2528

2629
run_sxat:
2730
$(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1
31+
$(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1
2832

2933
clean:
3034
- rm *.out

test/equation/sparse_qr/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@ sxat:
1616

1717
run_cpu:
1818
./$(FUNC)_cpu.out ../../test.mtx 1
19+
./$(FUNC)_cpu.out ../../test2.mtx 1
1920

2021
run_gpu:
2122
$(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1
23+
$(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1
2224

2325
run_a64fx:
2426
$(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1
27+
$(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1
2528

2629
run_sxat:
2730
$(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1
31+
$(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1
2832

2933
clean:
3034
- rm *.out

test/test2.mtx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
%%MatrixMarket matrix coordinate real general
2+
3 3 7
3+
1 1 2
4+
1 2 1
5+
2 1 -1
6+
2 2 2
7+
2 3 -1
8+
3 2 1
9+
3 3 2

test/test_utils.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ bool ans_check(const std::string &func, double result, double ans, double tol) {
3131
}
3232

3333
if (err < tol) {
34-
std::cout << func << "(" << get_type<T>() << ")" << std::flush;
35-
std::cout << ": pass" << std::endl;
34+
// std::cout << func << "(" << get_type<T>() << ")" << std::flush;
35+
// std::cout << ": pass" << std::endl;
3636
return true;
3737
} else {
3838
std::cout << "Error!!" << std::endl;
@@ -104,8 +104,8 @@ bool ans_check(const std::string &func, const T *result, const T *ans, int size,
104104
}
105105

106106
if (check) {
107-
std::cout << func << "(" << get_type<T>() << ")" << std::flush;
108-
std::cout << ": pass" << std::endl;
107+
// std::cout << func << "(" << get_type<T>() << ")" << std::flush;
108+
// std::cout << ": pass" << std::endl;
109109
return check;
110110
} else {
111111
std::cout << "Error!!" << std::endl;
@@ -144,9 +144,9 @@ bool ans_check(const std::string &func, const std::string &type,
144144
}
145145

146146
if (check) {
147-
std::cout << func << "(" << get_type<T>() << "," << type << ")"
148-
<< std::flush;
149-
std::cout << ": pass" << std::endl;
147+
// std::cout << func << "(" << get_type<T>() << "," << type << ")"
148+
// << std::flush;
149+
// std::cout << ": pass" << std::endl;
150150
return check;
151151
} else {
152152
std::cout << "Error!!" << std::endl;

0 commit comments

Comments
 (0)