Skip to content

Commit a9c4820

Browse files
Merge pull request #997 from DrTimothyAldenDavis/paru
paru performance & benchmark; icx workaround for UMFPACK
2 parents d95dcd3 + 1815ad8 commit a9c4820

File tree

88 files changed

+8964
-703
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+8964
-703
lines changed

.github/workflows/macos.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ jobs:
5252
run: |
5353
brew update
5454
brew install --overwrite python@3.10 python@3.11 python@3.12
55+
brew uninstall cmake
5556
brew reinstall gcc
5657
brew install autoconf automake ccache cmake gmp lapack libomp mpfr openblas
5758
HOMEBREW_PREFIX=$(brew --prefix)

ChangeLog

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
Aug 18, 2025: version 7.11.1
1+
TODO, 2025: version 7.12.0
22

33
* UMFPACK 6.3.7: workaround for an Intel icx 2025.2 compiler bug
44
* LAGraph 1.2.1: bug fix for experimental maxflow method (FIXME IN PROGRESS)
5-
* ParU 1.0.2: minor revisions for benchmarking
5+
* ParU 1.1.0: performance improvements, minor revisions for benchmarking
66
* Package versions in this release: (* denotes a new version)
7-
SuiteSparse_config 7.11.1 *
7+
SuiteSparse_config 7.12.0 *
88
AMD 3.3.4
99
BTF 2.3.3
1010
CAMD 3.3.5
@@ -19,7 +19,7 @@ Aug 18, 2025: version 7.11.1
1919
LDL 3.3.3
2020
LAGraph 1.2.1 * FIXME IN PROGRESS
2121
SuiteSparse_Mongoose 3.3.5
22-
ParU 1.0.2 *
22+
ParU 1.1.0 *
2323
RBio 4.3.5
2424
SPEX 3.2.4
2525
SPQR 4.3.5

Example/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ message ( STATUS "MY prefix path: ${CMAKE_PREFIX_PATH}" )
5353
#-------------------------------------------------------------------------------
5454

5555
# cmake inserts the date and version number into Include/my.h:
56-
set ( MY_DATE "Aug 18, 2025" )
56+
set ( MY_DATE "TODO FIXME, 2025" )
5757
set ( MY_VERSION_MAJOR 1 )
5858
set ( MY_VERSION_MINOR 8 )
5959
set ( MY_VERSION_PATCH 10 )
@@ -88,7 +88,7 @@ project ( my
8888
#-------------------------------------------------------------------------------
8989

9090
# look for all SuiteSparse packages:
91-
find_package ( SuiteSparse_config 7.11.1 REQUIRED )
91+
find_package ( SuiteSparse_config 7.12.0 REQUIRED )
9292
find_package ( AMD 3.3.4 REQUIRED )
9393
find_package ( BTF 2.3.3 REQUIRED )
9494
find_package ( CAMD 3.3.5 REQUIRED )
@@ -102,7 +102,7 @@ find_package ( KLU_CHOLMOD 2.3.6 REQUIRED )
102102
find_package ( LDL 3.3.3 REQUIRED )
103103
find_package ( LAGraph 1.2.0 ) # FIXME: make 1.2.1
104104
find_package ( SuiteSparse_Mongoose 3.3.5 REQUIRED )
105-
find_package ( ParU 1.0.2 REQUIRED )
105+
find_package ( ParU 1.1.0 REQUIRED )
106106
find_package ( RBio 4.3.5 REQUIRED )
107107
find_package ( SPEX 3.2.4 REQUIRED ) # requires GMP and MPFR
108108
find_package ( SPQR 4.3.5 REQUIRED )

Example/Include/my_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
// SuiteSparse include files for C/C++:
1616
#include "SuiteSparse_config.h"
17-
#if !defined (SUITESPARSE__VERSION) || SUITESPARSE__VERSION < SUITESPARSE__VERCODE(7,10,4)
18-
#error "This library requires SuiteSparse_config 7.10.4 or later"
17+
#if !defined (SUITESPARSE__VERSION) || SUITESPARSE__VERSION < SUITESPARSE__VERCODE(7,12,0)
18+
#error "This library requires SuiteSparse_config 7.12.0 or later"
1919
#endif
2020

2121
#include "amd.h"

ParU/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
# cmake 3.22 is required to find the BLAS in SuiteSparse_config
1414
cmake_minimum_required ( VERSION 3.22 )
1515

16-
set ( PARU_DATE "Aug 18, 2025" )
16+
set ( PARU_DATE "FIXME, 2025" )
1717
set ( PARU_VERSION_MAJOR 1 CACHE STRING "" FORCE )
18-
set ( PARU_VERSION_MINOR 0 CACHE STRING "" FORCE )
19-
set ( PARU_VERSION_UPDATE 2 CACHE STRING "" FORCE )
18+
set ( PARU_VERSION_MINOR 1 CACHE STRING "" FORCE )
19+
set ( PARU_VERSION_UPDATE 0 CACHE STRING "" FORCE )
2020

2121
message ( STATUS "Building PARU version: v"
2222
${PARU_VERSION_MAJOR}.
@@ -103,10 +103,10 @@ endif ( )
103103
#-------------------------------------------------------------------------------
104104

105105
if ( NOT SUITESPARSE_ROOT_CMAKELISTS )
106-
find_package ( SuiteSparse_config 7.11.1
106+
find_package ( SuiteSparse_config 7.12.0
107107
PATHS ${CMAKE_SOURCE_DIR}/../SuiteSparse_config/build NO_DEFAULT_PATH )
108108
if ( NOT TARGET SuiteSparse::SuiteSparseConfig )
109-
find_package ( SuiteSparse_config 7.11.1 REQUIRED )
109+
find_package ( SuiteSparse_config 7.12.0 REQUIRED )
110110
endif ( )
111111

112112
find_package ( CHOLMOD 5.3.4

ParU/Config/ParU.h.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ typedef enum ParU_Info
6060

6161
#define PARU__VERSION SUITESPARSE__VERCODE(@PARU_VERSION_MAJOR@,@PARU_VERSION_MINOR@,@PARU_VERSION_UPDATE@)
6262
#if !defined (SUITESPARSE__VERSION) || \
63-
(SUITESPARSE__VERSION < SUITESPARSE__VERCODE(7,11,1))
64-
#error "ParU @PARU_VERSION_MAJOR@.@PARU_VERSION_MINOR@.@PARU_VERSION_UPDATE@ requires SuiteSparse_config 7.11.1 or later"
63+
(SUITESPARSE__VERSION < SUITESPARSE__VERCODE(7,12,0))
64+
#error "ParU @PARU_VERSION_MAJOR@.@PARU_VERSION_MINOR@.@PARU_VERSION_UPDATE@ requires SuiteSparse_config 7.12.0 or later"
6565
#endif
6666

6767
#if !defined (UMFPACK__VERSION) || \

ParU/Demo/Benchmarking/README.txt

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
This folder contains sample scripts to run the paru_benchmark program, and the
2+
MUMPS and SuperLU benchmarks, on the test matrices used for the ACM TOMS
3+
submission for ParU. Linux is required.
4+
5+
To get the matrices (in Matrix Market format) from the sparse.tamu.edu website,
6+
use the following:
7+
8+
chmod +x get_matrices
9+
./get_matrices
10+
11+
The above script will download the matrices into your /tmp/matrices folder.
12+
About 17GB is required. These are used for all solvers except SuperLU.
13+
14+
Next, compile ParU and its demos/benchmark programs (where "SuiteSparse") is
15+
your top-level SuiteSparse repository (suppose it is in your home directory):
16+
17+
cd ~/SuiteSparse
18+
make
19+
cd ParU
20+
make demos
21+
22+
Finally, run the benchmarks for ParU and UMFPACK, with:
23+
24+
chmod +x run_benchmarks
25+
script
26+
./run_benchmarks
27+
28+
This will run all the benchmarks for ParU and UMFPACK for the ACM TOMS paper,
29+
and will save the results in the file "typescript"; the results will also
30+
be displayed on your screen.
31+
32+
Note that we used the following scripts to benchmark ParU and UMFPACK for the
33+
ACM TOMS paper submission for ParU, but they are specific to our two systems.
34+
We include them for reference:
35+
36+
do_paru_and_umf_hyper
37+
do_paru_and_umf.slurm
38+
39+
To benchmark MUMPS, first obtain a copy of MUMPS 5.7.3. After uncompressing
40+
the original MUMPS 5.7.3 into (say) a ~/MUMPS folder in your home directory,
41+
make the following modifications;
42+
43+
cp -f mumps_573_benchmarking/Makefile.inc MUMPS/
44+
cp -f mumps_573_benchmarking/examples/* MUMPS/examples
45+
46+
Then edit your ~/MUMPS/Makefile.inc to select the appropriate libraries.
47+
You will likely need to revise the location of the metis-5.1.0 library;
48+
it is not included in MUMPS. You can obtain it at one of these links:
49+
50+
https://github.com/KarypisLab/METIS
51+
https://karypis.github.io/glaros/software/metis/overview.html
52+
https://karypis.github.io/glaros/files/sw/metis/metis-5.1.0.tar.gz
53+
54+
Place a copy in ~/metis-5.1.0 (for example), and revise your MUMPS/Makefile.inc
55+
file accordingly. Then build MUMPS, following the MUMPS instructions. Next,
56+
use the following to run MUMPS on the test matrices:
57+
58+
cd ~/SuiteSparse/ParU/Demo/Benchmarking/mumps_573_benchmarking
59+
script
60+
./run_mumps
61+
62+
To benchmark SuperLU_MT 4.0.1, first obtain a copy of superlu_mt_401 and
63+
(suppose it appears as ~/superlu_mt) and copy a few revised files into the
64+
original distribution:
65+
66+
cp -f superlu_mt_401_benchmarking/SRC/* ~/superlu_mt/SRC
67+
cp -f superlu_mt_401_benchmarking/EXAMPLE/* ~/superlu_mt/EXAMPLE
68+
cp -f build_with_* ~/superlu_mt/
69+
cp -f CMakeLists.txt ~/superlu_mt/
70+
71+
Then revise the build_with_gcc_and_mkl to match your system (you will need
72+
to tell it where to find the Intel MKL library). Then build SuperLU_MT
73+
with:
74+
75+
./build_with_gcc_and_mkl
76+
77+
download the matrices for SuperLU_MT with:
78+
79+
./get_RB_matrices
80+
81+
(requires about 15GB). Next, run the SuperLU_MT benchmarks with:
82+
83+
cd ~/SuiteSparse/ParU/Demo/Benchmarking/superlu_mt_401_benchmarking
84+
script
85+
./run_superlu
86+
87+
The output files from all of these benchmarks vary from program to program.
88+
To collect the run times for import into a CSV file, use the following on
89+
each the output files:
90+
91+
grep TABLE typescript
92+
93+
sample outputs are listed below. For UMFPACK and ParU, the 3rd column
94+
is the name of the matrix. The next 3 columns give the umfpack
95+
and paru strategies (1: unsym, 2: symmetric), and the ordering
96+
(1: amd/colamd, 3: metis). The sym_time is the symbolic analysis
97+
time, the num_times are the run times for each # of threads used
98+
(from high to low), followed by the solve times.
99+
100+
TABLE, UMF, TSOPF_RS_b39_c30.mtx, 1, 1, 1, sym_time:, 7.406790e-02, num_times:, 1.018119e-01, 1.018070e-01, 1.014700e-01, 9.615564e-02, 9.654265e-02, 9.607372e-02, 9.630437e-02, sol_times:, 2.128671e-02, 2.123689e-02, 1.612758e-02, 1.608125e-02, 1.602140e-02, 1.599254e-02, 1.601883e-02,
101+
TABLE, UMF, TSOPF_RS_b39_c30.mtx, 1, 1, 3, sym_time:, 3.292655e-01, num_times:, 1.517104e-01, 1.516826e-01, 1.518991e-01, 1.511355e-01, 1.515573e-01, 1.528105e-01, 1.514552e-01, sol_times:, 2.080022e-02, 2.066906e-02, 2.067235e-02, 2.076371e-02, 2.066134e-02, 2.082458e-02, 2.074122e-02,
102+
TABLE, ParU, TSOPF_RS_b39_c30.mtx, 1, 1, 1, sym_time:, 7.695978e-02, num_times:, 1.453122e-01, 1.216802e-01, 1.230776e-01, 1.284256e-01, 1.216281e-01, 1.164964e-01, 9.921592e-02, sol_times:, 8.401886e-03, 7.752119e-03, 8.355235e-03, 8.849248e-03, 8.315628e-03, 7.050963e-03, 5.527283e-03,
103+
TABLE, ParU, TSOPF_RS_b39_c30.mtx, 1, 1, 3, sym_time:, 3.495287e-01, num_times:, 2.116326e-01, 1.406327e-01, 1.310422e-01, 1.301144e-01, 1.134511e-01, 1.313425e-01, 1.388268e-01, sol_times:, 1.492923e-02, 1.442635e-02, 1.443325e-02, 1.284580e-02, 1.152180e-02, 1.207555e-02, 9.344153e-03,
104+
105+
106+
An example MUMPS output is listed below. It has the same
107+
format as the ParU and UMFPACK outputs, except the run times are in
108+
order of low to high # of threads. The 4th column is the ordering
109+
(1: amd, 2: metis on A+A').
110+
111+
TABLE, MUMPS, /tmp/matrices/TSOPF_RS_b39_c30/TSOPF_RS_b39_c30.mtx, 1, sym_time:, 3.204014e-01, num_times:, 6.222303e-02, 7.103832e-02, 4.108610e-02, 4.489215e-02, 4.803422e-02, 6.339786e-02, sol_times:, 1.091543e-02, 2.187732e-02, 8.204759e-03, 9.458208e-03, 1.547582e-02, 1.731181e-02,
112+
113+
SuperLU is similar, except that the matrix name is not listed
114+
(use awk to find both "TABLE" and "Matrix:" if preferred).
115+
116+
TABLE, SuperLU_MT, threads:, 32, ordering:, 3, analyze_time:, 4.29381728e-02, 4.40463973e-02, 4.21937061e-02, 4.61900234e-02, 4.15172875e-02, 4.73920098e-02, factor_time:, 8.40138663e-02, 6.18000347e-02, 5.73010538e-02, 5.09567745e-02, 5.99720702e-02, 7.43006011e-02, solve_time:, 6.62509473e-02, 7.43965395e-02, 5.66021195e-02, 7.09967716e-02, 4.94663576e-02, 6.65261745e-02,
117+
118+
For the ACM TOMS submissions, we then copied these run times from
119+
a spreadsheet into a MATLAB script that generated the plots in the
120+
figures in the paper. This step is a bit tedious so we have omitted
121+
the details. However, the final results for our two systems are
122+
in these files in this folder:
123+
124+
analyze_grace.m plot the results on grace.hprc.tamu.edu
125+
analyze_hyper.m plot the results on a 24-core desktop
126+
plot_one_matrix.m used by analyze_*.m
127+
subplot_one_matrix.m used by analyze_*.m
128+

0 commit comments

Comments
 (0)