Skip to content

Commit 0f24b39

Browse files
authored
Reword/expand comments in Makefile.rule
Lots of small changes in the wording of the comments, plus an expansion of the NUM_THREADS and NO_AFFINITY sections.
1 parent 89b60da commit 0f24b39

File tree

1 file changed

+29
-19
lines changed

1 file changed

+29
-19
lines changed

Makefile.rule

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ VERSION = 0.3.6.dev
4848
# HOSTCC = gcc
4949

5050
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
51+
# Please note that AVX is not available on 32-bit.
52+
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
5153
# BINARY=64
5254

5355
# About threaded BLAS. It will be automatically detected if you don't
@@ -57,7 +59,7 @@ VERSION = 0.3.6.dev
5759
# USE_THREAD = 0
5860

5961
# If you're going to use this library with OpenMP, please comment it in.
60-
# This flag is always set for POWER8. Don't modify the flag
62+
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
6163
# USE_OPENMP = 1
6264

6365
# The OpenMP scheduler to use - by default this is "static" and you
@@ -68,36 +70,39 @@ VERSION = 0.3.6.dev
6870
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
6971
# CCOMMON_OPT += -DOMP_SCHED=dynamic
7072

71-
# You can define maximum number of threads. Basically it should be
72-
# less than actual number of cores. If you don't specify one, it's
73+
# You can define the maximum number of threads. Basically it should be less
74+
# than or equal to the number of CPU threads. If you don't specify one, it's
7375
# automatically detected by the the script.
76+
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
77+
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
78+
# detection includes logical CPUs, thus allowing the use of SMT.
7479
# NUM_THREADS = 24
7580

7681
# If you have enabled USE_OPENMP and your application would call
77-
# OpenBLAS's calculation API from multi threads, please comment it in.
78-
# This flag defines how many instances of OpenBLAS's calculation API can
79-
# actually run in parallel. If more threads call OpenBLAS's calculation API,
82+
# OpenBLAS's calculation API from multiple threads, please comment this in.
83+
# This flag defines how many instances of OpenBLAS's calculation API can actually
84+
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
8085
# they need to wait for the preceding API calls to finish or risk data corruption.
8186
# NUM_PARALLEL = 2
8287

83-
# if you don't need to install the static library, please comment it in.
88+
# If you don't need to generate the static library, please comment this in.
8489
# NO_STATIC = 1
8590

86-
# if you don't need generate the shared library, please comment it in.
91+
# If you don't need to generate the shared library, please comment this in.
8792
# NO_SHARED = 1
8893

89-
# If you don't need CBLAS interface, please comment it in.
94+
# If you don't need the CBLAS interface, please comment this in.
9095
# NO_CBLAS = 1
9196

92-
# If you only want CBLAS interface without installing Fortran compiler,
93-
# please comment it in.
97+
# If you only want the CBLAS interface without installing a Fortran compiler,
98+
# please comment this in.
9499
# ONLY_CBLAS = 1
95100

96-
# If you don't need LAPACK, please comment it in.
97-
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
101+
# If you don't need LAPACK, please comment this in.
102+
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
98103
# NO_LAPACK = 1
99104

100-
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
105+
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
101106
# NO_LAPACKE = 1
102107

103108
# Build LAPACK Deprecated functions since LAPACK 3.6.0
@@ -106,7 +111,7 @@ BUILD_LAPACK_DEPRECATED = 1
106111
# Build RecursiveLAPACK on top of LAPACK
107112
# BUILD_RELAPACK = 1
108113

109-
# If you want to use legacy threaded Level 3 implementation.
114+
# If you want to use the legacy threaded Level 3 implementation.
110115
# USE_SIMPLE_THREADED_LEVEL3 = 1
111116

112117
# If you want to use the new, still somewhat experimental code that uses
@@ -116,19 +121,24 @@ BUILD_LAPACK_DEPRECATED = 1
116121
# USE_TLS = 1
117122

118123
# If you want to drive whole 64bit region by BLAS. Not all Fortran
119-
# compiler supports this. It's safe to keep comment it out if you
120-
# are not sure(equivalent to "-i8" option).
124+
# compilers support this. It's safe to keep this commented out if you
125+
# are not sure. (This is equivalent to the "-i8" ifort option).
121126
# INTERFACE64 = 1
122127

123128
# Unfortunately most of kernel won't give us high quality buffer.
124129
# BLAS tries to find the best region before entering main function,
125130
# but it will consume time. If you don't like it, you can disable one.
126131
NO_WARMUP = 1
127132

128-
# If you want to disable CPU/Memory affinity on Linux.
133+
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
134+
# This feature is only implemented on Linux, and is always disabled on other platforms.
135+
# Enabling affinity handling may improve performance, especially on NUMA systems, but
136+
# it may conflict with certain applications that also try to manage affinity.
137+
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
138+
# else modifies affinity settings.
129139
NO_AFFINITY = 1
130140

131-
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
141+
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
132142
# BIGNUMA = 1
133143

134144
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers

0 commit comments

Comments
 (0)