@@ -144,6 +144,64 @@ option(ALUMINUM_ENABLE_BENCHMARKS
144144 "Build benchmarks."
145145 OFF )
146146
147+ # Tuning parameters (in the order they appear in the file). Recall:
148+ # Cache values previously set are not modified. These only take effect
149+ # if the cache values do not already exist.
150+ #
151+ # See extended documentation in cmake/tuning_params.hpp.in.
152+ set (AL_PE_NUM_CONCURRENT_OPS 4
153+ CACHE STRING
154+ "Number of concurrent operations the progress engine will perform" )
155+
156+ set (AL_PE_NUM_STREAMS 64
157+ CACHE STRING
158+ "Max number of streams the progress engine supports" )
159+
160+ set (AL_PE_NUM_PIPELINE_STAGES 2
161+ CACHE STRING
162+ "Max number of pipeline stages the progress engine supports" )
163+
164+ set (AL_PE_INPUT_QUEUE_SIZE 8192
165+ CACHE STRING
166+ "Max number of entries in each stream's input queue" )
167+
168+ option (AL_PE_ADD_DEFAULT_STREAM
169+ "Automatically add a default stream entry form the progress engine"
170+ OFF )
171+
172+ option (AL_PE_STREAM_QUEUE_CACHE
173+ "Use thread-local cache to map streams to input queues"
174+ OFF )
175+
176+ option (AL_PE_START_ON_DEMAND
177+ "Delay starting the progress engine until needed"
178+ ON )
179+
180+ set (AL_SYNC_MEM_PREALLOC 1024
181+ CACHE STRING
182+ "Amount of sync object memory to preallocate in the pool" )
183+
184+ set (AL_DEFAULT_CACHE_LINE_SIZE 64) # x86_64
185+ if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^ppc" )
186+ set (AL_DEFAULT_CACHE_LINE_SIZE 128) # power
187+ endif ()
188+ # TODO: I'd like to also detect A64FX but fugaku head nodes are x86
189+ # and cross-compiles are wonky and that's too much work. For now, just
190+ # manually set this on the command line on A64FX.
191+ set (AL_CACHE_LINE_SIZE ${AL_DEFAULT_CACHE_LINE_SIZE}
192+ CACHE STRING
193+ "Cache line size in bytes (x86: 64; POWER: 128; A64FX: 256)" )
194+
195+ set (AL_DESTRUCTIVE_INTERFERENCE_SIZE 128
196+ CACHE STRING
197+ "Minimum size in bytes to avoid destructive interference" )
198+
199+ set (AL_CUDA_STREAM_POOL_SIZE 5
200+ CACHE STRING
201+ "Number of CUDA streams in the default stream pool" )
202+
203+ # END Tuning parameters
204+
147205if (ALUMINUM_HAS_GPU
148206 AND NOT ALUMINUM_ENABLE_NCCL
149207 AND NOT ALUMINUM_ENABLE_MPI_CUDA
@@ -406,6 +464,10 @@ configure_file(
406464 "${CMAKE_SOURCE_DIR} /cmake/Al_config.hpp.in"
407465 "${CMAKE_BINARY_DIR} /Al_config.hpp" @ONLY)
408466
467+ configure_file (
468+ "${CMAKE_SOURCE_DIR} /cmake/tuning_params.hpp.in"
469+ "${CMAKE_BINARY_DIR} /aluminum/tuning_params.hpp" @ONLY)
470+
409471# Macro for setting full paths to source files.
410472macro (set_source_path VAR)
411473 unset (__tmp_names)
@@ -475,6 +537,9 @@ install(FILES
475537 DESTINATION ${CMAKE_INSTALL_DIR} )
476538install (FILES
477539 "${CMAKE_BINARY_DIR} /Al_config.hpp" DESTINATION ${INCLUDE_INSTALL_DIRS} )
540+ install (FILES
541+ "${CMAKE_BINARY_DIR} /aluminum/tuning_params.hpp"
542+ DESTINATION ${INCLUDE_INSTALL_DIRS} /aluminum)
478543
479544# Install the CMake modules we need
480545install (FILES
0 commit comments