Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
b0f0bc6
Added --target-buffers command-line option
radelja Oct 4, 2024
8124b6e
Implemented target buffers command-line option
radelja Oct 11, 2024
fb51843
Changed --target-buffers option to --dense-buffers
radelja Oct 11, 2024
f82b89f
Minimize dense buffer resizing for OpenMP backend
radelja Oct 11, 2024
c5b9da4
Updated --dense-buffers option to not take an argument
radelja Oct 11, 2024
1da027e
Add tests for parsing --dense-buffers option
radelja Oct 11, 2024
60e525c
Merge pull request #222 from radelja/omp-dense-buffers
jyoung3131 Oct 26, 2024
e2f5503
Now 'gs' is used to specify GatherScatter kernel instead of 'sg'
radelja Oct 26, 2024
6e49b93
Now prints out VERSION from project() in top-level CMakeLists.txt
radelja Oct 26, 2024
ce7e801
Merge pull request #225 from radelja/fix-version
jyoung3131 Oct 26, 2024
bd62b87
Updated function names to reflect 'sg' to 'gs' changes
radelja Nov 1, 2024
df308cc
Initialize source and target buffers in parallel for OpenMP backend
radelja Nov 11, 2024
a02ce8f
Merge pull request #223 from radelja/gs-option-fix
plavin Nov 17, 2024
419ef6d
Fix pragma omp compiler warnings for Serial and CUDA backends
radelja Nov 22, 2024
4702c54
Add --atomic-thread-fence flag for OpenMP kernels
radelja Nov 26, 2024
005d3f6
Implement --atomic-thread-fence flag for OpenMP kernels
radelja Nov 26, 2024
677fdb7
Add tests for --atomic-thread-fence flag
radelja Nov 26, 2024
5727d41
Update handling of invalid command-line arguments
radelja Nov 28, 2024
0d5bd71
Changed negative delta values in JSON input files to 8
radelja Nov 28, 2024
1cd00d0
Update handling of invalid values in JSON input files
radelja Nov 28, 2024
ef685cb
Merge pull request #226 from radelja/omp-rand-buffers
plavin Dec 3, 2024
f968923
Merge pull request #229 from radelja/invalid-values-fix
plavin Dec 13, 2024
f8ff381
Split standard suite test into three separate tests
radelja Dec 31, 2024
dfc394e
Update STREAM-like GPU test to use same count for all patterns
radelja Dec 31, 2024
fcd35ab
Merge pull request #231 from radelja/standard-suite-update
plavin Jan 9, 2025
63a38bf
Merge remote-tracking branch 'upstream/spatter-devel' into atomic-thr…
radelja Jan 10, 2025
06e95b6
Merge pull request #228 from radelja/atomic-thread-fence
jyoung3131 Jan 13, 2025
ec847a3
add install step
plavin Mar 11, 2025
6ff66be
Use SPATTER_INCLUDE_FILES variable instead of listing headers
radelja Mar 13, 2025
d615339
Updated JSONParser class to use void pointer and casting for storing …
radelja Mar 13, 2025
a5896f1
install libcuda_backend.so
plavin Mar 13, 2025
d386992
add missing include
plavin Mar 13, 2025
57dd721
Updated CMakeLists.txt to set install rpath
radelja Mar 23, 2025
34d580d
Merge pull request #238 from plavin/spatter-devel
jyoung3131 Apr 7, 2025
06078f9
Update Configuration.cc
jyoung3131 Apr 7, 2025
928d41e
Update build-mpi.yml
jyoung3131 Apr 7, 2025
f5d8764
Merge branch 'main' into spatter-devel
jyoung3131 Apr 7, 2025
03cc997
Update run-crnch-cuda.sh
jyoung3131 Apr 7, 2025
77c44a3
Merge pull request #241 from hpcgarage/jyoung3131-patch-2
jyoung3131 Apr 7, 2025
bd3c273
Update build-mpi.yml
jyoung3131 Apr 7, 2025
87ab3e4
Merge pull request #242 from hpcgarage/jyoung3131-patch-3
jyoung3131 Apr 7, 2025
cb14b9d
Update CMakeLists.txt
jyoung3131 Apr 9, 2025
7172281
Merge pull request #245 from hpcgarage/jyoung3131-patch-4
jyoung3131 Apr 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions .github/workflows/build-mpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@ jobs:
build-and-run-mpi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: awalsh128/cache-apt-pkgs-action@latest #Add OpenMPI to test against
with:
packages: openmpi-bin libopenmpi-dev
version: 1.0
- uses: actions/checkout@v4.2.2

- name: Install-MPI
run: |
sudo apt update
sudo apt install -y openmpi-bin openmpi-common libopenmpi-dev

- name: Build-MPI
run:
cmake -DUSE_MPI=1 -B build_mpi -S . && make -C build_mpi
run: |
cmake -DMPI_CXX_COMPILER=mpicxx -DUSE_MPI=1 -B build_mpi -S . && make -C build_mpi

- name: Test-MPI
run: make test -C build_mpi
run: make test -C build_mpi
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

include(DisableIntree)

project(Spatter VERSION 2.0.0 LANGUAGES CXX)
project(Spatter VERSION 2.1.0 LANGUAGES CXX)

include(GNUInstallDirs)

Expand All @@ -20,10 +20,19 @@ include(pkgs/MPISupport)
include(pkgs/OpenMPSupport)
include(pkgs/CUDASupport)

if (APPLE)
set(CMAKE_INSTALL_RPATH "@executable_path/../lib")
else ()
set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib")
endif ()

# Create gz_read executable
add_executable(gz_read standard-suite/binary-traces/gz_read.cc)
target_link_libraries(gz_read z)

install(TARGETS gz_read
RUNTIME DESTINATION bin)

add_subdirectory(src)

enable_testing()
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Usage: ./spatter
-d (--delta) Delta (default 8)
-e (--boundary) Set Boundary (limits max value of pattern using modulo)
-f (--file) Input File
-g (--pattern-gather) Set Inner Gather Pattern (Valid with kernel-name: sg, multigather)
-g (--pattern-gather) Set Inner Gather Pattern (Valid with kernel-name: gs, multigather)
-h (--help) Print Help Message
-j (--pattern-size) Set Pattern Size (truncates pattern to pattern-size)
-k (--kernel) Kernel (default gather)
Expand All @@ -105,7 +105,7 @@ Usage: ./spatter
-r (--runs) Set Number of Runs (default 10)
-s (--random) Set Random Seed (default random)
-t (--omp-threads) Set Number of Threads (default 1 if !USE_OPENMP or backend != openmp or OMP_MAX_THREADS if USE_OPENMP)
-u (--pattern-scatter) Set Inner Scatter Pattern (Valid with kernel-name: sg, multiscatter)
-u (--pattern-scatter) Set Inner Scatter Pattern (Valid with kernel-name: gs, multiscatter)
-v (--verbosity) Set Verbosity Level (default 1)
-w (--wrap) Set Wrap (default 1)
-x (--delta-gather) Delta (default 8)
Expand Down
2 changes: 1 addition & 1 deletion notebooks/spatter_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

EXPERIMENTS = {'ustride':'Uniform Stride', 'stream': 'Stream', 'nekbone':'Nekbone', 'lulesh':'LULESH', 'amg':'AMG', 'pennant':'PENNANT'}

KERNELS = {'gather':'Gather', 'scatter':'Scatter', 'sg':'GS', 'multigather':'MultiGather', 'multiscatter':'MultiScatter'}
KERNELS = {'gather':'Gather', 'scatter':'Scatter', 'gs':'GS', 'multigather':'MultiGather', 'multiscatter':'MultiScatter'}

#################################################################
# NO EDITING IS REQUIRED BEYOND THIS POINT TO ADD NEW PLATFORMS #
Expand Down
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ add_executable(spatter main.cc)
#target_compile_options(spatter PUBLIC "-fnew-alignment 32")
target_link_libraries(spatter ${COMMON_LINK_LIBRARIES} Spatter)
set_target_properties(spatter PROPERTIES
COMPILE_DEFINITIONS "${COMMON_COMPILE_DEFINITIONS}"
COMPILE_DEFINITIONS "${COMMON_COMPILE_DEFINITIONS};SPAT_VERSION=${PROJECT_VERSION}"
COMPILE_OPTIONS "${WARNING_FLAGS}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
)

install(TARGETS spatter
RUNTIME DESTINATION bin)
13 changes: 13 additions & 0 deletions src/Spatter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ target_compile_options(Spatter_shared
${WARNING_FLAGS}
)

install(TARGETS Spatter Spatter_shared
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)

install(FILES ${SPATTER_INCLUDE_FILES}
DESTINATION include/Spatter)

if (USE_CUDA)
install (TARGETS cuda_backend
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
endif()

# Library/Header installation section

#set(ConfigPackageLocation lib/cmake/Spatter)
Expand Down
74 changes: 51 additions & 23 deletions src/Spatter/Configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ ConfigurationBase::ConfigurationBase(const size_t id, const std::string name,
const size_t delta_scatter, const long int seed, const size_t wrap,
const size_t count, const size_t shared_mem, const size_t local_work_size,
const int nthreads, const unsigned long nruns, const bool aggregate,
const bool atomic, const unsigned long verbosity)
const bool atomic, const bool atomic_fence, const bool dense_buffers,
const unsigned long verbosity)
: id(id), name(name), kernel(k), pattern(pattern),
pattern_gather(pattern_gather), pattern_scatter(pattern_scatter),
sparse(sparse), dev_sparse(dev_sparse), sparse_size(sparse_size),
Expand All @@ -36,6 +37,7 @@ ConfigurationBase::ConfigurationBase(const size_t id, const std::string name,
delta_scatter(delta_scatter), seed(seed), wrap(wrap), count(count),
shmem(shared_mem), local_work_size(local_work_size),
omp_threads(nthreads), nruns(nruns), aggregate(aggregate), atomic(atomic),
atomic_fence(atomic_fence), dense_buffers(dense_buffers),
verbosity(verbosity), time_seconds(nruns, 0) {
std::transform(kernel.begin(), kernel.end(), kernel.begin(),
[](unsigned char c) { return std::tolower(c); });
Expand All @@ -48,8 +50,8 @@ int ConfigurationBase::run(bool timed, unsigned long run_id) {
gather(timed, run_id);
else if (kernel.compare("scatter") == 0)
scatter(timed, run_id);
else if (kernel.compare("sg") == 0)
scatter_gather(timed, run_id);
else if (kernel.compare("gs") == 0)
gather_scatter(timed, run_id);
else if (kernel.compare("multigather") == 0)
multi_gather(timed, run_id);
else if (kernel.compare("multiscatter") == 0)
Expand All @@ -68,7 +70,7 @@ void ConfigurationBase::report() {
if (kernel.compare("gather") == 0 || kernel.compare("scatter") == 0)
bytes_moved = pattern.size() * count * sizeof(size_t);

if (kernel.compare("sg") == 0)
if (kernel.compare("gs") == 0)
bytes_moved = (pattern_scatter.size() + pattern_gather.size()) * count * sizeof(size_t);

if (kernel.compare("multiscatter") == 0)
Expand Down Expand Up @@ -103,8 +105,9 @@ void ConfigurationBase::report() {
MPI_Gather(&mpi_minimum_time, 1, MPI_DOUBLE, vector_minimum_time.data(), 1,
MPI_DOUBLE, 0, MPI_COMM_WORLD);

//double mpi_maximum_bandwidth = static_cast<double>(bytes_per_run) / mpi_minimum_time / 1000000.0;
double mpi_maximum_bandwidth =
static_cast<double>(bytes_per_run) / mpi_minimum_time / 1000000.0;
static_cast<double>(bytes_moved) / mpi_minimum_time / 1000000.0;
std::vector<double> vector_maximum_bandwidth(numpes, 0.0);
MPI_Gather(&mpi_maximum_bandwidth, 1, MPI_DOUBLE,
vector_maximum_bandwidth.data(), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
Expand Down Expand Up @@ -141,7 +144,7 @@ void ConfigurationBase::setup() {
<< std::endl;
exit(1);
}
} else if (kernel.compare("sg") == 0) {
} else if (kernel.compare("gs") == 0) {
if (pattern_gather.size() == 0) {
std::cerr << "Pattern-Gather needs to have length of at least 1"
<< std::endl;
Expand Down Expand Up @@ -184,7 +187,7 @@ void ConfigurationBase::setup() {
// sparse size = max_pattern_val + delta * (count - 1) + 1
// assert(pattern.size() > max_pattern_scatter_val + 1)

if (kernel.compare("sg") == 0) {
if (kernel.compare("gs") == 0) {
size_t max_pattern_scatter_val = *(std::max_element(
std::cbegin(pattern_scatter), std::cend(pattern_scatter)));
size_t max_pattern_gather_val = *(std::max_element(
Expand Down Expand Up @@ -395,7 +398,7 @@ Configuration<Spatter::Serial>::Configuration(const size_t id,
dev_sparse_scatter, sparse_scatter_size, dense, dense_perthread,
dev_dense, dense_size, delta, delta_gather,
delta_scatter, seed, wrap, count, 0, 1024, 1, nruns, aggregate, false,
verbosity) {
false, false, verbosity) {
ConfigurationBase::setup();
}

Expand Down Expand Up @@ -441,7 +444,7 @@ void Configuration<Spatter::Serial>::scatter(bool timed, unsigned long run_id) {
}
}

void Configuration<Spatter::Serial>::scatter_gather(
void Configuration<Spatter::Serial>::gather_scatter(
bool timed, unsigned long run_id) {
assert(pattern_scatter.size() == pattern_gather.size());
size_t pattern_length = pattern_scatter.size();
Expand Down Expand Up @@ -527,13 +530,15 @@ Configuration<Spatter::OpenMP>::Configuration(const size_t id,
const size_t delta_gather, const size_t delta_scatter, const long int seed,
const size_t wrap, const size_t count, const int nthreads,
const unsigned long nruns, const bool aggregate, const bool atomic,
const bool atomic_fence, const bool dense_buffers,
const unsigned long verbosity)
: ConfigurationBase(id, name, kernel, pattern, pattern_gather,
pattern_scatter, sparse, dev_sparse, sparse_size, sparse_gather,
dev_sparse_gather, sparse_gather_size, sparse_scatter,
dev_sparse_scatter, sparse_scatter_size, dense, dense_perthread,
dev_dense, dense_size, delta, delta_gather, delta_scatter, seed, wrap,
count, 0, 1024, nthreads, nruns, aggregate, atomic, verbosity) {
count, 0, 1024, nthreads, nruns, aggregate, atomic, atomic_fence,
dense_buffers, verbosity) {
ConfigurationBase::setup();
}

Expand All @@ -555,11 +560,13 @@ void Configuration<Spatter::OpenMP>::gather(bool timed, unsigned long run_id) {
#pragma omp parallel
{
int t = omp_get_thread_num();
double *source = sparse.data();
double *target = (dense_buffers ? dense_perthread[t].data() : dense.data());

#pragma omp for
for (size_t i = 0; i < count; ++i) {
double *sl = sparse.data() + delta * i;
double *tl = dense_perthread[t].data() + pattern_length * (i % wrap);
double *sl = source + delta * i;
double *tl = target + pattern_length * (i % wrap);

#pragma omp simd
for (size_t j = 0; j < pattern_length; ++j) {
Expand All @@ -568,6 +575,9 @@ void Configuration<Spatter::OpenMP>::gather(bool timed, unsigned long run_id) {
}
}

if (atomic_fence)
std::atomic_thread_fence(std::memory_order_release);

if (timed) {
timer.stop();
time_seconds[run_id] = timer.seconds();
Expand All @@ -589,11 +599,13 @@ void Configuration<Spatter::OpenMP>::scatter(bool timed, unsigned long run_id) {
#pragma omp parallel
{
int t = omp_get_thread_num();
double *source = (dense_buffers ? dense_perthread[t].data() : dense.data());
double *target = sparse.data();

#pragma omp for
for (size_t i = 0; i < count; ++i) {
double *tl = sparse.data() + delta * i;
double *sl = dense_perthread[t].data() + pattern_length * (i % wrap);
double *tl = target + delta * i;
double *sl = source + pattern_length * (i % wrap);

#pragma omp simd
for (size_t j = 0; j < pattern_length; ++j) {
Expand All @@ -602,14 +614,17 @@ void Configuration<Spatter::OpenMP>::scatter(bool timed, unsigned long run_id) {
}
}

if (atomic_fence)
std::atomic_thread_fence(std::memory_order_release);

if (timed) {
timer.stop();
time_seconds[run_id] = timer.seconds();
timer.clear();
}
}

void Configuration<Spatter::OpenMP>::scatter_gather(
void Configuration<Spatter::OpenMP>::gather_scatter(
bool timed, unsigned long run_id) {
assert(pattern_scatter.size() == pattern_gather.size());
size_t pattern_length = pattern_scatter.size();
Expand All @@ -632,6 +647,9 @@ void Configuration<Spatter::OpenMP>::scatter_gather(
}
}

if (atomic_fence)
std::atomic_thread_fence(std::memory_order_release);

if (timed) {
timer.stop();
time_seconds[run_id] = timer.seconds();
Expand All @@ -653,11 +671,13 @@ void Configuration<Spatter::OpenMP>::multi_gather(
#pragma omp parallel
{
int t = omp_get_thread_num();
double *source = sparse.data();
double *target = (dense_buffers ? dense_perthread[t].data() : dense.data());

#pragma omp for
for (size_t i = 0; i < count; ++i) {
double *sl = sparse.data() + delta * i;
double *tl = dense_perthread[t].data() + pattern_length * (i % wrap);
double *sl = source + delta * i;
double *tl = target + pattern_length * (i % wrap);

#pragma omp simd
for (size_t j = 0; j < pattern_length; ++j) {
Expand All @@ -666,6 +686,9 @@ void Configuration<Spatter::OpenMP>::multi_gather(
}
}

if (atomic_fence)
std::atomic_thread_fence(std::memory_order_release);

if (timed) {
timer.stop();
time_seconds[run_id] = timer.seconds();
Expand All @@ -688,11 +711,13 @@ void Configuration<Spatter::OpenMP>::multi_scatter(
#pragma omp parallel
{
int t = omp_get_thread_num();
double *target = sparse.data();
double *source = (dense_buffers ? dense_perthread[t].data() : dense.data());

#pragma omp for
for (size_t i = 0; i < count; ++i) {
double *tl = sparse.data() + delta * i;
double *sl = dense_perthread[t].data() + pattern_length * (i % wrap);
double *tl = target + delta * i;
double *sl = source + pattern_length * (i % wrap);

#pragma omp simd
for (size_t j = 0; j < pattern_length; ++j) {
Expand All @@ -701,6 +726,9 @@ void Configuration<Spatter::OpenMP>::multi_scatter(
}
}

if (atomic_fence)
std::atomic_thread_fence(std::memory_order_release);

if (timed) {
timer.stop();
time_seconds[run_id] = timer.seconds();
Expand Down Expand Up @@ -732,7 +760,7 @@ Configuration<Spatter::CUDA>::Configuration(const size_t id,
dev_sparse_scatter, sparse_scatter_size, dense, dense_perthread,
dev_dense, dense_size, delta, delta_gather, delta_scatter, seed,
wrap, count, shared_mem, local_work_size, 1, nruns, aggregate, atomic,
verbosity) {
false, false, verbosity) {

setup();
}
Expand Down Expand Up @@ -805,7 +833,7 @@ void Configuration<Spatter::CUDA>::scatter(bool timed, unsigned long run_id) {
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::CUDA>::scatter_gather(
void Configuration<Spatter::CUDA>::gather_scatter(
bool timed, unsigned long run_id) {
assert(pattern_scatter.size() == pattern_gather.size());
int pattern_length = static_cast<int>(pattern_scatter.size());
Expand All @@ -817,11 +845,11 @@ void Configuration<Spatter::CUDA>::scatter_gather(
float time_ms = 0.0;

if (atomic)
time_ms = cuda_scatter_gather_atomic_wrapper(dev_pattern_scatter,
time_ms = cuda_gather_scatter_atomic_wrapper(dev_pattern_scatter,
dev_sparse_scatter, dev_pattern_gather, dev_sparse_gather,
pattern_length, delta_scatter, delta_gather, wrap, count);
else
time_ms = cuda_scatter_gather_wrapper(dev_pattern_scatter,
time_ms = cuda_gather_scatter_wrapper(dev_pattern_scatter,
dev_sparse_scatter, dev_pattern_gather, dev_sparse_gather,
pattern_length, delta_scatter, delta_gather, wrap, count);

Expand Down
Loading