Skip to content

Commit bde4b6f

Browse files
Merge pull request #277 from chillenzer/add-GallatinCUDAafterReorganisation
Add gallatin cuda after reorganisation
2 parents a288377 + 1a0530e commit bde4b6f

File tree

7 files changed

+298
-15
lines changed

7 files changed

+298
-15
lines changed

CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,38 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/add_controlled.cmake)
2929
add_controlled("PackageProject.cmake" REQUIRED)
3030
add_controlled("alpaka" REQUIRED)
3131

32+
3233
# ---- Create library ----
3334

3435
# Note: for header-only libraries change all PUBLIC flags to INTERFACE and create an interface
3536
add_library(${PROJECT_NAME} INTERFACE)
3637
set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20)
3738

39+
if(alpaka_ACC_GPU_CUDA_ENABLE)
40+
add_controlled("Gallatin")
41+
42+
# Gallatin needs some fairly recent compute capability from CUDA.
43+
# CMake defaults to taking the oldest supported by the device
44+
# (https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html)
45+
# which can be too old. This leads to compilation errors along the lines of
46+
#
47+
# error: no instance of overloaded function "atomicCAS" matches the argument list
48+
# argument types are: (unsigned short *, unsigned short, unsigned short)
49+
#
50+
# because this overload was only added later (apparently?).
51+
52+
if ("${CMAKE_CUDA_ARCHITECTURES}" LESS 70)
53+
message(
54+
WARNING
55+
"CUDA architecture detected is too old: ${CMAKE_CUDA_ARCHITECTURES}. "
56+
"If the architecture set is too old, this can lead to compilation errors with Gallatin. "
57+
"If Gallatin is needed, please set CMAKE_CUDA_ARCHITECTURES to the correct value >= 70."
58+
)
59+
endif()
60+
61+
target_link_libraries(${PROJECT_NAME} INTERFACE gallatin)
62+
endif()
63+
3864
# being a cross-platform target, we enforce standards conformance on MSVC
3965
target_compile_options(${PROJECT_NAME} INTERFACE "$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/permissive->")
4066

cmake/package-lock.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,12 @@ CPMDeclarePackage(Catch2
3535
SYSTEM YES
3636
EXCLUDE_FROM_ALL YES
3737
)
38+
# Gallatin
39+
CPMDeclarePackage(Gallatin
40+
# There's no release available yet.
41+
GIT_TAG ac0cb8e380ffcb74156bafb8805fb60412817c5f
42+
# Use our own fork for some patches
43+
GITHUB_REPOSITORY chillenzer/Gallatin
44+
SYSTEM YES
45+
EXCLUDE_FROM_ALL YES
46+
)

examples/getAvailableSlots/source/main.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,19 @@ struct ExampleKernel
9999
}
100100
};
101101

102-
template<typename T_CreationPolicy>
102+
template<
103+
typename T_CreationPolicy,
104+
typename T_ReservePoolPolicy,
105+
typename T_AlignmentPolicy = mallocMC::AlignmentPolicies::Shrink<AlignmentConfig>>
103106
auto example03() -> int
104107
{
105108
using Allocator = mallocMC::Allocator<
106109
Acc,
107110
T_CreationPolicy,
108111
mallocMC::DistributionPolicies::Noop,
109112
mallocMC::OOMPolicies::ReturnNull,
110-
mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>,
111-
mallocMC::AlignmentPolicies::Shrink<AlignmentConfig>>;
113+
T_ReservePoolPolicy,
114+
T_AlignmentPolicy>;
112115

113116
auto const platform = alpaka::Platform<Acc>{};
114117
auto const dev = alpaka::getDevByIdx(platform, 0);
@@ -130,8 +133,19 @@ auto example03() -> int
130133

131134
auto main(int /*argc*/, char* /*argv*/[]) -> int
132135
{
133-
example03<FlatterScatter<FlatterScatterHeapConfig>>();
134-
example03<Scatter<FlatterScatterHeapConfig>>();
135-
example03<OldMalloc>();
136+
example03<FlatterScatter<FlatterScatterHeapConfig>, mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>>();
137+
example03<Scatter<FlatterScatterHeapConfig>, mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>>();
138+
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
139+
example03<
140+
mallocMC::CreationPolicies::GallatinCuda<>,
141+
mallocMC::ReservePoolPolicies::Noop,
142+
mallocMC::AlignmentPolicies::Noop>();
143+
// GallatinCuda already uses cudaSetLimits and we're not allowed to call it a second time.
144+
example03<OldMalloc, mallocMC::ReservePoolPolicies::Noop>();
145+
// This should normally be:
146+
// example01<OldMalloc, mallocMC::ReservePoolPolicies::CudaSetLimits>();
147+
#else
148+
example03<OldMalloc, mallocMC::ReservePoolPolicies::Noop>();
149+
#endif
136150
return 0;
137151
}

examples/vectorAdd/source/main.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
THE SOFTWARE.
2828
*/
2929

30-
#include "mallocMC/creationPolicies/FlatterScatter.hpp"
31-
#include "mallocMC/creationPolicies/OldMalloc.hpp"
32-
3330
#include <alpaka/alpaka.hpp>
3431
#include <alpaka/example/ExampleDefaultAcc.hpp>
3532

@@ -80,16 +77,19 @@ ALPAKA_STATIC_ACC_MEM_GLOBAL int** arA;
8077
ALPAKA_STATIC_ACC_MEM_GLOBAL int** arB;
8178
ALPAKA_STATIC_ACC_MEM_GLOBAL int** arC;
8279

83-
template<typename T_CreationPolicy>
80+
template<
81+
typename T_CreationPolicy,
82+
typename T_ReservePoolPolicy,
83+
typename T_AlignmentPolicy = mallocMC::AlignmentPolicies::Shrink<ShrinkConfig>>
8484
auto example01() -> int
8585
{
8686
using Allocator = mallocMC::Allocator<
8787
Acc,
8888
T_CreationPolicy,
8989
mallocMC::DistributionPolicies::Noop,
9090
mallocMC::OOMPolicies::ReturnNull,
91-
mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>,
92-
mallocMC::AlignmentPolicies::Shrink<ShrinkConfig>>;
91+
T_ReservePoolPolicy,
92+
T_AlignmentPolicy>;
9393

9494
constexpr auto length = 100;
9595

@@ -227,8 +227,19 @@ auto example01() -> int
227227

228228
auto main(int /*argc*/, char* /*argv*/[]) -> int
229229
{
230-
example01<FlatterScatter<FlatterScatterHeapConfig>>();
231-
example01<Scatter<FlatterScatterHeapConfig>>();
232-
example01<OldMalloc>();
230+
example01<FlatterScatter<FlatterScatterHeapConfig>, mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>>();
231+
example01<Scatter<FlatterScatterHeapConfig>, mallocMC::ReservePoolPolicies::AlpakaBuf<Acc>>();
232+
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
233+
example01<
234+
mallocMC::CreationPolicies::GallatinCuda<>,
235+
mallocMC::ReservePoolPolicies::Noop,
236+
mallocMC::AlignmentPolicies::Noop>();
237+
// GallatinCuda already uses cudaSetLimits and we're not allowed to call it a second time.
238+
example01<OldMalloc, mallocMC::ReservePoolPolicies::Noop>();
239+
// This should normally be:
240+
// example01<OldMalloc, mallocMC::ReservePoolPolicies::CudaSetLimits>();
241+
#else
242+
example01<OldMalloc, mallocMC::ReservePoolPolicies::Noop>();
243+
#endif
233244
return 0;
234245
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
mallocMC: Memory Allocator for Many Core Architectures.
3+
4+
Copyright 2014-2024 Institute of Radiation Physics,
5+
Helmholtz-Zentrum Dresden - Rossendorf
6+
7+
Author(s): Carlchristian Eckert - c.eckert ( at ) hzdr.de
8+
Julian Lenz - j.lenz ( at ) hzdr.de
9+
10+
Permission is hereby granted, free of charge, to any person obtaining a copy
11+
of this software and associated documentation files (the "Software"), to deal
12+
in the Software without restriction, including without limitation the rights
13+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
copies of the Software, and to permit persons to whom the Software is
15+
furnished to do so, subject to the following conditions:
16+
17+
The above copyright notice and this permission notice shall be included in
18+
all copies or substantial portions of the Software.
19+
20+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
THE SOFTWARE.
27+
*/
28+
29+
#pragma once
30+
31+
#include <alpaka/alpaka.hpp>
32+
33+
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
34+
# include <gallatin/allocators/gallatin.cuh>
35+
#else
36+
37+
// Construct a fake, so we get a nice error message when we try to use is
38+
// and it's not in the way when we don't.
39+
namespace gallatin::allocators
40+
{
41+
template<size_t...>
42+
struct Gallatin
43+
{
44+
static auto generate_on_device(auto...)
45+
{
46+
return nullptr;
47+
}
48+
};
49+
} // namespace gallatin::allocators
50+
51+
#endif
52+
53+
namespace mallocMC
54+
{
55+
namespace CreationPolicies
56+
{
57+
/**
58+
* @brief Prototype integration of Gallatin (https://dl.acm.org/doi/10.1145/3627535.3638499)
59+
*
60+
* This CreationPolicy integrates the CUDA code for the Gallatin prototype into mallocMC
61+
* as a thin wrapper. Its intended for proof-of-principle tests and benchmarks only and
62+
* obviously only works with on CUDA devices.
63+
*
64+
* It also only works with the reservePoolPolicies::Noop beccause it does what CudaSetLimits
65+
* does internally on its own.
66+
*
67+
* If we should ever see the need for it, we'd re-implement it in alpaka for a fully-fletched
68+
* and well-maintained version of this.
69+
* Experience has been mixed so far: While we could reproduce good performance in some cases,
70+
* fragmentation was found to be unusably high (to the point of single-digit utilisaton of
71+
* available memory) in PIConGPU. That's why there's currently no plan to lift the prototype
72+
* status in the near future.
73+
*/
74+
template<
75+
typename T_AlignmentPolicy,
76+
size_t bytes_per_segment = 16ULL * 1024 * 1024,
77+
size_t smallest_slice = 16,
78+
size_t largest_slice = 4096>
79+
class GallatinCudaImpl
80+
{
81+
using Gallatin = gallatin::allocators::Gallatin<bytes_per_segment, smallest_slice, largest_slice>;
82+
83+
public:
84+
template<typename T_AlignmentPolicyLocal>
85+
using AlignmentAwarePolicy
86+
= GallatinCudaImpl<T_AlignmentPolicyLocal, bytes_per_segment, smallest_slice, largest_slice>;
87+
Gallatin* heap{nullptr};
88+
89+
static constexpr auto providesAvailableSlots = false;
90+
91+
template<typename AlpakaAcc>
92+
ALPAKA_FN_ACC auto create(AlpakaAcc const& acc, uint32_t bytes) const -> void*
93+
{
94+
return heap->malloc(static_cast<size_t>(bytes));
95+
}
96+
97+
template<typename AlpakaAcc>
98+
ALPAKA_FN_ACC void destroy(AlpakaAcc const& /*acc*/, void* mem) const
99+
{
100+
heap->free(mem);
101+
}
102+
103+
ALPAKA_FN_ACC auto isOOM(void* p, size_t s) const -> bool
104+
{
105+
return s != 0 && (p == nullptr);
106+
}
107+
108+
template<typename AlpakaAcc, typename AlpakaDevice, typename AlpakaQueue, typename T_DeviceAllocator>
109+
static void initHeap(
110+
AlpakaDevice& dev,
111+
AlpakaQueue& queue,
112+
T_DeviceAllocator* devAllocator,
113+
void*,
114+
size_t memsize)
115+
{
116+
static_assert(
117+
std::is_same_v<alpaka::AccToTag<AlpakaAcc>, alpaka::TagGpuCudaRt>,
118+
"The GallatinCuda creation policy is only available on CUDA architectures. Please choose a "
119+
"different one.");
120+
121+
// This is an extremely hot fix:
122+
// PIConGPU initialises its allocator with 0 bytes to be able to distribute the pointer.
123+
// Only afterwards it can find out its actual memory requirements and uses destructiveResize to set
124+
// the correct heap size. Gallatin runs into issues with this approach.
125+
// Instead, we simply don't believe the request if it's 0.
126+
if(memsize == 0)
127+
return;
128+
129+
auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0);
130+
using Dim = typename alpaka::trait::DimType<AlpakaAcc>::type;
131+
using Idx = typename alpaka::trait::IdxType<AlpakaAcc>::type;
132+
using VecType = alpaka::Vec<Dim, Idx>;
133+
134+
auto tmp = Gallatin::generate_on_device(memsize, 42, true);
135+
auto workDivSingleThread
136+
= alpaka::WorkDivMembers<Dim, Idx>{VecType::ones(), VecType::ones(), VecType::ones()};
137+
alpaka::exec<AlpakaAcc>(
138+
queue,
139+
workDivSingleThread,
140+
[tmp, devAllocator] ALPAKA_FN_ACC(AlpakaAcc const&) { devAllocator->heap = tmp; });
141+
}
142+
143+
static auto classname() -> std::string
144+
{
145+
return "GallatinCuda";
146+
}
147+
};
148+
149+
template<
150+
size_t bytes_per_segment = 16ULL * 1024 * 1024,
151+
size_t smallest_slice = 16,
152+
size_t largest_slice = 4096>
153+
struct GallatinCuda
154+
{
155+
template<typename T_AlignmentPolicy>
156+
using AlignmentAwarePolicy
157+
= GallatinCudaImpl<T_AlignmentPolicy, bytes_per_segment, smallest_slice, largest_slice>;
158+
};
159+
160+
} // namespace CreationPolicies
161+
} // namespace mallocMC

include/mallocMC/mallocMC.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "alignmentPolicies/Noop.hpp"
4848
#include "alignmentPolicies/Shrink.hpp"
4949
#include "creationPolicies/FlatterScatter.hpp"
50+
#include "creationPolicies/GallatinCuda.hpp"
5051
#include "creationPolicies/OldMalloc.hpp"
5152
#include "creationPolicies/Scatter.hpp"
5253
#include "distributionPolicies/Noop.hpp"
@@ -55,3 +56,4 @@
5556
#include "oOMPolicies/ReturnNull.hpp"
5657
#include "reservePoolPolicies/AlpakaBuf.hpp"
5758
#include "reservePoolPolicies/CudaSetLimits.hpp"
59+
#include "reservePoolPolicies/Noop.hpp"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
mallocMC: Memory Allocator for Many Core Architectures.
3+
4+
Copyright 2014-2024 Institute of Radiation Physics,
5+
Helmholtz-Zentrum Dresden - Rossendorf
6+
7+
Author(s): Carlchristian Eckert - c.eckert ( at ) hzdr.de
8+
Julian Lenz - j.lenz ( at ) hzdr.de
9+
10+
Permission is hereby granted, free of charge, to any person obtaining a copy
11+
of this software and associated documentation files (the "Software"), to deal
12+
in the Software without restriction, including without limitation the rights
13+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
copies of the Software, and to permit persons to whom the Software is
15+
furnished to do so, subject to the following conditions:
16+
17+
The above copyright notice and this permission notice shall be included in
18+
all copies or substantial portions of the Software.
19+
20+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
THE SOFTWARE.
27+
*/
28+
29+
#pragma once
30+
31+
namespace mallocMC
32+
{
33+
namespace ReservePoolPolicies
34+
{
35+
/**
36+
* @brief Does exactly nothing.
37+
*
38+
* This is intended for use with prototypes that were originally designed
39+
* to handle these aspects on their own. Currently needed for GallatinCuda.
40+
*/
41+
struct Noop
42+
{
43+
template<typename AlpakaDev>
44+
auto setMemPool(AlpakaDev const& /*dev*/, size_t /*memsize*/) -> void*
45+
{
46+
return nullptr;
47+
}
48+
49+
static void resetMemPool()
50+
{
51+
}
52+
53+
static auto classname() -> std::string
54+
{
55+
return "Noop";
56+
}
57+
};
58+
59+
} // namespace ReservePoolPolicies
60+
} // namespace mallocMC

0 commit comments

Comments
 (0)