Skip to content

Commit e004043

Browse files
Merge pull request #297 from chillenzer/fix-array-assert-with-modern-compilers
Fix array assert compilation failure
2 parents fea3342 + e30354d commit e004043

File tree

7 files changed

+128
-38
lines changed

7 files changed

+128
-38
lines changed

examples/native-cuda/source/main.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@
2626
THE SOFTWARE.
2727
*/
2828

29+
#include "mallocMC/span.hpp"
30+
2931
#include <mallocMC/mallocMC.cuh>
3032

3133
#include <cstdint>
3234
#include <cstdlib>
3335
#include <functional>
34-
#include <span>
3536

3637
/**
3738
* @brief Computes the sum of squares of the first `n` natural numbers.
@@ -66,15 +67,14 @@ __device__ auto sumOfSquares(auto const n)
6667
*/
6768
__global__ void oneDotProductPerThread(mallocMC::CudaMemoryManager<> memoryManager, uint64_t numValues)
6869
{
70+
using mallocMC::span;
6971
uint64_t tid = threadIdx.x + blockIdx.x * blockDim.x;
7072

7173
// Not very realistic, all threads are doing this on their own:
72-
auto a = std::span<uint64_t>(
73-
reinterpret_cast<uint64_t*>(memoryManager.malloc(numValues * sizeof(uint64_t))),
74-
numValues);
75-
auto b = std::span<uint64_t>(
76-
reinterpret_cast<uint64_t*>(memoryManager.malloc(numValues * sizeof(uint64_t))),
77-
numValues);
74+
auto a
75+
= span<uint64_t>(reinterpret_cast<uint64_t*>(memoryManager.malloc(numValues * sizeof(uint64_t))), numValues);
76+
auto b
77+
= span<uint64_t>(reinterpret_cast<uint64_t*>(memoryManager.malloc(numValues * sizeof(uint64_t))), numValues);
7878

7979
std::iota(std::begin(a), std::end(a), tid);
8080
std::iota(std::begin(b), std::end(b), tid);

include/mallocMC/creationPolicies/FlatterScatter/BitField.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include "mallocMC/creationPolicies/FlatterScatter/wrappingLoop.hpp"
3030
#include "mallocMC/mallocMC_utils.hpp"
31+
#include "mallocMC/span.hpp"
3132

3233
#include <alpaka/core/Common.hpp>
3334
#include <alpaka/intrinsic/Traits.hpp>
@@ -36,7 +37,6 @@
3637

3738
#include <cstdint>
3839
#include <limits>
39-
#include <span>
4040
#include <type_traits>
4141

4242
namespace mallocMC::CreationPolicies::FlatterScatterAlloc
@@ -344,7 +344,7 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc
344344
template<uint32_t MyBitMaskSize = BitMaskSize>
345345
struct BitFieldFlatImpl
346346
{
347-
std::span<BitMaskImpl<MyBitMaskSize>> data;
347+
mallocMC::span<BitMaskImpl<MyBitMaskSize>> data;
348348

349349
/**
350350
* @brief Check if the index-th bit in the bit field is set (=1).

include/mallocMC/span.hpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
mallocMC: Memory Allocator for Many Core Architectures.
3+
http://www.icg.tugraz.at/project/mvp
4+
https://www.hzdr.de/crp
5+
6+
Copyright (C) 2012 Institute for Computer Graphics and Vision,
7+
Graz University of Technology
8+
Copyright (C) 2014-2026 Institute of Radiation Physics,
9+
Helmholtz-Zentrum Dresden - Rossendorf
10+
11+
Author(s): Markus Steinberger - steinberger ( at ) icg.tugraz.at
12+
Bernhard Kainz - kainz ( at ) icg.tugraz.at
13+
Michael Kenzel - kenzel ( at ) icg.tugraz.at
14+
Rene Widera - r.widera ( at ) hzdr.de
15+
Axel Huebl - a.huebl ( at ) hzdr.de
16+
Carlchristian Eckert - c.eckert ( at ) hzdr.de
17+
Julian Lenz - j.lenz ( at ) hzdr.de
18+
19+
Permission is hereby granted, free of charge, to any person obtaining a copy
20+
of this software and associated documentation files (the "Software"), to deal
21+
in the Software without restriction, including without limitation the rights
22+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23+
copies of the Software, and to permit persons to whom the Software is
24+
furnished to do so, subject to the following conditions:
25+
26+
The above copyright notice and this permission notice shall be included in
27+
all copies or substantial portions of the Software.
28+
29+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
35+
THE SOFTWARE.
36+
*/
37+
38+
// This is a workaround for the following issue:
39+
// https://github.com/llvm/llvm-project/pull/136133
40+
// If clang or clang-based compilers like hipcc try to compile device code
41+
// with a too recent version of libstdc++ from GCC,
42+
// they run into issues like
43+
// error: reference to __host__ function '__glibcxx_assert_fail' in __host__ __device__ function
44+
45+
#pragma once
46+
#include <cstddef>
47+
48+
namespace mallocMC
49+
{
50+
template<typename TData>
51+
struct span
52+
{
53+
TData* ptr_;
54+
size_t size_;
55+
56+
constexpr span(TData* ptr, size_t size) : ptr_(ptr), size_(size) {};
57+
58+
// This is explicitly NOT `explcit` because we want to be able to
59+
// silently wrap an array into a span within other constructor calls.
60+
template<size_t N>
61+
constexpr span(TData (&arr)[N]) : ptr_(arr)
62+
, size_(N)
63+
{
64+
}
65+
66+
[[nodiscard]] constexpr auto size() const -> size_t
67+
{
68+
return size_;
69+
}
70+
71+
[[nodiscard]] constexpr auto operator[](size_t index) const -> decltype(auto)
72+
{
73+
return ptr_[index];
74+
}
75+
76+
[[nodiscard]] constexpr auto begin() const -> decltype(auto)
77+
{
78+
return ptr_;
79+
}
80+
81+
[[nodiscard]] constexpr auto end() const -> decltype(auto)
82+
{
83+
return &(ptr_[size_]);
84+
}
85+
};
86+
} // namespace mallocMC

test/multithreaded/source/AccessBlock.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp"
2929

3030
#include "mallocMC/mallocMC_utils.hpp"
31+
#include "mallocMC/span.hpp"
3132
#include "mocks.hpp"
3233

3334
#include <alpaka/acc/AccCpuSerial.hpp>
@@ -61,11 +62,11 @@
6162
#include <cstdio>
6263
#include <functional>
6364
#include <iterator>
64-
#include <span>
6565
#include <tuple>
6666
#include <type_traits>
6767

6868
using mallocMC::CreationPolicies::FlatterScatterAlloc::AccessBlock;
69+
using mallocMC::span;
6970

7071
using Dim = alpaka::DimInt<1>;
7172
using Idx = std::uint32_t;
@@ -78,7 +79,6 @@ constexpr uint32_t pteSize = 4 + 4;
7879
constexpr uint32_t blockSize = numPages * (pageSize + pteSize);
7980

8081
using MyAccessBlock = AccessBlock<HeapConfig<blockSize, pageSize>, AlignmentPolicy>;
81-
using std::span;
8282

8383
// Fill all pages of the given access block with occupied chunks of the given size. This is useful to test the
8484
// behaviour near full filling but also to have a deterministic page and chunk where an allocation must happen
@@ -185,8 +185,8 @@ struct IsValid
185185
bool* results,
186186
uint32_t const size) const
187187
{
188-
std::span<void*> tmpPointers(pointers, size);
189-
std::span<bool> tmpResults(results, size);
188+
span<void*> tmpPointers(pointers, size);
189+
span<bool> tmpResults(results, size);
190190
std::transform(
191191
std::begin(tmpPointers),
192192
std::end(tmpPointers),
@@ -236,7 +236,7 @@ auto createChunkSizes(auto const& devHost, auto const& devAcc, auto& queue)
236236
auto createPointers(auto const& devHost, auto const& devAcc, auto& queue, uint32_t const size)
237237
{
238238
auto pointers = makeBuffer<void*>(devHost, devAcc, size);
239-
std::span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
239+
span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
240240
std::fill(std::begin(tmp), std::end(tmp), reinterpret_cast<void*>(1U));
241241
alpaka::memcpy(queue, pointers.m_onDevice, pointers.m_onHost);
242242
return pointers;
@@ -310,7 +310,7 @@ auto freeAllButOneOnFirstPage(
310310
AccessBlock<HeapConfig<T_blockSize, T_pageSize>, AlignmentPolicy>* accessBlock,
311311
auto& pointers)
312312
{
313-
std::span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
313+
span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
314314
std::sort(std::begin(tmp), std::end(tmp));
315315
// This points to the first chunk of page 0.
316316
auto* pointer1 = tmp[0];
@@ -375,7 +375,7 @@ auto checkContent(
375375
alpaka::wait(queue);
376376

377377

378-
std::span<bool> tmpResults(alpaka::getPtrNative(results.m_onHost), results.m_extents[0]);
378+
span<bool> tmpResults(alpaka::getPtrNative(results.m_onHost), results.m_extents[0]);
379379
auto writtenCorrectly = std::reduce(std::cbegin(tmpResults), std::cend(tmpResults), true, std::multiplies<bool>{});
380380

381381
return writtenCorrectly;
@@ -758,7 +758,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
758758
devHost,
759759
devAcc,
760760
getAvailableSlots<Acc>(accessBlock, queue, devHost, devAcc, chunkSizes.m_onHost[0]));
761-
std::span<uint32_t> tmp(alpaka::getPtrNative(content.m_onHost), content.m_extents[0]);
761+
span<uint32_t> tmp(alpaka::getPtrNative(content.m_onHost), content.m_extents[0]);
762762
std::generate(std::begin(tmp), std::end(tmp), ContentGenerator{});
763763
alpaka::memcpy(queue, content.m_onDevice, content.m_onHost);
764764
alpaka::wait(queue);
@@ -814,7 +814,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
814814
alpaka::memcpy(queue, result.m_onHost, result.m_onDevice);
815815
alpaka::wait(queue);
816816

817-
std::span<bool> tmpResults(alpaka::getPtrNative(result.m_onHost), result.m_extents[0]);
817+
span<bool> tmpResults(alpaka::getPtrNative(result.m_onHost), result.m_extents[0]);
818818
CHECK(std::none_of(std::cbegin(tmpResults), std::cend(tmpResults), [](auto const val) { return val; }));
819819

820820
CHECK(getAvailableSlots<Acc>(accessBlock, queue, devHost, devAcc, chunkSizes.m_onHost[0]) == allSlots);
@@ -837,7 +837,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
837837
alpaka::memcpy(queue, pointers.m_onHost, pointers.m_onDevice);
838838
alpaka::wait(queue);
839839

840-
std::span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
840+
span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
841841
std::sort(std::begin(tmpPointers), std::end(tmpPointers));
842842
CHECK(std::unique(std::begin(tmpPointers), std::end(tmpPointers)) == std::end(tmpPointers));
843843
}
@@ -865,7 +865,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
865865

866866
// We only let the last (availableSlots-1) keep their memory. So, the rest at the beginning should have a
867867
// nullptr.
868-
std::span<void*> tmpManyPointers(alpaka::getPtrNative(manyPointers.m_onHost), manyPointers.m_extents[0]);
868+
span<void*> tmpManyPointers(alpaka::getPtrNative(manyPointers.m_onHost), manyPointers.m_extents[0]);
869869
auto beginNonNull = std::begin(tmpManyPointers) + (oversubscriptionFactor - 1) * availableSlots + 1;
870870

871871
CHECK(std::all_of(
@@ -880,7 +880,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
880880
SECTION("can handle many different chunk sizes.")
881881
{
882882
auto chunkSizes = makeBuffer<uint32_t>(devHost, devAcc, pageSize);
883-
std::span<uint32_t> chunkSizesSpan(alpaka::getPtrNative(chunkSizes.m_onHost), chunkSizes.m_extents[0]);
883+
span<uint32_t> chunkSizesSpan(alpaka::getPtrNative(chunkSizes.m_onHost), chunkSizes.m_extents[0]);
884884
std::iota(std::begin(chunkSizesSpan), std::end(chunkSizesSpan), 1U);
885885
alpaka::memcpy(queue, chunkSizes.m_onDevice, chunkSizes.m_onHost);
886886
alpaka::wait(queue);
@@ -892,14 +892,14 @@ TEMPLATE_LIST_TEST_CASE("Threaded AccessBlock", "", alpaka::EnabledAccTags)
892892
CreateAllChunkSizes{},
893893
accessBlock,
894894
span<void*>(alpaka::getPtrNative(pointers.m_onDevice), MyAccessBlock::numPages()),
895-
std::span<uint32_t>(alpaka::getPtrNative(chunkSizes.m_onDevice), chunkSizes.m_extents[0]));
895+
span<uint32_t>(alpaka::getPtrNative(chunkSizes.m_onDevice), chunkSizes.m_extents[0]));
896896

897897
alpaka::wait(queue);
898898

899899
alpaka::memcpy(queue, pointers.m_onHost, pointers.m_onDevice);
900900
alpaka::wait(queue);
901901

902-
std::span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), MyAccessBlock::numPages());
902+
span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), MyAccessBlock::numPages());
903903
std::sort(std::begin(tmpPointers), std::end(tmpPointers));
904904
CHECK(std::unique(std::begin(tmpPointers), std::end(tmpPointers)) == std::end(tmpPointers));
905905
}

test/multithreaded/source/Scatter.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "mallocMC/distributionPolicies/Noop.hpp"
3434
#include "mallocMC/mallocMC_utils.hpp"
3535
#include "mallocMC/oOMPolicies/ReturnNull.hpp"
36+
#include "mallocMC/span.hpp"
3637
#include "mocks.hpp"
3738

3839
#include <alpaka/acc/AccCpuSerial.hpp>
@@ -66,10 +67,10 @@
6667
#include <cstdio>
6768
#include <functional>
6869
#include <iterator>
69-
#include <span>
7070
#include <tuple>
7171
#include <type_traits>
7272

73+
using mallocMC::span;
7374
using Dim = alpaka::DimInt<1>;
7475
using Idx = std::uint32_t;
7576

@@ -98,8 +99,6 @@ using MyDeviceAllocator = mallocMC::DeviceAllocator<
9899
mallocMC::OOMPolicies::ReturnNull,
99100
mallocMC::AlignmentPolicies::Shrink<>>;
100101

101-
using std::span;
102-
103102
// Fill all pages of the given access block with occupied chunks of the given size. This is useful to test the
104103
// behaviour near full filling but also to have a deterministic page and chunk where an allocation must happen
105104
// regardless of the underlying access optimisations etc.
@@ -236,7 +235,7 @@ auto createChunkSizes(auto const& devHost, auto const& devAcc, auto& queue)
236235
auto createPointers(auto const& devHost, auto const& devAcc, auto& queue, uint32_t const size)
237236
{
238237
auto pointers = makeBuffer<void*>(devHost, devAcc, size);
239-
std::span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
238+
span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
240239
std::fill(std::begin(tmp), std::end(tmp), reinterpret_cast<void*>(1U));
241240
alpaka::memcpy(queue, pointers.m_onDevice, pointers.m_onHost);
242241
return pointers;
@@ -307,7 +306,7 @@ auto fillAllButOne(auto& queue, auto* accessBlock, auto const& chunkSize, auto&
307306
template<typename TAcc>
308307
auto freeAllButOneOnFirstPage(auto& queue, auto* accessBlock, auto& pointers)
309308
{
310-
std::span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
309+
span<void*> tmp(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
311310
std::sort(std::begin(tmp), std::end(tmp));
312311
// This points to the first chunk of page 0.
313312
auto* pointer1 = tmp[0];
@@ -371,7 +370,7 @@ auto checkContent(
371370
alpaka::wait(queue);
372371

373372

374-
std::span<bool> tmpResults(alpaka::getPtrNative(results.m_onHost), results.m_extents[0]);
373+
span<bool> tmpResults(alpaka::getPtrNative(results.m_onHost), results.m_extents[0]);
375374
auto writtenCorrectly = std::reduce(std::cbegin(tmpResults), std::cend(tmpResults), true, std::multiplies<bool>{});
376375

377376
return writtenCorrectly;
@@ -735,7 +734,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded Scatter", "", alpaka::EnabledAccTags)
735734
devHost,
736735
devAcc,
737736
getAvailableSlots<Acc>(accessBlock, queue, devHost, devAcc, chunkSizes.m_onHost[0]));
738-
std::span<uint32_t> tmp(alpaka::getPtrNative(content.m_onHost), content.m_extents[0]);
737+
span<uint32_t> tmp(alpaka::getPtrNative(content.m_onHost), content.m_extents[0]);
739738
std::generate(std::begin(tmp), std::end(tmp), ContentGenerator{});
740739
alpaka::memcpy(queue, content.m_onDevice, content.m_onHost);
741740
alpaka::wait(queue);
@@ -796,7 +795,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded Scatter", "", alpaka::EnabledAccTags)
796795
alpaka::memcpy(queue, pointers.m_onHost, pointers.m_onDevice);
797796
alpaka::wait(queue);
798797

799-
std::span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
798+
span<void*> tmpPointers(alpaka::getPtrNative(pointers.m_onHost), pointers.m_extents[0]);
800799
std::sort(std::begin(tmpPointers), std::end(tmpPointers));
801800
CHECK(std::unique(std::begin(tmpPointers), std::end(tmpPointers)) == std::end(tmpPointers));
802801
}
@@ -824,7 +823,7 @@ TEMPLATE_LIST_TEST_CASE("Threaded Scatter", "", alpaka::EnabledAccTags)
824823

825824
// We only let the last (availableSlots-1) keep their memory. So, the rest at the beginning should have a
826825
// nullptr.
827-
std::span<void*> tmpManyPointers(alpaka::getPtrNative(manyPointers.m_onHost), manyPointers.m_extents[0]);
826+
span<void*> tmpManyPointers(alpaka::getPtrNative(manyPointers.m_onHost), manyPointers.m_extents[0]);
828827
auto beginNonNull = std::begin(tmpManyPointers) + (oversubscriptionFactor - 1) * availableSlots + 1;
829828

830829
CHECK(std::all_of(

0 commit comments

Comments
 (0)