Skip to content

Commit ec95056

Browse files
yingcong-wukbenzie
authored andcommitted
[DeivceASAN] Make ShadowMemory one instance per type (#16687)
UR PR: oneapi-src/unified-runtime#2585 --------- Co-authored-by: Kenneth Benzie (Benie) <[email protected]>
1 parent 6679ac2 commit ec95056

File tree

2 files changed

+151
-6
lines changed

2 files changed

+151
-6
lines changed
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit eaea885d5477c8936209175a5b00062ca44f5765
2-
# Merge: af4ab49c 2a03334c
1+
# commit 871061f1aa3b8ade57e0a2ed63d8000e257548cc
2+
# Merge: 262ec93e 7cca93f9
33
# Author: Kenneth Benzie (Benie) <[email protected]>
4-
# Date: Thu Jan 16 14:30:47 2025 +0000
5-
# Merge pull request #2569 from zhaomaosu/asan-only-warn-host-ptr
6-
# [DevASAN] Only report warning if passing host ptr to kernel
7-
set(UNIFIED_RUNTIME_TAG eaea885d5477c8936209175a5b00062ca44f5765)
4+
# Date: Tue Jan 21 13:26:45 2025 +0000
5+
# Merge pull request #2588 from kbenzie/benie/ci-delete-prerelease
6+
# Remove the prerelease.yml job
7+
set(UNIFIED_RUNTIME_TAG 871061f1aa3b8ade57e0a2ed63d8000e257548cc)
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// REQUIRES: aspect-ext_oneapi_virtual_mem, linux, (gpu && level_zero)
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} NEOReadDebugKeys=1 CreateMultipleRootDevices=2 %t.out
4+
5+
// Test for the assumption behide DevASAN shadow memory for L0GPU , which is it
6+
// is okay to access VirtualMem from different device/context.
7+
8+
#include <sycl/detail/core.hpp>
9+
10+
#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
11+
#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>
12+
13+
namespace syclext = sycl::ext::oneapi::experimental;
14+
15+
// Find the least common multiple of the context and device granularities. This
16+
// value can be used for aligning both physical memory allocations and for
17+
// reserving virtual memory ranges.
18+
size_t GetLCMGranularity(
19+
const sycl::device &Dev, const sycl::context &Ctx,
20+
syclext::granularity_mode Gm = syclext::granularity_mode::recommended) {
21+
size_t CtxGranularity = syclext::get_mem_granularity(Ctx, Gm);
22+
size_t DevGranularity = syclext::get_mem_granularity(Dev, Ctx, Gm);
23+
24+
size_t GCD = CtxGranularity;
25+
size_t Rem = DevGranularity % GCD;
26+
while (Rem != 0) {
27+
std::swap(GCD, Rem);
28+
Rem %= GCD;
29+
}
30+
return (DevGranularity / GCD) * CtxGranularity;
31+
}
32+
33+
size_t GetAlignedByteSize(const size_t UnalignedBytes,
34+
const size_t AligmentGranularity) {
35+
return ((UnalignedBytes + AligmentGranularity - 1) / AligmentGranularity) *
36+
AligmentGranularity;
37+
}
38+
39+
bool check_for_42(std::vector<int> &vec, int ref_result = 42) {
40+
return vec[42] == ref_result;
41+
}
42+
43+
int main() {
44+
// Get all available devices
45+
auto devices = sycl::device::get_devices();
46+
47+
// Filter out GPU devices
48+
std::vector<sycl::device> gpuDevices;
49+
for (const auto &dev : devices) {
50+
if (dev.is_gpu()) {
51+
gpuDevices.push_back(dev);
52+
}
53+
}
54+
55+
// Check if we have at least two GPU devices
56+
if (gpuDevices.size() < 2) {
57+
std::cerr << "Less than two GPU devices found." << std::endl;
58+
return 1;
59+
}
60+
61+
// Create contexts for the first two GPU devices
62+
auto dev1 = gpuDevices[0];
63+
auto dev2 = gpuDevices[1];
64+
sycl::context context1_d1(dev1);
65+
sycl::context context2_d1(dev1);
66+
sycl::context context_d2(dev2);
67+
68+
sycl::queue Q1_d1(context1_d1, dev1);
69+
sycl::queue Q2_d1(context2_d1, dev1);
70+
sycl::queue Q1_d2(context_d2, dev2);
71+
72+
constexpr size_t NumberOfElements = 1000;
73+
size_t BytesRequired = NumberOfElements * sizeof(int);
74+
size_t UsedGranularity = GetLCMGranularity(dev1, context2_d1);
75+
size_t AlignedByteSize =
76+
((BytesRequired + UsedGranularity - 1) / UsedGranularity) *
77+
UsedGranularity;
78+
printf("UsedGranularity: %zu\n", UsedGranularity);
79+
printf("AlignedByteSize: %zu\n", AlignedByteSize);
80+
81+
syclext::physical_mem NewPhysicalMem{dev1, context2_d1, AlignedByteSize};
82+
83+
uintptr_t VirtualMemoryPtr =
84+
syclext::reserve_virtual_mem(0, AlignedByteSize, context2_d1);
85+
86+
void *MappedPtr =
87+
NewPhysicalMem.map(VirtualMemoryPtr, AlignedByteSize,
88+
syclext::address_access_mode::read_write);
89+
90+
int *DataPtr = reinterpret_cast<int *>(MappedPtr);
91+
printf("DataPtr: %p\n", DataPtr);
92+
93+
std::vector<int> ResultHostData(NumberOfElements);
94+
constexpr int ExpectedValueAfterFill = 42;
95+
96+
{
97+
// Normal case, same device, same context
98+
sycl::buffer<int> CheckBuffer(ResultHostData);
99+
Q2_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements)
100+
.wait_and_throw();
101+
Q2_d1.submit([&](sycl::handler &Handle) {
102+
sycl::accessor A(CheckBuffer, Handle, sycl::write_only);
103+
Handle.parallel_for(NumberOfElements,
104+
[=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; });
105+
});
106+
Q2_d1.wait();
107+
}
108+
assert(check_for_42(ResultHostData));
109+
ResultHostData = std::vector<int>(NumberOfElements);
110+
Q2_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw();
111+
assert(check_for_42(ResultHostData, 0));
112+
113+
{
114+
// !!! Same device, different context !!!
115+
sycl::buffer<int> CheckBuffer(ResultHostData);
116+
Q1_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements)
117+
.wait_and_throw();
118+
Q1_d1.submit([&](sycl::handler &Handle) {
119+
sycl::accessor A(CheckBuffer, Handle, sycl::write_only);
120+
Handle.parallel_for(NumberOfElements,
121+
[=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; });
122+
});
123+
Q1_d1.wait();
124+
}
125+
assert(check_for_42(ResultHostData));
126+
ResultHostData = std::vector<int>(NumberOfElements);
127+
Q1_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw();
128+
assert(check_for_42(ResultHostData, 0));
129+
130+
{
131+
// !!! Different device, different context !!!
132+
sycl::buffer<int> CheckBuffer(ResultHostData);
133+
Q1_d2.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements)
134+
.wait_and_throw();
135+
Q1_d2.submit([&](sycl::handler &Handle) {
136+
sycl::accessor A(CheckBuffer, Handle, sycl::write_only);
137+
Handle.parallel_for(NumberOfElements,
138+
[=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; });
139+
});
140+
Q1_d2.wait();
141+
}
142+
assert(check_for_42(ResultHostData));
143+
144+
return 0;
145+
}

0 commit comments

Comments
 (0)