Skip to content

Commit 165d1e4

Browse files
Use GfxPartition for GPU address range allocations
[2/n] - OsAgnosticMemoryManager Related-To: NEO-2877 Change-Id: I887126362381ac960608a2150fae211631d3cd5b Signed-off-by: Venevtsev, Igor <[email protected]>
1 parent bb6dfd4 commit 165d1e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+500
-241
lines changed

runtime/device/device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class Device : public BaseObject<_cl_device_id> {
115115
const HardwareCapabilities &getHardwareCapabilities() const { return hardwareCapabilities; }
116116
uint32_t getDeviceIndex() const { return deviceIndex; }
117117
bool isFullRangeSvm() const {
118-
return getHardwareInfo().capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress;
118+
return executionEnvironment->isFullRangeSvm();
119119
}
120120

121121
protected:

runtime/dll/linux/allocator_helper.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@
1111
#include "runtime/helpers/aligned_memory.h"
1212

1313
namespace NEO {
14+
1415
size_t getSizeToMap() {
1516
return static_cast<size_t>(alignUp(4 * GB - 8096, 4096));
1617
}
18+
19+
size_t getSizeToReserve() {
20+
return maxNBitValue<47> / 4;
21+
}
22+
1723
} // namespace NEO

runtime/execution_environment/execution_environment.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
5454
const HardwareInfo *getHardwareInfo() const { return hwInfo.get(); }
5555
HardwareInfo *getMutableHardwareInfo() const { return hwInfo.get(); }
5656
bool isFullRangeSvm() const {
57-
return hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress;
57+
return hwInfo->capabilityTable.gpuAddressSpace >= maxNBitValue<47>;
5858
}
5959

6060
GmmHelper *getGmmHelper() const;

runtime/memory_manager/gfx_partition.cpp

Lines changed: 105 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "runtime/memory_manager/gfx_partition.h"
99

1010
#include "runtime/helpers/aligned_memory.h"
11+
#include "runtime/os_interface/os_memory.h"
1112

1213
namespace NEO {
1314

@@ -16,34 +17,116 @@ const std::array<HeapIndex, 4> GfxPartition::heap32Names{{HeapIndex::HEAP_INTERN
1617
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
1718
HeapIndex::HEAP_EXTERNAL}};
1819

19-
void GfxPartition::init(uint64_t gpuAddressSpace) {
20-
21-
// 1) Full Range SVM gfx layout:
22-
//
23-
// SVM H0 H1 H2 H3 STANDARD STANDARD64K
24-
// |__________________________________|____|____|____|____|________________|______________|
25-
// | | | | | | | |
26-
// | gfxBase gfxTop
27-
// 0x0 0x0000800000000000/0x10000000 for 32 bit 0x0000FFFFFFFFFFFFFFFF
28-
//
29-
// 2) Limited Range gfx layout (no SVM):
30-
//
31-
// H0 H1 H2 H3 STANDARD STANDARD64K
32-
// |____|____|____|____|____________________|__________________|
33-
// | | | | | | |
34-
// gfxBase gfxTop
35-
// 0x0 0xFFF...FFF < 48 bit
20+
const std::array<HeapIndex, 6> GfxPartition::heapNonSvmNames{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
21+
HeapIndex::HEAP_INTERNAL,
22+
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
23+
HeapIndex::HEAP_EXTERNAL,
24+
HeapIndex::HEAP_STANDARD,
25+
HeapIndex::HEAP_STANDARD64KB}};
26+
GfxPartition::~GfxPartition() {
27+
if (reservedCpuAddressRange) {
28+
OSMemory::releaseCpuAddressRange(reservedCpuAddressRange, reservedCpuAddressRangeSize);
29+
}
30+
}
31+
32+
void GfxPartition::Heap::init(uint64_t base, uint64_t size) {
33+
this->base = base;
34+
this->size = size;
35+
36+
// Exclude very first and very last 64K from GPU address range allocation
37+
if (size > 2 * GfxPartition::heapGranularity) {
38+
size -= 2 * GfxPartition::heapGranularity;
39+
}
40+
41+
alloc = std::make_unique<HeapAllocator>(base + GfxPartition::heapGranularity, size);
42+
}
43+
44+
void GfxPartition::freeGpuAddressRange(uint64_t ptr, size_t size) {
45+
for (auto heapName : GfxPartition::heapNonSvmNames) {
46+
auto &heap = getHeap(heapName);
47+
if ((ptr > heap.getBase()) && ((ptr + size) < heap.getLimit())) {
48+
heap.free(ptr, size);
49+
break;
50+
}
51+
}
52+
}
53+
54+
void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve) {
55+
56+
/*
57+
* I. 64-bit builds:
58+
*
59+
* 1) 48-bit Full Range SVM gfx layout:
60+
*
61+
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
62+
* |__________________________________|____|____|____|____|________________|______________|
63+
* | | | | | | | |
64+
* | gfxBase gfxTop
65+
* 0x0 0x0000800000000000 0x0000FFFFFFFFFFFF
66+
*
67+
*
68+
* 2) 47-bit Full Range SVM gfx layout:
69+
*
70+
* gfxSize = 2^47 / 4 = 0x200000000000
71+
* ________________________________________________
72+
* / \
73+
* SVM / H0 H1 H2 H3 STANDARD STANDARD64K \ SVM
74+
* |________________|____|____|____|____|________________|______________|_______________|
75+
* | | | | | | | | |
76+
* | gfxBase gfxTop |
77+
* 0x0 reserveCpuAddressRange(gfxSize) 0x00007FFFFFFFFFFF
78+
* \_____________________________________ SVM _________________________________________/
79+
*
80+
*
81+
*
82+
* 3) Limited Range gfx layout (no SVM):
83+
*
84+
* H0 H1 H2 H3 STANDARD STANDARD64K
85+
* |____|____|____|____|____________________|__________________|
86+
* | | | | | | |
87+
* gfxBase gfxTop
88+
* 0x0 0xFFF...FFF < 47 bit
89+
*
90+
*
91+
* II. 32-bit builds:
92+
*
93+
* 1) 32-bit Full Range SVM gfx layout:
94+
*
95+
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
96+
* |_______|____|____|____|____|________________|______________|
97+
* | | | | | | | |
98+
* | gfxBase gfxTop
99+
* 0x0 0x100000000 gpuAddressSpace
100+
*/
36101

37102
uint64_t gfxTop = gpuAddressSpace + 1;
38-
uint64_t gfxBase = is64bit ? MemoryConstants::max64BitAppAddress + 1 : MemoryConstants::max32BitAddress + 1;
103+
uint64_t gfxBase = 0x0ull;
39104
const uint64_t gfxHeap32Size = 4 * MemoryConstants::gigaByte;
40105

41-
if (gpuAddressSpace < MemoryConstants::max48BitAddress) {
42-
gfxBase = 0ull;
106+
if (is32bit) {
107+
gfxBase = maxNBitValue<32> + 1;
108+
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
109+
} else {
110+
if (gpuAddressSpace == maxNBitValue<48>) {
111+
gfxBase = maxNBitValue<48 - 1> + 1;
112+
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
113+
} else if (gpuAddressSpace == maxNBitValue<47>) {
114+
reservedCpuAddressRangeSize = cpuAddressRangeSizeToReserve;
115+
UNRECOVERABLE_IF(reservedCpuAddressRangeSize == 0);
116+
reservedCpuAddressRange = OSMemory::reserveCpuAddressRange(reservedCpuAddressRangeSize);
117+
UNRECOVERABLE_IF(reservedCpuAddressRange == nullptr);
118+
UNRECOVERABLE_IF(!isAligned<GfxPartition::heapGranularity>(reservedCpuAddressRange));
119+
gfxBase = reinterpret_cast<uint64_t>(reservedCpuAddressRange);
120+
gfxTop = gfxBase + reservedCpuAddressRangeSize;
121+
heapInit(HeapIndex::HEAP_SVM, 0ull, gpuAddressSpace + 1);
122+
} else if (gpuAddressSpace < maxNBitValue<47>) {
123+
gfxBase = 0ull;
124+
heapInit(HeapIndex::HEAP_SVM, 0ull, 0ull);
125+
} else {
126+
UNRECOVERABLE_IF("Invalid GPU Address Range!");
127+
}
43128
}
44129

45-
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
46-
47130
for (auto heap : GfxPartition::heap32Names) {
48131
heapInit(heap, gfxBase, gfxHeap32Size);
49132
gfxBase += gfxHeap32Size;

runtime/memory_manager/gfx_partition.h

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ constexpr auto internalHeapIndex = is32bit ? HeapIndex::HEAP_INTERNAL : HeapInde
3131
class GfxPartition {
3232
public:
3333
GfxPartition() {}
34+
~GfxPartition();
3435

35-
void init(uint64_t gpuAddressSpace);
36+
void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve);
3637

3738
void heapInit(HeapIndex heapIndex, uint64_t base, uint64_t size) {
3839
getHeap(heapIndex).init(base, size);
@@ -46,33 +47,35 @@ class GfxPartition {
4647
getHeap(heapIndex).free(ptr, size);
4748
}
4849

50+
void freeGpuAddressRange(uint64_t ptr, size_t size);
51+
4952
uint64_t getHeapBase(HeapIndex heapIndex) {
5053
return getHeap(heapIndex).getBase();
5154
}
5255

5356
uint64_t getHeapLimit(HeapIndex heapIndex) {
54-
return getHeap(heapIndex).getBase() + getHeap(heapIndex).getSize() - 1;
57+
return getHeap(heapIndex).getLimit();
5558
}
5659

5760
uint64_t getHeapMinimalAddress(HeapIndex heapIndex) {
5861
return getHeapBase(heapIndex) + heapGranularity;
5962
}
6063

64+
bool isLimitedRange() { return getHeap(HeapIndex::HEAP_SVM).getSize() == 0ull; }
65+
6166
static const uint64_t heapGranularity = MemoryConstants::pageSize64k;
6267

6368
static const std::array<HeapIndex, 4> heap32Names;
69+
static const std::array<HeapIndex, 6> heapNonSvmNames;
6470

6571
protected:
6672
class Heap {
6773
public:
6874
Heap() = default;
75+
void init(uint64_t base, uint64_t size);
6976
uint64_t getBase() const { return base; }
7077
uint64_t getSize() const { return size; }
71-
void init(uint64_t base, uint64_t size) {
72-
this->base = base;
73-
this->size = size;
74-
alloc = std::make_unique<HeapAllocator>(base + heapGranularity, size ? size - heapGranularity : 0ull);
75-
}
78+
uint64_t getLimit() const { return base + size - 1; }
7679
uint64_t allocate(size_t &size) { return alloc->allocate(size); }
7780
void free(uint64_t ptr, size_t size) { alloc->free(ptr, size); }
7881

@@ -82,10 +85,13 @@ class GfxPartition {
8285
};
8386

8487
Heap &getHeap(HeapIndex heapIndex) {
85-
return heap[static_cast<uint32_t>(heapIndex)];
88+
return heaps[static_cast<uint32_t>(heapIndex)];
8689
}
8790

88-
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heap;
91+
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heaps;
92+
93+
void *reservedCpuAddressRange = nullptr;
94+
size_t reservedCpuAddressRangeSize = 0;
8995
};
9096

9197
} // namespace NEO

runtime/memory_manager/memory_manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ class MemoryManager {
111111

112112
virtual uint64_t getExternalHeapBaseAddress() = 0;
113113

114+
bool isLimitedRange() { return gfxPartition.isLimitedRange(); }
115+
114116
bool peek64kbPagesEnabled() const { return enable64kbpages; }
115117
bool peekForce32BitAllocations() const { return force32bitAllocations; }
116118
virtual void setForce32BitAllocations(bool newValue);

0 commit comments

Comments
 (0)