Skip to content

Commit 55f3c8f

Browse files
Add resolve capability for compressed USM device allocations
Related-To: NEO-5107 Signed-off-by: Slawomir Milczarek <[email protected]>
1 parent d7ff26c commit 55f3c8f

27 files changed

+637
-216
lines changed

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
9191
auto surfaceState = surfaceStateHeap.begin() + ptr.bindful;
9292
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
9393
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
94-
NEO::Buffer::setSurfaceState(&device, surfaceState, sizeToPatch, addressToPatch, 0,
94+
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
9595
&allocation, 0, 0);
9696
}
9797
}

opencl/source/built_ins/aux_translation_builtin.h

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -8,9 +8,12 @@
88
#pragma once
99
#include "shared/source/built_ins/built_ins.h"
1010
#include "shared/source/helpers/hw_helper.h"
11+
#include "shared/source/memory_manager/graphics_allocation.h"
1112

1213
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
1314
#include "opencl/source/helpers/dispatch_info_builder.h"
15+
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
16+
#include "opencl/source/mem_obj/buffer.h"
1417

1518
#include "pipe_control_args.h"
1619

@@ -24,21 +27,20 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
2427
template <typename GfxFamily>
2528
bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
2629
size_t kernelInstanceNumber = 0;
27-
size_t numMemObjectsToTranslate = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
28-
resizeKernelInstances(numMemObjectsToTranslate);
30+
size_t numKernelObjectsToTranslate = multiDispatchInfo.getKernelObjsForAuxTranslation()->size();
31+
resizeKernelInstances(numKernelObjectsToTranslate);
2932
multiDispatchInfo.setBuiltinOpParams(operationParams);
3033

31-
for (auto &memObj : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
34+
for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) {
3235
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder(clDevice);
33-
size_t allocationSize = alignUp(memObj->getSize(), 512);
3436

3537
UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1);
3638

3739
if (kernelInstanceNumber == 0) {
3840
// Before Kernel
3941
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchInitCommands, true);
4042
}
41-
if (kernelInstanceNumber == numMemObjectsToTranslate - 1) {
43+
if (kernelInstanceNumber == numKernelObjectsToTranslate - 1) {
4244
// After Kernel
4345
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchEpilogueCommands, false);
4446
}
@@ -50,8 +52,20 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
5052
builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get());
5153
}
5254

53-
builder.setArg(0, memObj);
54-
builder.setArg(1, memObj);
55+
size_t allocationSize = 0;
56+
if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
57+
auto buffer = static_cast<Buffer *>(kernelObj.object);
58+
builder.setArg(0, buffer);
59+
builder.setArg(1, buffer);
60+
allocationSize = alignUp(buffer->getSize(), 512);
61+
} else {
62+
DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
63+
auto svmAlloc = static_cast<GraphicsAllocation *>(kernelObj.object);
64+
auto svmPtr = reinterpret_cast<void *>(svmAlloc->getGpuAddressToPatch());
65+
builder.setArgSvmAlloc(0, svmPtr, svmAlloc);
66+
builder.setArgSvmAlloc(1, svmPtr, svmAlloc);
67+
allocationSize = alignUp(svmAlloc->getUnderlyingBufferSize(), 512);
68+
}
5569

5670
size_t xGws = allocationSize / 16;
5771

opencl/source/command_queue/command_queue.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -534,7 +534,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
534534
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset);
535535
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
536536
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
537-
Buffer::setSurfaceState(&device->getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
537+
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
538538
return true;
539539
}
540540

opencl/source/command_queue/enqueue_common.h

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -58,7 +58,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
5858
const cl_event *eventWaitList,
5959
cl_event *event) {
6060
BuiltInOwnershipWrapper builtInLock;
61-
MemObjsForAuxTranslation memObjsForAuxTranslation;
61+
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
6262
MultiDispatchInfo multiDispatchInfo(kernel);
6363

6464
if (DebugManager.flags.ForceDispatchScheduler.get()) {
@@ -69,9 +69,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
6969
if (kernel->isAuxTranslationRequired()) {
7070
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
7171
builtInLock.takeOwnership(builder);
72-
kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation, rootDeviceIndex);
73-
multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation);
74-
if (!memObjsForAuxTranslation.empty()) {
72+
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex);
73+
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
74+
if (!kernelObjsForAuxTranslation.empty()) {
7575
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
7676
}
7777
}
@@ -89,7 +89,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
8989
}
9090
}
9191
if (kernel->isAuxTranslationRequired()) {
92-
if (!memObjsForAuxTranslation.empty()) {
92+
if (!kernelObjsForAuxTranslation.empty()) {
9393
UNRECOVERABLE_IF(kernel->isParentKernel);
9494
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
9595
}
@@ -479,23 +479,31 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const Multi
479479
const EventsRequest &eventsRequest, bool queueBlocked) {
480480
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
481481
auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
482-
auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
483-
blitPropertiesContainer.resize(numBuffers * 2);
482+
auto numKernelObjs = multiDispatchInfo.getKernelObjsForAuxTranslation()->size();
483+
blitPropertiesContainer.resize(numKernelObjs * 2);
484484

485485
auto bufferIndex = 0;
486-
for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
486+
for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) {
487+
GraphicsAllocation *allocation = nullptr;
488+
if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
489+
auto buffer = static_cast<Buffer *>(kernelObj.object);
490+
allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
491+
} else {
492+
DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
493+
allocation = static_cast<GraphicsAllocation *>(kernelObj.object);
494+
}
487495
{
488496
// Aux to NonAux
489-
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
490-
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
497+
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(
498+
AuxTranslationDirection::AuxToNonAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
491499
auto auxToNonAuxNode = nodesAllocator->getTag();
492500
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
493501
}
494502

495503
{
496504
// NonAux to Aux
497-
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
498-
buffer->getGraphicsAllocation(rootDeviceIndex), getGpgpuCommandStreamReceiver().getClearColorAllocation());
505+
blitPropertiesContainer[bufferIndex + numKernelObjs] = BlitProperties::constructPropertiesForAuxTranslation(
506+
AuxTranslationDirection::NonAuxToAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
499507
auto nonAuxToAuxNode = nodesAllocator->getTag();
500508
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
501509
}

opencl/source/command_queue/gpgpu_walker_base.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -194,9 +194,9 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
194194
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
195195
for (auto &dispatchInfo : multiDispatchInfo) {
196196
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
197-
size_t memObjAuxCount = multiDispatchInfo.getMemObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getMemObjsForAuxTranslation()->size() : 0;
198-
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
199-
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
197+
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
198+
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
199+
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
200200
}
201201
if (parentKernel) {
202202
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();

opencl/source/command_queue/hardware_interface_base.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -101,7 +101,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
101101
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
102102
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
103103
Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh),
104-
sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
104+
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
105105
}
106106

107107
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();

opencl/source/helpers/cl_hw_helper_base.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -17,8 +17,8 @@ template <typename Family>
1717
bool ClHwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
1818
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
1919
hwInfo.capabilityTable.blitterOperationsSupported &&
20-
multiDispatchInfo.getMemObjsForAuxTranslation() &&
21-
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
20+
multiDispatchInfo.getKernelObjsForAuxTranslation() &&
21+
(multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0);
2222
}
2323

2424
template <typename GfxFamily>

opencl/source/helpers/dispatch_info.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -13,6 +13,7 @@
1313
#include "shared/source/utilities/stackvec.h"
1414

1515
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
16+
#include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
1617
#include "opencl/source/mem_obj/mem_obj.h"
1718

1819
#include <algorithm>
@@ -197,19 +198,19 @@ struct MultiDispatchInfo {
197198
return builtinOpParams;
198199
}
199200

200-
void setMemObjsForAuxTranslation(const MemObjsForAuxTranslation &memObjsForAuxTranslation) {
201-
this->memObjsForAuxTranslation = &memObjsForAuxTranslation;
201+
void setKernelObjsForAuxTranslation(const KernelObjsForAuxTranslation &kernelObjsForAuxTranslation) {
202+
this->kernelObjsForAuxTranslation = &kernelObjsForAuxTranslation;
202203
}
203204

204-
const MemObjsForAuxTranslation *getMemObjsForAuxTranslation() const {
205-
return memObjsForAuxTranslation;
205+
const KernelObjsForAuxTranslation *getKernelObjsForAuxTranslation() const {
206+
return kernelObjsForAuxTranslation;
206207
}
207208

208209
protected:
209210
BuiltinOpParams builtinOpParams = {};
210211
StackVec<DispatchInfo, 9> dispatchInfos;
211212
StackVec<MemObj *, 2> redescribedSurfaces;
212-
const MemObjsForAuxTranslation *memObjsForAuxTranslation = nullptr;
213+
const KernelObjsForAuxTranslation *kernelObjsForAuxTranslation = nullptr;
213214
Kernel *mainKernel = nullptr;
214215
};
215216
} // namespace NEO

opencl/source/helpers/properties_helper.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -13,7 +13,6 @@
1313
#include "opencl/source/api/cl_types.h"
1414

1515
#include <array>
16-
#include <unordered_set>
1716

1817
namespace NEO {
1918
class MemObj;
@@ -34,7 +33,6 @@ struct EventsRequest {
3433

3534
using MemObjSizeArray = std::array<size_t, 3>;
3635
using MemObjOffsetArray = std::array<size_t, 3>;
37-
using MemObjsForAuxTranslation = std::unordered_set<MemObj *>;
3836

3937
struct TransferProperties {
4038
TransferProperties() = delete;

opencl/source/kernel/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2018-2020 Intel Corporation
2+
# Copyright (C) 2018-2021 Intel Corporation
33
#
44
# SPDX-License-Identifier: MIT
55
#
@@ -14,6 +14,7 @@ set(RUNTIME_SRCS_KERNEL
1414
${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl
1515
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
1616
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h
17+
${CMAKE_CURRENT_SOURCE_DIR}/kernel_objects_for_aux_translation.h
1718
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp
1819
)
1920
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL})

0 commit comments

Comments
 (0)