Skip to content

Commit b4f53fd

Browse files
Pick applicable buffers for aux translation
Change-Id: I60a28cd9e0dec61120b1ae5c42dfe0cb852eb387
1 parent 428fdb4 commit b4f53fd

File tree

8 files changed

+87
-5
lines changed

8 files changed

+87
-5
lines changed

runtime/command_queue/command_queue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
415415

416416
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
417417

418-
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
418+
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
419419

420420
Context *context;
421421
Device *device;

runtime/command_queue/enqueue_common.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
8383
if (DebugManager.flags.ForceDispatchScheduler.get()) {
8484
forceDispatchScheduler(multiDispatchInfo);
8585
} else {
86+
BuffersForAuxTranslation buffersForAuxTranslation;
8687
if (kernel->isAuxTranslationRequired()) {
87-
dispatchAuxTranslation(multiDispatchInfo);
88+
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
89+
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
8890
}
8991

9092
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
@@ -101,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
101103
}
102104
}
103105
if (kernel->isAuxTranslationRequired()) {
104-
dispatchAuxTranslation(multiDispatchInfo);
106+
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
105107
}
106108
}
107109

runtime/helpers/base_object.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ inline const DerivedType *castToObject(const typename DerivedType::BaseType *obj
7777
return const_cast<const DerivedType *>(castToObject<DerivedType>(const_cast<typename DerivedType::BaseType *>(object)));
7878
}
7979

80+
template <typename DerivedType>
81+
inline DerivedType *castToObject(const void *object) {
82+
cl_mem clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(object));
83+
return castToObject<DerivedType>(clMem);
84+
}
85+
8086
extern std::thread::id invalidThreadID;
8187

8288
class ConditionVariableWithCounter {

runtime/helpers/properties_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424

2525
#include "runtime/api/cl_types.h"
2626
#include <array>
27+
#include <unordered_set>
2728

2829
namespace OCLRT {
2930
class MemObj;
31+
class Buffer;
3032

3133
enum class QueueThrottle {
3234
LOW,
@@ -47,6 +49,7 @@ struct EventsRequest {
4749

4850
using MemObjSizeArray = std::array<size_t, 3>;
4951
using MemObjOffsetArray = std::array<size_t, 3>;
52+
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
5053

5154
struct TransferProperties {
5255
TransferProperties() = delete;

runtime/kernel/kernel.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2103,4 +2103,16 @@ void Kernel::resolveArgs() {
21032103
bool Kernel::canTransformImages() const {
21042104
return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE;
21052105
}
2106+
2107+
void Kernel::fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation) {
2108+
buffersForAuxTranslation.reserve(getKernelArgsNumber());
2109+
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
2110+
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
2111+
auto buffer = castToObject<Buffer>(getKernelArg(i));
2112+
if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
2113+
buffersForAuxTranslation.insert(buffer);
2114+
}
2115+
}
2116+
}
2117+
}
21062118
} // namespace OCLRT

runtime/kernel/kernel.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@
2727
#include "runtime/helpers/base_object.h"
2828
#include "runtime/helpers/preamble.h"
2929
#include "runtime/helpers/address_patch.h"
30+
#include "runtime/helpers/properties_helper.h"
3031
#include "runtime/program/program.h"
3132
#include "runtime/program/kernel_info.h"
3233
#include "runtime/os_interface/debug_settings_manager.h"
3334
#include <vector>
3435

3536
namespace OCLRT {
3637
struct CompletionStamp;
38+
class Buffer;
3739
class GraphicsAllocation;
3840
class ImageTransformer;
3941
class Surface;
@@ -384,6 +386,8 @@ class Kernel : public BaseObject<_cl_kernel> {
384386
return usingImagesOnly;
385387
}
386388

389+
void fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation);
390+
387391
protected:
388392
struct ObjectCounts {
389393
uint32_t imageCount;

unit_tests/command_queue/enqueue_kernel_tests.cpp

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,8 +1573,8 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
15731573
class MyCmdQ : public CommandQueueHw<FamilyType> {
15741574
public:
15751575
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
1576-
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
1577-
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
1576+
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
1577+
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
15781578
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
15791579
}
15801580

@@ -1595,3 +1595,57 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
15951595
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
15961596
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
15971597
}
1598+
1599+
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
1600+
class MyCmdQ : public CommandQueueHw<FamilyType> {
1601+
public:
1602+
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
1603+
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
1604+
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
1605+
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
1606+
}
1607+
1608+
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
1609+
};
1610+
MyCmdQ cmdQ(context, pDevice);
1611+
size_t gws[3] = {1, 0, 0};
1612+
MockBuffer buffer0, buffer1, buffer2, buffer3;
1613+
cl_mem clMem0 = &buffer0;
1614+
cl_mem clMem1 = &buffer1;
1615+
cl_mem clMem2 = &buffer2;
1616+
cl_mem clMem3 = &buffer3;
1617+
buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
1618+
buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
1619+
buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
1620+
buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
1621+
1622+
MockKernelWithInternals mockKernel(*pDevice, context);
1623+
mockKernel.mockKernel->auxTranslationRequired = true;
1624+
mockKernel.kernelInfo.kernelArgInfo.resize(6);
1625+
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
1626+
kernelInfo.kernelArgPatchInfoVector.resize(1);
1627+
}
1628+
1629+
mockKernel.mockKernel->initialize();
1630+
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
1631+
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
1632+
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
1633+
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
1634+
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
1635+
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
1636+
1637+
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
1638+
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
1639+
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
1640+
mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert
1641+
mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert
1642+
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
1643+
1644+
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
1645+
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
1646+
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
1647+
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
1648+
1649+
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
1650+
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
1651+
}

unit_tests/mocks/mock_kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ namespace OCLRT {
3838
class MockKernel : public Kernel {
3939
public:
4040
using Kernel::auxTranslationRequired;
41+
using Kernel::kernelArguments;
4142

4243
struct BlockPatchValues {
4344
uint64_t offset;

0 commit comments

Comments
 (0)