Skip to content

Commit 3df6110

Browse files
Add extra parameters to setArgStateful()
Signed-off-by: Igor Venevtsev <[email protected]>
1 parent 0871c1b commit 3df6110

File tree

19 files changed

+91
-83
lines changed

19 files changed

+91
-83
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
9999
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceState, debugSurface->getGpuAddress(),
100100
debugSurface->getUnderlyingBufferSize(), mocs,
101101
false, false, false, neoDevice->getNumAvailableDevices(),
102-
debugSurface, neoDevice->getGmmHelper());
102+
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
103103
}
104104

105105
appendSignalEventPostWalker(hEvent);

level_zero/core/source/kernel/kernel_hw.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -68,7 +68,8 @@ struct KernelHw : public KernelImp {
6868
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
6969
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs,
7070
false, false, false, neoDevice->getNumAvailableDevices(),
71-
alloc, neoDevice->getGmmHelper());
71+
alloc, neoDevice->getGmmHelper(),
72+
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
7273
}
7374

7475
std::unique_ptr<Kernel> clone() const override {

opencl/source/gtpin/gtpin_callbacks.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -142,7 +142,8 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
142142
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex);
143143
cl_mem buffer = (cl_mem)resource;
144144
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
145-
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);
145+
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
146+
pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
146147
}
147148
}
148149

opencl/source/kernel/kernel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1490,7 +1490,10 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
14901490

14911491
if (requiresSshForBuffers(rootDeviceIndex)) {
14921492
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
1493-
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice());
1493+
auto context = program->getContextPtr();
1494+
size_t numDevicesInContext = context ? context->getNumDevices() : 1u;
1495+
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(),
1496+
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext);
14941497
}
14951498

14961499
kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable();

opencl/source/mem_obj/buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ void Buffer::setSurfaceState(const Device *device,
752752
multiGraphicsAllocation.addAllocation(gfxAlloc);
753753
}
754754
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
755-
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device);
755+
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u);
756756
delete buffer;
757757
}
758758

opencl/source/mem_obj/buffer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ class Buffer : public MemObj {
137137
bool isSubBuffer();
138138
bool isValidSubBufferOffset(size_t offset);
139139
uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing);
140-
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) = 0;
140+
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
141+
bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) = 0;
141142
bool bufferRectPitchSet(const size_t *bufferOrigin,
142143
const size_t *region,
143144
size_t &bufferRowPitch,
@@ -208,7 +209,8 @@ class BufferHw : public Buffer {
208209
: Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation),
209210
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
210211

211-
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) override;
212+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
213+
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override;
212214
void appendSurfaceStateExt(void *memory);
213215

214216
static Buffer *create(Context *context,

opencl/source/mem_obj/buffer_base.inl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -34,15 +34,16 @@ union SURFACE_STATE_BUFFER_LENGTH {
3434
};
3535

3636
template <typename GfxFamily>
37-
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) {
37+
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
38+
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) {
3839
auto rootDeviceIndex = device.getRootDeviceIndex();
3940
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
4041
const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
4142
EncodeSurfaceState<GfxFamily>::encodeBuffer(memory, getBufferAddress(rootDeviceIndex),
4243
getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex),
4344
getMocsValue(disableL3, isReadOnly, rootDeviceIndex),
4445
true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(),
45-
graphicsAllocation, device.getGmmHelper());
46+
graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, numDevicesInContext);
4647
appendSurfaceStateExt(memory);
4748
}
4849
} // namespace NEO

opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
3838
ASSERT_EQ(CL_SUCCESS, retVal);
3939

4040
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
41-
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
41+
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
4242

4343
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
4444
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
5555
ASSERT_EQ(CL_SUCCESS, retVal);
5656

5757
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
58-
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
58+
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
5959

6060
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
6161
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
7373
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
7474

7575
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
76-
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
76+
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
7777

7878
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
7979
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
9393
ASSERT_EQ(CL_SUCCESS, retVal);
9494

9595
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
96-
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
96+
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
9797

9898
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
9999
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgramm
113113
ASSERT_EQ(CL_SUCCESS, retVal);
114114

115115
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
116-
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
116+
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
117117

118118
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
119119
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -133,7 +133,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
133133
ASSERT_EQ(CL_SUCCESS, retVal);
134134

135135
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
136-
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
136+
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
137137

138138
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
139139
const auto actualMocs = surfaceState.getMemoryObjectControlState();

opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
3838
ASSERT_EQ(CL_SUCCESS, retVal);
3939

4040
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
41-
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
41+
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
4242

4343
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
4444
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
5555
ASSERT_EQ(CL_SUCCESS, retVal);
5656

5757
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
58-
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
58+
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
5959

6060
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
6161
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
7373
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
7474

7575
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
76-
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
76+
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
7777

7878
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
7979
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
9393
ASSERT_EQ(CL_SUCCESS, retVal);
9494

9595
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
96-
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
96+
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
9797

9898
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
9999
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
113113
ASSERT_EQ(CL_SUCCESS, retVal);
114114

115115
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
116-
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
116+
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
117117

118118
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
119119
const auto actualMocs = surfaceState.getMemoryObjectControlState();

opencl/test/unit_test/helpers/base_object_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -80,7 +80,7 @@ class MockObject : public MockObjectBase<BaseType> {};
8080
template <>
8181
class MockObject<Buffer> : public MockObjectBase<Buffer> {
8282
public:
83-
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {}
83+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {}
8484
};
8585

8686
template <>
@@ -295,7 +295,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
295295
CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) {
296296
}
297297

298-
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
298+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
299299
}
300300
};
301301

0 commit comments

Comments
 (0)