Skip to content

Commit bd32518

Browse files
Add extra parameters to EncodeComputeMode::adjustComputeMode() method
Signed-off-by: Igor Venevtsev <[email protected]>
1 parent 4808f66 commit bd32518

File tree

12 files changed

+32
-22
lines changed

12 files changed

+32
-22
lines changed

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ KernelImmutableData::~KernelImmutableData() {
7878

7979
inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap,
8080
uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation,
81-
const NEO::ArgDescPointer &ptr, const NEO::Device &device) {
81+
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics) {
8282
if (false == crossThreadData.empty()) {
8383
NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData);
8484
}
@@ -88,7 +88,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
8888
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
8989
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
9090
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
91-
&allocation, 0, 0, false, device.getNumAvailableDevices() > 1);
91+
&allocation, 0, 0, useGlobalAtomics, device.getNumAvailableDevices() > 1);
9292
}
9393
}
9494

@@ -171,7 +171,8 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
171171

172172
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
173173
static_cast<uintptr_t>(globalConstBuffer->getGpuAddressToPatch()),
174-
*globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, *neoDevice);
174+
*globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
175+
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
175176
this->residencyContainer.push_back(globalConstBuffer);
176177
} else if (nullptr != globalConstBuffer) {
177178
this->residencyContainer.push_back(globalConstBuffer);
@@ -182,7 +183,8 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
182183

183184
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
184185
static_cast<uintptr_t>(globalVarBuffer->getGpuAddressToPatch()),
185-
*globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, *neoDevice);
186+
*globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
187+
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
186188
this->residencyContainer.push_back(globalVarBuffer);
187189
} else if (nullptr != globalVarBuffer) {
188190
this->residencyContainer.push_back(globalVarBuffer);
@@ -741,7 +743,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
741743

742744
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
743745
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
744-
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
746+
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress,
747+
*neoDevice, kernelAttributes.flags.useGlobalAtomics);
745748

746749
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
747750
}
@@ -794,7 +797,7 @@ void KernelImp::setDebugSurface() {
794797
patchWithImplicitSurface(ArrayRef<uint8_t>(), surfaceStateHeapRef,
795798
0,
796799
*device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress,
797-
*device->getNEODevice());
800+
*device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
798801
}
799802
}
800803
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {

shared/source/command_container/cmdcontainer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ void CommandContainer::reset() {
148148
nextIddInBlock = this->getNumIddPerBlock();
149149
lastSentNumGrfRequired = 0;
150150
lastPipelineSelectModeRequired = false;
151+
lastSentUseGlobalAtomics = false;
151152
}
152153

153154
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,

shared/source/command_container/cmdcontainer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class CommandContainer : public NonCopyableOrMovableClass {
8787
uint32_t nextIddInBlock = 0;
8888
uint32_t lastSentNumGrfRequired = 0;
8989
bool lastPipelineSelectModeRequired = false;
90+
bool lastSentUseGlobalAtomics = false;
9091

9192
Device *getDevice() const { return device; }
9293

shared/source/command_container/command_encoder.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ struct EncodeStates {
9898
const void *fnDynamicStateHeap,
9999
BindlessHeapsHelper *bindlessHeapHelper);
100100

101-
static void adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency);
101+
static void adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
102102

103103
static size_t getAdjustStateComputeModeSize();
104104
};
@@ -265,7 +265,8 @@ struct EncodeSurfaceState {
265265
template <typename GfxFamily>
266266
struct EncodeComputeMode {
267267
using STATE_COMPUTE_MODE = typename GfxFamily::STATE_COMPUTE_MODE;
268-
static void adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable);
268+
static void adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr,
269+
bool isMultiOsContextCapable, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
269270

270271
static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor);
271272
};

shared/source/command_container/command_encoder_bdw_plus.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
6969
}
7070

7171
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true);
72-
EncodeStates<Family>::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false);
72+
EncodeStates<Family>::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false,
73+
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, device->getNumAvailableDevices() > 1);
7374
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false);
7475

7576
auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup();

shared/source/command_container/encode_compute_mode_bdw_plus.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -11,7 +11,7 @@
1111

1212
namespace NEO {
1313
template <typename Family>
14-
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) {
14+
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
1515
}
1616

1717
template <typename Family>
@@ -31,4 +31,4 @@ bool EncodeSetMMIO<Family>::isRemapApplicable(uint32_t offset) {
3131
return false;
3232
}
3333

34-
} // namespace NEO
34+
} // namespace NEO

shared/source/command_container/encode_compute_mode_tgllp_plus.inl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -11,7 +11,8 @@
1111

1212
namespace NEO {
1313
template <typename Family>
14-
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) {
14+
void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr,
15+
bool isMultiOsContextCapable, bool requiresCoherency, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
1516
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
1617
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
1718
STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast<STATE_COMPUTE_MODE *>(stateComputeModePtr)) : Family::cmdInitStateComputeMode;
@@ -20,7 +21,7 @@ void EncodeStates<Family>::adjustStateComputeMode(LinearStream &csr, uint32_t nu
2021

2122
stateComputeMode.setMaskBits(stateComputeMode.getMaskBits() | Family::stateComputeModeForceNonCoherentMask);
2223

23-
EncodeComputeMode<Family>::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable);
24+
EncodeComputeMode<Family>::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext);
2425
}
2526

2627
template <typename Family>
@@ -52,4 +53,4 @@ bool EncodeSetMMIO<Family>::isRemapApplicable(uint32_t offset) {
5253
(0x4400 <= offset && offset <= 0x441f);
5354
}
5455

55-
} // namespace NEO
56+
} // namespace NEO

shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,7 +21,8 @@ void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream
2121

2222
auto stateComputeMode = GfxFamily::cmdInitStateComputeMode;
2323
adjustThreadArbitionPolicy(&stateComputeMode);
24-
EncodeStates<GfxFamily>::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency);
24+
EncodeStates<GfxFamily>::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency,
25+
dispatchFlags.useGlobalAtomics, dispatchFlags.areMultipleSubDevicesInContext);
2526

2627
if (csrSizeRequestFlags.hasSharedHandles) {
2728
auto pc = stream.getSpaceForCmd<PIPE_CONTROL>();

shared/source/gen12lp/command_encoder_gen12lp.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
3535
}
3636

3737
template <>
38-
void EncodeComputeMode<Family>::adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable) {
38+
void EncodeComputeMode<Family>::adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr,
39+
bool isMultiOsContextCapable, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
3940
STATE_COMPUTE_MODE *stateComputeMode = static_cast<STATE_COMPUTE_MODE *>(stateComputeModePtr);
4041
auto buffer = csr.getSpace(sizeof(STATE_COMPUTE_MODE));
4142
*reinterpret_cast<STATE_COMPUTE_MODE *>(buffer) = *stateComputeMode;

shared/source/kernel/dispatch_kernel_encoder_interface.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
66
*/
77

88
#pragma once
9+
#include <cstddef>
910
#include <cstdint>
1011

1112
namespace NEO {

0 commit comments

Comments
 (0)