Skip to content

Commit 8c3c703

Browse files
performance: Use lock pointer copy with sfence for dc flush mitigation
Resolves: NEO-12898 Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent 2d6fb1a commit 8c3c703

File tree

5 files changed

+27
-21
lines changed

5 files changed

+27
-21
lines changed

opencl/source/mem_obj/buffer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "shared/source/memory_manager/memory_operations_handler.h"
2525
#include "shared/source/memory_manager/migration_sync_data.h"
2626
#include "shared/source/os_interface/os_interface.h"
27+
#include "shared/source/utilities/cpuintrinsics.h"
2728

2829
#include "opencl/source/cl_device/cl_device.h"
2930
#include "opencl/source/command_queue/command_queue.h"
@@ -212,8 +213,7 @@ bool inline copyHostPointer(Buffer *buffer,
212213
size <= Buffer::maxBufferSizeForCopyOnCpu &&
213214
isCompressionEnabled == false &&
214215
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
215-
isLockable &&
216-
!isGpuCopyRequiredForDcFlushMitigation;
216+
isLockable;
217217

218218
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
219219
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
@@ -222,6 +222,11 @@ bool inline copyHostPointer(Buffer *buffer,
222222
memory->setAubWritable(true, GraphicsAllocation::defaultBank);
223223
memory->setTbxWritable(true, GraphicsAllocation::defaultBank);
224224
memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size);
225+
226+
if (isGpuCopyRequiredForDcFlushMitigation) {
227+
CpuIntrinsics::sfence();
228+
}
229+
225230
return true;
226231
} else {
227232
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;

opencl/test/unit_test/libult/command_queue_ult.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ bool CommandQueue::isTimestampWaitEnabled() {
2020
}
2121

2222
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
23-
return false;
23+
return ultHwConfig.useGpuCopyForDcFlushMitigation;
2424
}
2525

2626
} // namespace NEO

opencl/test/unit_test/mem_obj/buffer_tests.cpp

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -599,33 +599,30 @@ TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInS
599599
}
600600
}
601601

602-
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy) {
603-
DebugManagerStateRestore restorer;
604-
debugManager.flags.AllowDcFlush.set(0);
605-
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
606-
executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
602+
namespace CpuIntrinsicsTests {
603+
extern std::atomic<uint32_t> sfenceCounter;
604+
} // namespace CpuIntrinsicsTests
607605

606+
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseLockPointerCopyWithSfence) {
607+
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
608608
auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get();
609-
if (!(productHelper->isBlitterFullySupported(*defaultHwInfo) && productHelper->isDcFlushMitigated())) {
609+
if (!productHelper->isDcFlushMitigated()) {
610610
GTEST_SKIP();
611611
}
612612

613-
auto blitterCalled = 0u;
614-
auto mockBlitMemoryToAllocation = [&](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr,
615-
Vec3<size_t> size) -> NEO::BlitOperationResult {
616-
memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x);
617-
blitterCalled++;
618-
return BlitOperationResult::success;
619-
};
620-
VariableBackup<NEO::BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup(
621-
&NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation);
613+
VariableBackup<UltHwConfig> backup(&ultHwConfig);
614+
ultHwConfig.useGpuCopyForDcFlushMitigation = true;
615+
616+
DebugManagerStateRestore restorer;
617+
debugManager.flags.AllowDcFlush.set(0);
622618

623619
auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment);
624620
executionEnvironment->memoryManager.reset(memoryManager);
625621
memoryManager->returnBaseAllocateGraphicsMemoryInDevicePool = true;
626622
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0));
627623

628624
MockContext ctx(device.get());
625+
CpuIntrinsicsTests::sfenceCounter.store(0u);
629626

630627
cl_int retVal = 0;
631628
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
@@ -634,7 +631,8 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
634631
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal));
635632

636633
ASSERT_NE(nullptr, buffer.get());
637-
EXPECT_EQ(blitterCalled, 1u);
634+
EXPECT_EQ(1u, CpuIntrinsicsTests::sfenceCounter.load());
635+
CpuIntrinsicsTests::sfenceCounter.store(0u);
638636
}
639637

640638
TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) {

shared/test/common/base_ult_config_listener.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ void BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
5050

5151
// Ensure that global state is restored
5252
UltHwConfig expectedState{};
53-
static_assert(sizeof(UltHwConfig) == (16 * sizeof(bool) + sizeof(const char *)), ""); // Ensure that there is no internal padding
53+
static_assert(sizeof(UltHwConfig) == (17 * sizeof(bool) + sizeof(const char *)) + sizeof(UltHwConfig::padding), ""); // Ensure that there is no internal padding
5454
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
5555

5656
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));

shared/test/common/helpers/ult_hw_config.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2023 Intel Corporation
2+
* Copyright (C) 2020-2024 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -16,6 +16,7 @@ struct UltHwConfig {
1616
bool useWaitForTimestamps = false;
1717
bool useBlitSplit = false;
1818
bool useFirstSubmissionInitDevice = false;
19+
bool useGpuCopyForDcFlushMitigation = false;
1920

2021
bool csrFailInitDirectSubmission = false;
2122
bool csrBaseCallDirectSubmissionAvailable = false;
@@ -28,6 +29,8 @@ struct UltHwConfig {
2829
bool csrCreatePreemptionReturnValue = true;
2930
bool reserved = false;
3031

32+
char padding[7];
33+
3134
const char *aubTestName = nullptr;
3235
};
3336

0 commit comments

Comments
 (0)