Skip to content

Commit cbc4d34

Browse files
Do not align down pointer passed to hostPtr allocation
- do not align up hostPtr allocation size - align BaseAddress programmed in SurfaceState to DWORD Change-Id: Ic6d02e53fd13dda881f8eb845a131bffe4deb45c
1 parent acc5e87 commit cbc4d34

File tree

9 files changed

+172
-67
lines changed

9 files changed

+172
-67
lines changed

runtime/command_queue/enqueue_read_buffer.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -97,16 +97,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
9797
}
9898

9999
MemObjSurface bufferSurf(buffer);
100-
HostPtrSurface hostPtrSurf(alignedDstPtr, size + dstPtrOffset);
100+
HostPtrSurface hostPtrSurf(dstPtr, size);
101101
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
102102

103103
if (size != 0) {
104104
bool status = getCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, getDevice(), true);
105105
if (!status) {
106106
return CL_OUT_OF_RESOURCES;
107107
}
108-
109-
hostPtrSurf.getAllocation()->allocationOffset += dstPtrOffset;
110108
}
111109

112110
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;

runtime/command_queue/enqueue_write_buffer.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -97,7 +97,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
9797
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
9898
}
9999

100-
HostPtrSurface hostPtrSurf(alignedSrcPtr, size + srcPtrOffset, true);
100+
HostPtrSurface hostPtrSurf(srcPtr, size, true);
101101
MemObjSurface bufferSurf(buffer);
102102
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
103103

@@ -106,8 +106,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
106106
if (!status) {
107107
return CL_OUT_OF_RESOURCES;
108108
}
109-
110-
hostPtrSurf.getAllocation()->allocationOffset += srcPtrOffset;
111109
}
112110

113111
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;

runtime/mem_obj/buffer.inl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -33,7 +33,15 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
3333

3434
auto gmmHelper = executionEnvironment->getGmmHelper();
3535
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
36-
auto surfaceSize = alignUp(getSize(), 4);
36+
37+
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
38+
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
39+
bufferAddress += this->offset;
40+
41+
auto bufferAddressAligned = alignDown(bufferAddress, 4);
42+
auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned);
43+
44+
auto surfaceSize = alignUp(getSize() + bufferOffset, 4);
3745

3846
SURFACE_STATE_BUFFER_LENGTH Length = {0};
3947
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
@@ -42,10 +50,6 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
4250
surfaceState->setHeight(Length.SurfaceState.Height + 1);
4351
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
4452

45-
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
46-
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : reinterpret_cast<uint64_t>(getHostPtr());
47-
bufferAddress += this->offset;
48-
4953
auto bufferSize = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getUnderlyingBufferSize() : getSize();
5054

5155
if (bufferAddress != 0) {
@@ -67,7 +71,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
6771
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
6872
}
6973

70-
surfaceState->setSurfaceBaseAddress(bufferAddress);
74+
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
7175

7276
Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr;
7377

unit_tests/command_queue/enqueue_read_buffer_tests.cpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -327,30 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
327327
EXPECT_FALSE(csr.disableL3Cache);
328328
}
329329

330-
HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
331-
void *ptr = (void *)0x1039;
332-
333-
cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
334-
CL_FALSE,
335-
0,
336-
MemoryConstants::cacheLineSize,
337-
ptr,
338-
0,
339-
nullptr,
340-
nullptr);
341-
342-
EXPECT_EQ(CL_SUCCESS, retVal);
343-
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
344-
345-
auto allocation = csr.getTemporaryAllocations().peekHead();
346-
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
347-
allocation = allocation->next;
348-
}
349-
350-
ASSERT_NE(allocation, nullptr);
351-
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
352-
}
353-
354330
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
355331
DebugManagerStateRestore dbgRestore;
356332
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);

unit_tests/command_queue/enqueue_write_buffer_tests.cpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -434,27 +434,3 @@ HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocatio
434434

435435
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
436436
}
437-
438-
HWTEST_F(EnqueueWriteBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenWriteBufferIsCalledThenHostGraphicsAllocationHasCorrectOffset) {
439-
void *ptr = (void *)0x1039;
440-
441-
cl_int retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
442-
CL_FALSE,
443-
0,
444-
MemoryConstants::cacheLineSize,
445-
ptr,
446-
0,
447-
nullptr,
448-
nullptr);
449-
450-
EXPECT_EQ(CL_SUCCESS, retVal);
451-
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
452-
453-
auto allocation = csr.getTemporaryAllocations().peekHead();
454-
while (allocation && allocation->getUnderlyingBuffer() != alignDown(ptr, 4)) {
455-
allocation = allocation->next;
456-
}
457-
458-
ASSERT_NE(allocation, nullptr);
459-
EXPECT_EQ((void *)allocation->getGpuAddressToPatch(), ptr);
460-
}

unit_tests/mem_obj/buffer_tests.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,29 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
13451345
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT == surfaceState.getCoherencyType());
13461346
}
13471347

1348+
HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) {
1349+
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1350+
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
1351+
1352+
RENDER_SURFACE_STATE surfaceState = {};
1353+
MockContext context;
1354+
void *svmPtr = reinterpret_cast<void *>(0x1005);
1355+
1356+
Buffer::setSurfaceState(device.get(),
1357+
&surfaceState,
1358+
5,
1359+
svmPtr,
1360+
nullptr,
1361+
0);
1362+
1363+
EXPECT_EQ(0x1004u, surfaceState.getSurfaceBaseAddress());
1364+
SURFACE_STATE_BUFFER_LENGTH length = {};
1365+
length.SurfaceState.Width = surfaceState.getWidth() - 1;
1366+
length.SurfaceState.Height = surfaceState.getHeight() - 1;
1367+
length.SurfaceState.Depth = surfaceState.getDepth() - 1;
1368+
EXPECT_EQ(alignUp(5u, 4u), length.Length + 1);
1369+
}
1370+
13481371
struct BufferUnmapTest : public DeviceFixture, public ::testing::Test {
13491372
void SetUp() override {
13501373
DeviceFixture::SetUp();
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2017-2018 Intel Corporation
2+
# Copyright (C) 2017-2019 Intel Corporation
33
#
44
# SPDX-License-Identifier: MIT
55
#
@@ -9,4 +9,5 @@ set(IGDRCL_SRCS_tests_scenarios
99
${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_barrier_scenario_tests.cpp
1010
${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp
1111
)
12-
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios})
12+
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios})
13+
add_subdirectories()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#
2+
# Copyright (C) 2017-2019 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
#
6+
7+
set(IGDRCL_SRCS_tests_scenarios_windows
8+
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
9+
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp
10+
)
11+
if(WIN32)
12+
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows})
13+
endif()
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Copyright (C) 2017-2019 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "runtime/os_interface/windows/os_interface.h"
9+
#include "runtime/os_interface/windows/wddm_device_command_stream.h"
10+
11+
#include "unit_tests/fixtures/buffer_fixture.h"
12+
#include "unit_tests/helpers/hw_info_helper.h"
13+
#include "unit_tests/helpers/execution_environment_helper.h"
14+
#include "unit_tests/helpers/hw_parse.h"
15+
#include "unit_tests/mocks/mock_command_queue.h"
16+
#include "unit_tests/mocks/mock_device.h"
17+
#include "unit_tests/os_interface/windows/mock_wddm_memory_manager.h"
18+
#include "test.h"
19+
20+
using namespace OCLRT;
21+
22+
struct EnqueueBufferWindowsTest : public HardwareParse,
23+
public ::testing::Test {
24+
EnqueueBufferWindowsTest(void)
25+
: buffer(nullptr) {
26+
}
27+
28+
void SetUp() override {
29+
executionEnvironment = getExecutionEnvironmentImpl(hwInfo);
30+
}
31+
32+
void TearDown() override {
33+
buffer.reset(nullptr);
34+
}
35+
36+
template <typename FamilyType>
37+
void initializeFixture() {
38+
auto wddmCsr = new WddmCommandStreamReceiver<FamilyType>(*hwInfo, *executionEnvironment);
39+
40+
executionEnvironment->commandStreamReceivers.resize(1);
41+
executionEnvironment->commandStreamReceivers[0][0].reset(wddmCsr);
42+
43+
memoryManager = new MockWddmMemoryManager(executionEnvironment->osInterface->get()->getWddm(), *executionEnvironment);
44+
executionEnvironment->memoryManager.reset(memoryManager);
45+
46+
device = std::unique_ptr<MockDevice>(Device::create<MockDevice>(hwInfo, executionEnvironment, 0));
47+
48+
context = std::make_unique<MockContext>(device.get());
49+
50+
const size_t bufferMisalignment = 1;
51+
const size_t bufferSize = 16;
52+
bufferMemory = std::make_unique<uint32_t[]>(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t)));
53+
cl_int retVal = 0;
54+
55+
buffer.reset(Buffer::create(context.get(),
56+
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
57+
bufferSize,
58+
reinterpret_cast<char *>(bufferMemory.get()) + bufferMisalignment,
59+
retVal));
60+
61+
EXPECT_EQ(CL_SUCCESS, retVal);
62+
}
63+
64+
protected:
65+
HwInfoHelper hwInfoHelper;
66+
HardwareInfo *hwInfo = nullptr;
67+
ExecutionEnvironment *executionEnvironment;
68+
cl_queue_properties properties = {};
69+
std::unique_ptr<uint32_t[]> bufferMemory;
70+
std::unique_ptr<MockDevice> device;
71+
std::unique_ptr<MockContext> context;
72+
std::unique_ptr<Buffer> buffer;
73+
74+
MockWddmMemoryManager *memoryManager = nullptr;
75+
};
76+
77+
HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) {
78+
initializeFixture<FamilyType>();
79+
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), &properties);
80+
uint32_t memory[2] = {};
81+
char *misalignedPtr = reinterpret_cast<char *>(memory) + 1;
82+
83+
buffer->forceDisallowCPUCopy = true;
84+
auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 4, misalignedPtr, 0, nullptr, nullptr);
85+
86+
EXPECT_EQ(CL_SUCCESS, retVal);
87+
ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size());
88+
Kernel *kernel = cmdQ->lastEnqueuedKernels[0];
89+
90+
parseCommands<FamilyType>(*cmdQ);
91+
92+
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
93+
const auto &surfaceStateDst = getSurfaceState<FamilyType>(1);
94+
95+
if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
96+
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
97+
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
98+
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), *pKernelArg);
99+
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
100+
101+
} else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
102+
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
103+
kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
104+
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedPtr, 4)), static_cast<uint64_t>(*pKernelArg));
105+
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
106+
}
107+
}
108+
109+
if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
110+
auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() +
111+
kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset);
112+
EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset);
113+
} else {
114+
EXPECT_TRUE(false);
115+
}
116+
}

0 commit comments

Comments
 (0)