Skip to content

Commit b5f443e

Browse files
Revert commit cc1f4be.
This reverts commit cc1f4be. Revert "Revert "Use GPU instead of CPU address in programming commands for HwTim(...)"" Change-Id: Iff122612bb46ba80bcc70b07b2609bfd5f0b9653 Signed-off-by: Artur Harasimiuk <[email protected]>
1 parent b2c1d68 commit b5f443e

File tree

14 files changed

+45
-38
lines changed

14 files changed

+45
-38
lines changed

runtime/command_queue/enqueue_common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
146146
auto devQueue = this->getContext().getDefaultDeviceQueue();
147147
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
148148

149-
HwTimeStamps *hwTimeStamps = nullptr;
149+
TagNode<HwTimeStamps> *hwTimeStamps = nullptr;
150150

151151
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
152152

@@ -230,7 +230,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
230230
}
231231
if (this->isProfilingEnabled()) {
232232
// Get allocation for timestamps
233-
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode()->tag;
233+
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
234234
if (this->isPerfCountersEnabled()) {
235235
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
236236
// PERF COUNTER: copy current configuration from queue to event

runtime/command_queue/gpgpu_walker.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ class GpgpuWalkerHelper {
139139
const iOpenCL::SPatchThreadPayload &threadPayload);
140140

141141
static void dispatchProfilingCommandsStart(
142-
HwTimeStamps &hwTimeStamps,
142+
TagNode<HwTimeStamps> &hwTimeStamps,
143143
OCLRT::LinearStream *commandStream);
144144

145145
static void dispatchProfilingCommandsEnd(
146-
HwTimeStamps &hwTimeStamps,
146+
TagNode<HwTimeStamps> &hwTimeStamps,
147147
OCLRT::LinearStream *commandStream);
148148

149149
static void dispatchPerfCountersNoopidRegisterCommands(

runtime/command_queue/gpgpu_walker.inl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,17 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
101101

102102
template <typename GfxFamily>
103103
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
104-
HwTimeStamps &hwTimeStamps,
104+
TagNode<HwTimeStamps> &hwTimeStamps,
105105
OCLRT::LinearStream *commandStream) {
106106
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
107107

108108
// PIPE_CONTROL for global timestamp
109-
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
109+
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->GlobalStartTS, hwTimeStamps.tag);
110110

111111
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
112112

113113
//MI_STORE_REGISTER_MEM for context local timestamp
114-
TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
114+
TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextStartTS, hwTimeStamps.tag);
115115

116116
//low part
117117
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
@@ -122,7 +122,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
122122

123123
template <typename GfxFamily>
124124
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
125-
HwTimeStamps &hwTimeStamps,
125+
TagNode<HwTimeStamps> &hwTimeStamps,
126126
OCLRT::LinearStream *commandStream) {
127127

128128
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
@@ -133,7 +133,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
133133
pPipeControlCmd->setCommandStreamerStallEnable(true);
134134

135135
//MI_STORE_REGISTER_MEM for context local timestamp
136-
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
136+
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextEndTS, hwTimeStamps.tag);
137137

138138
//low part
139139
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));

runtime/command_queue/hardware_interface.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class HardwareInterface {
4040
cl_uint numEventsInWaitList,
4141
const cl_event *eventWaitList,
4242
KernelOperation **blockedCommandsData,
43-
HwTimeStamps *hwTimeStamps,
43+
TagNode<HwTimeStamps> *hwTimeStamps,
4444
HwPerfCounter *hwPerfCounter,
4545
TimestampPacketContainer *previousTimestampPacketNodes,
4646
TimestampPacketContainer *currentTimestampPacketNodes,
@@ -69,13 +69,13 @@ class HardwareInterface {
6969
static void dispatchProfilingPerfStartCommands(
7070
const DispatchInfo &dispatchInfo,
7171
const MultiDispatchInfo &multiDispatchInfo,
72-
HwTimeStamps *hwTimeStamps,
72+
TagNode<HwTimeStamps> *hwTimeStamps,
7373
HwPerfCounter *hwPerfCounter,
7474
LinearStream *commandStream,
7575
CommandQueue &commandQueue);
7676

7777
static void dispatchProfilingPerfEndCommands(
78-
HwTimeStamps *hwTimeStamps,
78+
TagNode<HwTimeStamps> *hwTimeStamps,
7979
HwPerfCounter *hwPerfCounter,
8080
LinearStream *commandStream,
8181
CommandQueue &commandQueue);

runtime/command_queue/hardware_interface.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
1919
cl_uint numEventsInWaitList,
2020
const cl_event *eventWaitList,
2121
KernelOperation **blockedCommandsData,
22-
HwTimeStamps *hwTimeStamps,
22+
TagNode<HwTimeStamps> *hwTimeStamps,
2323
HwPerfCounter *hwPerfCounter,
2424
TimestampPacketContainer *previousTimestampPacketNodes,
2525
TimestampPacketContainer *currentTimestampPacketNodes,

runtime/command_queue/hardware_interface_base.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ template <typename GfxFamily>
5959
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
6060
const DispatchInfo &dispatchInfo,
6161
const MultiDispatchInfo &multiDispatchInfo,
62-
HwTimeStamps *hwTimeStamps,
62+
TagNode<HwTimeStamps> *hwTimeStamps,
6363
HwPerfCounter *hwPerfCounter,
6464
LinearStream *commandStream,
6565
CommandQueue &commandQueue) {
@@ -77,7 +77,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
7777

7878
template <typename GfxFamily>
7979
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
80-
HwTimeStamps *hwTimeStamps,
80+
TagNode<HwTimeStamps> *hwTimeStamps,
8181
HwPerfCounter *hwPerfCounter,
8282
LinearStream *commandStream,
8383
CommandQueue &commandQueue) {

runtime/device_queue/device_queue.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ void DeviceQueue::initDeviceQueue() {
143143
igilEventPool->m_size = caps.maxOnDeviceEvents;
144144
}
145145

146-
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
146+
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
147147
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
148148
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
149149
}
@@ -152,7 +152,7 @@ void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHea
152152
return;
153153
}
154154

155-
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
155+
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
156156
return;
157157
}
158158

runtime/device_queue/device_queue.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class Event;
2222
struct MultiDispatchInfo;
2323
class SchedulerKernel;
2424
struct HwTimeStamps;
25+
template <class T>
26+
struct TagNode;
2527

2628
template <>
2729
struct OpenCLObjectMapper<_device_queue> {
@@ -66,10 +68,10 @@ class DeviceQueue : public BaseObject<_device_queue> {
6668
size_t paramValueSize, void *paramValue,
6769
size_t *paramValueSizeRet);
6870

69-
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
71+
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
7072

7173
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
72-
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
74+
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount);
7375

7476
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
7577
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());

runtime/device_queue/device_queue_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class DeviceQueueHw : public DeviceQueue {
5555

5656
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
5757

58-
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
58+
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override;
5959
void resetDeviceQueue() override;
6060
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
6161

runtime/device_queue/device_queue_hw.inl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "runtime/helpers/preamble.h"
1313
#include "runtime/helpers/string.h"
1414
#include "runtime/memory_manager/memory_manager.h"
15+
#include "runtime/utilities/tag_allocator.h"
1516

1617
namespace OCLRT {
1718
template <typename GfxFamily>
@@ -201,7 +202,7 @@ void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
201202
}
202203

203204
template <typename GfxFamily>
204-
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
205+
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
205206
// CleanUp Section
206207
auto offset = slbCS.getUsed();
207208
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
@@ -215,7 +216,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
215216
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
216217

217218
if (hwTimeStamp != nullptr) {
218-
uint64_t TimeStampAddress = (uint64_t)((uintptr_t) & (hwTimeStamp->ContextCompleteTS));
219+
uint64_t TimeStampAddress = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
219220
igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
220221

221222
addProfilingEndCmds(TimeStampAddress);

0 commit comments

Comments
 (0)