apply dispatch hint to blocking workload.

lindongw · Compute-Runtime-Automation · commit 10c3a5eba350 · 2019-10-29T08:21:28.000+01:00
Change-Id: I521d237864db5a6eb0ee78a9b8495552241ba129
Signed-off-by: Lindong Wu &lt;lindong.wu@intel.com&gt;
diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp
@@ -220,6 +220,11 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
         dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
     }
 
+    if (commandQueue.dispatchHints != 0) {
+        dispatchFlags.engineHints = commandQueue.dispatchHints;
+        dispatchFlags.epilogueRequired = true;
+    }
+
     DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
 
     gtpinNotifyPreFlushTask(&commandQueue);
diff --git a/unit_tests/helpers/task_information_tests.cpp b/unit_tests/helpers/task_information_tests.cpp
@@ -287,3 +287,37 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD
     EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue);
     EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired);
 }
+
+HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchHints) {
+    using CsrType = MockCsr1<FamilyType>;
+    SetUpImpl<CsrType>();
+    auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
+    auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
+
+    IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
+    mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1);
+    mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2);
+    mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3);
+    mockCmdQ->dispatchHints = 1234;
+
+    PreemptionMode preemptionMode = device->getPreemptionMode();
+    auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
+
+    std::vector<Surface *> surfaces;
+    auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    MockKernelWithInternals kernel(*device);
+    kernelOperation->setHeaps(ih1, ih2, ih3);
+
+    bool flushDC = false;
+    bool slmUsed = false;
+    bool ndRangeKernel = false;
+    bool requiresCoherency = false;
+    for (auto &surface : surfaces) {
+        requiresCoherency |= surface->IsCoherent;
+    }
+    std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
+    command->submit(20, false);
+
+    EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired);
+    EXPECT_EQ(1234u, mockCsr->passedDispatchFlags.engineHints);
+}