|
1 | 1 | /*
|
2 |
| - * Copyright (C) 2019-2024 Intel Corporation |
| 2 | + * Copyright (C) 2019-2025 Intel Corporation |
3 | 3 | *
|
4 | 4 | * SPDX-License-Identifier: MIT
|
5 | 5 | *
|
6 | 6 | */
|
7 | 7 |
|
8 | 8 | #include "shared/source/gen12lp/hw_cmds.h"
|
| 9 | +#include "shared/source/helpers/flat_batch_buffer_helper.h" |
| 10 | +#include "shared/source/helpers/gfx_core_helper.h" |
| 11 | +#include "shared/source/helpers/pipe_control_args.h" |
9 | 12 |
|
10 | 13 | #include "opencl/source/command_queue/command_queue.h"
|
11 | 14 | #include "opencl/source/helpers/hardware_commands_helper.h"
|
12 | 15 | #include "opencl/source/helpers/hardware_commands_helper_base.inl"
|
13 |
| -#include "opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl" |
| 16 | +#include "opencl/source/kernel/kernel.h" |
14 | 17 |
|
15 | 18 | namespace NEO {
|
16 | 19 | using FamilyType = Gen12LpFamily;
|
17 | 20 |
|
| 21 | +template <typename GfxFamily> |
| 22 | +typename HardwareCommandsHelper<GfxFamily>::INTERFACE_DESCRIPTOR_DATA *HardwareCommandsHelper<GfxFamily>::getInterfaceDescriptor( |
| 23 | + const IndirectHeap &indirectHeap, |
| 24 | + uint64_t offsetInterfaceDescriptor, |
| 25 | + HardwareCommandsHelper<GfxFamily>::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { |
| 26 | + return static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(indirectHeap.getCpuBase(), (size_t)offsetInterfaceDescriptor)); |
| 27 | +} |
| 28 | + |
| 29 | +template <typename GfxFamily> |
| 30 | +uint32_t HardwareCommandsHelper<GfxFamily>::additionalSizeRequiredDsh() { |
| 31 | + return sizeof(INTERFACE_DESCRIPTOR_DATA); |
| 32 | +} |
| 33 | + |
| 34 | +template <typename GfxFamily> |
| 35 | +size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredCS() { |
| 36 | + size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + |
| 37 | + sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD); |
| 38 | + return size; |
| 39 | +} |
| 40 | + |
| 41 | +template <typename GfxFamily> |
| 42 | +void HardwareCommandsHelper<GfxFamily>::sendMediaStateFlush( |
| 43 | + LinearStream &commandStream, |
| 44 | + size_t offsetInterfaceDescriptorData) { |
| 45 | + |
| 46 | + using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; |
| 47 | + auto pCmd = commandStream.getSpaceForCmd<MEDIA_STATE_FLUSH>(); |
| 48 | + MEDIA_STATE_FLUSH cmd = GfxFamily::cmdInitMediaStateFlush; |
| 49 | + |
| 50 | + cmd.setInterfaceDescriptorOffset(static_cast<uint32_t>(offsetInterfaceDescriptorData)); |
| 51 | + *pCmd = cmd; |
| 52 | +} |
| 53 | + |
| 54 | +template <typename GfxFamily> |
| 55 | +void HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad( |
| 56 | + LinearStream &commandStream, |
| 57 | + size_t offsetInterfaceDescriptorData, |
| 58 | + size_t sizeInterfaceDescriptorData) { |
| 59 | + { |
| 60 | + using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; |
| 61 | + auto pCmd = commandStream.getSpaceForCmd<MEDIA_STATE_FLUSH>(); |
| 62 | + *pCmd = GfxFamily::cmdInitMediaStateFlush; |
| 63 | + } |
| 64 | + |
| 65 | + { |
| 66 | + using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; |
| 67 | + auto pCmd = commandStream.getSpaceForCmd<MEDIA_INTERFACE_DESCRIPTOR_LOAD>(); |
| 68 | + MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; |
| 69 | + cmd.setInterfaceDescriptorDataStartAddress(static_cast<uint32_t>(offsetInterfaceDescriptorData)); |
| 70 | + cmd.setInterfaceDescriptorTotalLength(static_cast<uint32_t>(sizeInterfaceDescriptorData)); |
| 71 | + *pCmd = cmd; |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +template <typename GfxFamily> |
| 76 | +template <typename WalkerType> |
| 77 | +size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData( |
| 78 | + IndirectHeap &indirectHeap, |
| 79 | + Kernel &kernel, |
| 80 | + bool inlineDataProgrammingRequired, |
| 81 | + WalkerType *walkerCmd, |
| 82 | + uint32_t &sizeCrossThreadData, |
| 83 | + uint64_t scratchAddress, |
| 84 | + const RootDeviceEnvironment &rootDeviceEnvironment) { |
| 85 | + indirectHeap.align(GfxFamily::cacheLineSize); |
| 86 | + |
| 87 | + auto pImplicitArgs = kernel.getImplicitArgs(); |
| 88 | + if (pImplicitArgs) { |
| 89 | + const auto &kernelDescriptor = kernel.getDescriptor(); |
| 90 | + |
| 91 | + auto isHwLocalIdGeneration = false; |
| 92 | + auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, isHwLocalIdGeneration, rootDeviceEnvironment); |
| 93 | + |
| 94 | + auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); |
| 95 | + auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); |
| 96 | + |
| 97 | + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr); |
| 98 | + |
| 99 | + auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast<uint64_t *>(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); |
| 100 | + *implicitArgsCrossThreadPtr = implicitArgsGpuVA; |
| 101 | + } |
| 102 | + auto offsetCrossThreadData = indirectHeap.getUsed(); |
| 103 | + char *pDest = nullptr; |
| 104 | + |
| 105 | + pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData)); |
| 106 | + memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData); |
| 107 | + |
| 108 | + if (debugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { |
| 109 | + FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); |
| 110 | + } |
| 111 | + |
| 112 | + return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset()); |
| 113 | +} |
| 114 | + |
| 115 | +template <typename GfxFamily> |
| 116 | +template <typename WalkerType> |
| 117 | +void HardwareCommandsHelper<GfxFamily>::setInterfaceDescriptorOffset( |
| 118 | + WalkerType *walkerCmd, |
| 119 | + uint32_t &interfaceDescriptorIndex) { |
| 120 | + |
| 121 | + walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); |
| 122 | +} |
| 123 | + |
18 | 124 | template <>
|
19 | 125 | size_t HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() {
|
20 | 126 | size_t size = 2 * sizeof(typename FamilyType::MEDIA_STATE_FLUSH) +
|
|
0 commit comments