Skip to content

Commit 524ae77

Browse files
refactor: Add GDI profiling
Resolves: NEO-9236 Related-To: NEO-10036 Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent 7728123 commit 524ae77

File tree

15 files changed

+193
-175
lines changed

15 files changed

+193
-175
lines changed

CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -579,11 +579,6 @@ else()
579579
set(NEO_TESTS_LISTENER_OPTION "--enable_default_listener")
580580
endif()
581581

582-
# Put profiling enable flag into define
583-
if(KMD_PROFILING)
584-
add_definitions(-DKMD_PROFILING=${KMD_PROFILING})
585-
endif()
586-
587582
if(MSVC)
588583
# Force to treat warnings as errors
589584
if(NOT CMAKE_CXX_FLAGS MATCHES "/WX")

opencl/source/api/api_enter.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,3 @@
1212

1313
#define API_ENTER(retValPointer) \
1414
LoggerApiEnterWrapper<NEO::FileLogger<globalDebugFunctionalityLevel>::enabled()> ApiWrapperForSingleCall(__FUNCTION__, retValPointer)
15-
16-
#if KMD_PROFILING == 1
17-
#undef API_ENTER
18-
19-
#define API_ENTER(x) \
20-
PerfProfilerApiWrapper globalPerfProfilersWrapperInstanceForSingleApiFunction(__FUNCTION__)
21-
#endif

opencl/test/unit_test/linux/main_linux_dll.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ TEST_F(DrmSimpleTests, givenPrintIoctlTimesWhenCallIoctlThenStatisticsAreGathere
234234
auto drm = DrmWrap::createDrm(*(mockExecutionEnvironment.rootDeviceEnvironments[0].get()));
235235

236236
DebugManagerStateRestore restorer;
237-
debugManager.flags.PrintIoctlTimes.set(true);
237+
debugManager.flags.PrintKmdTimes.set(true);
238238
VariableBackup<decltype(forceExtraIoctlDuration)> backupForceExtraIoctlDuration(&forceExtraIoctlDuration, true);
239239

240240
EXPECT_TRUE(drm->ioctlStatistics.empty());

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintBOPrefetchingResult, false, "tracks the result
308308
DECLARE_DEBUG_VARIABLE(bool, PrintTagAllocationAddress, false, "Print tag allocation address for each engine")
309309
DECLARE_DEBUG_VARIABLE(bool, ProvideVerboseImplicitFlush, false, "provides verbose messages about implicit flush mechanism")
310310
DECLARE_DEBUG_VARIABLE(bool, PrintBlitDispatchDetails, false, "Print blit dispatch details")
311-
DECLARE_DEBUG_VARIABLE(bool, PrintIoctlTimes, false, "Print ioctl times")
311+
DECLARE_DEBUG_VARIABLE(bool, PrintKmdTimes, false, "Print ioctl times")
312312
DECLARE_DEBUG_VARIABLE(bool, PrintIoctlEntries, false, "Print ioctl being called")
313313
DECLARE_DEBUG_VARIABLE(bool, PrintUmdSharedMigration, false, "Print log message when shared allocation is being migrated by UMD")
314314
DECLARE_DEBUG_VARIABLE(bool, PrintImageBlitBlockCopyCmdDetails, false, "Prints XY_BLOCK_COPY_BLT command details")

shared/source/helpers/options.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
#pragma once
99
#include <cstdint>
1010

11-
#ifndef KMD_PROFILING
12-
#define KMD_PROFILING 0
13-
#endif
14-
1511
namespace NEO {
1612
enum CommandStreamReceiverType {
1713
// Use receiver for real HW

shared/source/os_interface/linux/drm_neo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ int Drm::ioctl(DrmIoctl request, void *arg) {
8888
int returnedErrno = 0;
8989
SYSTEM_ENTER();
9090
do {
91-
auto measureTime = debugManager.flags.PrintIoctlTimes.get();
91+
auto measureTime = debugManager.flags.PrintKmdTimes.get();
9292
std::chrono::steady_clock::time_point start;
9393
std::chrono::steady_clock::time_point end;
9494

@@ -644,7 +644,7 @@ std::vector<DataType> Drm::query(uint32_t queryId, uint32_t queryItemFlags) {
644644
}
645645

646646
void Drm::printIoctlStatistics() {
647-
if (!debugManager.flags.PrintIoctlTimes.get()) {
647+
if (!debugManager.flags.PrintKmdTimes.get()) {
648648
return;
649649
}
650650

shared/source/os_interface/windows/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM
6868
${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface.h
6969
${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface_logging.cpp
7070
${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface_logging.h
71+
${CMAKE_CURRENT_SOURCE_DIR}/gdi_profiling.h
7172
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id_win.cpp
7273
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h
7374
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_wddm.cpp

shared/source/os_interface/windows/gdi_interface.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Gdi::~Gdi() {
2525
if constexpr (GdiLogging::gdiLoggingSupport) {
2626
GdiLogging::close();
2727
}
28+
this->profiler.printGdiTimes();
2829
}
2930

3031
bool Gdi::setupHwQueueProcAddresses() {

shared/source/os_interface/windows/gdi_interface.h

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,67 +8,73 @@
88
#pragma once
99
#include "shared/source/os_interface/os_library.h"
1010
#include "shared/source/os_interface/windows/d3dkmthk_wrapper.h"
11+
#include "shared/source/os_interface/windows/gdi_profiling.h"
1112
#include "shared/source/os_interface/windows/thk_wrapper.h"
1213

1314
#include <memory>
1415

1516
namespace NEO {
1617

18+
#define DEFINE_THK_WRAPPER(TYPE, VAR) ThkWrapper<TYPE> VAR = ThkWrapper<TYPE>(this->profiler, #TYPE, this->gdiId++);
19+
1720
class Gdi {
21+
uint32_t gdiId = 0;
22+
GdiProfiler profiler;
23+
1824
public:
1925
Gdi();
2026
MOCKABLE_VIRTUAL ~Gdi();
2127

22-
ThkWrapper<IN OUT CONST D3DKMT_OPENADAPTERFROMLUID *> openAdapterFromLuid{};
23-
ThkWrapper<IN OUT D3DKMT_CREATEALLOCATION *> createAllocation{};
24-
ThkWrapper<IN OUT D3DKMT_CREATEALLOCATION *> createAllocation2{};
28+
DEFINE_THK_WRAPPER(IN OUT CONST D3DKMT_OPENADAPTERFROMLUID *, openAdapterFromLuid);
29+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEALLOCATION *, createAllocation);
30+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEALLOCATION *, createAllocation2);
2531
NTSTATUS(APIENTRY *shareObjects)
2632
(UINT cObjects, const D3DKMT_HANDLE *hObjects, POBJECT_ATTRIBUTES pObjectAttributes, DWORD dwDesiredAccess, HANDLE *phSharedNtHandle) = {};
27-
ThkWrapper<IN CONST D3DKMT_DESTROYALLOCATION *> destroyAllocation{};
28-
ThkWrapper<IN CONST D3DKMT_DESTROYALLOCATION2 *> destroyAllocation2{};
29-
ThkWrapper<IN CONST D3DKMT_QUERYADAPTERINFO *> queryAdapterInfo{};
30-
ThkWrapper<IN CONST D3DKMT_CLOSEADAPTER *> closeAdapter{};
31-
ThkWrapper<IN OUT D3DKMT_CREATEDEVICE *> createDevice{};
32-
ThkWrapper<IN CONST D3DKMT_DESTROYDEVICE *> destroyDevice{};
33-
ThkWrapper<IN CONST D3DKMT_ESCAPE *> escape{};
34-
ThkWrapper<IN D3DKMT_CREATECONTEXTVIRTUAL *> createContext{};
35-
ThkWrapper<IN CONST D3DKMT_DESTROYCONTEXT *> destroyContext{};
36-
ThkWrapper<IN OUT D3DKMT_OPENRESOURCE *> openResource{};
37-
ThkWrapper<IN OUT D3DKMT_OPENRESOURCEFROMNTHANDLE *> openResourceFromNtHandle{};
38-
ThkWrapper<IN OUT D3DKMT_QUERYRESOURCEINFO *> queryResourceInfo{};
39-
ThkWrapper<IN OUT D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *> queryResourceInfoFromNtHandle{};
40-
ThkWrapper<IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT *> createSynchronizationObject{};
41-
ThkWrapper<IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *> createSynchronizationObject2{};
42-
ThkWrapper<IN CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *> destroySynchronizationObject{};
43-
ThkWrapper<IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *> signalSynchronizationObject{};
44-
ThkWrapper<IN CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *> waitForSynchronizationObject{};
45-
ThkWrapper<IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *> waitForSynchronizationObjectFromCpu{};
46-
ThkWrapper<IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *> signalSynchronizationObjectFromCpu{};
47-
ThkWrapper<IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *> waitForSynchronizationObjectFromGpu{};
48-
ThkWrapper<IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *> signalSynchronizationObjectFromGpu{};
49-
ThkWrapper<IN OUT D3DKMT_CREATEPAGINGQUEUE *> createPagingQueue{};
50-
ThkWrapper<IN OUT D3DDDI_DESTROYPAGINGQUEUE *> destroyPagingQueue{};
51-
ThkWrapper<IN OUT D3DKMT_LOCK2 *> lock2{};
52-
ThkWrapper<IN CONST D3DKMT_UNLOCK2 *> unlock2{};
53-
ThkWrapper<IN OUT D3DDDI_MAPGPUVIRTUALADDRESS *> mapGpuVirtualAddress{};
54-
ThkWrapper<IN OUT D3DDDI_RESERVEGPUVIRTUALADDRESS *> reserveGpuVirtualAddress{};
55-
ThkWrapper<IN CONST D3DKMT_FREEGPUVIRTUALADDRESS *> freeGpuVirtualAddress{};
56-
ThkWrapper<IN CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *> updateGpuVirtualAddress{};
57-
ThkWrapper<IN CONST D3DKMT_SUBMITCOMMAND *> submitCommand{};
58-
ThkWrapper<IN OUT D3DDDI_MAKERESIDENT *> makeResident{};
59-
ThkWrapper<IN D3DKMT_EVICT *> evict{};
60-
ThkWrapper<IN D3DKMT_REGISTERTRIMNOTIFICATION *> registerTrimNotification{};
61-
ThkWrapper<IN D3DKMT_UNREGISTERTRIMNOTIFICATION *> unregisterTrimNotification{};
62-
ThkWrapper<IN CONST D3DKMT_SETALLOCATIONPRIORITY *> setAllocationPriority{};
63-
ThkWrapper<IN CONST D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *> setSchedulingPriority{};
33+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYALLOCATION *, destroyAllocation);
34+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYALLOCATION2 *, destroyAllocation2);
35+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_QUERYADAPTERINFO *, queryAdapterInfo);
36+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_CLOSEADAPTER *, closeAdapter);
37+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEDEVICE *, createDevice);
38+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYDEVICE *, destroyDevice);
39+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_ESCAPE *, escape);
40+
DEFINE_THK_WRAPPER(IN D3DKMT_CREATECONTEXTVIRTUAL *, createContext);
41+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYCONTEXT *, destroyContext);
42+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_OPENRESOURCE *, openResource);
43+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_OPENRESOURCEFROMNTHANDLE *, openResourceFromNtHandle);
44+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_QUERYRESOURCEINFO *, queryResourceInfo);
45+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *, queryResourceInfoFromNtHandle);
46+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT *, createSynchronizationObject);
47+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *, createSynchronizationObject2);
48+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *, destroySynchronizationObject);
49+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *, signalSynchronizationObject);
50+
DEFINE_THK_WRAPPER(IN CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *, waitForSynchronizationObject);
51+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *, waitForSynchronizationObjectFromCpu);
52+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *, signalSynchronizationObjectFromCpu);
53+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *, waitForSynchronizationObjectFromGpu);
54+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *, signalSynchronizationObjectFromGpu);
55+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEPAGINGQUEUE *, createPagingQueue);
56+
DEFINE_THK_WRAPPER(IN OUT D3DDDI_DESTROYPAGINGQUEUE *, destroyPagingQueue);
57+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_LOCK2 *, lock2);
58+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_UNLOCK2 *, unlock2);
59+
DEFINE_THK_WRAPPER(IN OUT D3DDDI_MAPGPUVIRTUALADDRESS *, mapGpuVirtualAddress);
60+
DEFINE_THK_WRAPPER(IN OUT D3DDDI_RESERVEGPUVIRTUALADDRESS *, reserveGpuVirtualAddress);
61+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_FREEGPUVIRTUALADDRESS *, freeGpuVirtualAddress);
62+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *, updateGpuVirtualAddress);
63+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SUBMITCOMMAND *, submitCommand);
64+
DEFINE_THK_WRAPPER(IN OUT D3DDDI_MAKERESIDENT *, makeResident);
65+
DEFINE_THK_WRAPPER(IN D3DKMT_EVICT *, evict);
66+
DEFINE_THK_WRAPPER(IN D3DKMT_REGISTERTRIMNOTIFICATION *, registerTrimNotification);
67+
DEFINE_THK_WRAPPER(IN D3DKMT_UNREGISTERTRIMNOTIFICATION *, unregisterTrimNotification);
68+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SETALLOCATIONPRIORITY *, setAllocationPriority);
69+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *, setSchedulingPriority);
6470

6571
// HW queue
66-
ThkWrapper<IN OUT D3DKMT_CREATEHWQUEUE *> createHwQueue{};
67-
ThkWrapper<IN CONST D3DKMT_DESTROYHWQUEUE *> destroyHwQueue{};
68-
ThkWrapper<IN CONST D3DKMT_SUBMITCOMMANDTOHWQUEUE *> submitCommandToHwQueue{};
72+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEHWQUEUE *, createHwQueue);
73+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYHWQUEUE *, destroyHwQueue);
74+
DEFINE_THK_WRAPPER(IN CONST D3DKMT_SUBMITCOMMANDTOHWQUEUE *, submitCommandToHwQueue);
6975

7076
// For debug purposes
71-
ThkWrapper<IN OUT D3DKMT_GETDEVICESTATE *> getDeviceState{};
77+
DEFINE_THK_WRAPPER(IN OUT D3DKMT_GETDEVICESTATE *, getDeviceState);
7278

7379
bool isInitialized() {
7480
return initialized;
@@ -79,7 +85,7 @@ class Gdi {
7985
protected:
8086
OsLibrary *createGdiDLL();
8187
MOCKABLE_VIRTUAL bool getAllProcAddresses();
82-
std::unique_ptr<NEO::OsLibrary> gdiDll;
88+
std::unique_ptr<NEO::OsLibrary> gdiDll = nullptr;
8389
bool initialized = false;
8490
};
8591
} // namespace NEO
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright (C) 2018-2024 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#pragma once
9+
10+
#include <algorithm>
11+
#include <limits>
12+
#include <vector>
13+
14+
namespace NEO {
15+
16+
class GdiProfiler {
17+
18+
struct StatisticsEntry {
19+
long long totalTime = 0;
20+
uint64_t count = 0;
21+
long long minTime = std::numeric_limits<long long>::max();
22+
long long maxTime = 0;
23+
const char *gdiCall = nullptr;
24+
size_t getLength() const {
25+
return this->gdiCall ? strlen(this->gdiCall) : 0u;
26+
}
27+
};
28+
29+
public:
30+
void printGdiTimes() {
31+
if (this->gdiStatistics.empty()) {
32+
return;
33+
}
34+
35+
auto maxCallLengthIt = std::max_element(this->gdiStatistics.begin(), this->gdiStatistics.end(), [](const auto &gdiData1, const auto &gdiData2) {
36+
return gdiData1.getLength() < gdiData2.getLength();
37+
});
38+
auto maxCallLength = static_cast<int>(strlen(maxCallLengthIt->gdiCall));
39+
40+
printf("\n--- Gdi statistics ---\n");
41+
printf("%*s %15s %10s %25s %15s %15s", maxCallLength, "Request", "Total time(ns)", "Count", "Avg time per gdi call", "Min", "Max\n");
42+
for (const auto &gdiData : this->gdiStatistics) {
43+
if (gdiData.count == 0) {
44+
continue;
45+
}
46+
printf("%*s %15llu %10lu %25f %15lld %15lld\n",
47+
maxCallLength,
48+
gdiData.gdiCall,
49+
gdiData.totalTime,
50+
static_cast<unsigned long>(gdiData.count),
51+
gdiData.totalTime / static_cast<double>(gdiData.count),
52+
gdiData.minTime,
53+
gdiData.maxTime);
54+
}
55+
printf("\n");
56+
}
57+
58+
void recordElapsedTime(long long elapsedTime, const char *name, uint32_t id) {
59+
if (this->gdiStatistics.size() <= id) {
60+
this->gdiStatistics.resize(id + 1u);
61+
}
62+
63+
auto &gdiData = this->gdiStatistics[id];
64+
65+
gdiData.gdiCall = name;
66+
gdiData.totalTime += elapsedTime;
67+
gdiData.count++;
68+
gdiData.minTime = std::min(gdiData.minTime, elapsedTime);
69+
gdiData.maxTime = std::max(gdiData.maxTime, elapsedTime);
70+
}
71+
72+
protected:
73+
std::vector<StatisticsEntry> gdiStatistics{};
74+
};
75+
} // namespace NEO

0 commit comments

Comments
 (0)