Skip to content

Commit d16eccb

Browse files
committed
GFX9 (Vega) updates
* Additional Gfx9 counters -- addition of L1CacheHit counter for compute -- fix FlatVMemInsts/CSFlatVMemInsts counter -- reinstate many counters that were available on previous hardware gens * Fix issues with OpenCL counter collection on GFX9 * Documentation updates -- Address #6 Change-Id: I07b122f6cec2ea734e5feedf3023f99ae031a762
1 parent 0a5d16a commit d16eccb

File tree

64 files changed

+25542
-19842
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+25542
-19842
lines changed

Doc/GPUPerfAPI-UserGuide.doc

3 KB
Binary file not shown.

Doc/GPUPerfAPI-UserGuide.pdf

2.66 KB
Binary file not shown.

Src/GPUPerfAPICL/CLCounterDataRequest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ bool CLCounterDataRequest::BeginRequest(GPA_ContextState* pContextState, gpa_uin
205205
// need to Enable counters
206206
GPA_HardwareCounterDescExt* pCounter = getCurrentContext()->m_pCounterAccessor->GetHardwareCounterExt((*pCounters)[i]);
207207

208-
gpa_uint32 uGroupIndex = pCounter->m_groupIndex;
208+
gpa_uint32 uGroupIndex = pCounter->m_groupIdDriver;
209209
assert(uGroupIndex <= uGroups);
210210

211211
gpa_uint64 uCounters = pHardwareCounters->m_pGroups[uGroupIndex].m_numCounters;

Src/GPUPerfAPICounterGenerator/GPACounterGeneratorCL.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,34 @@ GPA_Status GPA_CounterGeneratorCL::GeneratePublicCounters(GDT_HW_GENERATION desi
6161
return GPA_STATUS_OK;
6262
}
6363

64+
int GPA_CounterGeneratorCL::GetDriverGroupId(GDT_HW_GENERATION desiredGeneration, int blockIndex) const
65+
{
66+
int result = blockIndex;
67+
68+
if (GDT_HW_GENERATION_GFX9 == desiredGeneration)
69+
{
70+
// for GFX9, the driver has MC, SRBM at index 92, 93, but we don't have those groups for GFX9
71+
static const int GFX9_MC_BLOCK_INDEX = 92;
72+
73+
if (blockIndex >= GFX9_MC_BLOCK_INDEX)
74+
{
75+
result += 2;
76+
}
77+
}
78+
else if (GDT_HW_GENERATION_SEAISLAND == desiredGeneration)
79+
{
80+
// for GFX7 (CI), the driver has TCS at index 79, but we don't have that group for GFX7
81+
static const int GFX9_TCS_BLOCK_INDEX = 79;
82+
83+
if (blockIndex >= GFX9_TCS_BLOCK_INDEX)
84+
{
85+
result++;
86+
}
87+
}
88+
89+
return result;
90+
}
91+
6492
GPA_Status GPA_CounterGeneratorCL::GenerateHardwareCounters(GDT_HW_GENERATION desiredGeneration, GPA_HardwareCounters* pHardwareCounters)
6593
{
6694
if (desiredGeneration == GDT_HW_GENERATION_SOUTHERNISLAND)
@@ -123,7 +151,8 @@ GPA_Status GPA_CounterGeneratorCL::GenerateHardwareCounters(GDT_HW_GENERATION de
123151
{
124152
counter.m_pHardwareCounter = &(pClGroup[j]);
125153
counter.m_groupIndex = i;
126-
counter.m_groupIdDriver = i;
154+
counter.m_groupIdDriver = GetDriverGroupId(desiredGeneration, i);
155+
127156
counter.m_counterIdDriver = 0;
128157

129158
#if defined(_DEBUG) && defined(_WIN32) && defined(AMDT_INTERNAL)

Src/GPUPerfAPICounterGenerator/GPACounterGeneratorCL.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ class GPA_CounterGeneratorCL : public GPA_CounterGeneratorBase
2424
GPA_Status GeneratePublicCounters(GDT_HW_GENERATION desiredGeneration, GPA_PublicCounters* pPublicCounters) override;
2525
GPA_Status GenerateHardwareCounters(GDT_HW_GENERATION desiredGeneration, GPA_HardwareCounters* pHardwareCounters) override;
2626
GPA_Status GenerateSoftwareCounters(GDT_HW_GENERATION desiredGeneration, GPA_SoftwareCounters* pSoftwareCounters) override;
27+
28+
private:
29+
30+
/// Function to map from GPA block index to the OCL driver's block index for a particular hw generation
31+
/// \param desiredGeneration the hardware generation
32+
/// \param blockIndex GPA's block index
33+
/// \return the OCL driver's block index
34+
int GetDriverGroupId(GDT_HW_GENERATION desiredGeneration, int blockIndex) const;
2735
};
2836

2937
#endif //_GPA_COUNTER_GENERATOR_CL_H_

Src/GPUPerfAPICounterGenerator/GPAPublicCounters.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,22 @@ static void EvaluateExpression(const char* pExpression, void* pResult, vector< c
478478

479479
stack.push_back(sum);
480480
}
481+
else if (_strcmpi(pch, "sum256") == 0)
482+
{
483+
assert(stack.size() >= 256);
484+
T sum = 0;
485+
486+
// pop the last 256 items and add them together
487+
for (int i = 0; i < 256; i++)
488+
{
489+
T value = stack.back();
490+
stack.pop_back();
491+
492+
sum += value;
493+
}
494+
495+
stack.push_back(sum);
496+
}
481497
else
482498
{
483499
// must be number, reference to internal counter

Src/GPUPerfAPICounterGenerator/InternalCountersCLGfx9.cpp

Lines changed: 106 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ GPA_HardwareCounterDesc* CLCounterGroupArrayGfx9[] =
2727
PA_SC0countersGfx9,
2828
SPI0countersGfx9,
2929
SQ0countersGfx9,
30+
SQ_ES0countersGfx9,
31+
SQ_GS0countersGfx9,
32+
SQ_VS0countersGfx9,
33+
SQ_PS0countersGfx9,
34+
SQ_LS0countersGfx9,
35+
SQ_HS0countersGfx9,
36+
SQ_CS0countersGfx9,
3037
SX0countersGfx9,
3138
TA0countersGfx9,
3239
TA1countersGfx9,
@@ -100,6 +107,12 @@ GPA_HardwareCounterDesc* CLCounterGroupArrayGfx9[] =
100107
WDcountersGfx9,
101108
CPGcountersGfx9,
102109
CPCcountersGfx9,
110+
ATCcountersGfx9,
111+
ATCL2countersGfx9,
112+
MCVML2countersGfx9,
113+
GCEAcountersGfx9,
114+
RPBcountersGfx9,
115+
RMIcountersGfx9,
103116
};
104117

105118
GPA_CounterGroupDesc HWCLGroupsGfx9[] =
@@ -119,84 +132,104 @@ GPA_CounterGroupDesc HWCLGroupsGfx9[] =
119132
{12, "PA_SC", 0, 491, 8},
120133
{13, "SPI", 0, 196, 6},
121134
{14, "SQ", 0, 374, 16},
122-
{15, "SX", 0, 208, 4},
123-
{16, "TA0", 0, 119, 2},
124-
{17, "TA1", 1, 119, 2},
125-
{18, "TA2", 2, 119, 2},
126-
{19, "TA3", 3, 119, 2},
127-
{20, "TA4", 4, 119, 2},
128-
{21, "TA5", 5, 119, 2},
129-
{22, "TA6", 6, 119, 2},
130-
{23, "TA7", 7, 119, 2},
131-
{24, "TA8", 8, 119, 2},
132-
{25, "TA9", 9, 119, 2},
133-
{26, "TA10", 10, 119, 2},
134-
{27, "TA11", 11, 119, 2},
135-
{28, "TA12", 12, 119, 2},
136-
{29, "TA13", 13, 119, 2},
137-
{30, "TA14", 14, 119, 2},
138-
{31, "TA15", 15, 119, 2},
139-
{32, "TCA0", 0, 35, 4},
140-
{33, "TCA1", 1, 35, 4},
141-
{34, "TCC0", 0, 256, 4},
142-
{35, "TCC1", 1, 256, 4},
143-
{36, "TCC2", 2, 256, 4},
144-
{37, "TCC3", 3, 256, 4},
145-
{38, "TCC4", 4, 256, 4},
146-
{39, "TCC5", 5, 256, 4},
147-
{40, "TCC6", 6, 256, 4},
148-
{41, "TCC7", 7, 256, 4},
149-
{42, "TCC8", 8, 256, 4},
150-
{43, "TCC9", 9, 256, 4},
151-
{44, "TCC10", 10, 256, 4},
152-
{45, "TCC11", 11, 256, 4},
153-
{46, "TCC12", 12, 256, 4},
154-
{47, "TCC13", 13, 256, 4},
155-
{48, "TCC14", 14, 256, 4},
156-
{49, "TCC15", 15, 256, 4},
157-
{50, "TD0", 0, 57, 2},
158-
{51, "TD1", 1, 57, 2},
159-
{52, "TD2", 2, 57, 2},
160-
{53, "TD3", 3, 57, 2},
161-
{54, "TD4", 4, 57, 2},
162-
{55, "TD5", 5, 57, 2},
163-
{56, "TD6", 6, 57, 2},
164-
{57, "TD7", 7, 57, 2},
165-
{58, "TD8", 8, 57, 2},
166-
{59, "TD9", 9, 57, 2},
167-
{60, "TD10", 10, 57, 2},
168-
{61, "TD11", 11, 57, 2},
169-
{62, "TD12", 12, 57, 2},
170-
{63, "TD13", 13, 57, 2},
171-
{64, "TD14", 14, 57, 2},
172-
{65, "TD15", 15, 57, 2},
173-
{66, "TCP0", 0, 85, 4},
174-
{67, "TCP1", 1, 85, 4},
175-
{68, "TCP2", 2, 85, 4},
176-
{69, "TCP3", 3, 85, 4},
177-
{70, "TCP4", 4, 85, 4},
178-
{71, "TCP5", 5, 85, 4},
179-
{72, "TCP6", 6, 85, 4},
180-
{73, "TCP7", 7, 85, 4},
181-
{74, "TCP8", 8, 85, 4},
182-
{75, "TCP9", 9, 85, 4},
183-
{76, "TCP10", 10, 85, 4},
184-
{77, "TCP11", 11, 85, 4},
185-
{78, "TCP12", 12, 85, 4},
186-
{79, "TCP13", 13, 85, 4},
187-
{80, "TCP14", 14, 85, 4},
188-
{81, "TCP15", 15, 85, 4},
189-
{82, "GDS", 0, 121, 4},
190-
{83, "VGT", 0, 148, 4},
191-
{84, "IA", 0, 32, 4},
192-
{85, "WD", 0, 58, 4},
193-
{86, "CPG", 0, 59, 2},
194-
{87, "CPC", 0, 35, 2},
135+
{15, "SQ_ES", 0, 374, 16},
136+
{16, "SQ_GS", 0, 374, 16},
137+
{17, "SQ_VS", 0, 374, 16},
138+
{18, "SQ_PS", 0, 374, 16},
139+
{19, "SQ_LS", 0, 374, 16},
140+
{20, "SQ_HS", 0, 374, 16},
141+
{21, "SQ_CS", 0, 374, 16},
142+
{22, "SX", 0, 208, 4},
143+
{23, "TA0", 0, 119, 2},
144+
{24, "TA1", 1, 119, 2},
145+
{25, "TA2", 2, 119, 2},
146+
{26, "TA3", 3, 119, 2},
147+
{27, "TA4", 4, 119, 2},
148+
{28, "TA5", 5, 119, 2},
149+
{29, "TA6", 6, 119, 2},
150+
{30, "TA7", 7, 119, 2},
151+
{31, "TA8", 8, 119, 2},
152+
{32, "TA9", 9, 119, 2},
153+
{33, "TA10", 10, 119, 2},
154+
{34, "TA11", 11, 119, 2},
155+
{35, "TA12", 12, 119, 2},
156+
{36, "TA13", 13, 119, 2},
157+
{37, "TA14", 14, 119, 2},
158+
{38, "TA15", 15, 119, 2},
159+
{39, "TCA0", 0, 35, 4},
160+
{40, "TCA1", 1, 35, 4},
161+
{41, "TCC0", 0, 256, 4},
162+
{42, "TCC1", 1, 256, 4},
163+
{43, "TCC2", 2, 256, 4},
164+
{44, "TCC3", 3, 256, 4},
165+
{45, "TCC4", 4, 256, 4},
166+
{46, "TCC5", 5, 256, 4},
167+
{47, "TCC6", 6, 256, 4},
168+
{48, "TCC7", 7, 256, 4},
169+
{49, "TCC8", 8, 256, 4},
170+
{50, "TCC9", 9, 256, 4},
171+
{51, "TCC10", 10, 256, 4},
172+
{52, "TCC11", 11, 256, 4},
173+
{53, "TCC12", 12, 256, 4},
174+
{54, "TCC13", 13, 256, 4},
175+
{55, "TCC14", 14, 256, 4},
176+
{56, "TCC15", 15, 256, 4},
177+
{57, "TD0", 0, 57, 2},
178+
{58, "TD1", 1, 57, 2},
179+
{59, "TD2", 2, 57, 2},
180+
{60, "TD3", 3, 57, 2},
181+
{61, "TD4", 4, 57, 2},
182+
{62, "TD5", 5, 57, 2},
183+
{63, "TD6", 6, 57, 2},
184+
{64, "TD7", 7, 57, 2},
185+
{65, "TD8", 8, 57, 2},
186+
{66, "TD9", 9, 57, 2},
187+
{67, "TD10", 10, 57, 2},
188+
{68, "TD11", 11, 57, 2},
189+
{69, "TD12", 12, 57, 2},
190+
{70, "TD13", 13, 57, 2},
191+
{71, "TD14", 14, 57, 2},
192+
{72, "TD15", 15, 57, 2},
193+
{73, "TCP0", 0, 85, 4},
194+
{74, "TCP1", 1, 85, 4},
195+
{75, "TCP2", 2, 85, 4},
196+
{76, "TCP3", 3, 85, 4},
197+
{77, "TCP4", 4, 85, 4},
198+
{78, "TCP5", 5, 85, 4},
199+
{79, "TCP6", 6, 85, 4},
200+
{80, "TCP7", 7, 85, 4},
201+
{81, "TCP8", 8, 85, 4},
202+
{82, "TCP9", 9, 85, 4},
203+
{83, "TCP10", 10, 85, 4},
204+
{84, "TCP11", 11, 85, 4},
205+
{85, "TCP12", 12, 85, 4},
206+
{86, "TCP13", 13, 85, 4},
207+
{87, "TCP14", 14, 85, 4},
208+
{88, "TCP15", 15, 85, 4},
209+
{89, "GDS", 0, 121, 4},
210+
{90, "VGT", 0, 148, 4},
211+
{91, "IA", 0, 32, 4},
212+
{92, "WD", 0, 58, 4},
213+
{93, "CPG", 0, 59, 2},
214+
{94, "CPC", 0, 35, 2},
215+
{95, "ATC", 0, 24, 4},
216+
{96, "ATCL2", 0, 8, 2},
217+
{97, "MCVML2", 0, 21, 8},
218+
{98, "GCEA", 0, 77, 2},
219+
{99, "RPB", 0, 63, 4},
220+
{100, "RMI", 0, 257, 4},
195221
};
196222

197223
GPA_SQCounterGroupDesc HWCLSQGroupsGfx9[] =
198224
{
199225
{14, 0, SQ_ALL},
226+
{15, 0, SQ_ES},
227+
{16, 0, SQ_GS},
228+
{17, 0, SQ_VS},
229+
{18, 0, SQ_PS},
230+
{19, 0, SQ_LS},
231+
{20, 0, SQ_HS},
232+
{21, 0, SQ_CS},
200233
};
201234

202235
const unsigned int HWCLGroupCountGfx9 = sizeof(HWCLGroupsGfx9) / sizeof(GPA_CounterGroupDesc);

Src/GPUPerfAPICounterGenerator/PublicCounterDefsCLGfx7.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//==============================================================================
2-
// Copyright (c) 2010-2016 Advanced Micro Devices, Inc. All rights reserved.
2+
// Copyright (c) 2010-2017 Advanced Micro Devices, Inc. All rights reserved.
33
/// \author AMD Developer Tools Team
44
/// \file
55
/// \brief PublicCounterDefinitions for CLGFX7
@@ -54,7 +54,7 @@ void AutoDefinePublicCountersCLGfx7(GPA_PublicCounters& p)
5454
}
5555
{
5656
vector< gpa_uint32 > internalCounters;
57-
internalCounters.push_back(2766);
57+
internalCounters.push_back(2764);
5858
internalCounters.push_back(2765);
5959
internalCounters.push_back(2736);
6060

Src/GPUPerfAPICounterGenerator/PublicCounterDefsCLGfx7.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//==============================================================================
2-
// Copyright (c) 2010-2016 Advanced Micro Devices, Inc. All rights reserved.
2+
// Copyright (c) 2010-2017 Advanced Micro Devices, Inc. All rights reserved.
33
/// \author AMD Developer Tools Team
44
/// \file
55
/// \brief PublicCounterDefinitions for CLGFX7

Src/GPUPerfAPICounterGenerator/PublicCounterDefsCLGfx8.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//==============================================================================
2-
// Copyright (c) 2010-2016 Advanced Micro Devices, Inc. All rights reserved.
2+
// Copyright (c) 2010-2017 Advanced Micro Devices, Inc. All rights reserved.
33
/// \author AMD Developer Tools Team
44
/// \file
55
/// \brief PublicCounterDefinitions for CLGFX8
@@ -86,7 +86,7 @@ void AutoDefinePublicCountersCLGfx8(GPA_PublicCounters& p)
8686
}
8787
{
8888
vector< gpa_uint32 > internalCounters;
89-
internalCounters.push_back(3461);
89+
internalCounters.push_back(3459);
9090
internalCounters.push_back(3460);
9191
internalCounters.push_back(3431);
9292

0 commit comments

Comments
 (0)