@@ -14,11 +14,9 @@ namespace llvm::AMDGPU {
14
14
// ===----------------------------------------------------------------------===//
15
15
// Custom Operands.
16
16
//
17
- // A table of custom operands shall describe "primary" operand names first
18
- // followed by aliases if any. It is not required but recommended to arrange
19
- // operands so that operand encoding match operand position in the table. This
20
- // will make getNameFromOperandTable() a bit more efficient. Unused slots in the
21
- // table shall have an empty name.
17
+ // A table of custom operands must be ordered by Encoding in ascending order
18
+ // to enable binary search lookup. Within entries that share the same encoding,
19
+ // "primary" operand names should be listed first followed by aliases if any.
22
20
//
23
21
// ===----------------------------------------------------------------------===//
24
22
@@ -27,21 +25,18 @@ template <size_t N>
27
25
static StringRef getNameFromOperandTable (const CustomOperand (&Table)[N],
28
26
unsigned Encoding,
29
27
const MCSubtargetInfo &STI) {
30
- auto isValidIndexForEncoding = [&](size_t Idx) {
31
- return Idx < N && Table[Idx].Encoding == Encoding &&
32
- !Table[Idx].Name .empty () &&
33
- (!Table[Idx].Cond || Table[Idx].Cond (STI));
34
- };
35
-
36
- // This is an optimization that should work in most cases. As a side effect,
37
- // it may cause selection of an alias instead of a primary operand name in
38
- // case of sparse tables.
39
- if (isValidIndexForEncoding (Encoding))
40
- return Table[Encoding].Name ;
41
-
42
- for (size_t Idx = 0 ; Idx != N; ++Idx)
43
- if (isValidIndexForEncoding (Idx))
44
- return Table[Idx].Name ;
28
+ // Find the first entry with the target encoding
29
+ auto First =
30
+ std::lower_bound (Table, Table + N, Encoding,
31
+ [](const CustomOperand &Entry, unsigned TargetEncoding) {
32
+ return Entry.Encoding < TargetEncoding;
33
+ });
34
+
35
+ // Search through entries with the same encoding to find the first valid one
36
+ for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) {
37
+ if (It->Encoding == Encoding && (!It->Cond || It->Cond (STI)))
38
+ return It->Name ;
39
+ }
45
40
46
41
return " " ;
47
42
}
@@ -92,21 +87,20 @@ namespace SendMsg {
92
87
// clang-format off
93
88
94
89
static constexpr CustomOperand MsgOperands[] = {
95
- {{" " }},
96
90
{{" MSG_INTERRUPT" }, ID_INTERRUPT},
97
91
{{" MSG_GS" }, ID_GS_PreGFX11, isNotGFX11Plus},
92
+ {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
98
93
{{" MSG_GS_DONE" }, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
94
+ {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
99
95
{{" MSG_SAVEWAVE" }, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
100
96
{{" MSG_STALL_WAVE_GEN" }, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11},
101
97
{{" MSG_HALT_WAVES" }, ID_HALT_WAVES, isGFX9_GFX10_GFX11},
102
98
{{" MSG_ORDERED_PS_DONE" }, ID_ORDERED_PS_DONE, isGFX9_GFX10},
103
99
{{" MSG_EARLY_PRIM_DEALLOC" }, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
104
100
{{" MSG_GS_ALLOC_REQ" }, ID_GS_ALLOC_REQ, isGFX9Plus},
105
101
{{" MSG_GET_DOORBELL" }, ID_GET_DOORBELL, isGFX9_GFX10},
106
- {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
107
- {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
108
- {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
109
102
{{" MSG_SAVEWAVE_HAS_TDM" }, ID_SAVEWAVE_HAS_TDM, isGFX1250},
103
+ {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
110
104
{{" MSG_SYSMSG" }, ID_SYSMSG},
111
105
{{" MSG_RTN_GET_DOORBELL" }, ID_RTN_GET_DOORBELL, isGFX11Plus},
112
106
{{" MSG_RTN_GET_DDID" }, ID_RTN_GET_DDID, isGFX11Plus},
@@ -121,7 +115,6 @@ static constexpr CustomOperand MsgOperands[] = {
121
115
};
122
116
123
117
static constexpr CustomOperand SysMsgOperands[] = {
124
- {{" " }},
125
118
{{" SYSMSG_OP_ECC_ERR_INTERRUPT" }, OP_SYS_ECC_ERR_INTERRUPT},
126
119
{{" SYSMSG_OP_REG_RD" }, OP_SYS_REG_RD},
127
120
{{" SYSMSG_OP_HOST_TRAP_ACK" }, OP_SYS_HOST_TRAP_ACK, isNotGFX9Plus},
@@ -169,68 +162,67 @@ namespace Hwreg {
169
162
// NOLINTBEGIN
170
163
// clang-format off
171
164
static constexpr CustomOperand Operands[] = {
172
- {{" " }},
173
- {{" HW_REG_MODE" }, ID_MODE},
174
- {{" HW_REG_STATUS" }, ID_STATUS},
175
- {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
176
- {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
177
- {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
178
- {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
179
- {{" HW_REG_IB_STS" }, ID_IB_STS},
180
- {{" " }},
181
- {{" " }},
182
- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
183
- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
184
- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
185
- {{" " }},
186
- {{" " }},
187
- {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
188
- {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
189
- {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
190
- {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
191
- {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
192
- {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
193
- {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
194
- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
195
- {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
196
- {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
197
- {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
198
- {{" " }},
199
- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
200
- {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
201
- {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
202
- {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
203
- {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
204
- {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
205
-
206
- // Register numbers reused in GFX11
207
- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
208
- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
209
-
210
- // Register numbers reused in GFX12+
211
- {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
212
- {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
213
- {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
214
- {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
215
- {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
216
- {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
217
- {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
218
- {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
219
- {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
220
-
221
- // GFX942 specific registers
222
- {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
223
- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
224
- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
225
- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
226
- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
227
-
228
- // GFX1250
229
- {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
230
- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
231
-
232
- // Aliases
233
- {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
165
+ {{" HW_REG_WAVE_MODE" }, ID_MODE, isGFX12Plus},
166
+ {{" HW_REG_MODE" }, ID_MODE},
167
+ {{" HW_REG_WAVE_STATUS" }, ID_STATUS, isGFX12Plus},
168
+ {{" HW_REG_STATUS" }, ID_STATUS},
169
+ {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
170
+ {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
171
+ {{" HW_REG_WAVE_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
172
+ {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
173
+ {{" HW_REG_WAVE_GPR_ALLOC" }, ID_GPR_ALLOC, isGFX12Plus},
174
+ {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
175
+ {{" HW_REG_WAVE_LDS_ALLOC" }, ID_LDS_ALLOC, isGFX12Plus},
176
+ {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
177
+ {{" HW_REG_IB_STS" }, ID_IB_STS},
178
+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
179
+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
180
+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
181
+ {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
182
+ {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
183
+ {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
184
+ {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
185
+ {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
186
+ {{" HW_REG_WAVE_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
187
+ {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
188
+ {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
189
+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
190
+ {{" HW_REG_WAVE_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
191
+ {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
192
+ {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
193
+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
194
+ {{" HW_REG_WAVE_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
195
+ {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
196
+ {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
197
+ {{" HW_REG_WAVE_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
198
+ {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
199
+ {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
200
+ {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
201
+ {{" HW_REG_WAVE_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
202
+ {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
203
+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
204
+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
205
+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
206
+ {{" HW_REG_WAVE_HW_ID1" }, ID_HW_ID1, isGFX12Plus},
207
+ {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
208
+ {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
209
+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
210
+ {{" HW_REG_WAVE_HW_ID2" }, ID_HW_ID2, isGFX12Plus},
211
+ {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
212
+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
213
+ {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
214
+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
215
+ {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
216
+ {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
217
+ {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
218
+ {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
219
+ {{" HW_REG_WAVE_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
220
+ {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
221
+ {{" HW_REG_WAVE_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
222
+ {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
223
+ {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
224
+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
225
+
234
226
};
235
227
// clang-format on
236
228
// NOLINTEND
0 commit comments