@@ -14,11 +14,9 @@ namespace llvm::AMDGPU {
1414// ===----------------------------------------------------------------------===//
1515// Custom Operands.
1616//
17- // A table of custom operands shall describe "primary" operand names first
18- // followed by aliases if any. It is not required but recommended to arrange
19- // operands so that operand encoding match operand position in the table. This
20- // will make getNameFromOperandTable() a bit more efficient. Unused slots in the
21- // table shall have an empty name.
17+ // A table of custom operands must be ordered by Encoding in ascending order
18+ // to enable binary search lookup. Within entries that share the same encoding,
19+ // "primary" operand names should be listed first followed by aliases if any.
2220//
2321// ===----------------------------------------------------------------------===//
2422
@@ -27,21 +25,22 @@ template <size_t N>
2725static StringRef getNameFromOperandTable (const CustomOperand (&Table)[N],
2826 unsigned Encoding,
2927 const MCSubtargetInfo &STI) {
30- auto isValidIndexForEncoding = [&](size_t Idx) {
31- return Idx < N && Table[Idx].Encoding == Encoding &&
32- !Table[Idx].Name .empty () &&
33- (!Table[Idx].Cond || Table[Idx].Cond (STI));
28+ auto IsValid = [&](const CustomOperand &Entry) {
29+ return Entry.Encoding == Encoding && !Entry.Name .empty () &&
30+ (!Entry.Cond || Entry.Cond (STI));
3431 };
3532
36- // This is an optimization that should work in most cases. As a side effect,
37- // it may cause selection of an alias instead of a primary operand name in
38- // case of sparse tables.
39- if (isValidIndexForEncoding (Encoding))
40- return Table[Encoding].Name ;
33+ // Find the first entry with the target encoding
34+ auto First =
35+ std::lower_bound (Table, Table + N, Encoding,
36+ [](const CustomOperand &Entry, unsigned TargetEncoding) {
37+ return Entry.Encoding < TargetEncoding;
38+ });
4139
42- for (size_t Idx = 0 ; Idx != N; ++Idx)
43- if (isValidIndexForEncoding (Idx))
44- return Table[Idx].Name ;
40+ // Search through entries with the same encoding to find the first valid one
41+ for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It)
42+ if (IsValid (*It))
43+ return It->Name ;
4544
4645 return " " ;
4746}
@@ -92,21 +91,20 @@ namespace SendMsg {
9291// clang-format off
9392
9493static constexpr CustomOperand MsgOperands[] = {
95- {{" " }},
9694 {{" MSG_INTERRUPT" }, ID_INTERRUPT},
9795 {{" MSG_GS" }, ID_GS_PreGFX11, isNotGFX11Plus},
96+ {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
9897 {{" MSG_GS_DONE" }, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
98+ {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
9999 {{" MSG_SAVEWAVE" }, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
100100 {{" MSG_STALL_WAVE_GEN" }, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11},
101101 {{" MSG_HALT_WAVES" }, ID_HALT_WAVES, isGFX9_GFX10_GFX11},
102102 {{" MSG_ORDERED_PS_DONE" }, ID_ORDERED_PS_DONE, isGFX9_GFX10},
103103 {{" MSG_EARLY_PRIM_DEALLOC" }, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
104104 {{" MSG_GS_ALLOC_REQ" }, ID_GS_ALLOC_REQ, isGFX9Plus},
105105 {{" MSG_GET_DOORBELL" }, ID_GET_DOORBELL, isGFX9_GFX10},
106- {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
107- {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
108- {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
109106 {{" MSG_SAVEWAVE_HAS_TDM" }, ID_SAVEWAVE_HAS_TDM, isGFX1250},
107+ {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
110108 {{" MSG_SYSMSG" }, ID_SYSMSG},
111109 {{" MSG_RTN_GET_DOORBELL" }, ID_RTN_GET_DOORBELL, isGFX11Plus},
112110 {{" MSG_RTN_GET_DDID" }, ID_RTN_GET_DDID, isGFX11Plus},
@@ -121,7 +119,6 @@ static constexpr CustomOperand MsgOperands[] = {
121119};
122120
123121static constexpr CustomOperand SysMsgOperands[] = {
124- {{" " }},
125122 {{" SYSMSG_OP_ECC_ERR_INTERRUPT" }, OP_SYS_ECC_ERR_INTERRUPT},
126123 {{" SYSMSG_OP_REG_RD" }, OP_SYS_REG_RD},
127124 {{" SYSMSG_OP_HOST_TRAP_ACK" }, OP_SYS_HOST_TRAP_ACK, isNotGFX9Plus},
@@ -169,83 +166,67 @@ namespace Hwreg {
169166// NOLINTBEGIN
170167// clang-format off
171168static constexpr CustomOperand Operands[] = {
172- // GFX12+ renamed registers
173- {{" HW_REG_WAVE_MODE" }, ID_MODE, isGFX12Plus},
174- {{" HW_REG_WAVE_STATUS" }, ID_STATUS, isGFX12Plus},
175- {{" HW_REG_WAVE_GPR_ALLOC" }, ID_GPR_ALLOC, isGFX12Plus},
176- {{" HW_REG_WAVE_LDS_ALLOC" }, ID_LDS_ALLOC, isGFX12Plus},
177- {{" HW_REG_WAVE_HW_ID1" }, ID_HW_ID1, isGFX12Plus},
178- {{" HW_REG_WAVE_HW_ID2" }, ID_HW_ID2, isGFX12Plus},
179-
180- {{" HW_REG_MODE" }, ID_MODE},
181- {{" HW_REG_STATUS" }, ID_STATUS},
182- {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
183- {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
184- {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
185- {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
186- {{" HW_REG_IB_STS" }, ID_IB_STS},
187- {{" " }},
188- {{" " }},
189- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
190- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
191- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
192- {{" " }},
193- {{" " }},
194- {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
195- {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
196- {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
197- {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
198- {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
199- {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
200- {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
201- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
202- {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
203- {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
204- {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
205- {{" " }},
206- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
207- {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
208- {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
209- {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
210- {{" HW_REG_WAVE_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
211- {{" HW_REG_WAVE_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
212-
213- // Register numbers reused in GFX11
214- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
215- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
216-
217- // Register numbers reused in GFX12+
218- {{" HW_REG_WAVE_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
219- {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
220- {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
221- {{" HW_REG_WAVE_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
222- {{" HW_REG_WAVE_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
223- {{" HW_REG_WAVE_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
224- {{" HW_REG_WAVE_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
225- {{" HW_REG_WAVE_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
226- {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
227-
228- // GFX942 specific registers
229- {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
230- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
231- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
232- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
233- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
234-
235- // GFX1250
236- {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
237- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
238-
239- // Aliases
240- {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
241- {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
242- {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
243- {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
244- {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
245- {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
246- {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
247- {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
248- {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
169+ {{" HW_REG_WAVE_MODE" }, ID_MODE, isGFX12Plus},
170+ {{" HW_REG_MODE" }, ID_MODE},
171+ {{" HW_REG_WAVE_STATUS" }, ID_STATUS, isGFX12Plus},
172+ {{" HW_REG_STATUS" }, ID_STATUS},
173+ {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
174+ {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
175+ {{" HW_REG_WAVE_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
176+ {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
177+ {{" HW_REG_WAVE_GPR_ALLOC" }, ID_GPR_ALLOC, isGFX12Plus},
178+ {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
179+ {{" HW_REG_WAVE_LDS_ALLOC" }, ID_LDS_ALLOC, isGFX12Plus},
180+ {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
181+ {{" HW_REG_IB_STS" }, ID_IB_STS},
182+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
183+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
184+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
185+ {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
186+ {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
187+ {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
188+ {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
189+ {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
190+ {{" HW_REG_WAVE_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
191+ {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
192+ {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
193+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
194+ {{" HW_REG_WAVE_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
195+ {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
196+ {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
197+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
198+ {{" HW_REG_WAVE_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
199+ {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
200+ {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
201+ {{" HW_REG_WAVE_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
202+ {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
203+ {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
204+ {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
205+ {{" HW_REG_WAVE_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
206+ {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
207+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
208+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
209+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
210+ {{" HW_REG_WAVE_HW_ID1" }, ID_HW_ID1, isGFX12Plus},
211+ {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
212+ {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
213+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
214+ {{" HW_REG_WAVE_HW_ID2" }, ID_HW_ID2, isGFX12Plus},
215+ {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
216+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
217+ {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
218+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
219+ {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
220+ {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
221+ {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
222+ {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
223+ {{" HW_REG_WAVE_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
224+ {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
225+ {{" HW_REG_WAVE_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
226+ {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
227+ {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
228+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
229+
249230};
250231// clang-format on
251232// NOLINTEND
0 commit comments