Skip to content

Commit 3f52226

Browse files
[AMDGPU] Resolving comments 1
1 parent 43f1a87 commit 3f52226

File tree

1 file changed

+80
-99
lines changed

1 file changed

+80
-99
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp

Lines changed: 80 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,9 @@ namespace llvm::AMDGPU {
1414
//===----------------------------------------------------------------------===//
1515
// Custom Operands.
1616
//
17-
// A table of custom operands shall describe "primary" operand names first
18-
// followed by aliases if any. It is not required but recommended to arrange
19-
// operands so that operand encoding match operand position in the table. This
20-
// will make getNameFromOperandTable() a bit more efficient. Unused slots in the
21-
// table shall have an empty name.
17+
// A table of custom operands must be ordered by Encoding in ascending order
18+
// to enable binary search lookup. Within entries that share the same encoding,
19+
// "primary" operand names should be listed first followed by aliases if any.
2220
//
2321
//===----------------------------------------------------------------------===//
2422

@@ -27,21 +25,22 @@ template <size_t N>
2725
static StringRef getNameFromOperandTable(const CustomOperand (&Table)[N],
2826
unsigned Encoding,
2927
const MCSubtargetInfo &STI) {
30-
auto isValidIndexForEncoding = [&](size_t Idx) {
31-
return Idx < N && Table[Idx].Encoding == Encoding &&
32-
!Table[Idx].Name.empty() &&
33-
(!Table[Idx].Cond || Table[Idx].Cond(STI));
28+
auto IsValid = [&](const CustomOperand &Entry) {
29+
return Entry.Encoding == Encoding && !Entry.Name.empty() &&
30+
(!Entry.Cond || Entry.Cond(STI));
3431
};
3532

36-
// This is an optimization that should work in most cases. As a side effect,
37-
// it may cause selection of an alias instead of a primary operand name in
38-
// case of sparse tables.
39-
if (isValidIndexForEncoding(Encoding))
40-
return Table[Encoding].Name;
33+
// Find the first entry with the target encoding
34+
auto First =
35+
std::lower_bound(Table, Table + N, Encoding,
36+
[](const CustomOperand &Entry, unsigned TargetEncoding) {
37+
return Entry.Encoding < TargetEncoding;
38+
});
4139

42-
for (size_t Idx = 0; Idx != N; ++Idx)
43-
if (isValidIndexForEncoding(Idx))
44-
return Table[Idx].Name;
40+
// Search through entries with the same encoding to find the first valid one
41+
for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It)
42+
if (IsValid(*It))
43+
return It->Name;
4544

4645
return "";
4746
}
@@ -92,21 +91,20 @@ namespace SendMsg {
9291
// clang-format off
9392

9493
static constexpr CustomOperand MsgOperands[] = {
95-
{{""}},
9694
{{"MSG_INTERRUPT"}, ID_INTERRUPT},
9795
{{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus},
96+
{{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
9897
{{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
98+
{{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
9999
{{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
100100
{{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11},
101101
{{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9_GFX10_GFX11},
102102
{{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9_GFX10},
103103
{{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
104104
{{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus},
105105
{{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10},
106-
{{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10},
107-
{{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
108-
{{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
109106
{{"MSG_SAVEWAVE_HAS_TDM"}, ID_SAVEWAVE_HAS_TDM, isGFX1250},
107+
{{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10},
110108
{{"MSG_SYSMSG"}, ID_SYSMSG},
111109
{{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus},
112110
{{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus},
@@ -121,7 +119,6 @@ static constexpr CustomOperand MsgOperands[] = {
121119
};
122120

123121
static constexpr CustomOperand SysMsgOperands[] = {
124-
{{""}},
125122
{{"SYSMSG_OP_ECC_ERR_INTERRUPT"}, OP_SYS_ECC_ERR_INTERRUPT},
126123
{{"SYSMSG_OP_REG_RD"}, OP_SYS_REG_RD},
127124
{{"SYSMSG_OP_HOST_TRAP_ACK"}, OP_SYS_HOST_TRAP_ACK, isNotGFX9Plus},
@@ -169,83 +166,67 @@ namespace Hwreg {
169166
// NOLINTBEGIN
170167
// clang-format off
171168
static constexpr CustomOperand Operands[] = {
172-
// GFX12+ renamed registers
173-
{{"HW_REG_WAVE_MODE"}, ID_MODE, isGFX12Plus},
174-
{{"HW_REG_WAVE_STATUS"}, ID_STATUS, isGFX12Plus},
175-
{{"HW_REG_WAVE_GPR_ALLOC"}, ID_GPR_ALLOC, isGFX12Plus},
176-
{{"HW_REG_WAVE_LDS_ALLOC"}, ID_LDS_ALLOC, isGFX12Plus},
177-
{{"HW_REG_WAVE_HW_ID1"}, ID_HW_ID1, isGFX12Plus},
178-
{{"HW_REG_WAVE_HW_ID2"}, ID_HW_ID2, isGFX12Plus},
179-
180-
{{"HW_REG_MODE"}, ID_MODE},
181-
{{"HW_REG_STATUS"}, ID_STATUS},
182-
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus},
183-
{{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
184-
{{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
185-
{{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
186-
{{"HW_REG_IB_STS"}, ID_IB_STS},
187-
{{""}},
188-
{{""}},
189-
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
190-
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
191-
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
192-
{{""}},
193-
{{""}},
194-
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11},
195-
{{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
196-
{{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
197-
{{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
198-
{{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
199-
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11},
200-
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11},
201-
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
202-
{{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
203-
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
204-
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
205-
{{""}},
206-
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
207-
{{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250},
208-
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
209-
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
210-
{{"HW_REG_WAVE_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
211-
{{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
212-
213-
// Register numbers reused in GFX11
214-
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
215-
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
216-
217-
// Register numbers reused in GFX12+
218-
{{"HW_REG_WAVE_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
219-
{{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
220-
{{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
221-
{{"HW_REG_WAVE_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
222-
{{"HW_REG_WAVE_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
223-
{{"HW_REG_WAVE_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
224-
{{"HW_REG_WAVE_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
225-
{{"HW_REG_WAVE_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
226-
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
227-
228-
// GFX942 specific registers
229-
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
230-
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
231-
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
232-
{{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
233-
{{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
234-
235-
// GFX1250
236-
{{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250},
237-
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250},
238-
239-
// Aliases
240-
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
241-
{{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
242-
{{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
243-
{{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
244-
{{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
245-
{{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
246-
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
247-
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
248-
{{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
169+
{{"HW_REG_WAVE_MODE"}, ID_MODE, isGFX12Plus},
170+
{{"HW_REG_MODE"}, ID_MODE},
171+
{{"HW_REG_WAVE_STATUS"}, ID_STATUS, isGFX12Plus},
172+
{{"HW_REG_STATUS"}, ID_STATUS},
173+
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus},
174+
{{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
175+
{{"HW_REG_WAVE_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
176+
{{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
177+
{{"HW_REG_WAVE_GPR_ALLOC"}, ID_GPR_ALLOC, isGFX12Plus},
178+
{{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
179+
{{"HW_REG_WAVE_LDS_ALLOC"}, ID_LDS_ALLOC, isGFX12Plus},
180+
{{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
181+
{{"HW_REG_IB_STS"}, ID_IB_STS},
182+
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
183+
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
184+
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
185+
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11},
186+
{{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
187+
{{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
188+
{{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
189+
{{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
190+
{{"HW_REG_WAVE_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
191+
{{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
192+
{{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
193+
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
194+
{{"HW_REG_WAVE_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
195+
{{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
196+
{{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
197+
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
198+
{{"HW_REG_WAVE_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
199+
{{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
200+
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11},
201+
{{"HW_REG_WAVE_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
202+
{{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
203+
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
204+
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11},
205+
{{"HW_REG_WAVE_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
206+
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
207+
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
208+
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
209+
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
210+
{{"HW_REG_WAVE_HW_ID1"}, ID_HW_ID1, isGFX12Plus},
211+
{{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
212+
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
213+
{{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
214+
{{"HW_REG_WAVE_HW_ID2"}, ID_HW_ID2, isGFX12Plus},
215+
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
216+
{{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
217+
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
218+
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
219+
{{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250},
220+
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
221+
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
222+
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
223+
{{"HW_REG_WAVE_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
224+
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
225+
{{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
226+
{{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
227+
{{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250},
228+
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250},
229+
249230
};
250231
// clang-format on
251232
// NOLINTEND

0 commit comments

Comments
 (0)