Skip to content

Commit 86af61b

Browse files
committed
patch 2
1 parent 0c78197 commit 86af61b

File tree

1 file changed

+32
-31
lines changed

1 file changed

+32
-31
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3076,42 +3076,43 @@ def : GCNPat <
30763076
(i32 8), /* clamp */ 0, /* op_sel */ 0))
30773077
>;
30783078

3079-
// FIXME: This should have been narrowed to i32 during legalization.
3080-
// This pattern should also be skipped for GlobalISel
3081-
class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
3082-
dag pattern = (i64 (bswap i64:$a));
3083-
dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
3084-
dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
3085-
dag ALIGNBIT32_INST1 = !if(hasTrue16,
3086-
(inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
3087-
(inst operand1, operand1, (i32 24)));
3088-
dag ALIGNBIT32_INST2 = !if(hasTrue16,
3089-
(inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
3090-
(inst operand1, operand1, (i32 8)));
3091-
dag ALIGNBIT32_INST3 = !if(hasTrue16,
3092-
(inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
3093-
(inst operand2, operand2, (i32 24)));
3094-
dag ALIGNBIT32_INST4 = !if(hasTrue16,
3095-
(inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
3096-
(inst operand2, operand2, (i32 8)));
3097-
dag result = (REG_SEQUENCE VReg_64,
3098-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3099-
ALIGNBIT32_INST1,
3100-
ALIGNBIT32_INST2),
3101-
sub0,
3102-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3103-
ALIGNBIT32_INST3,
3104-
ALIGNBIT32_INST4),
3105-
sub1);
3079+
class AlignBit32Inst<dag op1, dag op2, dag op3, bit hasTrue16> {
3080+
Instruction inst = !if(hasTrue16, V_ALIGNBIT_B32_fake16_e64, V_ALIGNBIT_B32_e64);
3081+
dag NoMods = !if(hasTrue16, (inst 0), (inst));
3082+
dag expr = !con(NoMods, (inst op1), NoMods, (inst op2),
3083+
NoMods, (inst op3), NoMods, NoMods);
3084+
}
3085+
3086+
multiclass bswapi64ExtPat<bit hasTrue16> {
3087+
def : GCNPat <
3088+
(i64 (bswap i64:$a)),
3089+
(REG_SEQUENCE VReg_64,
3090+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3091+
AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3092+
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3093+
(i32 24), hasTrue16>.expr,
3094+
AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095+
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3096+
(i32 8), hasTrue16>.expr),
3097+
sub0,
3098+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3099+
AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3100+
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101+
(i32 24), hasTrue16>.expr,
3102+
AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103+
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3104+
(i32 8), hasTrue16>.expr),
3105+
sub1)
3106+
>;
31063107
}
31073108

3109+
// FIXME: This should have been narrowed to i32 during legalization.
3110+
// This pattern should also be skipped for GlobalISel
31083111
let True16Predicate = NotHasTrue16BitInsts in
3109-
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
3110-
bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
3112+
defm : bswapi64ExtPat</*hasTrue16*/0>;
31113113

31123114
let True16Predicate = UseFakeTrue16Insts in
3113-
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
3114-
bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
3115+
defm : bswapi64ExtPat</*hasTrue16*/1>;
31153116

31163117
// FIXME: The AddedComplexity should not be needed, but in GlobalISel
31173118
// the BFI pattern ends up taking precedence without it.

0 commit comments

Comments
 (0)