Skip to content

Commit 149607b

Browse files
committed
simplify and remove duplication
1 parent 4fcbedd commit 149607b

File tree

2 files changed

+50
-61
lines changed

2 files changed

+50
-61
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3049,61 +3049,40 @@ def : GCNPat <
30493049

30503050
// FIXME: This should have been narrowed to i32 during legalization.
30513051
// This pattern should also be skipped for GlobalISel
3052+
class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
3053+
dag pattern = (i64 (bswap i64:$a));
3054+
dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
3055+
dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
3056+
dag ALIGNBIT32_INST1 = !if(hasTrue16,
3057+
(inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
3058+
(inst operand1, operand1, (i32 24)));
3059+
dag ALIGNBIT32_INST2 = !if(hasTrue16,
3060+
(inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
3061+
(inst operand1, operand1, (i32 8)));
3062+
dag ALIGNBIT32_INST3 = !if(hasTrue16,
3063+
(inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
3064+
(inst operand2, operand2, (i32 24)));
3065+
dag ALIGNBIT32_INST4 = !if(hasTrue16,
3066+
(inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
3067+
(inst operand2, operand2, (i32 8)));
3068+
dag result = (REG_SEQUENCE VReg_64,
3069+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3070+
ALIGNBIT32_INST1,
3071+
ALIGNBIT32_INST2),
3072+
sub0,
3073+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3074+
ALIGNBIT32_INST3,
3075+
ALIGNBIT32_INST4),
3076+
sub1);
3077+
}
3078+
30523079
let True16Predicate = NotHasTrue16BitInsts in
3053-
def : GCNPat <
3054-
(i64 (bswap i64:$a)),
3055-
(REG_SEQUENCE VReg_64,
3056-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3057-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3058-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3059-
(i32 24)),
3060-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3061-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3062-
(i32 8))),
3063-
sub0,
3064-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3065-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3066-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3067-
(i32 24)),
3068-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3069-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3070-
(i32 8))),
3071-
sub1)
3072-
>;
3080+
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
3081+
bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
30733082

30743083
let True16Predicate = UseFakeTrue16Insts in
3075-
def : GCNPat <
3076-
(i64 (bswap i64:$a)),
3077-
(REG_SEQUENCE VReg_64,
3078-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3079-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3080-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3081-
0, /* src1_modifiers */
3082-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3083-
0, /* src2_modifiers */
3084-
(i32 24), /* clamp */ 0, /* op_sel */ 0),
3085-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3086-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3087-
0, /* src1_modifiers */
3088-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3089-
0, /* src2_modifiers */
3090-
(i32 8), /* clamp */ 0, /* op_sel */ 0)),
3091-
sub0,
3092-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3093-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3094-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095-
0, /* src1_modifiers */
3096-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3097-
0, /* src2_modifiers */
3098-
(i32 24), /* clamp */ 0, /* op_sel */ 0),
3099-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3100-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101-
0, /* src1_modifiers */
3102-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103-
0, /* src2_modifiers */
3104-
(i32 8), /* clamp */ 0, /* op_sel */ 0)),
3105-
sub1)
3106-
>;
3084+
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
3085+
bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
31073086

31083087
// FIXME: The AddedComplexity should not be needed, but in GlobalISel
31093088
// the BFI pattern ends up taking precedence without it.

llvm/test/CodeGen/AMDGPU/bswap.ll

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -737,15 +737,25 @@ define i64 @v_bswap_i48(i64 %src) {
737737
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v2
738738
; VI-NEXT: s_setpc_b64 s[30:31]
739739
;
740-
; GFX11-LABEL: v_bswap_i48:
741-
; GFX11: ; %bb.0:
742-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743-
; GFX11-NEXT: v_perm_b32 v2, 0, v0, 0x10203
744-
; GFX11-NEXT: v_perm_b32 v0, 0, v1, 0x10203
745-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
746-
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
747-
; GFX11-NEXT: v_alignbit_b32 v0, v2, v0, 16
748-
; GFX11-NEXT: s_setpc_b64 s[30:31]
740+
; GFX11-REAL16-LABEL: v_bswap_i48:
741+
; GFX11-REAL16: ; %bb.0:
742+
; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743+
; GFX11-REAL16-NEXT: v_perm_b32 v2, 0, v0, 0x10203
744+
; GFX11-REAL16-NEXT: v_perm_b32 v1, 0, v1, 0x10203
745+
; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
746+
; GFX11-REAL16-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
747+
; GFX11-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
748+
; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31]
749+
;
750+
; GFX11-FAKE16-LABEL: v_bswap_i48:
751+
; GFX11-FAKE16: ; %bb.0:
752+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753+
; GFX11-FAKE16-NEXT: v_perm_b32 v2, 0, v0, 0x10203
754+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v1, 0x10203
755+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
756+
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
757+
; GFX11-FAKE16-NEXT: v_alignbit_b32 v0, v2, v0, 16
758+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
749759
%trunc = trunc i64 %src to i48
750760
%bswap = call i48 @llvm.bswap.i48(i48 %trunc)
751761
%zext = zext i48 %bswap to i64

0 commit comments

Comments
 (0)