Skip to content

Commit 17c74e0

Browse files
committed
funnel shift combiner port from SelectionDAG ISel to GlobalISel
1 parent 27ca483 commit 17c74e0

File tree

3 files changed

+76
-8
lines changed

3 files changed

+76
-8
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule<
10331033
(apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
10341034
>;
10351035

1036+
// Transform: fshl x, ?, y | shl x, y -> fshl x, ?, y
1037+
def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
1038+
(defs root:$root),
1039+
(match (G_FSHL $out1, $x, $_, $y),
1040+
(G_SHL $out2, $x, $y),
1041+
(G_OR $root, $out1, $out2)),
1042+
(apply (G_FSHL $root, $x, $_, $y))
1043+
>;
1044+
1045+
// Transform: fshr ?, x, y | srl x, y -> fshr ?, x, y
1046+
def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
1047+
(defs root:$root),
1048+
(match (G_FSHR $out1, $_, $x, $y),
1049+
(G_LSHR $out2, $x, $y),
1050+
(G_OR $root, $out1, $out2)),
1051+
(apply (G_FSHR $root, $_, $x, $y))
1052+
>;
1053+
10361054
def rotate_out_of_range : GICombineRule<
10371055
(defs root:$root),
10381056
(match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
11051123
funnel_shift_to_rotate,
11061124
funnel_shift_right_zero,
11071125
funnel_shift_left_zero,
1108-
funnel_shift_overshift]>;
1126+
funnel_shift_overshift,
1127+
funnel_shift_or_shift_to_funnel_shift_left,
1128+
funnel_shift_or_shift_to_funnel_shift_right]>;
11091129

11101130
def bitfield_extract_from_sext_inreg : GICombineRule<
11111131
(defs root:$root, build_fn_matchinfo:$info),

llvm/test/CodeGen/AArch64/funnel-shift.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
674674
; CHECK-GI-LABEL: or_shl_fshl_simplify:
675675
; CHECK-GI: // %bb.0:
676676
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
677-
; CHECK-GI-NEXT: and w9, w2, #0x1f
678-
; CHECK-GI-NEXT: lsr w10, w0, #1
679-
; CHECK-GI-NEXT: lsl w11, w1, w2
677+
; CHECK-GI-NEXT: lsr w9, w0, #1
678+
; CHECK-GI-NEXT: and w10, w2, #0x1f
680679
; CHECK-GI-NEXT: bic w8, w8, w2
681-
; CHECK-GI-NEXT: lsl w9, w1, w9
682-
; CHECK-GI-NEXT: lsr w8, w10, w8
683-
; CHECK-GI-NEXT: orr w9, w9, w11
684-
; CHECK-GI-NEXT: orr w0, w9, w8
680+
; CHECK-GI-NEXT: lsl w10, w1, w10
681+
; CHECK-GI-NEXT: lsr w8, w9, w8
682+
; CHECK-GI-NEXT: orr w0, w10, w8
685683
; CHECK-GI-NEXT: ret
686684
%shy = shl i32 %y, %s
687685
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)

llvm/test/CodeGen/RISCV/GlobalISel/shift.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
105105
%trunc = trunc i48 %shl to i16
106106
ret i16 %trunc
107107
}
108+
109+
define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
110+
; RV32-LABEL: test_fshl_i32:
111+
; RV32: # %bb.0:
112+
; RV32-NEXT: not a3, a2
113+
; RV32-NEXT: sll a0, a0, a2
114+
; RV32-NEXT: srli a1, a1, 1
115+
; RV32-NEXT: srl a1, a1, a3
116+
; RV32-NEXT: or a0, a0, a1
117+
; RV32-NEXT: ret
118+
;
119+
; RV64-LABEL: test_fshl_i32:
120+
; RV64: # %bb.0:
121+
; RV64-NEXT: not a3, a2
122+
; RV64-NEXT: sllw a0, a0, a2
123+
; RV64-NEXT: srliw a1, a1, 1
124+
; RV64-NEXT: srlw a1, a1, a3
125+
; RV64-NEXT: or a0, a0, a1
126+
; RV64-NEXT: ret
127+
%fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
128+
%shl = shl i32 %x, %y
129+
%or = or i32 %fshl, %shl
130+
%trunc = trunc i32 %or to i16
131+
ret i16 %trunc
132+
}
133+
134+
define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
135+
; RV32-LABEL: test_fshr_i32:
136+
; RV32: # %bb.0:
137+
; RV32-NEXT: not a3, a2
138+
; RV32-NEXT: slli a0, a0, 1
139+
; RV32-NEXT: sll a0, a0, a3
140+
; RV32-NEXT: srl a1, a1, a2
141+
; RV32-NEXT: or a0, a0, a1
142+
; RV32-NEXT: ret
143+
;
144+
; RV64-LABEL: test_fshr_i32:
145+
; RV64: # %bb.0:
146+
; RV64-NEXT: not a3, a2
147+
; RV64-NEXT: slli a0, a0, 1
148+
; RV64-NEXT: sllw a0, a0, a3
149+
; RV64-NEXT: srlw a1, a1, a2
150+
; RV64-NEXT: or a0, a0, a1
151+
; RV64-NEXT: ret
152+
%fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
153+
%lshr = lshr i32 %x, %y
154+
%or = or i32 %fshr, %lshr
155+
%trunc = trunc i32 %or to i16
156+
ret i16 %trunc
157+
}

0 commit comments

Comments
 (0)