@@ -6,14 +6,16 @@ target triple = "aarch64-unknown-linux-gnu"
66define <4 x i32 > @no_sink_simple (<4 x i32 > %a , <4 x i32 > %b , i1 %c , ptr %p ) {
77; CHECK-LABEL: no_sink_simple:
88; CHECK: // %bb.0:
9+ ; CHECK-NEXT: cmgt v2.4s, v1.4s, v0.4s
10+ ; CHECK-NEXT: xtn v2.4h, v2.4s
911; CHECK-NEXT: tbz w0, #0, .LBB0_2
1012; CHECK-NEXT: // %bb.1: // %s
11- ; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s
13+ ; CHECK-NEXT: sshll v1.4s, v2.4h, #0
1214; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1315; CHECK-NEXT: str q0, [x1]
1416; CHECK-NEXT: ret
1517; CHECK-NEXT: .LBB0_2: // %t
16- ; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
18+ ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
1719; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
1820; CHECK-NEXT: ret
1921 %d = icmp slt <4 x i32 > %a , %b
3234define void @vector_loop_with_icmp (ptr nocapture noundef writeonly %dest ) {
3335; CHECK-LABEL: vector_loop_with_icmp:
3436; CHECK: // %bb.0: // %entry
35- ; CHECK-NEXT: mov w8 , #15 // =0xf
37+ ; CHECK-NEXT: mov w9 , #15 // =0xf
3638; CHECK-NEXT: mov w10, #4 // =0x4
37- ; CHECK-NEXT: adrp x9 , .LCPI1_0
39+ ; CHECK-NEXT: adrp x8 , .LCPI1_0
3840; CHECK-NEXT: adrp x11, .LCPI1_1
39- ; CHECK-NEXT: dup v0.2d, x8
41+ ; CHECK-NEXT: dup v0.2d, x9
4042; CHECK-NEXT: dup v1.2d, x10
41- ; CHECK-NEXT: ldr q2, [x9 , :lo12:.LCPI1_0]
43+ ; CHECK-NEXT: ldr q2, [x8 , :lo12:.LCPI1_0]
4244; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI1_1]
43- ; CHECK-NEXT: add x9 , x0, #8
44- ; CHECK-NEXT: mov w10 , #16 // =0x10
45- ; CHECK-NEXT: mov w11 , #1 // =0x1
45+ ; CHECK-NEXT: add x8 , x0, #8
46+ ; CHECK-NEXT: mov w9 , #16 // =0x10
47+ ; CHECK-NEXT: mov w10 , #1 // =0x1
4648; CHECK-NEXT: b .LBB1_2
4749; CHECK-NEXT: .LBB1_1: // %pred.store.continue18
4850; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
4951; CHECK-NEXT: add v2.2d, v2.2d, v1.2d
5052; CHECK-NEXT: add v3.2d, v3.2d, v1.2d
51- ; CHECK-NEXT: subs x10, x10 , #4
52- ; CHECK-NEXT: add x9, x9 , #16
53+ ; CHECK-NEXT: subs x9, x9 , #4
54+ ; CHECK-NEXT: add x8, x8 , #16
5355; CHECK-NEXT: b.eq .LBB1_10
5456; CHECK-NEXT: .LBB1_2: // %vector.body
5557; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
56- ; CHECK-NEXT: cmhi v4.2d, v0.2d, v3.2d
57- ; CHECK-NEXT: xtn v4.2s, v4.2d
58- ; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
59- ; CHECK-NEXT: umov w12, v4.h[0]
60- ; CHECK-NEXT: tbz w12, #0, .LBB1_4
61- ; CHECK-NEXT: // %bb.3: // %pred.store.if
58+ ; CHECK-NEXT: cmhi v4.2d, v0.2d, v2.2d
59+ ; CHECK-NEXT: cmhi v5.2d, v0.2d, v3.2d
60+ ; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s
61+ ; CHECK-NEXT: xtn v4.4h, v4.4s
62+ ; CHECK-NEXT: umov w11, v4.h[0]
63+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_6
64+ ; CHECK-NEXT: // %bb.3: // %pred.store.continue
6265; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
63- ; CHECK-NEXT: stur w11, [x9, #-8]
64- ; CHECK-NEXT: .LBB1_4: // %pred.store.continue
66+ ; CHECK-NEXT: umov w11, v4.h[1]
67+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_7
68+ ; CHECK-NEXT: .LBB1_4: // %pred.store.continue6
6569; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
66- ; CHECK-NEXT: dup v4.2d, x8
67- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v3.2d
68- ; CHECK-NEXT: xtn v4.2s, v4.2d
69- ; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
70- ; CHECK-NEXT: umov w12, v4.h[1]
71- ; CHECK-NEXT: tbz w12, #0, .LBB1_6
72- ; CHECK-NEXT: // %bb.5: // %pred.store.if5
70+ ; CHECK-NEXT: umov w11, v4.h[2]
71+ ; CHECK-NEXT: tbnz w11, #0, .LBB1_8
72+ ; CHECK-NEXT: .LBB1_5: // %pred.store.continue8
7373; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
74- ; CHECK-NEXT: stur w11, [x9, #-4]
75- ; CHECK-NEXT: .LBB1_6: // %pred.store.continue6
74+ ; CHECK-NEXT: umov w11, v4.h[3]
75+ ; CHECK-NEXT: tbz w11, #0, .LBB1_1
76+ ; CHECK-NEXT: b .LBB1_9
77+ ; CHECK-NEXT: .LBB1_6: // %pred.store.if
7678; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
77- ; CHECK-NEXT: dup v4.2d, x8
78- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
79- ; CHECK-NEXT: xtn v4.2s, v4.2d
80- ; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
81- ; CHECK-NEXT: umov w12, v4.h[2]
82- ; CHECK-NEXT: tbz w12, #0, .LBB1_8
83- ; CHECK-NEXT: // %bb.7: // %pred.store.if7
79+ ; CHECK-NEXT: stur w10, [x8, #-8]
80+ ; CHECK-NEXT: umov w11, v4.h[1]
81+ ; CHECK-NEXT: tbz w11, #0, .LBB1_4
82+ ; CHECK-NEXT: .LBB1_7: // %pred.store.if5
8483; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
85- ; CHECK-NEXT: str w11, [x9]
86- ; CHECK-NEXT: .LBB1_8: // %pred.store.continue8
84+ ; CHECK-NEXT: stur w10, [x8, #-4]
85+ ; CHECK-NEXT: umov w11, v4.h[2]
86+ ; CHECK-NEXT: tbz w11, #0, .LBB1_5
87+ ; CHECK-NEXT: .LBB1_8: // %pred.store.if7
8788; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
88- ; CHECK-NEXT: dup v4.2d, x8
89- ; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
90- ; CHECK-NEXT: xtn v4.2s, v4.2d
91- ; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
92- ; CHECK-NEXT: umov w12, v4.h[3]
93- ; CHECK-NEXT: tbz w12, #0, .LBB1_1
94- ; CHECK-NEXT: // %bb.9: // %pred.store.if9
89+ ; CHECK-NEXT: str w10, [x8]
90+ ; CHECK-NEXT: umov w11, v4.h[3]
91+ ; CHECK-NEXT: tbz w11, #0, .LBB1_1
92+ ; CHECK-NEXT: .LBB1_9: // %pred.store.if9
9593; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
96- ; CHECK-NEXT: str w11 , [x9 , #4]
94+ ; CHECK-NEXT: str w10 , [x8 , #4]
9795; CHECK-NEXT: b .LBB1_1
9896; CHECK-NEXT: .LBB1_10: // %for.cond.cleanup
9997; CHECK-NEXT: ret
0 commit comments