Skip to content

Commit 63225d0

Browse files
committed
[AArch64] Run CSE one last time
During comparison optimization, many subs become sub, which is good. However, this means that now there are new cse opportunities that can be exploited because now, NZCV is no longer a use in those instructions. This will come into play more as I set ADDS to be the default rather than add for CSE purposes.
1 parent d0ee820 commit 63225d0

File tree

9 files changed

+441
-470
lines changed

9 files changed

+441
-470
lines changed

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,21 @@ void AArch64PassConfig::addMachineSSAOptimization() {
788788
// Run default MachineSSAOptimization first.
789789
TargetPassConfig::addMachineSSAOptimization();
790790

791+
// With optimization, dead code should already be eliminated. However
792+
// there is one known exception: peephole optimizations may open more
793+
// opportunities for dead code. This is especially true for targets whose
794+
// peephole optimizations like ARM and AArch64 where dead defs to the flag
795+
// register are removed, which previously prevented CSE.
796+
addPass(&MachineCSELegacyID);
797+
addPass(&MachineSinkingLegacyID);
798+
791799
if (TM->getOptLevel() != CodeGenOptLevel::None)
792800
addPass(createAArch64MIPeepholeOptPass());
801+
802+
// Clean-up any last code that can be eliminated
803+
// Due to the fact that the demotion of some instructions
804+
// can result in the removal of instructions previously unable to be removed
805+
addPass(&DeadMachineInstructionElimID);
793806
}
794807

795808
bool AArch64PassConfig::addILPOpts() {

llvm/test/CodeGen/AArch64/O3-pipeline.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@
162162
; CHECK-NEXT: Remove dead machine instructions
163163
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
164164
; CHECK-NEXT: AArch64 Dead register definitions
165+
; CHECK-NEXT: Machine Common Subexpression Elimination
166+
; CHECK-NEXT: Remove dead machine instructions
165167
; CHECK-NEXT: Detect Dead Lanes
166168
; CHECK-NEXT: Init Undef Pass
167169
; CHECK-NEXT: Process Implicit Definitions

llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -206,13 +206,12 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
206206
; CHECK-LABEL: insert_vec_v8i16_uaddlv_from_v8i16:
207207
; CHECK: ; %bb.0: ; %entry
208208
; CHECK-NEXT: movi.2d v0, #0000000000000000
209-
; CHECK-NEXT: movi.2d v1, #0000000000000000
210209
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
211-
; CHECK-NEXT: uaddlv.8h s0, v0
212-
; CHECK-NEXT: mov.h v1[0], v0[0]
213-
; CHECK-NEXT: ushll.4s v1, v1, #0
214-
; CHECK-NEXT: ucvtf.4s v1, v1
215-
; CHECK-NEXT: str q1, [x0]
210+
; CHECK-NEXT: uaddlv.8h s1, v0
211+
; CHECK-NEXT: mov.h v0[0], v1[0]
212+
; CHECK-NEXT: ushll.4s v0, v0, #0
213+
; CHECK-NEXT: ucvtf.4s v0, v0
214+
; CHECK-NEXT: str q0, [x0]
216215
; CHECK-NEXT: ret
217216

218217
entry:
@@ -228,14 +227,13 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
228227
; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16:
229228
; CHECK: ; %bb.0: ; %entry
230229
; CHECK-NEXT: movi.2d v0, #0000000000000000
231-
; CHECK-NEXT: movi.2d v1, #0000000000000000
232230
; CHECK-NEXT: add x8, x0, #8
233-
; CHECK-NEXT: uaddlv.8h s0, v0
234-
; CHECK-NEXT: mov.h v1[0], v0[0]
235-
; CHECK-NEXT: ushll.4s v1, v1, #0
236-
; CHECK-NEXT: ucvtf.4s v1, v1
237-
; CHECK-NEXT: st1.s { v1 }[2], [x8]
238-
; CHECK-NEXT: str d1, [x0]
231+
; CHECK-NEXT: uaddlv.8h s1, v0
232+
; CHECK-NEXT: mov.h v0[0], v1[0]
233+
; CHECK-NEXT: ushll.4s v0, v0, #0
234+
; CHECK-NEXT: ucvtf.4s v0, v0
235+
; CHECK-NEXT: st1.s { v0 }[2], [x8]
236+
; CHECK-NEXT: str d0, [x0]
239237
; CHECK-NEXT: ret
240238

241239
entry:
@@ -283,9 +281,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
283281
; CHECK-NEXT: stp q0, q0, [x0, #32]
284282
; CHECK-NEXT: mov.h v2[0], v1[0]
285283
; CHECK-NEXT: bic.4h v2, #255, lsl #8
286-
; CHECK-NEXT: ushll.4s v2, v2, #0
287-
; CHECK-NEXT: ucvtf.4s v2, v2
288-
; CHECK-NEXT: stp q2, q0, [x0]
284+
; CHECK-NEXT: ushll.4s v1, v2, #0
285+
; CHECK-NEXT: ucvtf.4s v1, v1
286+
; CHECK-NEXT: stp q1, q0, [x0]
289287
; CHECK-NEXT: ret
290288

291289
entry:
@@ -386,12 +384,11 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) {
386384
; CHECK-LABEL: insert_vec_v4i16_uaddlv_from_v4i32:
387385
; CHECK: ; %bb.0: ; %entry
388386
; CHECK-NEXT: movi.2d v0, #0000000000000000
389-
; CHECK-NEXT: movi.2d v1, #0000000000000000
390-
; CHECK-NEXT: uaddlv.4s d0, v0
391-
; CHECK-NEXT: mov.h v1[0], v0[0]
392-
; CHECK-NEXT: ushll.4s v1, v1, #0
393-
; CHECK-NEXT: ucvtf.4s v1, v1
394-
; CHECK-NEXT: str q1, [x0]
387+
; CHECK-NEXT: uaddlv.4s d1, v0
388+
; CHECK-NEXT: mov.h v0[0], v1[0]
389+
; CHECK-NEXT: ushll.4s v0, v0, #0
390+
; CHECK-NEXT: ucvtf.4s v0, v0
391+
; CHECK-NEXT: str q0, [x0]
395392
; CHECK-NEXT: ret
396393

397394
entry:
@@ -407,14 +404,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) {
407404
; CHECK-LABEL: insert_vec_v16i16_uaddlv_from_v4i32:
408405
; CHECK: ; %bb.0: ; %entry
409406
; CHECK-NEXT: movi.2d v0, #0000000000000000
410-
; CHECK-NEXT: movi.2d v1, #0000000000000000
411407
; CHECK-NEXT: movi.2d v2, #0000000000000000
412-
; CHECK-NEXT: uaddlv.4s d0, v0
413-
; CHECK-NEXT: stp q2, q2, [x0, #32]
414-
; CHECK-NEXT: mov.h v1[0], v0[0]
415-
; CHECK-NEXT: ushll.4s v1, v1, #0
408+
; CHECK-NEXT: uaddlv.4s d1, v0
409+
; CHECK-NEXT: stp q0, q0, [x0, #32]
410+
; CHECK-NEXT: mov.h v2[0], v1[0]
411+
; CHECK-NEXT: ushll.4s v1, v2, #0
416412
; CHECK-NEXT: ucvtf.4s v1, v1
417-
; CHECK-NEXT: stp q1, q2, [x0]
413+
; CHECK-NEXT: stp q1, q0, [x0]
418414
; CHECK-NEXT: ret
419415

420416
entry:
@@ -430,14 +426,13 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
430426
; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v4i32:
431427
; CHECK: ; %bb.0: ; %entry
432428
; CHECK-NEXT: movi.2d v0, #0000000000000000
433-
; CHECK-NEXT: movi.2d v1, #0000000000000000
434429
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
435-
; CHECK-NEXT: uaddlv.4s d0, v0
436-
; CHECK-NEXT: mov.h v1[0], v0[0]
437-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
438-
; CHECK-NEXT: ushll.4s v1, v1, #0
439-
; CHECK-NEXT: ucvtf.4s v1, v1
440-
; CHECK-NEXT: str q1, [x0]
430+
; CHECK-NEXT: uaddlv.4s d1, v0
431+
; CHECK-NEXT: mov.h v0[0], v1[0]
432+
; CHECK-NEXT: bic.4h v0, #255, lsl #8
433+
; CHECK-NEXT: ushll.4s v0, v0, #0
434+
; CHECK-NEXT: ucvtf.4s v0, v0
435+
; CHECK-NEXT: str q0, [x0]
441436
; CHECK-NEXT: ret
442437

443438
entry:
@@ -453,15 +448,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
453448
; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v4i32:
454449
; CHECK: ; %bb.0: ; %entry
455450
; CHECK-NEXT: movi.2d v0, #0000000000000000
456-
; CHECK-NEXT: movi.2d v1, #0000000000000000
457451
; CHECK-NEXT: movi.2d v2, #0000000000000000
458-
; CHECK-NEXT: uaddlv.4s d0, v0
459-
; CHECK-NEXT: stp q2, q2, [x0, #32]
460-
; CHECK-NEXT: mov.h v1[0], v0[0]
461-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
462-
; CHECK-NEXT: ushll.4s v1, v1, #0
452+
; CHECK-NEXT: uaddlv.4s d1, v0
453+
; CHECK-NEXT: stp q0, q0, [x0, #32]
454+
; CHECK-NEXT: mov.h v2[0], v1[0]
455+
; CHECK-NEXT: bic.4h v2, #255, lsl #8
456+
; CHECK-NEXT: ushll.4s v1, v2, #0
463457
; CHECK-NEXT: ucvtf.4s v1, v1
464-
; CHECK-NEXT: stp q1, q2, [x0]
458+
; CHECK-NEXT: stp q1, q0, [x0]
465459
; CHECK-NEXT: ret
466460

467461
entry:

llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,13 @@ define void @f1(i1 %c0, i1 %c1, ptr %a, i64 %i) {
9797
; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
9898
; LSLFAST-NEXT: .cfi_def_cfa_offset 16
9999
; LSLFAST-NEXT: .cfi_offset w30, -16
100-
; LSLFAST-NEXT: add x8, x2, x3, lsl #4
100+
; LSLFAST-NEXT: add x0, x2, x3, lsl #4
101101
; LSLFAST-NEXT: tbz w1, #0, .LBB1_3
102102
; LSLFAST-NEXT: // %bb.2: // %B
103-
; LSLFAST-NEXT: mov x0, x8
104103
; LSLFAST-NEXT: bl g
105104
; LSLFAST-NEXT: b .LBB1_4
106105
; LSLFAST-NEXT: .LBB1_3: // %C
107-
; LSLFAST-NEXT: add x0, x2, x3, lsl #4
108-
; LSLFAST-NEXT: mov x1, x8
106+
; LSLFAST-NEXT: mov x1, x0
109107
; LSLFAST-NEXT: bl g
110108
; LSLFAST-NEXT: .LBB1_4:
111109
; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/atomic-ops.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
133133
; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i8:
134134
; OUTLINE_ATOMICS: // %bb.0:
135135
; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
136-
; OUTLINE_ATOMICS-NEXT: neg w0, w0
137136
; OUTLINE_ATOMICS-NEXT: adrp x1, var8
138137
; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8
138+
; OUTLINE_ATOMICS-NEXT: neg w0, w0
139139
; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd1_relax
140140
; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
141141
; OUTLINE_ATOMICS-NEXT: ret
@@ -161,9 +161,9 @@ define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
161161
; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i16:
162162
; OUTLINE_ATOMICS: // %bb.0:
163163
; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
164-
; OUTLINE_ATOMICS-NEXT: neg w0, w0
165164
; OUTLINE_ATOMICS-NEXT: adrp x1, var16
166165
; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16
166+
; OUTLINE_ATOMICS-NEXT: neg w0, w0
167167
; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd2_rel
168168
; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
169169
; OUTLINE_ATOMICS-NEXT: ret
@@ -189,9 +189,9 @@ define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
189189
; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i32:
190190
; OUTLINE_ATOMICS: // %bb.0:
191191
; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
192-
; OUTLINE_ATOMICS-NEXT: neg w0, w0
193192
; OUTLINE_ATOMICS-NEXT: adrp x1, var32
194193
; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32
194+
; OUTLINE_ATOMICS-NEXT: neg w0, w0
195195
; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd4_acq
196196
; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
197197
; OUTLINE_ATOMICS-NEXT: ret
@@ -217,9 +217,9 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
217217
; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i64:
218218
; OUTLINE_ATOMICS: // %bb.0:
219219
; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
220-
; OUTLINE_ATOMICS-NEXT: neg x0, x0
221220
; OUTLINE_ATOMICS-NEXT: adrp x1, var64
222221
; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64
222+
; OUTLINE_ATOMICS-NEXT: neg x0, x0
223223
; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel
224224
; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
225225
; OUTLINE_ATOMICS-NEXT: ret

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -749,41 +749,38 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
749749
; CHECK-GI-NEXT: mov v4.h[1], w9
750750
; CHECK-GI-NEXT: neg v5.2d, v5.2d
751751
; CHECK-GI-NEXT: ldr q19, [x8, :lo12:.LCPI23_3]
752-
; CHECK-GI-NEXT: neg v19.2d, v19.2d
753752
; CHECK-GI-NEXT: ushl v7.2d, v7.2d, v5.2d
754753
; CHECK-GI-NEXT: ushl v5.2d, v17.2d, v5.2d
754+
; CHECK-GI-NEXT: neg v17.2d, v19.2d
755755
; CHECK-GI-NEXT: mov v4.h[2], w9
756756
; CHECK-GI-NEXT: add v7.2d, v0.2d, v7.2d
757757
; CHECK-GI-NEXT: add v5.2d, v2.2d, v5.2d
758758
; CHECK-GI-NEXT: mov v4.h[3], w9
759759
; CHECK-GI-NEXT: adrp x9, .LCPI23_0
760760
; CHECK-GI-NEXT: ldr q6, [x9, :lo12:.LCPI23_0]
761761
; CHECK-GI-NEXT: adrp x9, .LCPI23_2
762-
; CHECK-GI-NEXT: sshl v7.2d, v7.2d, v19.2d
762+
; CHECK-GI-NEXT: sshl v7.2d, v7.2d, v17.2d
763763
; CHECK-GI-NEXT: ldr q20, [x9, :lo12:.LCPI23_2]
764-
; CHECK-GI-NEXT: sshl v5.2d, v5.2d, v19.2d
764+
; CHECK-GI-NEXT: sshl v5.2d, v5.2d, v17.2d
765765
; CHECK-GI-NEXT: neg v6.2d, v6.2d
766766
; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0
767-
; CHECK-GI-NEXT: neg v20.2d, v20.2d
768767
; CHECK-GI-NEXT: ushl v16.2d, v16.2d, v6.2d
769768
; CHECK-GI-NEXT: ushl v6.2d, v18.2d, v6.2d
770-
; CHECK-GI-NEXT: ushll v17.2d, v4.2s, #0
771-
; CHECK-GI-NEXT: ushll2 v18.2d, v4.4s, #0
772-
; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0
769+
; CHECK-GI-NEXT: neg v18.2d, v20.2d
770+
; CHECK-GI-NEXT: ushll v21.2d, v4.2s, #0
771+
; CHECK-GI-NEXT: ushll2 v4.2d, v4.4s, #0
773772
; CHECK-GI-NEXT: add v16.2d, v1.2d, v16.2d
774773
; CHECK-GI-NEXT: add v6.2d, v3.2d, v6.2d
775-
; CHECK-GI-NEXT: shl v17.2d, v17.2d, #63
776-
; CHECK-GI-NEXT: shl v18.2d, v18.2d, #63
774+
; CHECK-GI-NEXT: shl v19.2d, v21.2d, #63
777775
; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63
778-
; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d
779-
; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d
780-
; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63
781-
; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63
776+
; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v18.2d
777+
; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v18.2d
778+
; CHECK-GI-NEXT: sshr v19.2d, v19.2d, #63
782779
; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63
783-
; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b
784-
; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b
785-
; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b
786-
; CHECK-GI-NEXT: bif v3.16b, v6.16b, v18.16b
780+
; CHECK-GI-NEXT: bif v0.16b, v7.16b, v19.16b
781+
; CHECK-GI-NEXT: bif v1.16b, v16.16b, v4.16b
782+
; CHECK-GI-NEXT: bif v2.16b, v5.16b, v19.16b
783+
; CHECK-GI-NEXT: bif v3.16b, v6.16b, v4.16b
787784
; CHECK-GI-NEXT: ret
788785
%1 = sdiv <8 x i64> %x, <i64 1, i64 4, i64 8, i64 16, i64 1, i64 4, i64 8, i64 16>
789786
ret <8 x i64> %1

llvm/test/CodeGen/AArch64/concat-vector.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,9 @@ define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) {
272272
; CHECK-GI: // %bb.0:
273273
; CHECK-GI-NEXT: ldrh w8, [x0]
274274
; CHECK-GI-NEXT: ldrh w9, [x0, #2]
275-
; CHECK-GI-NEXT: fmov s1, w8
276-
; CHECK-GI-NEXT: mov v1.h[1], w9
277-
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
275+
; CHECK-GI-NEXT: fmov s0, w8
276+
; CHECK-GI-NEXT: mov v0.h[1], w9
277+
; CHECK-GI-NEXT: mov v0.s[1], v0.s[0]
278278
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
279279
; CHECK-GI-NEXT: ret
280280
%l1 = load <2 x i16>, ptr %p1
@@ -295,9 +295,9 @@ define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) {
295295
; CHECK-GI: // %bb.0:
296296
; CHECK-GI-NEXT: ldrh w8, [x0]
297297
; CHECK-GI-NEXT: ldrh w9, [x0, #2]
298-
; CHECK-GI-NEXT: fmov s1, w8
299-
; CHECK-GI-NEXT: mov v1.h[1], w9
300-
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
298+
; CHECK-GI-NEXT: fmov s0, w8
299+
; CHECK-GI-NEXT: mov v0.h[1], w9
300+
; CHECK-GI-NEXT: mov v0.s[1], v0.s[0]
301301
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
302302
; CHECK-GI-NEXT: ret
303303
%l1 = load <2 x i16>, ptr %p1

llvm/test/CodeGen/AArch64/fabs-fp128.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,21 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
144144
; CHECK-GI-LABEL: fabs_v4f128:
145145
; CHECK-GI: // %bb.0: // %entry
146146
; CHECK-GI-NEXT: mov x8, v0.d[1]
147-
; CHECK-GI-NEXT: mov v7.d[0], v0.d[0]
147+
; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
148148
; CHECK-GI-NEXT: mov x9, v1.d[1]
149149
; CHECK-GI-NEXT: mov x10, v2.d[1]
150150
; CHECK-GI-NEXT: mov x11, v3.d[1]
151151
; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
152152
; CHECK-GI-NEXT: mov v2.d[0], v2.d[0]
153153
; CHECK-GI-NEXT: mov v3.d[0], v3.d[0]
154154
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
155-
; CHECK-GI-NEXT: mov v7.d[1], x8
155+
; CHECK-GI-NEXT: mov v0.d[1], x8
156156
; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff
157157
; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff
158158
; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff
159159
; CHECK-GI-NEXT: mov v1.d[1], x8
160160
; CHECK-GI-NEXT: mov v2.d[1], x9
161161
; CHECK-GI-NEXT: mov v3.d[1], x10
162-
; CHECK-GI-NEXT: mov v0.16b, v7.16b
163162
; CHECK-GI-NEXT: ret
164163
entry:
165164
%c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)

0 commit comments

Comments
 (0)