Skip to content

Commit e4b0f01

Browse files
[GISel][CombinerHelper] Add matcher code for unmerging the first half of vector A and B
1 parent 8d757e7 commit e4b0f01

File tree

5 files changed

+214
-68
lines changed

5 files changed

+214
-68
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,11 +449,20 @@ Register CombinerHelper::createUnmergeValue(
449449

450450
bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
451451
const Register DstReg = MI.getOperand(0).getReg();
452+
const Register SrcReg1 = MI.getOperand(1).getReg();
453+
const Register SrcReg2 = MI.getOperand(2).getReg();
454+
452455
const LLT DstTy = MRI.getType(DstReg);
453456
const LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
457+
454458
const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
455459
const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
456460

461+
// This test is a bit silly, but it is required because some tests rely on
462+
// the legalizer changing the type of the shufflevector.
463+
if (DstTy.getScalarSizeInBits() == 1)
464+
return false;
465+
457466
// {1, 2, ..., n} -> G_CONCAT_VECTOR
458467
// Turns a shuffle vector that only increments into a concat vector
459468
// instruction
@@ -498,6 +507,40 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
498507
applyCombineShuffleVector(MI, Ops);
499508
return true;
500509
}
510+
511+
// {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
512+
// Extracts the first chunk of the same size of the destination vector from
513+
// the source
514+
GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
515+
if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
516+
// This optimization does not work if the target type is not a multiple of
517+
// two, this can happen in some backends that support uneven vector types.
518+
// We also need to make sure that the vector can be split into two.
519+
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
520+
SrcNumElts % DstNumElts != 0)
521+
return false;
522+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
523+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
524+
createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts);
525+
MI.eraseFromParent();
526+
return true;
527+
}
528+
529+
// {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
530+
// Extracts the second chunk of the same size of the destination vector from
531+
// the source
532+
GeneratorType SecondQuarter =
533+
adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
534+
if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) {
535+
if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0)
536+
return false;
537+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
538+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
539+
createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts);
540+
MI.eraseFromParent();
541+
return true;
542+
}
543+
501544
return false;
502545
}
503546

llvm/test/CodeGen/AArch64/ext-narrow-index.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -global-isel -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
4+
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
45

56
; Tests of shufflevector where the index operand is half the width of the vector
67
; operands. We should get one ext instruction and not two.
@@ -42,8 +43,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
4243
;
4344
; CHECK-GISEL-LABEL: i8_off8:
4445
; CHECK-GISEL: // %bb.0: // %entry
45-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
46-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
46+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
4747
; CHECK-GISEL-NEXT: ret
4848
entry:
4949
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -254,9 +254,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
254254
;
255255
; CHECK-GISEL-LABEL: i8_zero_off8:
256256
; CHECK-GISEL: // %bb.0: // %entry
257-
; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
258-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
259-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
257+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
260258
; CHECK-GISEL-NEXT: ret
261259
entry:
262260
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-DOT
44
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
55
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
6+
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
67

78
define i32 @addv_v2i32(<2 x i32> %a) {
89
; CHECK-LABEL: addv_v2i32:
@@ -3744,17 +3745,13 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i1
37443745
; CHECK-GI-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
37453746
; CHECK-GI: // %bb.0: // %entry
37463747
; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
3747-
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
37483748
; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
3749-
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
37503749
; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0
3751-
; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
37523750
; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0
3753-
; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
3754-
; CHECK-GI-NEXT: add v0.4s, v4.4s, v0.4s
3755-
; CHECK-GI-NEXT: add v1.4s, v5.4s, v1.4s
3756-
; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s
3757-
; CHECK-GI-NEXT: add v3.4s, v7.4s, v3.4s
3751+
; CHECK-GI-NEXT: uaddw2 v0.4s, v4.4s, v0.8h
3752+
; CHECK-GI-NEXT: uaddw2 v1.4s, v5.4s, v1.8h
3753+
; CHECK-GI-NEXT: uaddw2 v2.4s, v6.4s, v2.8h
3754+
; CHECK-GI-NEXT: uaddw2 v3.4s, v7.4s, v3.8h
37583755
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
37593756
; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
37603757
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s

llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,25 @@ body: |
133133
PseudoRET implicit $lr, implicit $x0
134134
...
135135

136+
---
137+
name: extract_vector_1024_to_512
138+
legalized: false
139+
body: |
140+
bb.1.entry:
141+
liveins: $y2
142+
; CHECK-LABEL: name: extract_vector_1024_to_512
143+
; CHECK: liveins: $y2
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
146+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
147+
; CHECK-NEXT: $x0 = COPY [[UV]](<16 x s32>)
148+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
149+
%1:_(<32 x s32>) = COPY $y2
150+
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
151+
$x0 = COPY %0:_(<16 x s32>)
152+
PseudoRET implicit $lr, implicit $x0
153+
...
154+
136155
---
137156
name: concat_vector_32_512_first_start
138157
legalized: false
@@ -154,6 +173,26 @@ body: |
154173
PseudoRET implicit $lr, implicit $x0
155174
...
156175

176+
---
177+
name: extract_vector_1024_to_256
178+
legalized: false
179+
body: |
180+
bb.1.entry:
181+
liveins: $y2
182+
; CHECK-LABEL: name: extract_vector_1024_to_256
183+
; CHECK: liveins: $y2
184+
; CHECK-NEXT: {{ $}}
185+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
186+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
187+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
188+
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
189+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
190+
%1:_(<32 x s32>) = COPY $y2
191+
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
192+
$wl0 = COPY %0:_(<8 x s32>)
193+
PseudoRET implicit $lr, implicit $x0
194+
...
195+
157196
---
158197
name: concat_vector_32_512_first_end
159198
legalized: false
@@ -428,3 +467,117 @@ body: |
428467
$x0 = COPY %0:_(<16 x s32>)
429468
PseudoRET implicit $lr, implicit $x0
430469
...
470+
471+
---
472+
name: extract_vector_1024_to_128
473+
legalized: false
474+
body: |
475+
bb.1.entry:
476+
liveins: $y2
477+
; CHECK-LABEL: name: extract_vector_1024_to_128
478+
; CHECK: liveins: $y2
479+
; CHECK-NEXT: {{ $}}
480+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
481+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
482+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
483+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[UV2]](<8 x s32>)
484+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>)
485+
%1:_(<32 x s32>) = COPY $y2
486+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3)
487+
PseudoRET implicit $lr, implicit %0
488+
...
489+
490+
---
491+
name: extract_vector_1024_to_32
492+
legalized: false
493+
body: |
494+
bb.1.entry:
495+
liveins: $y2
496+
; CHECK-LABEL: name: extract_vector_1024_to_32
497+
; CHECK: liveins: $y2
498+
; CHECK-NEXT: {{ $}}
499+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
500+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
501+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
502+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
503+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
504+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
505+
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s8>), [[UV9:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
506+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV8]](<2 x s8>)
507+
%1:_(<128 x s8>) = COPY $y2
508+
%0:_(<2 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_, shufflemask(0, 1)
509+
PseudoRET implicit $lr, implicit %0
510+
...
511+
512+
---
513+
name: extract_vector_second_half_512_to_256
514+
legalized: false
515+
body: |
516+
bb.1.entry:
517+
liveins: $x0, $x1
518+
; CHECK-LABEL: name: extract_vector_second_half_512_to_256
519+
; CHECK: liveins: $x0, $x1
520+
; CHECK-NEXT: {{ $}}
521+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
522+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
523+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
524+
%1:_(<16 x s32>) = COPY $x0
525+
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
526+
PseudoRET implicit $lr, implicit %2
527+
...
528+
529+
---
530+
name: extract_vector_second_half_512_to_128
531+
legalized: false
532+
body: |
533+
bb.1.entry:
534+
liveins: $x0, $x1
535+
; CHECK-LABEL: name: extract_vector_second_half_512_to_128
536+
; CHECK: liveins: $x0, $x1
537+
; CHECK-NEXT: {{ $}}
538+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
539+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
540+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[UV]](<8 x s32>)
541+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV3]](<4 x s32>)
542+
%1:_(<16 x s32>) = COPY $x0
543+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(4, 5, 6, 7)
544+
PseudoRET implicit $lr, implicit %2
545+
...
546+
547+
---
548+
name: extract_vector_second_half_1024_to_512
549+
legalized: false
550+
body: |
551+
bb.1.entry:
552+
liveins: $y2, $y3
553+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_512
554+
; CHECK: liveins: $y2, $y3
555+
; CHECK-NEXT: {{ $}}
556+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
557+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
558+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<64 x s8>)
559+
%1:_(<128 x s8>) = COPY $y2
560+
%2:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127)
561+
PseudoRET implicit $lr, implicit %2
562+
...
563+
564+
---
565+
name: extract_vector_second_half_1024_to_32
566+
legalized: false
567+
body: |
568+
bb.1.entry:
569+
liveins: $y2, $y3
570+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_32
571+
; CHECK: liveins: $y2, $y3
572+
; CHECK-NEXT: {{ $}}
573+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
574+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
575+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
576+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
577+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
578+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
579+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV7]](<4 x s8>)
580+
%1:_(<128 x s8>) = COPY $y2
581+
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
582+
PseudoRET implicit $lr, implicit %2
583+
...

llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll

Lines changed: 10 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -15,63 +15,18 @@ define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
1515
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
1616
; CHECK-NEXT: nop // Delay Slot 4
1717
; CHECK-NEXT: nop // Delay Slot 3
18-
; CHECK-NEXT: nop // Delay Slot 2
19-
; CHECK-NEXT: mov r8, r16 // Delay Slot 1
18+
; CHECK-NEXT: vmov x0, x2 // Delay Slot 2
19+
; CHECK-NEXT: nop // Delay Slot 1
2020
; CHECK-NEXT: // %bb.1: // %if.end
21-
; CHECK-NEXT: mova r16, #8; nopb ; nopxm
22-
; CHECK-NEXT: vextract.s32 r0, x2, r16
23-
; CHECK-NEXT: mova r16, #9
24-
; CHECK-NEXT: vextract.s32 r1, x2, r16
25-
; CHECK-NEXT: mova r16, #10
26-
; CHECK-NEXT: vextract.s32 r2, x2, r16
27-
; CHECK-NEXT: mova r16, #11
28-
; CHECK-NEXT: vextract.s32 r3, x2, r16
29-
; CHECK-NEXT: mova r16, #12
30-
; CHECK-NEXT: vextract.s32 r4, x2, r16
31-
; CHECK-NEXT: mova r16, #13
32-
; CHECK-NEXT: vextract.s32 r5, x2, r16
33-
; CHECK-NEXT: mova r16, #15
34-
; CHECK-NEXT: vextract.s32 r6, x2, r16
35-
; CHECK-NEXT: mova r16, #14
36-
; CHECK-NEXT: vextract.s32 r7, x2, r16
37-
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
38-
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
39-
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
40-
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
41-
; CHECK-NEXT: ret lr
42-
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
43-
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
44-
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
45-
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
46-
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
21+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vmov wl0, wh0; nopv
4722
; CHECK-NEXT: .p2align 4
48-
; CHECK-NEXT: .LBB0_2: // %if.then
49-
; CHECK-NEXT: mova r16, #0; nopb ; nopxm
50-
; CHECK-NEXT: vextract.s32 r0, x2, r16
51-
; CHECK-NEXT: mova r16, #1
52-
; CHECK-NEXT: vextract.s32 r1, x2, r16
53-
; CHECK-NEXT: mova r16, #2
54-
; CHECK-NEXT: vextract.s32 r2, x2, r16
55-
; CHECK-NEXT: mova r16, #3
56-
; CHECK-NEXT: vextract.s32 r3, x2, r16
57-
; CHECK-NEXT: mova r16, #4
58-
; CHECK-NEXT: vextract.s32 r4, x2, r16
59-
; CHECK-NEXT: mova r16, #5
60-
; CHECK-NEXT: vextract.s32 r5, x2, r16
61-
; CHECK-NEXT: mova r16, #7
62-
; CHECK-NEXT: vextract.s32 r6, x2, r16
63-
; CHECK-NEXT: mova r16, #6
64-
; CHECK-NEXT: vextract.s32 r7, x2, r16
65-
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
66-
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
67-
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
68-
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
69-
; CHECK-NEXT: ret lr
70-
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
71-
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
72-
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
73-
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
74-
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
23+
; CHECK-NEXT: .LBB0_2: // %return
24+
; CHECK-NEXT: nopa ; ret lr
25+
; CHECK-NEXT: nop // Delay Slot 5
26+
; CHECK-NEXT: nop // Delay Slot 4
27+
; CHECK-NEXT: nop // Delay Slot 3
28+
; CHECK-NEXT: nop // Delay Slot 2
29+
; CHECK-NEXT: nop // Delay Slot 1
7530
entry:
7631
%cmp = icmp eq i32 %idx, 0
7732
br i1 %cmp, label %if.then, label %if.end

0 commit comments

Comments
 (0)