Skip to content

Commit 6cb3e09

Browse files
[GISel][CombinerHelper] Add two patterns that extract the first two chunks of a vector
1 parent b8652cc commit 6cb3e09

File tree

5 files changed

+210
-74
lines changed

5 files changed

+210
-74
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,11 +449,20 @@ Register CombinerHelper::createUnmergeValue(
449449

450450
bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
451451
const Register DstReg = MI.getOperand(0).getReg();
452+
const Register SrcReg1 = MI.getOperand(1).getReg();
453+
const Register SrcReg2 = MI.getOperand(2).getReg();
454+
452455
const LLT DstTy = MRI.getType(DstReg);
453456
const LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
457+
454458
const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
455459
const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
456460

461+
// This test is a bit silly, but it is required because some tests rely on
462+
// the legalizer changing the type of the shufflevector.
463+
if (DstTy.getScalarSizeInBits() == 1)
464+
return false;
465+
457466
// {1, 2, ..., n} -> G_CONCAT_VECTOR
458467
// Turns a shuffle vector that only increments into a concat vector
459468
// instruction
@@ -500,6 +509,38 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
500509
return true;
501510
}
502511

512+
// {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
513+
// Extracts the first chunk of the same size of the destination vector from
514+
// the source
515+
GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
516+
if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
517+
// This optimization does not work if the target type is not a multiple of
518+
// two, this can happen in some backends that support uneven vector types.
519+
// We also need to make sure that the vector can be split into two.
520+
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
521+
SrcNumElts % DstNumElts != 0)
522+
return false;
523+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
524+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
525+
createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts);
526+
MI.eraseFromParent();
527+
return true;
528+
}
529+
530+
// {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
531+
// Extracts the second chunk of the same size of the destination vector from
532+
// the source
533+
GeneratorType SecondQuarter =
534+
adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
535+
if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) {
536+
if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0)
537+
return false;
538+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
539+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
540+
createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts);
541+
MI.eraseFromParent();
542+
return true;
543+
}
503544
return false;
504545
}
505546

llvm/test/CodeGen/AArch64/ext-narrow-index.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc < %s -global-isel -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
4+
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
45

56
; Tests of shufflevector where the index operand is half the width of the vector
67
; operands. We should get one ext instruction and not two.
@@ -42,8 +43,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
4243
;
4344
; CHECK-GISEL-LABEL: i8_off8:
4445
; CHECK-GISEL: // %bb.0: // %entry
45-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
46-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
46+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
4747
; CHECK-GISEL-NEXT: ret
4848
entry:
4949
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -254,9 +254,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
254254
;
255255
; CHECK-GISEL-LABEL: i8_zero_off8:
256256
; CHECK-GISEL: // %bb.0: // %entry
257-
; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
258-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
259-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
257+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
260258
; CHECK-GISEL-NEXT: ret
261259
entry:
262260
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-DOT
44
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
55
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
6+
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
67

78
define i32 @addv_v2i32(<2 x i32> %a) {
89
; CHECK-LABEL: addv_v2i32:
@@ -3744,17 +3745,13 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i1
37443745
; CHECK-GI-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
37453746
; CHECK-GI: // %bb.0: // %entry
37463747
; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
3747-
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
37483748
; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
3749-
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
37503749
; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0
3751-
; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
37523750
; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0
3753-
; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
3754-
; CHECK-GI-NEXT: add v0.4s, v4.4s, v0.4s
3755-
; CHECK-GI-NEXT: add v1.4s, v5.4s, v1.4s
3756-
; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s
3757-
; CHECK-GI-NEXT: add v3.4s, v7.4s, v3.4s
3751+
; CHECK-GI-NEXT: uaddw2 v0.4s, v4.4s, v0.8h
3752+
; CHECK-GI-NEXT: uaddw2 v1.4s, v5.4s, v1.8h
3753+
; CHECK-GI-NEXT: uaddw2 v2.4s, v6.4s, v2.8h
3754+
; CHECK-GI-NEXT: uaddw2 v3.4s, v7.4s, v3.8h
37583755
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
37593756
; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
37603757
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s

llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

Lines changed: 152 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,25 @@ body: |
133133
PseudoRET implicit $lr, implicit $x0
134134
...
135135

136+
---
137+
name: extract_vector_1024_to_512
138+
legalized: false
139+
body: |
140+
bb.1.entry:
141+
liveins: $y2
142+
; CHECK-LABEL: name: extract_vector_1024_to_512
143+
; CHECK: liveins: $y2
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
146+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
147+
; CHECK-NEXT: $x0 = COPY [[UV]](<16 x s32>)
148+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
149+
%1:_(<32 x s32>) = COPY $y2
150+
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
151+
$x0 = COPY %0:_(<16 x s32>)
152+
PseudoRET implicit $lr, implicit $x0
153+
...
154+
136155
---
137156
name: concat_vector_32_512_first_start
138157
legalized: false
@@ -154,6 +173,26 @@ body: |
154173
PseudoRET implicit $lr, implicit $x0
155174
...
156175

176+
---
177+
name: extract_vector_1024_to_256
178+
legalized: false
179+
body: |
180+
bb.1.entry:
181+
liveins: $y2
182+
; CHECK-LABEL: name: extract_vector_1024_to_256
183+
; CHECK: liveins: $y2
184+
; CHECK-NEXT: {{ $}}
185+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
186+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
187+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
188+
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
189+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
190+
%1:_(<32 x s32>) = COPY $y2
191+
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
192+
$wl0 = COPY %0:_(<8 x s32>)
193+
PseudoRET implicit $lr, implicit $x0
194+
...
195+
157196
---
158197
name: concat_vector_32_512_first_end
159198
legalized: false
@@ -427,4 +466,116 @@ body: |
427466
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, -1, 11, 12, 13, -1, 15, 0, 1, -1, 3, 4, 5, -1, 7)
428467
$x0 = COPY %0:_(<16 x s32>)
429468
PseudoRET implicit $lr, implicit $x0
430-
...
469+
...
470+
name: extract_vector_1024_to_128
471+
legalized: false
472+
body: |
473+
bb.1.entry:
474+
liveins: $y2
475+
; CHECK-LABEL: name: extract_vector_1024_to_128
476+
; CHECK: liveins: $y2
477+
; CHECK-NEXT: {{ $}}
478+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
479+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
480+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
481+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[UV2]](<8 x s32>)
482+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>)
483+
%1:_(<32 x s32>) = COPY $y2
484+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3)
485+
PseudoRET implicit $lr, implicit %0
486+
...
487+
488+
---
489+
name: extract_vector_1024_to_32
490+
legalized: false
491+
body: |
492+
bb.1.entry:
493+
liveins: $y2
494+
; CHECK-LABEL: name: extract_vector_1024_to_32
495+
; CHECK: liveins: $y2
496+
; CHECK-NEXT: {{ $}}
497+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
498+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
499+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
500+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
501+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
502+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
503+
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s8>), [[UV9:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
504+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV8]](<2 x s8>)
505+
%1:_(<128 x s8>) = COPY $y2
506+
%0:_(<2 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_, shufflemask(0, 1)
507+
PseudoRET implicit $lr, implicit %0
508+
...
509+
510+
---
511+
name: extract_vector_second_half_512_to_256
512+
legalized: false
513+
body: |
514+
bb.1.entry:
515+
liveins: $x0, $x1
516+
; CHECK-LABEL: name: extract_vector_second_half_512_to_256
517+
; CHECK: liveins: $x0, $x1
518+
; CHECK-NEXT: {{ $}}
519+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
520+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
521+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
522+
%1:_(<16 x s32>) = COPY $x0
523+
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
524+
PseudoRET implicit $lr, implicit %2
525+
...
526+
527+
---
528+
name: extract_vector_second_half_512_to_128
529+
legalized: false
530+
body: |
531+
bb.1.entry:
532+
liveins: $x0, $x1
533+
; CHECK-LABEL: name: extract_vector_second_half_512_to_128
534+
; CHECK: liveins: $x0, $x1
535+
; CHECK-NEXT: {{ $}}
536+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
537+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
538+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[UV]](<8 x s32>)
539+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV3]](<4 x s32>)
540+
%1:_(<16 x s32>) = COPY $x0
541+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(4, 5, 6, 7)
542+
PseudoRET implicit $lr, implicit %2
543+
...
544+
545+
---
546+
name: extract_vector_second_half_1024_to_512
547+
legalized: false
548+
body: |
549+
bb.1.entry:
550+
liveins: $y2, $y3
551+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_512
552+
; CHECK: liveins: $y2, $y3
553+
; CHECK-NEXT: {{ $}}
554+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
555+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
556+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<64 x s8>)
557+
%1:_(<128 x s8>) = COPY $y2
558+
%2:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127)
559+
PseudoRET implicit $lr, implicit %2
560+
...
561+
562+
---
563+
name: extract_vector_second_half_1024_to_32
564+
legalized: false
565+
body: |
566+
bb.1.entry:
567+
liveins: $y2, $y3
568+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_32
569+
; CHECK: liveins: $y2, $y3
570+
; CHECK-NEXT: {{ $}}
571+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
572+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
573+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
574+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
575+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
576+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
577+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV7]](<4 x s8>)
578+
%1:_(<128 x s8>) = COPY $y2
579+
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
580+
PseudoRET implicit $lr, implicit %2
581+
...

llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll

Lines changed: 9 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -15,69 +15,18 @@ define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
1515
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
1616
; CHECK-NEXT: nop // Delay Slot 4
1717
; CHECK-NEXT: nop // Delay Slot 3
18-
; CHECK-NEXT: nop // Delay Slot 2
19-
; CHECK-NEXT: mov r8, r16 // Delay Slot 1
18+
; CHECK-NEXT: vmov x0, x2 // Delay Slot 2
19+
; CHECK-NEXT: nop // Delay Slot 1
2020
; CHECK-NEXT: // %bb.1: // %if.end
21-
; CHECK-NEXT: mova r16, #8
22-
; CHECK-NEXT: vextract.s32 r0, x2, r16
23-
; CHECK-NEXT: nop
24-
; CHECK-NEXT: mova r16, #9
25-
; CHECK-NEXT: vextract.s32 r1, x2, r16
26-
; CHECK-NEXT: nop
27-
; CHECK-NEXT: mova r16, #10
28-
; CHECK-NEXT: vextract.s32 r2, x2, r16
29-
; CHECK-NEXT: nop
30-
; CHECK-NEXT: mova r16, #11
31-
; CHECK-NEXT: vextract.s32 r3, x2, r16
32-
; CHECK-NEXT: nop
33-
; CHECK-NEXT: mova r16, #12
34-
; CHECK-NEXT: vextract.s32 r4, x2, r16
35-
; CHECK-NEXT: nop
36-
; CHECK-NEXT: mova r16, #13
37-
; CHECK-NEXT: vextract.s32 r5, x2, r16
38-
; CHECK-NEXT: j #.LBB0_3
21+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vmov wl0, wh0; nopv
22+
; CHECK-NEXT: .p2align 4
23+
; CHECK-NEXT: .LBB0_2: // %return
24+
; CHECK-NEXT: nopa ; ret lr
3925
; CHECK-NEXT: nop // Delay Slot 5
40-
; CHECK-NEXT: mova r16, #15 // Delay Slot 4
41-
; CHECK-NEXT: vextract.s32 r6, x2, r16 // Delay Slot 3
26+
; CHECK-NEXT: nop // Delay Slot 4
27+
; CHECK-NEXT: nop // Delay Slot 3
4228
; CHECK-NEXT: nop // Delay Slot 2
43-
; CHECK-NEXT: mova r16, #14 // Delay Slot 1
44-
; CHECK-NEXT: .p2align 4
45-
; CHECK-NEXT: .LBB0_2: // %if.then
46-
; CHECK-NEXT: mova r16, #0; nopxm
47-
; CHECK-NEXT: vextract.s32 r0, x2, r16
48-
; CHECK-NEXT: nop
49-
; CHECK-NEXT: mova r16, #1
50-
; CHECK-NEXT: vextract.s32 r1, x2, r16
51-
; CHECK-NEXT: nop
52-
; CHECK-NEXT: mova r16, #2
53-
; CHECK-NEXT: vextract.s32 r2, x2, r16
54-
; CHECK-NEXT: nop
55-
; CHECK-NEXT: mova r16, #3
56-
; CHECK-NEXT: vextract.s32 r3, x2, r16
57-
; CHECK-NEXT: nop
58-
; CHECK-NEXT: mova r16, #4
59-
; CHECK-NEXT: vextract.s32 r4, x2, r16
60-
; CHECK-NEXT: nop
61-
; CHECK-NEXT: mova r16, #5
62-
; CHECK-NEXT: vextract.s32 r5, x2, r16
63-
; CHECK-NEXT: nop
64-
; CHECK-NEXT: mova r16, #7
65-
; CHECK-NEXT: vextract.s32 r6, x2, r16
66-
; CHECK-NEXT: nop
67-
; CHECK-NEXT: mova r16, #6
68-
; CHECK-NEXT: .p2align 4
69-
; CHECK-NEXT: .LBB0_3: // %return
70-
; CHECK-NEXT: nopx ; vextract.s32 r7, x2, r16
71-
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
72-
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
73-
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
74-
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
75-
; CHECK-NEXT: ret lr
76-
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
77-
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
78-
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
79-
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
80-
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
29+
; CHECK-NEXT: nop // Delay Slot 1
8130
entry:
8231
%cmp = icmp eq i32 %idx, 0
8332
br i1 %cmp, label %if.then, label %if.end

0 commit comments

Comments
 (0)