|  | 
|  | 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 | 
|  | 2 | +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s | 
|  | 3 | + | 
|  | 4 | +# From bug #162644. The _wrong_ output of this test is to generate the | 
|  | 5 | +# body of the tail-predicated loop like this: | 
|  | 6 | +# | 
|  | 7 | +#     $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 | 
|  | 8 | +#     renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4) | 
|  | 9 | +#     $q0 = MVE_VORR $q1, $q1, 0, $noreg, $noreg, undef $q0 | 
|  | 10 | +#     renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0 | 
|  | 11 | +#     $lr = MVE_LETP killed renamable $lr, %bb.1 | 
|  | 12 | +# | 
|  | 13 | +# in which the second MVE_VORR, copying q1 into q0, is an invalid conversion of | 
|  | 14 | +# the input MQPRCopy, because it won't copy the vector lanes disabled by | 
|  | 15 | +# FPSCR.LTPSIZE, and those are needed in the output value of the loop. | 
|  | 16 | +# | 
|  | 17 | +# In the right output, that MQPRCopy is expanded into a pair of VMOVD copying | 
|  | 18 | +# d2,d3 into d0,d1 respectively, which are unaffected by LTPSIZE. | 
|  | 19 | + | 
|  | 20 | +--- | | 
|  | 21 | +  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" | 
|  | 22 | +  target triple = "thumbv8.1m.main-unknown-none-eabihf" | 
|  | 23 | + | 
|  | 24 | +  @inactive = dso_local local_unnamed_addr global <4 x float> zeroinitializer, align 16 | 
|  | 25 | + | 
|  | 26 | +  define <4 x float> @test_func(ptr %0, i32 %1) { | 
|  | 27 | +    %3 = load <4 x float>, ptr @inactive, align 16 | 
|  | 28 | +    %4 = add i32 %1, 3 | 
|  | 29 | +    %5 = call i32 @llvm.smin.i32(i32 %1, i32 4) | 
|  | 30 | +    %6 = sub i32 %4, %5 | 
|  | 31 | +    %7 = lshr i32 %6, 2 | 
|  | 32 | +    %8 = add nuw nsw i32 %7, 1 | 
|  | 33 | +    %9 = call i32 @llvm.start.loop.iterations.i32(i32 %8) | 
|  | 34 | +    br label %10 | 
|  | 35 | + | 
|  | 36 | +  10:                                               ; preds = %10, %2 | 
|  | 37 | +    %11 = phi <4 x float> [ splat (float 0x3FB99999A0000000), %2 ], [ %17, %10 ] | 
|  | 38 | +    %12 = phi i32 [ %1, %2 ], [ %19, %10 ] | 
|  | 39 | +    %13 = phi ptr [ %0, %2 ], [ %18, %10 ] | 
|  | 40 | +    %14 = phi i32 [ %9, %2 ], [ %20, %10 ] | 
|  | 41 | +    %15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12) | 
|  | 42 | +    %16 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %13, i32 4, <4 x i1> %15, <4 x float> zeroinitializer) | 
|  | 43 | +    %17 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %11, <4 x float> %16, <4 x i1> %15, <4 x float> %3) | 
|  | 44 | +    %18 = getelementptr inbounds nuw i8, ptr %13, i32 16 | 
|  | 45 | +    %19 = add i32 %12, -4 | 
|  | 46 | +    %20 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1) | 
|  | 47 | +    %21 = icmp ne i32 %20, 0 | 
|  | 48 | +    br i1 %21, label %10, label %22 | 
|  | 49 | + | 
|  | 50 | +  22:                                               ; preds = %10 | 
|  | 51 | +    ret <4 x float> %17 | 
|  | 52 | +  } | 
|  | 53 | +... | 
|  | 54 | +--- | 
|  | 55 | +name:            test_func | 
|  | 56 | +alignment:       4 | 
|  | 57 | +legalized:       false | 
|  | 58 | +tracksRegLiveness: true | 
|  | 59 | +registers:       [] | 
|  | 60 | +liveins: | 
|  | 61 | +  - { reg: '$r0', virtual-reg: '' } | 
|  | 62 | +  - { reg: '$r1', virtual-reg: '' } | 
|  | 63 | +stack: | 
|  | 64 | +  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, | 
|  | 65 | +      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, | 
|  | 66 | +      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } | 
|  | 67 | +  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, | 
|  | 68 | +      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, | 
|  | 69 | +      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } | 
|  | 70 | +body:             | | 
|  | 71 | +  ; CHECK-LABEL: name: test_func | 
|  | 72 | +  ; CHECK: bb.0 (%ir-block.2): | 
|  | 73 | +  ; CHECK-NEXT:   successors: %bb.1(0x80000000) | 
|  | 74 | +  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r7 | 
|  | 75 | +  ; CHECK-NEXT: {{  $}} | 
|  | 76 | +  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp | 
|  | 77 | +  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8 | 
|  | 78 | +  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4 | 
|  | 79 | +  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8 | 
|  | 80 | +  ; CHECK-NEXT:   $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg | 
|  | 81 | +  ; CHECK-NEXT:   $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg | 
|  | 82 | +  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive) | 
|  | 83 | +  ; CHECK-NEXT:   $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg | 
|  | 84 | +  ; CHECK-NEXT:   $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg | 
|  | 85 | +  ; CHECK-NEXT:   renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 | 
|  | 86 | +  ; CHECK-NEXT:   $lr = MVE_DLSTP_32 killed renamable $r1 | 
|  | 87 | +  ; CHECK-NEXT: {{  $}} | 
|  | 88 | +  ; CHECK-NEXT: bb.1 (%ir-block.10, align 4): | 
|  | 89 | +  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000) | 
|  | 90 | +  ; CHECK-NEXT:   liveins: $lr, $d2, $d3, $q0, $r0 | 
|  | 91 | +  ; CHECK-NEXT: {{  $}} | 
|  | 92 | +  ; CHECK-NEXT:   $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 | 
|  | 93 | +  ; CHECK-NEXT:   renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4) | 
|  | 94 | +  ; CHECK-NEXT:   $d0 = VMOVD $d2, 14 /* CC::al */, $noreg | 
|  | 95 | +  ; CHECK-NEXT:   $d1 = VMOVD $d3, 14 /* CC::al */, $noreg | 
|  | 96 | +  ; CHECK-NEXT:   renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0 | 
|  | 97 | +  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.1 | 
|  | 98 | +  ; CHECK-NEXT: {{  $}} | 
|  | 99 | +  ; CHECK-NEXT: bb.2 (%ir-block.22): | 
|  | 100 | +  ; CHECK-NEXT:   liveins: $q0 | 
|  | 101 | +  ; CHECK-NEXT: {{  $}} | 
|  | 102 | +  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 | 
|  | 103 | +  bb.0 (%ir-block.2): | 
|  | 104 | +    successors: %bb.1(0x80000000) | 
|  | 105 | +    liveins: $r0, $r1, $r7, $lr | 
|  | 106 | +
 | 
|  | 107 | +    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp | 
|  | 108 | +    frame-setup CFI_INSTRUCTION def_cfa_offset 8 | 
|  | 109 | +    frame-setup CFI_INSTRUCTION offset $lr, -4 | 
|  | 110 | +    frame-setup CFI_INSTRUCTION offset $r7, -8 | 
|  | 111 | +    $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg | 
|  | 112 | +    tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr | 
|  | 113 | +    $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg | 
|  | 114 | +    renamable $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg | 
|  | 115 | +    renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive) | 
|  | 116 | +    $r2 = tMOVr $r1, 14 /* CC::al */, $noreg | 
|  | 117 | +    t2IT 10, 8, implicit-def $itstate | 
|  | 118 | +    renamable $r2 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r2, implicit killed $itstate | 
|  | 119 | +    renamable $r2, dead $cpsr = tSUBrr renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg | 
|  | 120 | +    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 3, 14 /* CC::al */, $noreg | 
|  | 121 | +    renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg | 
|  | 122 | +    $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg | 
|  | 123 | +    $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg | 
|  | 124 | +    renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 | 
|  | 125 | +    renamable $lr = t2DoLoopStartTP killed renamable $r2, renamable $r1 | 
|  | 126 | +
 | 
|  | 127 | +  bb.1 (%ir-block.10, align 4): | 
|  | 128 | +    successors: %bb.1(0x7c000000), %bb.2(0x04000000) | 
|  | 129 | +    liveins: $lr, $q0, $q1, $r0, $r1 | 
|  | 130 | +
 | 
|  | 131 | +    renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg | 
|  | 132 | +    $q2 = MQPRCopy killed $q0 | 
|  | 133 | +    MVE_VPST 8, implicit $vpr | 
|  | 134 | +    renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, renamable $lr :: (load unknown-size from %ir.13, align 4) | 
|  | 135 | +    $q0 = MQPRCopy $q1 | 
|  | 136 | +    MVE_VPST 8, implicit $vpr | 
|  | 137 | +    renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 1, killed renamable $vpr, renamable $lr, killed renamable $q0 | 
|  | 138 | +    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg | 
|  | 139 | +    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr | 
|  | 140 | +    tB %bb.2, 14 /* CC::al */, $noreg | 
|  | 141 | +
 | 
|  | 142 | +  bb.2 (%ir-block.22): | 
|  | 143 | +    liveins: $q0 | 
|  | 144 | +
 | 
|  | 145 | +    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 | 
|  | 146 | +... | 
0 commit comments