|
62 | 62 | ret void;
|
63 | 63 | }
|
64 | 64 |
|
| 65 | +define arm_aapcs_vfpcc void @push_out_add_sub_block_c(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { |
| 66 | +; CHECK-LABEL: @push_out_add_sub_block_c( |
| 67 | +; CHECK-NEXT: vector.ph: |
| 68 | +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6> |
| 69 | +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 2, i32 2, i32 2, i32 2> |
| 70 | +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 |
| 71 | +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 |
| 72 | +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| 73 | +; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] |
| 74 | +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 32, i32 32, i32 32, i32 32> |
| 75 | +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| 76 | +; CHECK: vector.body: |
| 77 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] |
| 78 | +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[PREINCREMENTSTARTINDEX]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY_END]] ] |
| 79 | +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX]], 48 |
| 80 | +; CHECK-NEXT: br i1 [[TMP1]], label [[LOWER_BLOCK:%.*]], label [[END:%.*]] |
| 81 | +; CHECK: lower.block: |
| 82 | +; CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[VEC_IND]], i32 32) |
| 83 | +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP2]], 0 |
| 84 | +; CHECK-NEXT: [[TMP4]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP2]], 1 |
| 85 | +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] |
| 86 | +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP5]], align 4 |
| 87 | +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 |
| 88 | +; CHECK-NEXT: br label [[VECTOR_BODY_END]] |
| 89 | +; CHECK: vector.body.end: |
| 90 | +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] |
| 91 | +; CHECK-NEXT: br i1 [[TMP6]], label [[END]], label [[VECTOR_BODY]] |
| 92 | +; CHECK: end: |
| 93 | +; CHECK-NEXT: ret void |
| 94 | +; |
| 95 | + |
| 96 | +vector.ph: |
| 97 | + br label %vector.body |
| 98 | + |
| 99 | +vector.body: ; preds = %vector.body, %vector.ph |
| 100 | + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body.end ] |
| 101 | + %vec.ind = phi <4 x i32> [ %vec.ind.next, %vector.body.end ], [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ] |
| 102 | + %0 = icmp eq i32 %index, 48 |
| 103 | + br i1 %0, label %lower.block, label %end |
| 104 | + |
| 105 | +lower.block: ; preds = %vector.body |
| 106 | + %1 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> |
| 107 | + %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 |
| 108 | + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| 109 | + %3 = getelementptr inbounds i32, i32* %dst, i32 %index |
| 110 | + %4 = bitcast i32* %3 to <4 x i32>* |
| 111 | + store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 |
| 112 | + %index.next = add i32 %index, 4 |
| 113 | + %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> |
| 114 | + br label %vector.body.end |
| 115 | + |
| 116 | +vector.body.end: ; preds = %lower.block |
| 117 | + %5 = icmp eq i32 %index.next, %n.vec |
| 118 | + br i1 %5, label %end, label %vector.body |
| 119 | + |
| 120 | +end: |
| 121 | + ret void; |
| 122 | +} |
| 123 | + |
65 | 124 | define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) {
|
66 | 125 | ; CHECK-LABEL: @push_out_mul_sub_block(
|
67 | 126 | ; CHECK-NEXT: vector.ph:
|
|
0 commit comments