|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -O2 -mtriple=wasm32 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s |
| 3 | + |
| 4 | +;void f(unsigned char *x, unsigned char *y, int n) { |
| 5 | +; for (int i = 0; i < n; i++) |
| 6 | +; x[i] = (x[i] + y[i] + 1) / 2; |
| 7 | +;} |
| 8 | + |
| 9 | +define void @f(ptr %x, ptr %y, i32 %n) { |
| 10 | +; CHECK-LABEL: f: |
| 11 | +; CHECK: .functype f (i32, i32, i32) -> () |
| 12 | +; CHECK-NEXT: # %bb.0: # %entry |
| 13 | +; CHECK-NEXT: block |
| 14 | +; CHECK-NEXT: i32.const $push0=, 1 |
| 15 | +; CHECK-NEXT: i32.lt_s $push1=, $2, $pop0 |
| 16 | +; CHECK-NEXT: br_if 0, $pop1 # 0: down to label0 |
| 17 | +; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| 18 | +; CHECK-NEXT: i32.const $5=, 0 |
| 19 | +; CHECK-NEXT: block |
| 20 | +; CHECK-NEXT: i32.const $push2=, 16 |
| 21 | +; CHECK-NEXT: i32.lt_u $push3=, $2, $pop2 |
| 22 | +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label1 |
| 23 | +; CHECK-NEXT: # %bb.2: # %vector.memcheck |
| 24 | +; CHECK-NEXT: block |
| 25 | +; CHECK-NEXT: i32.add $push5=, $1, $2 |
| 26 | +; CHECK-NEXT: i32.ge_u $push6=, $0, $pop5 |
| 27 | +; CHECK-NEXT: br_if 0, $pop6 # 0: down to label2 |
| 28 | +; CHECK-NEXT: # %bb.3: # %vector.memcheck |
| 29 | +; CHECK-NEXT: i32.add $push4=, $0, $2 |
| 30 | +; CHECK-NEXT: i32.lt_u $push7=, $1, $pop4 |
| 31 | +; CHECK-NEXT: br_if 1, $pop7 # 1: down to label1 |
| 32 | +; CHECK-NEXT: .LBB0_4: # %vector.ph |
| 33 | +; CHECK-NEXT: end_block # label2: |
| 34 | +; CHECK-NEXT: local.copy $6=, $0 |
| 35 | +; CHECK-NEXT: local.copy $7=, $1 |
| 36 | +; CHECK-NEXT: i32.const $push8=, 2147483632 |
| 37 | +; CHECK-NEXT: i32.and $push34=, $2, $pop8 |
| 38 | +; CHECK-NEXT: local.tee $push33=, $5=, $pop34 |
| 39 | +; CHECK-NEXT: local.copy $8=, $pop33 |
| 40 | +; CHECK-NEXT: .LBB0_5: # %vector.body |
| 41 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 42 | +; CHECK-NEXT: loop # label3: |
| 43 | +; CHECK-NEXT: v128.load $push44=, 0($6):p2align=0 |
| 44 | +; CHECK-NEXT: local.tee $push43=, $4=, $pop44 |
| 45 | +; CHECK-NEXT: v128.load $push42=, 0($7):p2align=0 |
| 46 | +; CHECK-NEXT: local.tee $push41=, $3=, $pop42 |
| 47 | +; CHECK-NEXT: v128.or $push9=, $pop43, $pop41 |
| 48 | +; CHECK-NEXT: v128.xor $push10=, $4, $3 |
| 49 | +; CHECK-NEXT: i32.const $push40=, 1 |
| 50 | +; CHECK-NEXT: i8x16.shr_u $push11=, $pop10, $pop40 |
| 51 | +; CHECK-NEXT: i8x16.sub $push12=, $pop9, $pop11 |
| 52 | +; CHECK-NEXT: v128.store 0($6):p2align=0, $pop12 |
| 53 | +; CHECK-NEXT: i32.const $push39=, 16 |
| 54 | +; CHECK-NEXT: i32.add $6=, $6, $pop39 |
| 55 | +; CHECK-NEXT: i32.const $push38=, 16 |
| 56 | +; CHECK-NEXT: i32.add $7=, $7, $pop38 |
| 57 | +; CHECK-NEXT: i32.const $push37=, -16 |
| 58 | +; CHECK-NEXT: i32.add $push36=, $8, $pop37 |
| 59 | +; CHECK-NEXT: local.tee $push35=, $8=, $pop36 |
| 60 | +; CHECK-NEXT: br_if 0, $pop35 # 0: up to label3 |
| 61 | +; CHECK-NEXT: # %bb.6: # %middle.block |
| 62 | +; CHECK-NEXT: end_loop |
| 63 | +; CHECK-NEXT: i32.eq $push13=, $2, $5 |
| 64 | +; CHECK-NEXT: br_if 1, $pop13 # 1: down to label0 |
| 65 | +; CHECK-NEXT: .LBB0_7: # %for.body.preheader16 |
| 66 | +; CHECK-NEXT: end_block # label1: |
| 67 | +; CHECK-NEXT: i32.const $push46=, 1 |
| 68 | +; CHECK-NEXT: i32.or $6=, $5, $pop46 |
| 69 | +; CHECK-NEXT: block |
| 70 | +; CHECK-NEXT: i32.const $push45=, 1 |
| 71 | +; CHECK-NEXT: i32.and $push14=, $2, $pop45 |
| 72 | +; CHECK-NEXT: i32.eqz $push64=, $pop14 |
| 73 | +; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4 |
| 74 | +; CHECK-NEXT: # %bb.8: # %for.body.prol |
| 75 | +; CHECK-NEXT: i32.add $push50=, $0, $5 |
| 76 | +; CHECK-NEXT: local.tee $push49=, $7=, $pop50 |
| 77 | +; CHECK-NEXT: i32.load8_u $push17=, 0($7) |
| 78 | +; CHECK-NEXT: i32.add $push15=, $1, $5 |
| 79 | +; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) |
| 80 | +; CHECK-NEXT: i32.add $push18=, $pop17, $pop16 |
| 81 | +; CHECK-NEXT: i32.const $push48=, 1 |
| 82 | +; CHECK-NEXT: i32.add $push19=, $pop18, $pop48 |
| 83 | +; CHECK-NEXT: i32.const $push47=, 1 |
| 84 | +; CHECK-NEXT: i32.shr_u $push20=, $pop19, $pop47 |
| 85 | +; CHECK-NEXT: i32.store8 0($pop49), $pop20 |
| 86 | +; CHECK-NEXT: local.copy $5=, $6 |
| 87 | +; CHECK-NEXT: .LBB0_9: # %for.body.prol.loopexit |
| 88 | +; CHECK-NEXT: end_block # label4: |
| 89 | +; CHECK-NEXT: i32.eq $push21=, $2, $6 |
| 90 | +; CHECK-NEXT: br_if 0, $pop21 # 0: down to label0 |
| 91 | +; CHECK-NEXT: # %bb.10: # %for.body.preheader1 |
| 92 | +; CHECK-NEXT: i32.add $6=, $0, $5 |
| 93 | +; CHECK-NEXT: i32.add $7=, $1, $5 |
| 94 | +; CHECK-NEXT: i32.sub $8=, $2, $5 |
| 95 | +; CHECK-NEXT: .LBB0_11: # %for.body |
| 96 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 97 | +; CHECK-NEXT: loop # label5: |
| 98 | +; CHECK-NEXT: i32.load8_u $push23=, 0($6) |
| 99 | +; CHECK-NEXT: i32.load8_u $push22=, 0($7) |
| 100 | +; CHECK-NEXT: i32.add $push24=, $pop23, $pop22 |
| 101 | +; CHECK-NEXT: i32.const $push63=, 1 |
| 102 | +; CHECK-NEXT: i32.add $push25=, $pop24, $pop63 |
| 103 | +; CHECK-NEXT: i32.const $push62=, 1 |
| 104 | +; CHECK-NEXT: i32.shr_u $push26=, $pop25, $pop62 |
| 105 | +; CHECK-NEXT: i32.store8 0($6), $pop26 |
| 106 | +; CHECK-NEXT: i32.const $push61=, 1 |
| 107 | +; CHECK-NEXT: i32.add $push60=, $6, $pop61 |
| 108 | +; CHECK-NEXT: local.tee $push59=, $2=, $pop60 |
| 109 | +; CHECK-NEXT: i32.load8_u $push27=, 0($2) |
| 110 | +; CHECK-NEXT: i32.const $push58=, 1 |
| 111 | +; CHECK-NEXT: i32.add $push28=, $7, $pop58 |
| 112 | +; CHECK-NEXT: i32.load8_u $push29=, 0($pop28) |
| 113 | +; CHECK-NEXT: i32.add $push30=, $pop27, $pop29 |
| 114 | +; CHECK-NEXT: i32.const $push57=, 1 |
| 115 | +; CHECK-NEXT: i32.add $push31=, $pop30, $pop57 |
| 116 | +; CHECK-NEXT: i32.const $push56=, 1 |
| 117 | +; CHECK-NEXT: i32.shr_u $push32=, $pop31, $pop56 |
| 118 | +; CHECK-NEXT: i32.store8 0($pop59), $pop32 |
| 119 | +; CHECK-NEXT: i32.const $push55=, 2 |
| 120 | +; CHECK-NEXT: i32.add $6=, $6, $pop55 |
| 121 | +; CHECK-NEXT: i32.const $push54=, 2 |
| 122 | +; CHECK-NEXT: i32.add $7=, $7, $pop54 |
| 123 | +; CHECK-NEXT: i32.const $push53=, -2 |
| 124 | +; CHECK-NEXT: i32.add $push52=, $8, $pop53 |
| 125 | +; CHECK-NEXT: local.tee $push51=, $8=, $pop52 |
| 126 | +; CHECK-NEXT: br_if 0, $pop51 # 0: up to label5 |
| 127 | +; CHECK-NEXT: .LBB0_12: # %for.cond.cleanup |
| 128 | +; CHECK-NEXT: end_loop |
| 129 | +; CHECK-NEXT: end_block # label0: |
| 130 | +; CHECK-NEXT: return |
| 131 | +entry: |
| 132 | + %cmp12 = icmp sgt i32 %n, 0 |
| 133 | + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup |
| 134 | + |
| 135 | +for.body.preheader: |
| 136 | + %min.iters.check = icmp ult i32 %n, 16 |
| 137 | + br i1 %min.iters.check, label %for.body.preheader16, label %vector.memcheck |
| 138 | + |
| 139 | +vector.memcheck: |
| 140 | + %scevgep = getelementptr i8, ptr %x, i32 %n |
| 141 | + %scevgep14 = getelementptr i8, ptr %y, i32 %n |
| 142 | + %bound0 = icmp ult ptr %x, %scevgep14 |
| 143 | + %bound1 = icmp ult ptr %y, %scevgep |
| 144 | + %found.conflict = and i1 %bound0, %bound1 |
| 145 | + br i1 %found.conflict, label %for.body.preheader16, label %vector.ph |
| 146 | + |
| 147 | +vector.ph: |
| 148 | + %n.vec = and i32 %n, 2147483632 |
| 149 | + br label %vector.body |
| 150 | + |
| 151 | +vector.body: |
| 152 | + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| 153 | + %0 = getelementptr inbounds nuw i8, ptr %x, i32 %index |
| 154 | + %wide.load = load <16 x i8>, ptr %0, align 1 |
| 155 | + %1 = zext <16 x i8> %wide.load to <16 x i16> |
| 156 | + %2 = getelementptr inbounds nuw i8, ptr %y, i32 %index |
| 157 | + %wide.load15 = load <16 x i8>, ptr %2, align 1 |
| 158 | + %3 = zext <16 x i8> %wide.load15 to <16 x i16> |
| 159 | + %4 = add nuw nsw <16 x i16> %1, splat (i16 1) |
| 160 | + %5 = add nuw nsw <16 x i16> %4, %3 |
| 161 | + %6 = lshr <16 x i16> %5, splat (i16 1) |
| 162 | + %7 = trunc nuw <16 x i16> %6 to <16 x i8> |
| 163 | + store <16 x i8> %7, ptr %0, align 1 |
| 164 | + %index.next = add nuw i32 %index, 16 |
| 165 | + %8 = icmp eq i32 %index.next, %n.vec |
| 166 | + br i1 %8, label %middle.block, label %vector.body |
| 167 | + |
| 168 | +middle.block: |
| 169 | + %cmp.n = icmp eq i32 %n, %n.vec |
| 170 | + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader16 |
| 171 | + |
| 172 | +for.body.preheader16: |
| 173 | + %i.013.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] |
| 174 | + %.neg = or disjoint i32 %i.013.ph, 1 |
| 175 | + %xtraiter = and i32 %n, 1 |
| 176 | + %lcmp.mod.not = icmp eq i32 %xtraiter, 0 |
| 177 | + br i1 %lcmp.mod.not, label %for.body.prol.loopexit, label %for.body.prol |
| 178 | + |
| 179 | +for.body.prol: |
| 180 | + %arrayidx.prol = getelementptr inbounds nuw i8, ptr %x, i32 %i.013.ph |
| 181 | + %9 = load i8, ptr %arrayidx.prol, align 1 |
| 182 | + %conv.prol = zext i8 %9 to i16 |
| 183 | + %arrayidx1.prol = getelementptr inbounds nuw i8, ptr %y, i32 %i.013.ph |
| 184 | + %10 = load i8, ptr %arrayidx1.prol, align 1 |
| 185 | + %conv2.prol = zext i8 %10 to i16 |
| 186 | + %add.prol = add nuw nsw i16 %conv.prol, 1 |
| 187 | + %add3.prol = add nuw nsw i16 %add.prol, %conv2.prol |
| 188 | + %div11.prol = lshr i16 %add3.prol, 1 |
| 189 | + %conv4.prol = trunc nuw i16 %div11.prol to i8 |
| 190 | + store i8 %conv4.prol, ptr %arrayidx.prol, align 1 |
| 191 | + %inc.prol = or disjoint i32 %i.013.ph, 1 |
| 192 | + br label %for.body.prol.loopexit |
| 193 | + |
| 194 | +for.body.prol.loopexit: |
| 195 | + %i.013.unr = phi i32 [ %i.013.ph, %for.body.preheader16 ], [ %inc.prol, %for.body.prol ] |
| 196 | + %11 = icmp eq i32 %n, %.neg |
| 197 | + br i1 %11, label %for.cond.cleanup, label %for.body |
| 198 | + |
| 199 | +for.cond.cleanup: |
| 200 | + ret void |
| 201 | + |
| 202 | +for.body: |
| 203 | + %i.013 = phi i32 [ %inc.1, %for.body ], [ %i.013.unr, %for.body.prol.loopexit ] |
| 204 | + %arrayidx = getelementptr inbounds nuw i8, ptr %x, i32 %i.013 |
| 205 | + %12 = load i8, ptr %arrayidx, align 1 |
| 206 | + %conv = zext i8 %12 to i16 |
| 207 | + %arrayidx1 = getelementptr inbounds nuw i8, ptr %y, i32 %i.013 |
| 208 | + %13 = load i8, ptr %arrayidx1, align 1 |
| 209 | + %conv2 = zext i8 %13 to i16 |
| 210 | + %add = add nuw nsw i16 %conv, 1 |
| 211 | + %add3 = add nuw nsw i16 %add, %conv2 |
| 212 | + %div11 = lshr i16 %add3, 1 |
| 213 | + %conv4 = trunc nuw i16 %div11 to i8 |
| 214 | + store i8 %conv4, ptr %arrayidx, align 1 |
| 215 | + %inc = add nuw nsw i32 %i.013, 1 |
| 216 | + %arrayidx.1 = getelementptr inbounds nuw i8, ptr %x, i32 %inc |
| 217 | + %14 = load i8, ptr %arrayidx.1, align 1 |
| 218 | + %conv.1 = zext i8 %14 to i16 |
| 219 | + %arrayidx1.1 = getelementptr inbounds nuw i8, ptr %y, i32 %inc |
| 220 | + %15 = load i8, ptr %arrayidx1.1, align 1 |
| 221 | + %conv2.1 = zext i8 %15 to i16 |
| 222 | + %add.1 = add nuw nsw i16 %conv.1, 1 |
| 223 | + %add3.1 = add nuw nsw i16 %add.1, %conv2.1 |
| 224 | + %div11.1 = lshr i16 %add3.1, 1 |
| 225 | + %conv4.1 = trunc nuw i16 %div11.1 to i8 |
| 226 | + store i8 %conv4.1, ptr %arrayidx.1, align 1 |
| 227 | + %inc.1 = add nuw nsw i32 %i.013, 2 |
| 228 | + %exitcond.not.1 = icmp eq i32 %inc.1, %n |
| 229 | + br i1 %exitcond.not.1, label %for.cond.cleanup, label %for.body |
| 230 | +} |
0 commit comments