|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | 2 | ; RUN: llc < %s -mtriple=riscv32 -mattr=+v \ |
3 | 3 | ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ |
4 | | -; RUN: | FileCheck %s --check-prefixes=CHECK,RV32V |
| 4 | +; RUN: | FileCheck %s --check-prefix=CHECK |
5 | 5 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ |
6 | 6 | ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ |
7 | | -; RUN: | FileCheck %s --check-prefixes=CHECK,RV64V |
| 7 | +; RUN: | FileCheck %s --check-prefix=CHECK |
8 | 8 |
|
9 | 9 | define void @deinterleave3_0_i8(ptr %in, ptr %out) { |
10 | 10 | ; CHECK-LABEL: deinterleave3_0_i8: |
@@ -285,6 +285,94 @@ entry: |
285 | 285 | store <8 x i8> %shuffle.i5, ptr %out, align 1 |
286 | 286 | ret void |
287 | 287 | } |
288 | | -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
289 | | -; RV32V: {{.*}} |
290 | | -; RV64V: {{.*}} |
| 288 | + |
| 289 | +; Exercise the high lmul case |
| 290 | +define void @deinterleave7_0_i64(ptr %in, ptr %out) { |
| 291 | +; CHECK-LABEL: deinterleave7_0_i64: |
| 292 | +; CHECK: # %bb.0: # %entry |
| 293 | +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma |
| 294 | +; CHECK-NEXT: vle64.v v8, (a0) |
| 295 | +; CHECK-NEXT: li a0, 129 |
| 296 | +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma |
| 297 | +; CHECK-NEXT: vmv.v.i v0, 4 |
| 298 | +; CHECK-NEXT: vmv.s.x v16, a0 |
| 299 | +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma |
| 300 | +; CHECK-NEXT: vcompress.vm v20, v8, v16 |
| 301 | +; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma |
| 302 | +; CHECK-NEXT: vslidedown.vi v8, v8, 8 |
| 303 | +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| 304 | +; CHECK-NEXT: vrgather.vi v20, v8, 6, v0.t |
| 305 | +; CHECK-NEXT: vse64.v v20, (a1) |
| 306 | +; CHECK-NEXT: ret |
| 307 | +entry: |
| 308 | + %0 = load <16 x i64>, ptr %in |
| 309 | + %shuffle.i5 = shufflevector <16 x i64> %0, <16 x i64> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| 310 | + store <8 x i64> %shuffle.i5, ptr %out |
| 311 | + ret void |
| 312 | +} |
| 313 | + |
| 314 | +; Store back only the active subvector |
| 315 | +define void @deinterleave4_0_i8_subvec(ptr %in, ptr %out) { |
| 316 | +; CHECK-LABEL: deinterleave4_0_i8_subvec: |
| 317 | +; CHECK: # %bb.0: # %entry |
| 318 | +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma |
| 319 | +; CHECK-NEXT: vle8.v v8, (a0) |
| 320 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma |
| 321 | +; CHECK-NEXT: vnsrl.wi v8, v8, 0 |
| 322 | +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma |
| 323 | +; CHECK-NEXT: vnsrl.wi v8, v8, 0 |
| 324 | +; CHECK-NEXT: vse8.v v8, (a1) |
| 325 | +; CHECK-NEXT: ret |
| 326 | +entry: |
| 327 | + %0 = load <16 x i8>, ptr %in, align 1 |
| 328 | + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| 329 | + store <4 x i8> %shuffle.i5, ptr %out, align 1 |
| 330 | + ret void |
| 331 | +} |
| 332 | + |
| 333 | +; Store back only the active subvector |
| 334 | +define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) { |
| 335 | +; CHECK-LABEL: deinterleave7_0_i32_subvec: |
| 336 | +; CHECK: # %bb.0: # %entry |
| 337 | +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma |
| 338 | +; CHECK-NEXT: vle32.v v8, (a0) |
| 339 | +; CHECK-NEXT: li a0, 129 |
| 340 | +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma |
| 341 | +; CHECK-NEXT: vmv.v.i v0, 4 |
| 342 | +; CHECK-NEXT: vmv.s.x v12, a0 |
| 343 | +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma |
| 344 | +; CHECK-NEXT: vcompress.vm v14, v8, v12 |
| 345 | +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma |
| 346 | +; CHECK-NEXT: vslidedown.vi v8, v8, 8 |
| 347 | +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| 348 | +; CHECK-NEXT: vrgather.vi v14, v8, 6, v0.t |
| 349 | +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma |
| 350 | +; CHECK-NEXT: vse32.v v14, (a1) |
| 351 | +; CHECK-NEXT: ret |
| 352 | +entry: |
| 353 | + %0 = load <16 x i32>, ptr %in |
| 354 | + %shuffle.i5 = shufflevector <16 x i32> %0, <16 x i32> poison, <3 x i32> <i32 0, i32 7, i32 14> |
| 355 | + store <3 x i32> %shuffle.i5, ptr %out |
| 356 | + ret void |
| 357 | +} |
| 358 | + |
| 359 | +; Store back only the active subvector |
| 360 | +define void @deinterleave8_0_i8_subvec(ptr %in, ptr %out) { |
| 361 | +; CHECK-LABEL: deinterleave8_0_i8_subvec: |
| 362 | +; CHECK: # %bb.0: # %entry |
| 363 | +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma |
| 364 | +; CHECK-NEXT: vle8.v v8, (a0) |
| 365 | +; CHECK-NEXT: vslidedown.vi v9, v8, 8 |
| 366 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 367 | +; CHECK-NEXT: vmv.x.s a2, v9 |
| 368 | +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma |
| 369 | +; CHECK-NEXT: vmv.v.x v8, a0 |
| 370 | +; CHECK-NEXT: vslide1down.vx v8, v8, a2 |
| 371 | +; CHECK-NEXT: vse8.v v8, (a1) |
| 372 | +; CHECK-NEXT: ret |
| 373 | +entry: |
| 374 | + %0 = load <16 x i8>, ptr %in, align 1 |
| 375 | + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <2 x i32> <i32 0, i32 8> |
| 376 | + store <2 x i8> %shuffle.i5, ptr %out, align 1 |
| 377 | + ret void |
| 378 | +} |
0 commit comments