@@ -312,3 +312,91 @@ define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range
312312 ret <4 x double > %5
313313}
314314
315+ define <16 x i32 > @m4_square_num_of_shuffles_in_chunks (<16 x i32 > %0 ) vscale_range(2 ,2 ) {
316+ ; CHECK-LABEL: m4_square_num_of_shuffles_in_chunks:
317+ ; CHECK: # %bb.0: # %entry
318+ ; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
319+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0)
320+ ; CHECK-NEXT: vl1r.v v12, (a0)
321+ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
322+ ; CHECK-NEXT: vsext.vf2 v16, v12
323+ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
324+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
325+ ; CHECK-NEXT: vmv.v.v v8, v12
326+ ; CHECK-NEXT: ret
327+ entry:
328+ %1 = shufflevector <16 x i32 > %0 , <16 x i32 > poison, <16 x i32 > <i32 0 , i32 5 , i32 8 , i32 12 , i32 1 , i32 4 , i32 9 , i32 13 , i32 2 , i32 6 , i32 10 , i32 14 , i32 3 , i32 7 , i32 11 , i32 15 >
329+ ret <16 x i32 > %1
330+ }
331+
332+ define <16 x i32 > @m4_linear_num_of_shuffles_in_chunks (<16 x i32 > %0 ) vscale_range(2 ,2 ) {
333+ ; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks:
334+ ; CHECK: # %bb.0: # %entry
335+ ; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
336+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0)
337+ ; CHECK-NEXT: vl2re16.v v16, (a0)
338+ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
339+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
340+ ; CHECK-NEXT: vmv.v.v v8, v12
341+ ; CHECK-NEXT: ret
342+ entry:
343+ %1 = shufflevector <16 x i32 > %0 , <16 x i32 > poison, <16 x i32 > <i32 poison, i32 poison, i32 8 , i32 12 , i32 poison, i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11 , i32 poison>
344+ ret <16 x i32 > %1
345+ }
346+
347+ define i64 @multi_chunks_shuffle (<32 x i32 > %0 ) vscale_range(8 ,8 ) {
348+ ; RV32-LABEL: multi_chunks_shuffle:
349+ ; RV32: # %bb.0: # %entry
350+ ; RV32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
351+ ; RV32-NEXT: vmv.v.i v10, 0
352+ ; RV32-NEXT: li a0, 32
353+ ; RV32-NEXT: li a1, 63
354+ ; RV32-NEXT: vwsubu.vx v12, v10, a0
355+ ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
356+ ; RV32-NEXT: vmv.v.x v10, a0
357+ ; RV32-NEXT: lui a0, 61681
358+ ; RV32-NEXT: addi a0, a0, -241
359+ ; RV32-NEXT: vand.vx v12, v12, a1
360+ ; RV32-NEXT: vand.vx v10, v10, a1
361+ ; RV32-NEXT: vsrl.vv v12, v8, v12
362+ ; RV32-NEXT: vsll.vv v8, v8, v10
363+ ; RV32-NEXT: vmv.s.x v0, a0
364+ ; RV32-NEXT: vor.vv v8, v8, v12
365+ ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
366+ ; RV32-NEXT: vmv.v.i v10, 0
367+ ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
368+ ; RV32-NEXT: vrgather.vi v10, v8, 2
369+ ; RV32-NEXT: vor.vv v8, v8, v10
370+ ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
371+ ; RV32-NEXT: vslidedown.vi v8, v8, 1
372+ ; RV32-NEXT: vmv.x.s a0, v8
373+ ; RV32-NEXT: srai a1, a0, 31
374+ ; RV32-NEXT: ret
375+ ;
376+ ; RV64-LABEL: multi_chunks_shuffle:
377+ ; RV64: # %bb.0: # %entry
378+ ; RV64-NEXT: li a0, 32
379+ ; RV64-NEXT: vsetivli zero, 16, e64, m2, ta, ma
380+ ; RV64-NEXT: vsrl.vx v10, v8, a0
381+ ; RV64-NEXT: vsll.vx v8, v8, a0
382+ ; RV64-NEXT: lui a0, 61681
383+ ; RV64-NEXT: addi a0, a0, -241
384+ ; RV64-NEXT: vor.vv v8, v8, v10
385+ ; RV64-NEXT: vmv.s.x v0, a0
386+ ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
387+ ; RV64-NEXT: vmv.v.i v10, 0
388+ ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
389+ ; RV64-NEXT: vrgather.vi v10, v8, 2
390+ ; RV64-NEXT: vor.vv v8, v8, v10
391+ ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
392+ ; RV64-NEXT: vslidedown.vi v8, v8, 1
393+ ; RV64-NEXT: vmv.x.s a0, v8
394+ ; RV64-NEXT: ret
395+ entry:
396+ %1 = shufflevector <32 x i32 > %0 , <32 x i32 > zeroinitializer , <32 x i32 > <i32 1 , i32 0 , i32 3 , i32 2 , i32 37 , i32 36 , i32 39 , i32 38 , i32 9 , i32 8 , i32 11 , i32 10 , i32 45 , i32 44 , i32 47 , i32 46 , i32 17 , i32 16 , i32 19 , i32 18 , i32 53 , i32 52 , i32 55 , i32 54 , i32 25 , i32 24 , i32 27 , i32 26 , i32 61 , i32 60 , i32 63 , i32 62 >
397+ %2 = shufflevector <32 x i32 > zeroinitializer , <32 x i32 > %1 , <32 x i32 > <i32 3 , i32 34 , i32 33 , i32 0 , i32 7 , i32 38 , i32 37 , i32 4 , i32 11 , i32 42 , i32 41 , i32 8 , i32 15 , i32 46 , i32 45 , i32 12 , i32 19 , i32 50 , i32 49 , i32 16 , i32 23 , i32 54 , i32 53 , i32 20 , i32 27 , i32 58 , i32 57 , i32 24 , i32 31 , i32 62 , i32 61 , i32 28 >
398+ %3 = or <32 x i32 > %1 , %2
399+ %4 = extractelement <32 x i32 > %3 , i64 1
400+ %conv199 = sext i32 %4 to i64
401+ ret i64 %conv199
402+ }
0 commit comments