@@ -1287,37 +1287,17 @@ define void @shuffle_i64_splat(ptr %p) nounwind {
12871287}
12881288
12891289define void @shuffle_i128_splat (ptr %p ) nounwind {
1290- ; RV32-LABEL: shuffle_i128_splat:
1291- ; RV32: # %bb.0:
1292- ; RV32-NEXT: lw a1, 0(a0)
1293- ; RV32-NEXT: lw a2, 4(a0)
1294- ; RV32-NEXT: lw a3, 8(a0)
1295- ; RV32-NEXT: lw a4, 12(a0)
1296- ; RV32-NEXT: sw a1, 48(a0)
1297- ; RV32-NEXT: sw a2, 52(a0)
1298- ; RV32-NEXT: sw a3, 56(a0)
1299- ; RV32-NEXT: sw a4, 60(a0)
1300- ; RV32-NEXT: sw a1, 16(a0)
1301- ; RV32-NEXT: sw a2, 20(a0)
1302- ; RV32-NEXT: sw a3, 24(a0)
1303- ; RV32-NEXT: sw a4, 28(a0)
1304- ; RV32-NEXT: sw a1, 32(a0)
1305- ; RV32-NEXT: sw a2, 36(a0)
1306- ; RV32-NEXT: sw a3, 40(a0)
1307- ; RV32-NEXT: sw a4, 44(a0)
1308- ; RV32-NEXT: ret
1309- ;
1310- ; RV64-LABEL: shuffle_i128_splat:
1311- ; RV64: # %bb.0:
1312- ; RV64-NEXT: ld a1, 0(a0)
1313- ; RV64-NEXT: ld a2, 8(a0)
1314- ; RV64-NEXT: sd a1, 48(a0)
1315- ; RV64-NEXT: sd a2, 56(a0)
1316- ; RV64-NEXT: sd a1, 16(a0)
1317- ; RV64-NEXT: sd a2, 24(a0)
1318- ; RV64-NEXT: sd a1, 32(a0)
1319- ; RV64-NEXT: sd a2, 40(a0)
1320- ; RV64-NEXT: ret
1290+ ; CHECK-LABEL: shuffle_i128_splat:
1291+ ; CHECK: # %bb.0:
1292+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1293+ ; CHECK-NEXT: vle64.v v8, (a0)
1294+ ; CHECK-NEXT: lui a1, 16
1295+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1296+ ; CHECK-NEXT: vmv.v.x v12, a1
1297+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1298+ ; CHECK-NEXT: vrgatherei16.vv v16, v8, v12
1299+ ; CHECK-NEXT: vse64.v v16, (a0)
1300+ ; CHECK-NEXT: ret
13211301 %a = load <4 x i128 >, ptr %p
13221302 %res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 >
13231303 store <4 x i128 > %res , ptr %p
@@ -1327,58 +1307,32 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
13271307define void @shuffle_i256_splat (ptr %p ) nounwind {
13281308; RV32-LABEL: shuffle_i256_splat:
13291309; RV32: # %bb.0:
1330- ; RV32-NEXT: lw a1, 0(a0)
1331- ; RV32-NEXT: lw a2, 4(a0)
1332- ; RV32-NEXT: lw a3, 8(a0)
1333- ; RV32-NEXT: lw a4, 12(a0)
1334- ; RV32-NEXT: lw a5, 16(a0)
1335- ; RV32-NEXT: lw a6, 20(a0)
1336- ; RV32-NEXT: lw a7, 24(a0)
1337- ; RV32-NEXT: lw t0, 28(a0)
1338- ; RV32-NEXT: sw a5, 112(a0)
1339- ; RV32-NEXT: sw a6, 116(a0)
1340- ; RV32-NEXT: sw a7, 120(a0)
1341- ; RV32-NEXT: sw t0, 124(a0)
1342- ; RV32-NEXT: sw a1, 96(a0)
1343- ; RV32-NEXT: sw a2, 100(a0)
1344- ; RV32-NEXT: sw a3, 104(a0)
1345- ; RV32-NEXT: sw a4, 108(a0)
1346- ; RV32-NEXT: sw a5, 80(a0)
1347- ; RV32-NEXT: sw a6, 84(a0)
1348- ; RV32-NEXT: sw a7, 88(a0)
1349- ; RV32-NEXT: sw t0, 92(a0)
1350- ; RV32-NEXT: sw a1, 64(a0)
1351- ; RV32-NEXT: sw a2, 68(a0)
1352- ; RV32-NEXT: sw a3, 72(a0)
1353- ; RV32-NEXT: sw a4, 76(a0)
1354- ; RV32-NEXT: sw a5, 48(a0)
1355- ; RV32-NEXT: sw a6, 52(a0)
1356- ; RV32-NEXT: sw a7, 56(a0)
1357- ; RV32-NEXT: sw t0, 60(a0)
1358- ; RV32-NEXT: sw a1, 32(a0)
1359- ; RV32-NEXT: sw a2, 36(a0)
1360- ; RV32-NEXT: sw a3, 40(a0)
1361- ; RV32-NEXT: sw a4, 44(a0)
1310+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1311+ ; RV32-NEXT: vle64.v v8, (a0)
1312+ ; RV32-NEXT: lui a1, 12320
1313+ ; RV32-NEXT: addi a1, a1, 256
1314+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1315+ ; RV32-NEXT: vmv.v.x v16, a1
1316+ ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1317+ ; RV32-NEXT: vsext.vf2 v18, v16
1318+ ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1319+ ; RV32-NEXT: vrgatherei16.vv v24, v8, v18
1320+ ; RV32-NEXT: vse64.v v24, (a0)
13621321; RV32-NEXT: ret
13631322;
13641323; RV64-LABEL: shuffle_i256_splat:
13651324; RV64: # %bb.0:
1366- ; RV64-NEXT: ld a1, 0(a0)
1367- ; RV64-NEXT: ld a2, 8(a0)
1368- ; RV64-NEXT: ld a3, 16(a0)
1369- ; RV64-NEXT: ld a4, 24(a0)
1370- ; RV64-NEXT: sd a1, 96(a0)
1371- ; RV64-NEXT: sd a2, 104(a0)
1372- ; RV64-NEXT: sd a3, 112(a0)
1373- ; RV64-NEXT: sd a4, 120(a0)
1374- ; RV64-NEXT: sd a1, 32(a0)
1375- ; RV64-NEXT: sd a2, 40(a0)
1376- ; RV64-NEXT: sd a3, 48(a0)
1377- ; RV64-NEXT: sd a4, 56(a0)
1378- ; RV64-NEXT: sd a1, 64(a0)
1379- ; RV64-NEXT: sd a2, 72(a0)
1380- ; RV64-NEXT: sd a3, 80(a0)
1381- ; RV64-NEXT: sd a4, 88(a0)
1325+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1326+ ; RV64-NEXT: vle64.v v8, (a0)
1327+ ; RV64-NEXT: lui a1, 98305
1328+ ; RV64-NEXT: slli a1, a1, 5
1329+ ; RV64-NEXT: addi a1, a1, 1
1330+ ; RV64-NEXT: slli a1, a1, 16
1331+ ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1332+ ; RV64-NEXT: vmv.v.x v16, a1
1333+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1334+ ; RV64-NEXT: vrgatherei16.vv v24, v8, v16
1335+ ; RV64-NEXT: vse64.v v24, (a0)
13821336; RV64-NEXT: ret
13831337 %a = load <4 x i256 >, ptr %p
13841338 %res = shufflevector <4 x i256 > %a , <4 x i256 > poison, <4 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 >
0 commit comments