@@ -1141,3 +1141,237 @@ define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) {
11411141 %out = shufflevector <16 x i32 > %splat , <16 x i32 > %w , <16 x i32 > <i32 11 , i32 15 , i32 7 , i32 3 , i32 26 , i32 30 , i32 22 , i32 18 , i32 9 , i32 13 , i32 5 , i32 1 , i32 24 , i32 28 , i32 20 , i32 16 >
11421142 ret <16 x i32 > %out
11431143}
1144+
1145+ define <4 x i128 > @shuffle_i128 (<4 x i128 > %a ) {
1146+ ; RV32-LABEL: shuffle_i128:
1147+ ; RV32: # %bb.0:
1148+ ; RV32-NEXT: lw a2, 0(a1)
1149+ ; RV32-NEXT: lw a3, 4(a1)
1150+ ; RV32-NEXT: lw a4, 8(a1)
1151+ ; RV32-NEXT: lw a5, 12(a1)
1152+ ; RV32-NEXT: lw a6, 48(a1)
1153+ ; RV32-NEXT: lw a7, 52(a1)
1154+ ; RV32-NEXT: lw t0, 56(a1)
1155+ ; RV32-NEXT: lw t1, 60(a1)
1156+ ; RV32-NEXT: lw t2, 32(a1)
1157+ ; RV32-NEXT: lw t3, 36(a1)
1158+ ; RV32-NEXT: lw t4, 40(a1)
1159+ ; RV32-NEXT: lw a1, 44(a1)
1160+ ; RV32-NEXT: sw t2, 48(a0)
1161+ ; RV32-NEXT: sw t3, 52(a0)
1162+ ; RV32-NEXT: sw t4, 56(a0)
1163+ ; RV32-NEXT: sw a1, 60(a0)
1164+ ; RV32-NEXT: sw a6, 32(a0)
1165+ ; RV32-NEXT: sw a7, 36(a0)
1166+ ; RV32-NEXT: sw t0, 40(a0)
1167+ ; RV32-NEXT: sw t1, 44(a0)
1168+ ; RV32-NEXT: sw a2, 16(a0)
1169+ ; RV32-NEXT: sw a3, 20(a0)
1170+ ; RV32-NEXT: sw a4, 24(a0)
1171+ ; RV32-NEXT: sw a5, 28(a0)
1172+ ; RV32-NEXT: sw a2, 0(a0)
1173+ ; RV32-NEXT: sw a3, 4(a0)
1174+ ; RV32-NEXT: sw a4, 8(a0)
1175+ ; RV32-NEXT: sw a5, 12(a0)
1176+ ; RV32-NEXT: ret
1177+ ;
1178+ ; RV64-LABEL: shuffle_i128:
1179+ ; RV64: # %bb.0:
1180+ ; RV64-NEXT: ld a2, 48(a1)
1181+ ; RV64-NEXT: ld a3, 56(a1)
1182+ ; RV64-NEXT: ld a4, 0(a1)
1183+ ; RV64-NEXT: ld a5, 8(a1)
1184+ ; RV64-NEXT: ld a6, 32(a1)
1185+ ; RV64-NEXT: ld a1, 40(a1)
1186+ ; RV64-NEXT: sd a2, 32(a0)
1187+ ; RV64-NEXT: sd a3, 40(a0)
1188+ ; RV64-NEXT: sd a6, 48(a0)
1189+ ; RV64-NEXT: sd a1, 56(a0)
1190+ ; RV64-NEXT: sd a4, 0(a0)
1191+ ; RV64-NEXT: sd a5, 8(a0)
1192+ ; RV64-NEXT: sd a4, 16(a0)
1193+ ; RV64-NEXT: sd a5, 24(a0)
1194+ ; RV64-NEXT: ret
1195+ %res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1196+ ret <4 x i128 > %res
1197+ }
1198+
1199+ define void @shuffle_i128_ldst (ptr %p ) {
1200+ ; RV32-LABEL: shuffle_i128_ldst:
1201+ ; RV32: # %bb.0:
1202+ ; RV32-NEXT: lw a1, 48(a0)
1203+ ; RV32-NEXT: lw a2, 52(a0)
1204+ ; RV32-NEXT: lw a3, 56(a0)
1205+ ; RV32-NEXT: lw a4, 60(a0)
1206+ ; RV32-NEXT: lw a5, 0(a0)
1207+ ; RV32-NEXT: lw a6, 4(a0)
1208+ ; RV32-NEXT: lw a7, 8(a0)
1209+ ; RV32-NEXT: lw t0, 12(a0)
1210+ ; RV32-NEXT: lw t1, 32(a0)
1211+ ; RV32-NEXT: lw t2, 36(a0)
1212+ ; RV32-NEXT: lw t3, 40(a0)
1213+ ; RV32-NEXT: lw t4, 44(a0)
1214+ ; RV32-NEXT: sw t1, 48(a0)
1215+ ; RV32-NEXT: sw t2, 52(a0)
1216+ ; RV32-NEXT: sw t3, 56(a0)
1217+ ; RV32-NEXT: sw t4, 60(a0)
1218+ ; RV32-NEXT: sw a5, 16(a0)
1219+ ; RV32-NEXT: sw a6, 20(a0)
1220+ ; RV32-NEXT: sw a7, 24(a0)
1221+ ; RV32-NEXT: sw t0, 28(a0)
1222+ ; RV32-NEXT: sw a1, 32(a0)
1223+ ; RV32-NEXT: sw a2, 36(a0)
1224+ ; RV32-NEXT: sw a3, 40(a0)
1225+ ; RV32-NEXT: sw a4, 44(a0)
1226+ ; RV32-NEXT: ret
1227+ ;
1228+ ; RV64-LABEL: shuffle_i128_ldst:
1229+ ; RV64: # %bb.0:
1230+ ; RV64-NEXT: ld a1, 0(a0)
1231+ ; RV64-NEXT: ld a2, 8(a0)
1232+ ; RV64-NEXT: ld a3, 32(a0)
1233+ ; RV64-NEXT: ld a4, 40(a0)
1234+ ; RV64-NEXT: ld a5, 48(a0)
1235+ ; RV64-NEXT: ld a6, 56(a0)
1236+ ; RV64-NEXT: sd a3, 48(a0)
1237+ ; RV64-NEXT: sd a4, 56(a0)
1238+ ; RV64-NEXT: sd a1, 16(a0)
1239+ ; RV64-NEXT: sd a2, 24(a0)
1240+ ; RV64-NEXT: sd a5, 32(a0)
1241+ ; RV64-NEXT: sd a6, 40(a0)
1242+ ; RV64-NEXT: ret
1243+ %a = load <4 x i128 >, ptr %p
1244+ %res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1245+ store <4 x i128 > %res , ptr %p
1246+ ret void
1247+ }
1248+
1249+ define void @shuffle_i256_ldst (ptr %p ) {
1250+ ; RV32-LABEL: shuffle_i256_ldst:
1251+ ; RV32: # %bb.0:
1252+ ; RV32-NEXT: addi sp, sp, -48
1253+ ; RV32-NEXT: .cfi_def_cfa_offset 48
1254+ ; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
1255+ ; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
1256+ ; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
1257+ ; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
1258+ ; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
1259+ ; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
1260+ ; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
1261+ ; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
1262+ ; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
1263+ ; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
1264+ ; RV32-NEXT: .cfi_offset s0, -4
1265+ ; RV32-NEXT: .cfi_offset s1, -8
1266+ ; RV32-NEXT: .cfi_offset s2, -12
1267+ ; RV32-NEXT: .cfi_offset s3, -16
1268+ ; RV32-NEXT: .cfi_offset s4, -20
1269+ ; RV32-NEXT: .cfi_offset s5, -24
1270+ ; RV32-NEXT: .cfi_offset s6, -28
1271+ ; RV32-NEXT: .cfi_offset s7, -32
1272+ ; RV32-NEXT: .cfi_offset s8, -36
1273+ ; RV32-NEXT: .cfi_offset s9, -40
1274+ ; RV32-NEXT: lw a1, 0(a0)
1275+ ; RV32-NEXT: lw a2, 4(a0)
1276+ ; RV32-NEXT: lw a3, 8(a0)
1277+ ; RV32-NEXT: lw a4, 12(a0)
1278+ ; RV32-NEXT: lw a5, 16(a0)
1279+ ; RV32-NEXT: lw a6, 20(a0)
1280+ ; RV32-NEXT: lw a7, 24(a0)
1281+ ; RV32-NEXT: lw t0, 28(a0)
1282+ ; RV32-NEXT: lw t1, 96(a0)
1283+ ; RV32-NEXT: lw t2, 100(a0)
1284+ ; RV32-NEXT: lw t3, 104(a0)
1285+ ; RV32-NEXT: lw t4, 108(a0)
1286+ ; RV32-NEXT: lw t5, 112(a0)
1287+ ; RV32-NEXT: lw t6, 116(a0)
1288+ ; RV32-NEXT: lw s0, 120(a0)
1289+ ; RV32-NEXT: lw s1, 124(a0)
1290+ ; RV32-NEXT: lw s2, 64(a0)
1291+ ; RV32-NEXT: lw s3, 68(a0)
1292+ ; RV32-NEXT: lw s4, 72(a0)
1293+ ; RV32-NEXT: lw s5, 76(a0)
1294+ ; RV32-NEXT: lw s6, 80(a0)
1295+ ; RV32-NEXT: lw s7, 84(a0)
1296+ ; RV32-NEXT: lw s8, 88(a0)
1297+ ; RV32-NEXT: lw s9, 92(a0)
1298+ ; RV32-NEXT: sw s6, 112(a0)
1299+ ; RV32-NEXT: sw s7, 116(a0)
1300+ ; RV32-NEXT: sw s8, 120(a0)
1301+ ; RV32-NEXT: sw s9, 124(a0)
1302+ ; RV32-NEXT: sw s2, 96(a0)
1303+ ; RV32-NEXT: sw s3, 100(a0)
1304+ ; RV32-NEXT: sw s4, 104(a0)
1305+ ; RV32-NEXT: sw s5, 108(a0)
1306+ ; RV32-NEXT: sw t5, 80(a0)
1307+ ; RV32-NEXT: sw t6, 84(a0)
1308+ ; RV32-NEXT: sw s0, 88(a0)
1309+ ; RV32-NEXT: sw s1, 92(a0)
1310+ ; RV32-NEXT: sw t1, 64(a0)
1311+ ; RV32-NEXT: sw t2, 68(a0)
1312+ ; RV32-NEXT: sw t3, 72(a0)
1313+ ; RV32-NEXT: sw t4, 76(a0)
1314+ ; RV32-NEXT: sw a5, 48(a0)
1315+ ; RV32-NEXT: sw a6, 52(a0)
1316+ ; RV32-NEXT: sw a7, 56(a0)
1317+ ; RV32-NEXT: sw t0, 60(a0)
1318+ ; RV32-NEXT: sw a1, 32(a0)
1319+ ; RV32-NEXT: sw a2, 36(a0)
1320+ ; RV32-NEXT: sw a3, 40(a0)
1321+ ; RV32-NEXT: sw a4, 44(a0)
1322+ ; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
1323+ ; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
1324+ ; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
1325+ ; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
1326+ ; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
1327+ ; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
1328+ ; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
1329+ ; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
1330+ ; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
1331+ ; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
1332+ ; RV32-NEXT: .cfi_restore s0
1333+ ; RV32-NEXT: .cfi_restore s1
1334+ ; RV32-NEXT: .cfi_restore s2
1335+ ; RV32-NEXT: .cfi_restore s3
1336+ ; RV32-NEXT: .cfi_restore s4
1337+ ; RV32-NEXT: .cfi_restore s5
1338+ ; RV32-NEXT: .cfi_restore s6
1339+ ; RV32-NEXT: .cfi_restore s7
1340+ ; RV32-NEXT: .cfi_restore s8
1341+ ; RV32-NEXT: .cfi_restore s9
1342+ ; RV32-NEXT: addi sp, sp, 48
1343+ ; RV32-NEXT: .cfi_def_cfa_offset 0
1344+ ; RV32-NEXT: ret
1345+ ;
1346+ ; RV64-LABEL: shuffle_i256_ldst:
1347+ ; RV64: # %bb.0:
1348+ ; RV64-NEXT: ld a1, 96(a0)
1349+ ; RV64-NEXT: ld a2, 104(a0)
1350+ ; RV64-NEXT: ld a3, 112(a0)
1351+ ; RV64-NEXT: ld a4, 120(a0)
1352+ ; RV64-NEXT: ld a5, 0(a0)
1353+ ; RV64-NEXT: ld a6, 8(a0)
1354+ ; RV64-NEXT: ld a7, 16(a0)
1355+ ; RV64-NEXT: ld t0, 24(a0)
1356+ ; RV64-NEXT: ld t1, 64(a0)
1357+ ; RV64-NEXT: ld t2, 72(a0)
1358+ ; RV64-NEXT: ld t3, 80(a0)
1359+ ; RV64-NEXT: ld t4, 88(a0)
1360+ ; RV64-NEXT: sd t1, 96(a0)
1361+ ; RV64-NEXT: sd t2, 104(a0)
1362+ ; RV64-NEXT: sd t3, 112(a0)
1363+ ; RV64-NEXT: sd t4, 120(a0)
1364+ ; RV64-NEXT: sd a5, 32(a0)
1365+ ; RV64-NEXT: sd a6, 40(a0)
1366+ ; RV64-NEXT: sd a7, 48(a0)
1367+ ; RV64-NEXT: sd t0, 56(a0)
1368+ ; RV64-NEXT: sd a1, 64(a0)
1369+ ; RV64-NEXT: sd a2, 72(a0)
1370+ ; RV64-NEXT: sd a3, 80(a0)
1371+ ; RV64-NEXT: sd a4, 88(a0)
1372+ ; RV64-NEXT: ret
1373+ %a = load <4 x i256 >, ptr %p
1374+ %res = shufflevector <4 x i256 > %a , <4 x i256 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1375+ store <4 x i256 > %res , ptr %p
1376+ ret void
1377+ }
0 commit comments