Skip to content

Commit 031f33c

Browse files
committed
[RISCV] Add tests for legalization of <N x i128> and <N x i256> shuffles
1 parent 222ff18 commit 031f33c

File tree

2 files changed

+414
-0
lines changed

2 files changed

+414
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,3 +1141,237 @@ define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) {
11411141
%out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
11421142
ret <16 x i32> %out
11431143
}
1144+
1145+
define <4 x i128> @shuffle_i128(<4 x i128> %a) {
1146+
; RV32-LABEL: shuffle_i128:
1147+
; RV32: # %bb.0:
1148+
; RV32-NEXT: lw a2, 0(a1)
1149+
; RV32-NEXT: lw a3, 4(a1)
1150+
; RV32-NEXT: lw a4, 8(a1)
1151+
; RV32-NEXT: lw a5, 12(a1)
1152+
; RV32-NEXT: lw a6, 48(a1)
1153+
; RV32-NEXT: lw a7, 52(a1)
1154+
; RV32-NEXT: lw t0, 56(a1)
1155+
; RV32-NEXT: lw t1, 60(a1)
1156+
; RV32-NEXT: lw t2, 32(a1)
1157+
; RV32-NEXT: lw t3, 36(a1)
1158+
; RV32-NEXT: lw t4, 40(a1)
1159+
; RV32-NEXT: lw a1, 44(a1)
1160+
; RV32-NEXT: sw t2, 48(a0)
1161+
; RV32-NEXT: sw t3, 52(a0)
1162+
; RV32-NEXT: sw t4, 56(a0)
1163+
; RV32-NEXT: sw a1, 60(a0)
1164+
; RV32-NEXT: sw a6, 32(a0)
1165+
; RV32-NEXT: sw a7, 36(a0)
1166+
; RV32-NEXT: sw t0, 40(a0)
1167+
; RV32-NEXT: sw t1, 44(a0)
1168+
; RV32-NEXT: sw a2, 16(a0)
1169+
; RV32-NEXT: sw a3, 20(a0)
1170+
; RV32-NEXT: sw a4, 24(a0)
1171+
; RV32-NEXT: sw a5, 28(a0)
1172+
; RV32-NEXT: sw a2, 0(a0)
1173+
; RV32-NEXT: sw a3, 4(a0)
1174+
; RV32-NEXT: sw a4, 8(a0)
1175+
; RV32-NEXT: sw a5, 12(a0)
1176+
; RV32-NEXT: ret
1177+
;
1178+
; RV64-LABEL: shuffle_i128:
1179+
; RV64: # %bb.0:
1180+
; RV64-NEXT: ld a2, 48(a1)
1181+
; RV64-NEXT: ld a3, 56(a1)
1182+
; RV64-NEXT: ld a4, 0(a1)
1183+
; RV64-NEXT: ld a5, 8(a1)
1184+
; RV64-NEXT: ld a6, 32(a1)
1185+
; RV64-NEXT: ld a1, 40(a1)
1186+
; RV64-NEXT: sd a2, 32(a0)
1187+
; RV64-NEXT: sd a3, 40(a0)
1188+
; RV64-NEXT: sd a6, 48(a0)
1189+
; RV64-NEXT: sd a1, 56(a0)
1190+
; RV64-NEXT: sd a4, 0(a0)
1191+
; RV64-NEXT: sd a5, 8(a0)
1192+
; RV64-NEXT: sd a4, 16(a0)
1193+
; RV64-NEXT: sd a5, 24(a0)
1194+
; RV64-NEXT: ret
1195+
%res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1196+
ret <4 x i128> %res
1197+
}
1198+
1199+
define void @shuffle_i128_ldst(ptr %p) {
1200+
; RV32-LABEL: shuffle_i128_ldst:
1201+
; RV32: # %bb.0:
1202+
; RV32-NEXT: lw a1, 48(a0)
1203+
; RV32-NEXT: lw a2, 52(a0)
1204+
; RV32-NEXT: lw a3, 56(a0)
1205+
; RV32-NEXT: lw a4, 60(a0)
1206+
; RV32-NEXT: lw a5, 0(a0)
1207+
; RV32-NEXT: lw a6, 4(a0)
1208+
; RV32-NEXT: lw a7, 8(a0)
1209+
; RV32-NEXT: lw t0, 12(a0)
1210+
; RV32-NEXT: lw t1, 32(a0)
1211+
; RV32-NEXT: lw t2, 36(a0)
1212+
; RV32-NEXT: lw t3, 40(a0)
1213+
; RV32-NEXT: lw t4, 44(a0)
1214+
; RV32-NEXT: sw t1, 48(a0)
1215+
; RV32-NEXT: sw t2, 52(a0)
1216+
; RV32-NEXT: sw t3, 56(a0)
1217+
; RV32-NEXT: sw t4, 60(a0)
1218+
; RV32-NEXT: sw a5, 16(a0)
1219+
; RV32-NEXT: sw a6, 20(a0)
1220+
; RV32-NEXT: sw a7, 24(a0)
1221+
; RV32-NEXT: sw t0, 28(a0)
1222+
; RV32-NEXT: sw a1, 32(a0)
1223+
; RV32-NEXT: sw a2, 36(a0)
1224+
; RV32-NEXT: sw a3, 40(a0)
1225+
; RV32-NEXT: sw a4, 44(a0)
1226+
; RV32-NEXT: ret
1227+
;
1228+
; RV64-LABEL: shuffle_i128_ldst:
1229+
; RV64: # %bb.0:
1230+
; RV64-NEXT: ld a1, 0(a0)
1231+
; RV64-NEXT: ld a2, 8(a0)
1232+
; RV64-NEXT: ld a3, 32(a0)
1233+
; RV64-NEXT: ld a4, 40(a0)
1234+
; RV64-NEXT: ld a5, 48(a0)
1235+
; RV64-NEXT: ld a6, 56(a0)
1236+
; RV64-NEXT: sd a3, 48(a0)
1237+
; RV64-NEXT: sd a4, 56(a0)
1238+
; RV64-NEXT: sd a1, 16(a0)
1239+
; RV64-NEXT: sd a2, 24(a0)
1240+
; RV64-NEXT: sd a5, 32(a0)
1241+
; RV64-NEXT: sd a6, 40(a0)
1242+
; RV64-NEXT: ret
1243+
%a = load <4 x i128>, ptr %p
1244+
%res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1245+
store <4 x i128> %res, ptr %p
1246+
ret void
1247+
}
1248+
1249+
define void @shuffle_i256_ldst(ptr %p) {
1250+
; RV32-LABEL: shuffle_i256_ldst:
1251+
; RV32: # %bb.0:
1252+
; RV32-NEXT: addi sp, sp, -48
1253+
; RV32-NEXT: .cfi_def_cfa_offset 48
1254+
; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
1255+
; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
1256+
; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
1257+
; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
1258+
; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
1259+
; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
1260+
; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
1261+
; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
1262+
; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
1263+
; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
1264+
; RV32-NEXT: .cfi_offset s0, -4
1265+
; RV32-NEXT: .cfi_offset s1, -8
1266+
; RV32-NEXT: .cfi_offset s2, -12
1267+
; RV32-NEXT: .cfi_offset s3, -16
1268+
; RV32-NEXT: .cfi_offset s4, -20
1269+
; RV32-NEXT: .cfi_offset s5, -24
1270+
; RV32-NEXT: .cfi_offset s6, -28
1271+
; RV32-NEXT: .cfi_offset s7, -32
1272+
; RV32-NEXT: .cfi_offset s8, -36
1273+
; RV32-NEXT: .cfi_offset s9, -40
1274+
; RV32-NEXT: lw a1, 0(a0)
1275+
; RV32-NEXT: lw a2, 4(a0)
1276+
; RV32-NEXT: lw a3, 8(a0)
1277+
; RV32-NEXT: lw a4, 12(a0)
1278+
; RV32-NEXT: lw a5, 16(a0)
1279+
; RV32-NEXT: lw a6, 20(a0)
1280+
; RV32-NEXT: lw a7, 24(a0)
1281+
; RV32-NEXT: lw t0, 28(a0)
1282+
; RV32-NEXT: lw t1, 96(a0)
1283+
; RV32-NEXT: lw t2, 100(a0)
1284+
; RV32-NEXT: lw t3, 104(a0)
1285+
; RV32-NEXT: lw t4, 108(a0)
1286+
; RV32-NEXT: lw t5, 112(a0)
1287+
; RV32-NEXT: lw t6, 116(a0)
1288+
; RV32-NEXT: lw s0, 120(a0)
1289+
; RV32-NEXT: lw s1, 124(a0)
1290+
; RV32-NEXT: lw s2, 64(a0)
1291+
; RV32-NEXT: lw s3, 68(a0)
1292+
; RV32-NEXT: lw s4, 72(a0)
1293+
; RV32-NEXT: lw s5, 76(a0)
1294+
; RV32-NEXT: lw s6, 80(a0)
1295+
; RV32-NEXT: lw s7, 84(a0)
1296+
; RV32-NEXT: lw s8, 88(a0)
1297+
; RV32-NEXT: lw s9, 92(a0)
1298+
; RV32-NEXT: sw s6, 112(a0)
1299+
; RV32-NEXT: sw s7, 116(a0)
1300+
; RV32-NEXT: sw s8, 120(a0)
1301+
; RV32-NEXT: sw s9, 124(a0)
1302+
; RV32-NEXT: sw s2, 96(a0)
1303+
; RV32-NEXT: sw s3, 100(a0)
1304+
; RV32-NEXT: sw s4, 104(a0)
1305+
; RV32-NEXT: sw s5, 108(a0)
1306+
; RV32-NEXT: sw t5, 80(a0)
1307+
; RV32-NEXT: sw t6, 84(a0)
1308+
; RV32-NEXT: sw s0, 88(a0)
1309+
; RV32-NEXT: sw s1, 92(a0)
1310+
; RV32-NEXT: sw t1, 64(a0)
1311+
; RV32-NEXT: sw t2, 68(a0)
1312+
; RV32-NEXT: sw t3, 72(a0)
1313+
; RV32-NEXT: sw t4, 76(a0)
1314+
; RV32-NEXT: sw a5, 48(a0)
1315+
; RV32-NEXT: sw a6, 52(a0)
1316+
; RV32-NEXT: sw a7, 56(a0)
1317+
; RV32-NEXT: sw t0, 60(a0)
1318+
; RV32-NEXT: sw a1, 32(a0)
1319+
; RV32-NEXT: sw a2, 36(a0)
1320+
; RV32-NEXT: sw a3, 40(a0)
1321+
; RV32-NEXT: sw a4, 44(a0)
1322+
; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
1323+
; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
1324+
; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
1325+
; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
1326+
; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
1327+
; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
1328+
; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
1329+
; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
1330+
; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
1331+
; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
1332+
; RV32-NEXT: .cfi_restore s0
1333+
; RV32-NEXT: .cfi_restore s1
1334+
; RV32-NEXT: .cfi_restore s2
1335+
; RV32-NEXT: .cfi_restore s3
1336+
; RV32-NEXT: .cfi_restore s4
1337+
; RV32-NEXT: .cfi_restore s5
1338+
; RV32-NEXT: .cfi_restore s6
1339+
; RV32-NEXT: .cfi_restore s7
1340+
; RV32-NEXT: .cfi_restore s8
1341+
; RV32-NEXT: .cfi_restore s9
1342+
; RV32-NEXT: addi sp, sp, 48
1343+
; RV32-NEXT: .cfi_def_cfa_offset 0
1344+
; RV32-NEXT: ret
1345+
;
1346+
; RV64-LABEL: shuffle_i256_ldst:
1347+
; RV64: # %bb.0:
1348+
; RV64-NEXT: ld a1, 96(a0)
1349+
; RV64-NEXT: ld a2, 104(a0)
1350+
; RV64-NEXT: ld a3, 112(a0)
1351+
; RV64-NEXT: ld a4, 120(a0)
1352+
; RV64-NEXT: ld a5, 0(a0)
1353+
; RV64-NEXT: ld a6, 8(a0)
1354+
; RV64-NEXT: ld a7, 16(a0)
1355+
; RV64-NEXT: ld t0, 24(a0)
1356+
; RV64-NEXT: ld t1, 64(a0)
1357+
; RV64-NEXT: ld t2, 72(a0)
1358+
; RV64-NEXT: ld t3, 80(a0)
1359+
; RV64-NEXT: ld t4, 88(a0)
1360+
; RV64-NEXT: sd t1, 96(a0)
1361+
; RV64-NEXT: sd t2, 104(a0)
1362+
; RV64-NEXT: sd t3, 112(a0)
1363+
; RV64-NEXT: sd t4, 120(a0)
1364+
; RV64-NEXT: sd a5, 32(a0)
1365+
; RV64-NEXT: sd a6, 40(a0)
1366+
; RV64-NEXT: sd a7, 48(a0)
1367+
; RV64-NEXT: sd t0, 56(a0)
1368+
; RV64-NEXT: sd a1, 64(a0)
1369+
; RV64-NEXT: sd a2, 72(a0)
1370+
; RV64-NEXT: sd a3, 80(a0)
1371+
; RV64-NEXT: sd a4, 88(a0)
1372+
; RV64-NEXT: ret
1373+
%a = load <4 x i256>, ptr %p
1374+
%res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1375+
store <4 x i256> %res, ptr %p
1376+
ret void
1377+
}

0 commit comments

Comments
 (0)