@@ -1141,3 +1141,237 @@ define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) {
1141
1141
%out = shufflevector <16 x i32 > %splat , <16 x i32 > %w , <16 x i32 > <i32 11 , i32 15 , i32 7 , i32 3 , i32 26 , i32 30 , i32 22 , i32 18 , i32 9 , i32 13 , i32 5 , i32 1 , i32 24 , i32 28 , i32 20 , i32 16 >
1142
1142
ret <16 x i32 > %out
1143
1143
}
1144
+
1145
+ define <4 x i128 > @shuffle_i128 (<4 x i128 > %a ) {
1146
+ ; RV32-LABEL: shuffle_i128:
1147
+ ; RV32: # %bb.0:
1148
+ ; RV32-NEXT: lw a2, 0(a1)
1149
+ ; RV32-NEXT: lw a3, 4(a1)
1150
+ ; RV32-NEXT: lw a4, 8(a1)
1151
+ ; RV32-NEXT: lw a5, 12(a1)
1152
+ ; RV32-NEXT: lw a6, 48(a1)
1153
+ ; RV32-NEXT: lw a7, 52(a1)
1154
+ ; RV32-NEXT: lw t0, 56(a1)
1155
+ ; RV32-NEXT: lw t1, 60(a1)
1156
+ ; RV32-NEXT: lw t2, 32(a1)
1157
+ ; RV32-NEXT: lw t3, 36(a1)
1158
+ ; RV32-NEXT: lw t4, 40(a1)
1159
+ ; RV32-NEXT: lw a1, 44(a1)
1160
+ ; RV32-NEXT: sw t2, 48(a0)
1161
+ ; RV32-NEXT: sw t3, 52(a0)
1162
+ ; RV32-NEXT: sw t4, 56(a0)
1163
+ ; RV32-NEXT: sw a1, 60(a0)
1164
+ ; RV32-NEXT: sw a6, 32(a0)
1165
+ ; RV32-NEXT: sw a7, 36(a0)
1166
+ ; RV32-NEXT: sw t0, 40(a0)
1167
+ ; RV32-NEXT: sw t1, 44(a0)
1168
+ ; RV32-NEXT: sw a2, 16(a0)
1169
+ ; RV32-NEXT: sw a3, 20(a0)
1170
+ ; RV32-NEXT: sw a4, 24(a0)
1171
+ ; RV32-NEXT: sw a5, 28(a0)
1172
+ ; RV32-NEXT: sw a2, 0(a0)
1173
+ ; RV32-NEXT: sw a3, 4(a0)
1174
+ ; RV32-NEXT: sw a4, 8(a0)
1175
+ ; RV32-NEXT: sw a5, 12(a0)
1176
+ ; RV32-NEXT: ret
1177
+ ;
1178
+ ; RV64-LABEL: shuffle_i128:
1179
+ ; RV64: # %bb.0:
1180
+ ; RV64-NEXT: ld a2, 48(a1)
1181
+ ; RV64-NEXT: ld a3, 56(a1)
1182
+ ; RV64-NEXT: ld a4, 0(a1)
1183
+ ; RV64-NEXT: ld a5, 8(a1)
1184
+ ; RV64-NEXT: ld a6, 32(a1)
1185
+ ; RV64-NEXT: ld a1, 40(a1)
1186
+ ; RV64-NEXT: sd a2, 32(a0)
1187
+ ; RV64-NEXT: sd a3, 40(a0)
1188
+ ; RV64-NEXT: sd a6, 48(a0)
1189
+ ; RV64-NEXT: sd a1, 56(a0)
1190
+ ; RV64-NEXT: sd a4, 0(a0)
1191
+ ; RV64-NEXT: sd a5, 8(a0)
1192
+ ; RV64-NEXT: sd a4, 16(a0)
1193
+ ; RV64-NEXT: sd a5, 24(a0)
1194
+ ; RV64-NEXT: ret
1195
+ %res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1196
+ ret <4 x i128 > %res
1197
+ }
1198
+
1199
+ define void @shuffle_i128_ldst (ptr %p ) {
1200
+ ; RV32-LABEL: shuffle_i128_ldst:
1201
+ ; RV32: # %bb.0:
1202
+ ; RV32-NEXT: lw a1, 48(a0)
1203
+ ; RV32-NEXT: lw a2, 52(a0)
1204
+ ; RV32-NEXT: lw a3, 56(a0)
1205
+ ; RV32-NEXT: lw a4, 60(a0)
1206
+ ; RV32-NEXT: lw a5, 0(a0)
1207
+ ; RV32-NEXT: lw a6, 4(a0)
1208
+ ; RV32-NEXT: lw a7, 8(a0)
1209
+ ; RV32-NEXT: lw t0, 12(a0)
1210
+ ; RV32-NEXT: lw t1, 32(a0)
1211
+ ; RV32-NEXT: lw t2, 36(a0)
1212
+ ; RV32-NEXT: lw t3, 40(a0)
1213
+ ; RV32-NEXT: lw t4, 44(a0)
1214
+ ; RV32-NEXT: sw t1, 48(a0)
1215
+ ; RV32-NEXT: sw t2, 52(a0)
1216
+ ; RV32-NEXT: sw t3, 56(a0)
1217
+ ; RV32-NEXT: sw t4, 60(a0)
1218
+ ; RV32-NEXT: sw a5, 16(a0)
1219
+ ; RV32-NEXT: sw a6, 20(a0)
1220
+ ; RV32-NEXT: sw a7, 24(a0)
1221
+ ; RV32-NEXT: sw t0, 28(a0)
1222
+ ; RV32-NEXT: sw a1, 32(a0)
1223
+ ; RV32-NEXT: sw a2, 36(a0)
1224
+ ; RV32-NEXT: sw a3, 40(a0)
1225
+ ; RV32-NEXT: sw a4, 44(a0)
1226
+ ; RV32-NEXT: ret
1227
+ ;
1228
+ ; RV64-LABEL: shuffle_i128_ldst:
1229
+ ; RV64: # %bb.0:
1230
+ ; RV64-NEXT: ld a1, 0(a0)
1231
+ ; RV64-NEXT: ld a2, 8(a0)
1232
+ ; RV64-NEXT: ld a3, 32(a0)
1233
+ ; RV64-NEXT: ld a4, 40(a0)
1234
+ ; RV64-NEXT: ld a5, 48(a0)
1235
+ ; RV64-NEXT: ld a6, 56(a0)
1236
+ ; RV64-NEXT: sd a3, 48(a0)
1237
+ ; RV64-NEXT: sd a4, 56(a0)
1238
+ ; RV64-NEXT: sd a1, 16(a0)
1239
+ ; RV64-NEXT: sd a2, 24(a0)
1240
+ ; RV64-NEXT: sd a5, 32(a0)
1241
+ ; RV64-NEXT: sd a6, 40(a0)
1242
+ ; RV64-NEXT: ret
1243
+ %a = load <4 x i128 >, ptr %p
1244
+ %res = shufflevector <4 x i128 > %a , <4 x i128 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1245
+ store <4 x i128 > %res , ptr %p
1246
+ ret void
1247
+ }
1248
+
1249
+ define void @shuffle_i256_ldst (ptr %p ) {
1250
+ ; RV32-LABEL: shuffle_i256_ldst:
1251
+ ; RV32: # %bb.0:
1252
+ ; RV32-NEXT: addi sp, sp, -48
1253
+ ; RV32-NEXT: .cfi_def_cfa_offset 48
1254
+ ; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
1255
+ ; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
1256
+ ; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
1257
+ ; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
1258
+ ; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
1259
+ ; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
1260
+ ; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
1261
+ ; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
1262
+ ; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
1263
+ ; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
1264
+ ; RV32-NEXT: .cfi_offset s0, -4
1265
+ ; RV32-NEXT: .cfi_offset s1, -8
1266
+ ; RV32-NEXT: .cfi_offset s2, -12
1267
+ ; RV32-NEXT: .cfi_offset s3, -16
1268
+ ; RV32-NEXT: .cfi_offset s4, -20
1269
+ ; RV32-NEXT: .cfi_offset s5, -24
1270
+ ; RV32-NEXT: .cfi_offset s6, -28
1271
+ ; RV32-NEXT: .cfi_offset s7, -32
1272
+ ; RV32-NEXT: .cfi_offset s8, -36
1273
+ ; RV32-NEXT: .cfi_offset s9, -40
1274
+ ; RV32-NEXT: lw a1, 0(a0)
1275
+ ; RV32-NEXT: lw a2, 4(a0)
1276
+ ; RV32-NEXT: lw a3, 8(a0)
1277
+ ; RV32-NEXT: lw a4, 12(a0)
1278
+ ; RV32-NEXT: lw a5, 16(a0)
1279
+ ; RV32-NEXT: lw a6, 20(a0)
1280
+ ; RV32-NEXT: lw a7, 24(a0)
1281
+ ; RV32-NEXT: lw t0, 28(a0)
1282
+ ; RV32-NEXT: lw t1, 96(a0)
1283
+ ; RV32-NEXT: lw t2, 100(a0)
1284
+ ; RV32-NEXT: lw t3, 104(a0)
1285
+ ; RV32-NEXT: lw t4, 108(a0)
1286
+ ; RV32-NEXT: lw t5, 112(a0)
1287
+ ; RV32-NEXT: lw t6, 116(a0)
1288
+ ; RV32-NEXT: lw s0, 120(a0)
1289
+ ; RV32-NEXT: lw s1, 124(a0)
1290
+ ; RV32-NEXT: lw s2, 64(a0)
1291
+ ; RV32-NEXT: lw s3, 68(a0)
1292
+ ; RV32-NEXT: lw s4, 72(a0)
1293
+ ; RV32-NEXT: lw s5, 76(a0)
1294
+ ; RV32-NEXT: lw s6, 80(a0)
1295
+ ; RV32-NEXT: lw s7, 84(a0)
1296
+ ; RV32-NEXT: lw s8, 88(a0)
1297
+ ; RV32-NEXT: lw s9, 92(a0)
1298
+ ; RV32-NEXT: sw s6, 112(a0)
1299
+ ; RV32-NEXT: sw s7, 116(a0)
1300
+ ; RV32-NEXT: sw s8, 120(a0)
1301
+ ; RV32-NEXT: sw s9, 124(a0)
1302
+ ; RV32-NEXT: sw s2, 96(a0)
1303
+ ; RV32-NEXT: sw s3, 100(a0)
1304
+ ; RV32-NEXT: sw s4, 104(a0)
1305
+ ; RV32-NEXT: sw s5, 108(a0)
1306
+ ; RV32-NEXT: sw t5, 80(a0)
1307
+ ; RV32-NEXT: sw t6, 84(a0)
1308
+ ; RV32-NEXT: sw s0, 88(a0)
1309
+ ; RV32-NEXT: sw s1, 92(a0)
1310
+ ; RV32-NEXT: sw t1, 64(a0)
1311
+ ; RV32-NEXT: sw t2, 68(a0)
1312
+ ; RV32-NEXT: sw t3, 72(a0)
1313
+ ; RV32-NEXT: sw t4, 76(a0)
1314
+ ; RV32-NEXT: sw a5, 48(a0)
1315
+ ; RV32-NEXT: sw a6, 52(a0)
1316
+ ; RV32-NEXT: sw a7, 56(a0)
1317
+ ; RV32-NEXT: sw t0, 60(a0)
1318
+ ; RV32-NEXT: sw a1, 32(a0)
1319
+ ; RV32-NEXT: sw a2, 36(a0)
1320
+ ; RV32-NEXT: sw a3, 40(a0)
1321
+ ; RV32-NEXT: sw a4, 44(a0)
1322
+ ; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
1323
+ ; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
1324
+ ; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
1325
+ ; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
1326
+ ; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
1327
+ ; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
1328
+ ; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
1329
+ ; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
1330
+ ; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
1331
+ ; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
1332
+ ; RV32-NEXT: .cfi_restore s0
1333
+ ; RV32-NEXT: .cfi_restore s1
1334
+ ; RV32-NEXT: .cfi_restore s2
1335
+ ; RV32-NEXT: .cfi_restore s3
1336
+ ; RV32-NEXT: .cfi_restore s4
1337
+ ; RV32-NEXT: .cfi_restore s5
1338
+ ; RV32-NEXT: .cfi_restore s6
1339
+ ; RV32-NEXT: .cfi_restore s7
1340
+ ; RV32-NEXT: .cfi_restore s8
1341
+ ; RV32-NEXT: .cfi_restore s9
1342
+ ; RV32-NEXT: addi sp, sp, 48
1343
+ ; RV32-NEXT: .cfi_def_cfa_offset 0
1344
+ ; RV32-NEXT: ret
1345
+ ;
1346
+ ; RV64-LABEL: shuffle_i256_ldst:
1347
+ ; RV64: # %bb.0:
1348
+ ; RV64-NEXT: ld a1, 96(a0)
1349
+ ; RV64-NEXT: ld a2, 104(a0)
1350
+ ; RV64-NEXT: ld a3, 112(a0)
1351
+ ; RV64-NEXT: ld a4, 120(a0)
1352
+ ; RV64-NEXT: ld a5, 0(a0)
1353
+ ; RV64-NEXT: ld a6, 8(a0)
1354
+ ; RV64-NEXT: ld a7, 16(a0)
1355
+ ; RV64-NEXT: ld t0, 24(a0)
1356
+ ; RV64-NEXT: ld t1, 64(a0)
1357
+ ; RV64-NEXT: ld t2, 72(a0)
1358
+ ; RV64-NEXT: ld t3, 80(a0)
1359
+ ; RV64-NEXT: ld t4, 88(a0)
1360
+ ; RV64-NEXT: sd t1, 96(a0)
1361
+ ; RV64-NEXT: sd t2, 104(a0)
1362
+ ; RV64-NEXT: sd t3, 112(a0)
1363
+ ; RV64-NEXT: sd t4, 120(a0)
1364
+ ; RV64-NEXT: sd a5, 32(a0)
1365
+ ; RV64-NEXT: sd a6, 40(a0)
1366
+ ; RV64-NEXT: sd a7, 48(a0)
1367
+ ; RV64-NEXT: sd t0, 56(a0)
1368
+ ; RV64-NEXT: sd a1, 64(a0)
1369
+ ; RV64-NEXT: sd a2, 72(a0)
1370
+ ; RV64-NEXT: sd a3, 80(a0)
1371
+ ; RV64-NEXT: sd a4, 88(a0)
1372
+ ; RV64-NEXT: ret
1373
+ %a = load <4 x i256 >, ptr %p
1374
+ %res = shufflevector <4 x i256 > %a , <4 x i256 > poison, <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 2 >
1375
+ store <4 x i256 > %res , ptr %p
1376
+ ret void
1377
+ }
0 commit comments