|
7 | 7 | ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9,GFX9-GISEL %s |
8 | 8 | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG %s |
9 | 9 | ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s |
10 | | -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s |
11 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s |
| 10 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
| 11 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
| 12 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
| 13 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
12 | 14 |
|
13 | 15 | ; Test that add/sub with a constant is swapped to sub/add with negated |
14 | 16 | ; constant to minimize code size. |
@@ -1331,31 +1333,57 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp |
1331 | 1333 | ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] |
1332 | 1334 | ; GFX10-GISEL-NEXT: s_endpgm |
1333 | 1335 | ; |
1334 | | -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64: |
1335 | | -; GFX11-SDAG: ; %bb.0: |
1336 | | -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1337 | | -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1338 | | -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1339 | | -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1340 | | -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1341 | | -; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] |
1342 | | -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1343 | | -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1344 | | -; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] |
1345 | | -; GFX11-SDAG-NEXT: s_endpgm |
1346 | | -; |
1347 | | -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64: |
1348 | | -; GFX11-GISEL: ; %bb.0: |
1349 | | -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1350 | | -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1351 | | -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1352 | | -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1353 | | -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1354 | | -; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] |
1355 | | -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1356 | | -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1357 | | -; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] |
1358 | | -; GFX11-GISEL-NEXT: s_endpgm |
| 1336 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64: |
| 1337 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1338 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1339 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1340 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1341 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1342 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1343 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] |
| 1344 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1345 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1346 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] |
| 1347 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1348 | +; |
| 1349 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64: |
| 1350 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1351 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1352 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1353 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1354 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1355 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1356 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1357 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1358 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1359 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1360 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1361 | +; |
| 1362 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64: |
| 1363 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1364 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1365 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1366 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1367 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1368 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1369 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1370 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1371 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0 |
| 1372 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1373 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1374 | +; |
| 1375 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64: |
| 1376 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1377 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1378 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1379 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1380 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1381 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1382 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1383 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1384 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1385 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1386 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1359 | 1387 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
1360 | 1388 | %tid.ext = sext i32 %tid to i64 |
1361 | 1389 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext |
@@ -1491,37 +1519,69 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out |
1491 | 1519 | ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] |
1492 | 1520 | ; GFX10-GISEL-NEXT: s_endpgm |
1493 | 1521 | ; |
1494 | | -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
1495 | | -; GFX11-SDAG: ; %bb.0: |
1496 | | -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1497 | | -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1498 | | -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1499 | | -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
1500 | | -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1501 | | -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1502 | | -; GFX11-SDAG-NEXT: global_load_u16 v1, v1, s[2:3] |
1503 | | -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1504 | | -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1505 | | -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1506 | | -; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
1507 | | -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] |
1508 | | -; GFX11-SDAG-NEXT: s_endpgm |
1509 | | -; |
1510 | | -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
1511 | | -; GFX11-GISEL: ; %bb.0: |
1512 | | -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1513 | | -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1514 | | -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1515 | | -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
1516 | | -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1517 | | -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1518 | | -; GFX11-GISEL-NEXT: global_load_u16 v1, v1, s[2:3] |
1519 | | -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1520 | | -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1521 | | -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1522 | | -; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
1523 | | -; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] |
1524 | | -; GFX11-GISEL-NEXT: s_endpgm |
| 1522 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1523 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1524 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1525 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 |
| 1526 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1527 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 |
| 1528 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| 1529 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1530 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] |
| 1531 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1532 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1533 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1534 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 1535 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 1536 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1537 | +; |
| 1538 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1539 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1540 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1541 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1542 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1543 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1544 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1545 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1546 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] |
| 1547 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1548 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1549 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1550 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| 1551 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] |
| 1552 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1553 | +; |
| 1554 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1555 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1556 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1557 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 |
| 1558 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1559 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 |
| 1560 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| 1561 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1562 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] |
| 1563 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1564 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 0xffc0 |
| 1565 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1566 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 1567 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 1568 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1569 | +; |
| 1570 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1571 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1572 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1573 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1574 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1575 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1576 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1577 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1578 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] |
| 1579 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1580 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1581 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1582 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| 1583 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] |
| 1584 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1525 | 1585 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
1526 | 1586 | %tid.ext = sext i32 %tid to i64 |
1527 | 1587 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext |
@@ -1694,43 +1754,86 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, |
1694 | 1754 | ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 |
1695 | 1755 | ; GFX10-GISEL-NEXT: s_endpgm |
1696 | 1756 | ; |
1697 | | -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: |
1698 | | -; GFX11-SDAG: ; %bb.0: |
1699 | | -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1700 | | -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1701 | | -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1702 | | -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1703 | | -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1704 | | -; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
1705 | | -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1706 | | -; GFX11-SDAG-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
1707 | | -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1708 | | -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1709 | | -; GFX11-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64 |
1710 | | -; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
1711 | | -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 |
1712 | | -; GFX11-SDAG-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
1713 | | -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 |
1714 | | -; GFX11-SDAG-NEXT: s_endpgm |
1715 | | -; |
1716 | | -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: |
1717 | | -; GFX11-GISEL: ; %bb.0: |
1718 | | -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1719 | | -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1720 | | -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1721 | | -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1722 | | -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1723 | | -; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
1724 | | -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1725 | | -; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
1726 | | -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1727 | | -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1728 | | -; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0 |
1729 | | -; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
1730 | | -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 |
1731 | | -; GFX11-GISEL-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
1732 | | -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 |
1733 | | -; GFX11-GISEL-NEXT: s_endpgm |
| 1757 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1758 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1759 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1760 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1761 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1762 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1763 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1764 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc |
| 1765 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1766 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc |
| 1767 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1768 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1769 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l |
| 1770 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 1771 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l |
| 1772 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v0.h, 64 |
| 1773 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1774 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h |
| 1775 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc |
| 1776 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1777 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1778 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1779 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1780 | +; |
| 1781 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1782 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1783 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1784 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1785 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1786 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1787 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1788 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1789 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1790 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1791 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1792 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1793 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v2, v2, 64 |
| 1794 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1795 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1796 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1797 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1798 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1799 | +; |
| 1800 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1801 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1802 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1803 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1804 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1805 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1806 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1807 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1808 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1809 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1810 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1811 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0 |
| 1812 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v2.l, v2.l, 0xffc0 |
| 1813 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1814 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1815 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1816 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1817 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1818 | +; |
| 1819 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1820 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1821 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1822 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1823 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1824 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1825 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1826 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1827 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1828 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1829 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1830 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1831 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v2, 0xffc0 |
| 1832 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1833 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1834 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1835 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1836 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1734 | 1837 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
1735 | 1838 | %tid.ext = sext i32 %tid to i64 |
1736 | 1839 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext |
|
0 commit comments