11# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2- # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
2+ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
33
44# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.
55
@@ -14,15 +14,25 @@ body: |
1414 ; CHECK-NEXT: liveins: $sgpr4_sgpr5
1515 ; CHECK-NEXT: {{ $}}
1616 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
17- ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
18- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
17+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
18+ ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
19+ ; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
20+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
1921 ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
20- ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
22+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
23+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
2124 ; CHECK-NEXT: {{ $}}
2225 ; CHECK-NEXT: bb.1:
23- ; CHECK-NEXT: successors: %bb.2(0x80000000 )
26+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000 )
2427 ; CHECK-NEXT: {{ $}}
25- ; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
28+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
29+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
30+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
31+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
32+ ; CHECK-NEXT: $vcc_lo = COPY $exec_lo
33+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
34+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
35+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
2636 ; CHECK-NEXT: S_BRANCH %bb.2
2737 ; CHECK-NEXT: {{ $}}
2838 ; CHECK-NEXT: bb.2:
@@ -32,25 +42,35 @@ body: |
3242 liveins: $sgpr4_sgpr5
3343
3444 %0:sgpr_64 = COPY killed $sgpr4_sgpr5
35- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
36- %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
37- %3:sreg_32 = S_MOV_B32 1
38- undef %4.sub0:sgpr_128 = COPY %3
39- %5:sgpr_128 = COPY %4
40- %5.sub1:sgpr_128 = COPY killed %2
41- %6:sgpr_128 = COPY %5
42- %6.sub2 :sgpr_128 = COPY killed %1
43- %7:sreg_32 = S_MOV_B32 0
44- undef %8.sub0:sgpr_256 = COPY %7
45- %9:sreg_32 = COPY %3
45+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
46+ %2:sreg_32 = S_MOV_B32 1
47+ undef %3.sub0:sgpr_128 = COPY %2
48+ %4:sreg_32 = S_MOV_B32 0
49+ undef %5.sub0:sgpr_256 = COPY %4
50+ TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
51+ %6:sgpr_128 = COPY killed %3
52+ %6.sub1 :sgpr_128 = COPY killed %1
53+ %7:sreg_32 = COPY $exec_lo
54+ %8:sreg_32 = COPY %2
55+ %9:sreg_32 = COPY %4
4656
4757 bb.1:
48- successors: %bb.2(0x80000000 )
58+ successors: %bb.1(0x40000000), %bb.2(0x40000000 )
4959
50- %10:sreg_32 = COPY killed %9
51- undef %11.sub0:sgpr_128 = COPY %3
60+ %10:sreg_32 = COPY killed %8
61+ undef %11.sub0:sgpr_128 = COPY %2
5262 %11.sub1:sgpr_128 = COPY killed %10
53- S_NOP 0, implicit %5, implicit %8
63+ %11.sub2:sgpr_128 = COPY %2
64+ %11.sub3:sgpr_128 = COPY %2
65+ TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
66+ %12:sreg_32 = COPY killed %9
67+ %13:sgpr_128 = COPY %6
68+ %13.sub2:sgpr_128 = COPY killed %12
69+ TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
70+ $vcc_lo = COPY %7
71+ %8:sreg_32 = COPY %4
72+ %9:sreg_32 = COPY %2
73+ S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
5474 S_BRANCH %bb.2
5575
5676 bb.2:
0 commit comments