Skip to content

Commit e62c1e7

Browse files
committed
[DAG] isGuaranteedNotToBeUndefOrPoison - ISD::LOAD nodes are not poison if the LoadSDNode is known to be dereferenceable
Matches the behaviour in ValueTracking.cpp Frozen ISD::LOAD nodes do become an issue when we more aggressively push freeze through a DAG - so we need to find more cases where can safely unfreeze loads (e.g. constant pool)?
1 parent 3e746bd commit e62c1e7

File tree

7 files changed

+89
-91
lines changed

7 files changed

+89
-91
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5631,6 +5631,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
56315631
});
56325632
}
56335633

5634+
case ISD::LOAD:
5635+
return cast<LoadSDNode>(Op)->isDereferenceable();
5636+
56345637
// TODO: Search for noundef attributes from library functions.
56355638

56365639
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7791,7 +7791,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
77917791
;
77927792
; GFX6-LABEL: sdiv_i64_pow2_shl_denom:
77937793
; GFX6: ; %bb.0:
7794-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
7794+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
77957795
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
77967796
; GFX6-NEXT: s_mov_b32 s3, 0xf000
77977797
; GFX6-NEXT: s_mov_b32 s2, -1
@@ -7960,7 +7960,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
79607960
;
79617961
; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
79627962
; GFX9: ; %bb.0:
7963-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
7963+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
79647964
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
79657965
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
79667966
; GFX9-NEXT: s_ashr_i32 s6, s1, 31
@@ -9059,7 +9059,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
90599059
;
90609060
; GFX6-LABEL: srem_i64_pow2_shl_denom:
90619061
; GFX6: ; %bb.0:
9062-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
9062+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
90639063
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
90649064
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
90659065
; GFX6-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
@@ -9230,7 +9230,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
92309230
;
92319231
; GFX9-LABEL: srem_i64_pow2_shl_denom:
92329232
; GFX9: ; %bb.0:
9233-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
9233+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
92349234
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
92359235
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
92369236
; GFX9-NEXT: s_ashr_i32 s2, s1, 31

llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x
1515
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1616
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
1717
; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]]
18-
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
19-
; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc
20-
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
21-
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY3]], implicit-def dead $scc
22-
; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
18+
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
19+
; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
20+
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc
21+
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
22+
; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc
2323
; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc
24-
; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
25-
; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc
26-
; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
27-
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY5]], killed [[COPY4]], implicit-def dead $scc
24+
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc
2825
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
2926
; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
3027
; GCN-NEXT: S_ENDPGM 0
@@ -68,16 +65,15 @@ define amdgpu_kernel void @uniform_trunc_i32_to_i1(ptr addrspace(1) %out, i32 %x
6865
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
6966
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
7067
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
71-
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY killed [[S_LOAD_DWORDX2_IMM1]]
72-
; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
73-
; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
74-
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY5]], implicit-def dead $scc
68+
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
69+
; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1
70+
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY4]], implicit-def dead $scc
7571
; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
76-
; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc
72+
; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
7773
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
78-
; GCN-NEXT: S_CMP_LT_I32 killed [[COPY4]], killed [[S_MOV_B32_2]], implicit-def $scc
79-
; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
80-
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY7]], killed [[COPY6]], implicit-def dead $scc
74+
; GCN-NEXT: S_CMP_LT_I32 killed [[COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc
75+
; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc
76+
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY6]], killed [[COPY5]], implicit-def dead $scc
8177
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
8278
; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
8379
; GCN-NEXT: S_ENDPGM 0
@@ -126,14 +122,13 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x
126122
; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3
127123
; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
128124
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1
129-
; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
130-
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY7]], implicit-def dead $scc
125+
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
131126
; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
132-
; GCN-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $scc
127+
; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
133128
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
134-
; GCN-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
135-
; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
136-
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY8]], implicit-def dead $scc
129+
; GCN-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
130+
; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
131+
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc
137132
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
138133
; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.2, addrspace 1)
139134
; GCN-NEXT: S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/sra.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -830,33 +830,33 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
830830
define amdgpu_kernel void @s_ashr_33_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
831831
; SI-LABEL: s_ashr_33_i64:
832832
; SI: ; %bb.0:
833-
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
833+
; SI-NEXT: s_load_dword s6, s[4:5], 0x14
834834
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
835835
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
836836
; SI-NEXT: s_mov_b32 s3, 0xf000
837837
; SI-NEXT: s_mov_b32 s2, -1
838838
; SI-NEXT: s_waitcnt lgkmcnt(0)
839-
; SI-NEXT: s_ashr_i32 s6, s7, 31
840-
; SI-NEXT: s_ashr_i32 s7, s7, 1
841-
; SI-NEXT: s_add_u32 s4, s7, s4
842-
; SI-NEXT: s_addc_u32 s5, s6, s5
839+
; SI-NEXT: s_ashr_i32 s7, s6, 31
840+
; SI-NEXT: s_ashr_i32 s6, s6, 1
841+
; SI-NEXT: s_add_u32 s4, s6, s4
842+
; SI-NEXT: s_addc_u32 s5, s7, s5
843843
; SI-NEXT: v_mov_b32_e32 v0, s4
844844
; SI-NEXT: v_mov_b32_e32 v1, s5
845845
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
846846
; SI-NEXT: s_endpgm
847847
;
848848
; VI-LABEL: s_ashr_33_i64:
849849
; VI: ; %bb.0:
850-
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
850+
; VI-NEXT: s_load_dword s6, s[4:5], 0x50
851851
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
852852
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
853853
; VI-NEXT: s_mov_b32 s3, 0xf000
854854
; VI-NEXT: s_mov_b32 s2, -1
855855
; VI-NEXT: s_waitcnt lgkmcnt(0)
856-
; VI-NEXT: s_ashr_i32 s6, s7, 31
857-
; VI-NEXT: s_ashr_i32 s7, s7, 1
858-
; VI-NEXT: s_add_u32 s4, s7, s4
859-
; VI-NEXT: s_addc_u32 s5, s6, s5
856+
; VI-NEXT: s_ashr_i32 s7, s6, 31
857+
; VI-NEXT: s_ashr_i32 s6, s6, 1
858+
; VI-NEXT: s_add_u32 s4, s6, s4
859+
; VI-NEXT: s_addc_u32 s5, s7, s5
860860
; VI-NEXT: v_mov_b32_e32 v0, s4
861861
; VI-NEXT: v_mov_b32_e32 v1, s5
862862
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -953,33 +953,33 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
953953
define amdgpu_kernel void @s_ashr_62_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
954954
; SI-LABEL: s_ashr_62_i64:
955955
; SI: ; %bb.0:
956-
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
956+
; SI-NEXT: s_load_dword s6, s[4:5], 0x14
957957
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
958958
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
959959
; SI-NEXT: s_mov_b32 s3, 0xf000
960960
; SI-NEXT: s_mov_b32 s2, -1
961961
; SI-NEXT: s_waitcnt lgkmcnt(0)
962-
; SI-NEXT: s_ashr_i32 s6, s7, 31
963-
; SI-NEXT: s_ashr_i32 s7, s7, 30
964-
; SI-NEXT: s_add_u32 s4, s7, s4
965-
; SI-NEXT: s_addc_u32 s5, s6, s5
962+
; SI-NEXT: s_ashr_i32 s7, s6, 31
963+
; SI-NEXT: s_ashr_i32 s6, s6, 30
964+
; SI-NEXT: s_add_u32 s4, s6, s4
965+
; SI-NEXT: s_addc_u32 s5, s7, s5
966966
; SI-NEXT: v_mov_b32_e32 v0, s4
967967
; SI-NEXT: v_mov_b32_e32 v1, s5
968968
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
969969
; SI-NEXT: s_endpgm
970970
;
971971
; VI-LABEL: s_ashr_62_i64:
972972
; VI: ; %bb.0:
973-
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
973+
; VI-NEXT: s_load_dword s6, s[4:5], 0x50
974974
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
975975
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
976976
; VI-NEXT: s_mov_b32 s3, 0xf000
977977
; VI-NEXT: s_mov_b32 s2, -1
978978
; VI-NEXT: s_waitcnt lgkmcnt(0)
979-
; VI-NEXT: s_ashr_i32 s6, s7, 31
980-
; VI-NEXT: s_ashr_i32 s7, s7, 30
981-
; VI-NEXT: s_add_u32 s4, s7, s4
982-
; VI-NEXT: s_addc_u32 s5, s6, s5
979+
; VI-NEXT: s_ashr_i32 s7, s6, 31
980+
; VI-NEXT: s_ashr_i32 s6, s6, 30
981+
; VI-NEXT: s_add_u32 s4, s6, s4
982+
; VI-NEXT: s_addc_u32 s5, s7, s5
983983
; VI-NEXT: v_mov_b32_e32 v0, s4
984984
; VI-NEXT: v_mov_b32_e32 v1, s5
985985
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -1077,31 +1077,31 @@ define amdgpu_kernel void @v_ashr_62_i64(ptr addrspace(1) %out, ptr addrspace(1)
10771077
define amdgpu_kernel void @s_ashr_63_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
10781078
; SI-LABEL: s_ashr_63_i64:
10791079
; SI: ; %bb.0:
1080-
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
1081-
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x1d
1080+
; SI-NEXT: s_load_dword s8, s[4:5], 0x14
1081+
; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x1d
10821082
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
10831083
; SI-NEXT: s_mov_b32 s3, 0xf000
10841084
; SI-NEXT: s_mov_b32 s2, -1
10851085
; SI-NEXT: s_waitcnt lgkmcnt(0)
1086-
; SI-NEXT: s_ashr_i32 s5, s7, 31
1087-
; SI-NEXT: s_add_u32 s4, s5, s8
1088-
; SI-NEXT: s_addc_u32 s5, s5, s9
1086+
; SI-NEXT: s_ashr_i32 s5, s8, 31
1087+
; SI-NEXT: s_add_u32 s4, s5, s6
1088+
; SI-NEXT: s_addc_u32 s5, s5, s7
10891089
; SI-NEXT: v_mov_b32_e32 v0, s4
10901090
; SI-NEXT: v_mov_b32_e32 v1, s5
10911091
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
10921092
; SI-NEXT: s_endpgm
10931093
;
10941094
; VI-LABEL: s_ashr_63_i64:
10951095
; VI: ; %bb.0:
1096-
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
1097-
; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x74
1096+
; VI-NEXT: s_load_dword s8, s[4:5], 0x50
1097+
; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x74
10981098
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
10991099
; VI-NEXT: s_mov_b32 s3, 0xf000
11001100
; VI-NEXT: s_mov_b32 s2, -1
11011101
; VI-NEXT: s_waitcnt lgkmcnt(0)
1102-
; VI-NEXT: s_ashr_i32 s5, s7, 31
1103-
; VI-NEXT: s_add_u32 s4, s5, s8
1104-
; VI-NEXT: s_addc_u32 s5, s5, s9
1102+
; VI-NEXT: s_ashr_i32 s5, s8, 31
1103+
; VI-NEXT: s_add_u32 s4, s5, s6
1104+
; VI-NEXT: s_addc_u32 s5, s5, s7
11051105
; VI-NEXT: v_mov_b32_e32 v0, s4
11061106
; VI-NEXT: v_mov_b32_e32 v1, s5
11071107
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0

llvm/test/CodeGen/AMDGPU/srem64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,25 +1129,25 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
11291129
; GCN-IR-LABEL: s_test_srem33_64:
11301130
; GCN-IR: ; %bb.0: ; %_udiv-special-cases
11311131
; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1132-
; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
1132+
; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
11331133
; GCN-IR-NEXT: s_mov_b32 s13, 0
11341134
; GCN-IR-NEXT: s_waitcnt lgkmcnt(0)
1135-
; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 31
1136-
; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[4:5], 31
11371135
; GCN-IR-NEXT: s_ashr_i32 s4, s3, 31
1136+
; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[2:3], 31
11381137
; GCN-IR-NEXT: s_mov_b32 s5, s4
1139-
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
1138+
; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[8:9], 31
1139+
; GCN-IR-NEXT: s_xor_b64 s[2:3], s[6:7], s[4:5]
11401140
; GCN-IR-NEXT: s_sub_u32 s6, s2, s4
11411141
; GCN-IR-NEXT: s_subb_u32 s7, s3, s4
11421142
; GCN-IR-NEXT: s_ashr_i32 s2, s9, 31
11431143
; GCN-IR-NEXT: s_mov_b32 s3, s2
1144-
; GCN-IR-NEXT: s_xor_b64 s[8:9], s[8:9], s[2:3]
1144+
; GCN-IR-NEXT: s_xor_b64 s[8:9], s[10:11], s[2:3]
11451145
; GCN-IR-NEXT: s_sub_u32 s8, s8, s2
11461146
; GCN-IR-NEXT: s_subb_u32 s9, s9, s2
1147-
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0
1148-
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[8:9], 0
1147+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[6:7], 0
1148+
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0
11491149
; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[8:9]
1150-
; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11]
1150+
; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[2:3]
11511151
; GCN-IR-NEXT: s_flbit_i32_b64 s20, s[6:7]
11521152
; GCN-IR-NEXT: s_sub_u32 s14, s12, s20
11531153
; GCN-IR-NEXT: s_subb_u32 s15, 0, 0

llvm/test/CodeGen/X86/oddsubvector.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,10 @@ define <16 x i32> @PR42819(ptr %a0) {
155155
define void @PR42833() {
156156
; SSE2-LABEL: PR42833:
157157
; SSE2: # %bb.0:
158+
; SSE2-NEXT: movl b(%rip), %eax
158159
; SSE2-NEXT: movdqa c+144(%rip), %xmm2
159160
; SSE2-NEXT: movdqa c+128(%rip), %xmm0
160-
; SSE2-NEXT: movd %xmm0, %eax
161-
; SSE2-NEXT: addl b(%rip), %eax
161+
; SSE2-NEXT: addl c+128(%rip), %eax
162162
; SSE2-NEXT: movd %eax, %xmm1
163163
; SSE2-NEXT: movd %eax, %xmm3
164164
; SSE2-NEXT: paddd %xmm0, %xmm3
@@ -191,10 +191,10 @@ define void @PR42833() {
191191
;
192192
; SSE42-LABEL: PR42833:
193193
; SSE42: # %bb.0:
194+
; SSE42-NEXT: movl b(%rip), %eax
194195
; SSE42-NEXT: movdqa c+144(%rip), %xmm1
195196
; SSE42-NEXT: movdqa c+128(%rip), %xmm0
196-
; SSE42-NEXT: movd %xmm0, %eax
197-
; SSE42-NEXT: addl b(%rip), %eax
197+
; SSE42-NEXT: addl c+128(%rip), %eax
198198
; SSE42-NEXT: movd %eax, %xmm2
199199
; SSE42-NEXT: paddd %xmm0, %xmm2
200200
; SSE42-NEXT: movdqa d+144(%rip), %xmm3

llvm/test/CodeGen/X86/pr38539.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,26 @@ define void @f() nounwind {
2323
; X86-NEXT: pushl %esi
2424
; X86-NEXT: andl $-16, %esp
2525
; X86-NEXT: subl $160, %esp
26-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
26+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
27+
; X86-NEXT: shll $30, %ecx
28+
; X86-NEXT: movl %ecx, %edi
29+
; X86-NEXT: sarl $30, %edi
30+
; X86-NEXT: sarl $31, %ecx
2731
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
2832
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
2933
; X86-NEXT: movzbl (%eax), %eax
30-
; X86-NEXT: movzbl (%eax), %ecx
34+
; X86-NEXT: movzbl (%eax), %ebx
3135
; X86-NEXT: movzbl %al, %eax
32-
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
33-
; X86-NEXT: divb %cl
34-
; X86-NEXT: movl %edi, %eax
35-
; X86-NEXT: shll $30, %eax
36-
; X86-NEXT: movl %eax, %ecx
37-
; X86-NEXT: sarl $30, %ecx
38-
; X86-NEXT: sarl $31, %eax
39-
; X86-NEXT: xorl %eax, %edi
40-
; X86-NEXT: xorl %eax, %edx
41-
; X86-NEXT: shrdl $1, %eax, %ecx
42-
; X86-NEXT: xorl %ecx, %esi
43-
; X86-NEXT: subl %ecx, %esi
44-
; X86-NEXT: sbbl %eax, %edx
45-
; X86-NEXT: sbbl %eax, %edi
36+
; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
37+
; X86-NEXT: divb %bl
38+
; X86-NEXT: xorl %ecx, %edx
39+
; X86-NEXT: movl %ecx, %eax
40+
; X86-NEXT: shldl $31, %edi, %eax
41+
; X86-NEXT: xorl %ecx, %edi
42+
; X86-NEXT: xorl %eax, %esi
43+
; X86-NEXT: subl %eax, %esi
44+
; X86-NEXT: sbbl %ecx, %edx
45+
; X86-NEXT: sbbl %ecx, %edi
4646
; X86-NEXT: movl %edi, %ecx
4747
; X86-NEXT: shldl $30, %edx, %ecx
4848
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -114,16 +114,16 @@ define void @f() nounwind {
114114
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
115115
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
116116
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
117+
; X86-NEXT: movl 112(%esp,%esi), %eax
117118
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
118-
; X86-NEXT: movl 112(%esp,%esi), %edi
119-
; X86-NEXT: movl 116(%esp,%esi), %eax
119+
; X86-NEXT: movl 116(%esp,%esi), %edi
120120
; X86-NEXT: movl 120(%esp,%esi), %esi
121-
; X86-NEXT: shldl %cl, %eax, %esi
121+
; X86-NEXT: shldl %cl, %edi, %esi
122122
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
123-
; X86-NEXT: shldl %cl, %edi, %eax
124-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
125-
; X86-NEXT: shll %cl, %edi
123+
; X86-NEXT: shldl %cl, %eax, %edi
126124
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
125+
; X86-NEXT: shll %cl, %eax
126+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
127127
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
128128
; X86-NEXT: orl %edx, %eax
129129
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

0 commit comments

Comments
 (0)