Skip to content

Commit 8cbaffc

Browse files
committed
AMDGPU: Try to use zext to implement constant-32-bit addrspacecast
If the high bits are assumed 0 for the cast, use zext. Previously we would emit a build_vector and a bitcast with the high element as 0. The zext is more easily optimized. I'm less convinced this is good for globalisel, since you still need to have the inttoptr back to the original pointer type. The default value is 0, though I'm not sure if this is meaningful in the real world. The real uses might always override the high bit value with the attribute.
1 parent 882ea7a commit 8cbaffc

File tree

8 files changed

+95
-77
lines changed

8 files changed

+95
-77
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2560,8 +2560,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
25602560
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
25612561
uint32_t AddrHiVal = Info->get32BitAddressHighBits();
25622562
auto PtrLo = B.buildPtrToInt(S32, Src);
2563-
auto HighAddr = B.buildConstant(S32, AddrHiVal);
2564-
B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr});
2563+
if (AddrHiVal == 0) {
2564+
auto Zext = B.buildZExt(LLT::scalar(64), PtrLo);
2565+
B.buildIntToPtr(Dst, Zext);
2566+
} else {
2567+
auto HighAddr = B.buildConstant(S32, AddrHiVal);
2568+
B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr});
2569+
}
2570+
25652571
MI.eraseFromParent();
25662572
return true;
25672573
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8410,6 +8410,9 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
84108410
Op.getValueType() == MVT::i64) {
84118411
const SIMachineFunctionInfo *Info =
84128412
DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
8413+
if (Info->get32BitAddressHighBits() == 0)
8414+
return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, Src);
8415+
84138416
SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
84148417
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
84158418
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -428,9 +428,9 @@ body: |
428428
; GCN-NEXT: {{ $}}
429429
; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
430430
; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
431-
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
432-
; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
433-
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
431+
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
432+
; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
433+
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4)
434434
%0:_(p6) = COPY $vgpr0
435435
%1:_(p4) = G_ADDRSPACE_CAST %0
436436
$vgpr0_vgpr1 = COPY %1
@@ -485,9 +485,9 @@ body: |
485485
; GCN-NEXT: {{ $}}
486486
; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
487487
; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
488-
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
489-
; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
490-
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
488+
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
489+
; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64)
490+
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0)
491491
%0:_(p6) = COPY $vgpr0
492492
%1:_(p0) = G_ADDRSPACE_CAST %0
493493
$vgpr0_vgpr1 = COPY %1

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,24 @@ body: |
1212
; CI-NEXT: {{ $}}
1313
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
1414
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
15-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
16-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
17-
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
18-
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
19-
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64)
15+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
16+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
17+
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
18+
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
19+
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C]](s64)
2020
; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6)
21-
; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
22-
; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C2]](s32)
21+
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
22+
; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
2323
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
24-
; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
25-
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C3]](s64)
24+
; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
25+
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C2]](s64)
2626
; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6)
27-
; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
27+
; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
2828
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6)
29-
; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32)
29+
; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
3030
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
31-
; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
32-
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C4]](s32)
31+
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
32+
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
3333
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
3434
; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
3535
%0:_(p6) = COPY $vgpr0
@@ -48,9 +48,9 @@ body: |
4848
; CI-NEXT: {{ $}}
4949
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
5050
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
51-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
52-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
53-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
51+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
52+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
53+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
5454
; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
5555
%0:_(p6) = COPY $vgpr0
5656
%1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ body: |
1313
; CI-NEXT: {{ $}}
1414
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
1515
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
16-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
17-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
18-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
16+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
17+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
18+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
1919
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
2020
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
2121
%0:_(p6) = COPY $sgpr0
@@ -34,9 +34,9 @@ body: |
3434
; CI-NEXT: {{ $}}
3535
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
3636
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
37-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
38-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
39-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
37+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
38+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
39+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6)
4040
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
4141
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
4242
%0:_(p6) = COPY $sgpr0
@@ -55,9 +55,9 @@ body: |
5555
; CI-NEXT: {{ $}}
5656
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
5757
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
58-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
59-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
60-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
58+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
59+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
60+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6)
6161
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
6262
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
6363
%0:_(p6) = COPY $sgpr0
@@ -76,9 +76,9 @@ body: |
7676
; CI-NEXT: {{ $}}
7777
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
7878
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
79-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
80-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
81-
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
79+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
80+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
81+
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
8282
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
8383
%0:_(p6) = COPY $sgpr0
8484
%1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
@@ -96,9 +96,9 @@ body: |
9696
; CI-NEXT: {{ $}}
9797
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
9898
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
99-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
100-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
101-
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
99+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
100+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
101+
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6)
102102
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
103103
%0:_(p6) = COPY $sgpr0
104104
%1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
@@ -116,9 +116,9 @@ body: |
116116
; CI-NEXT: {{ $}}
117117
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
118118
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
119-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
120-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
121-
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
119+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
120+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
121+
; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6)
122122
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
123123
%0:_(p6) = COPY $sgpr0
124124
%1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ body: |
1414
; CI-NEXT: {{ $}}
1515
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
1616
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
17-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
18-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
19-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
20-
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
21-
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
17+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
18+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
19+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
20+
; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
21+
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
2222
%0:_(p6) = COPY $sgpr0
2323
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6)
2424
$vgpr0_vgpr1 = COPY %1
@@ -35,11 +35,11 @@ body: |
3535
; CI-NEXT: {{ $}}
3636
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
3737
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
38-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
39-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
40-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
41-
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
42-
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
38+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
39+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
40+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6)
41+
; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
42+
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
4343
%0:_(p6) = COPY $sgpr0
4444
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6)
4545
$vgpr0_vgpr1 = COPY %1
@@ -56,11 +56,11 @@ body: |
5656
; CI-NEXT: {{ $}}
5757
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
5858
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
59-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
60-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
61-
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
62-
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
63-
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
59+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
60+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
61+
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6)
62+
; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
63+
; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
6464
%0:_(p6) = COPY $sgpr0
6565
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6)
6666
$vgpr0_vgpr1 = COPY %1
@@ -77,9 +77,9 @@ body: |
7777
; CI-NEXT: {{ $}}
7878
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
7979
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
80-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
81-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
82-
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
80+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
81+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
82+
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
8383
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
8484
%0:_(p6) = COPY $sgpr0
8585
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
@@ -97,9 +97,9 @@ body: |
9797
; CI-NEXT: {{ $}}
9898
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
9999
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
100-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
101-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
102-
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
100+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
101+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
102+
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6)
103103
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
104104
%0:_(p6) = COPY $sgpr0
105105
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
@@ -117,9 +117,9 @@ body: |
117117
; CI-NEXT: {{ $}}
118118
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
119119
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
120-
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
121-
; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
122-
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
120+
; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
121+
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
122+
; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6)
123123
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
124124
%0:_(p6) = COPY $sgpr0
125125
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6)

llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) {
1111
; GFX6-LABEL: load_constant32bit_vgpr_offset:
1212
; GFX6: ; %bb.0: ; %entry
1313
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
14-
; GFX6-NEXT: s_mov_b32 s2, 0
1514
; GFX6-NEXT: v_mov_b32_e32 v1, 0
15+
; GFX6-NEXT: s_mov_b32 s2, 0
1616
; GFX6-NEXT: s_mov_b32 s3, 0xf000
1717
; GFX6-NEXT: s_mov_b64 s[0:1], 0
1818
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
@@ -59,8 +59,8 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %ar
5959
; GFX6-LABEL: load_constant32bit_vgpr_v8f32:
6060
; GFX6: ; %bb.0: ; %entry
6161
; GFX6-NEXT: v_mov_b32_e32 v4, v0
62-
; GFX6-NEXT: s_mov_b32 s2, 0
6362
; GFX6-NEXT: v_mov_b32_e32 v5, 0
63+
; GFX6-NEXT: s_mov_b32 s2, 0
6464
; GFX6-NEXT: s_mov_b32 s3, 0xf000
6565
; GFX6-NEXT: s_mov_b64 s[0:1], 0
6666
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64

llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,24 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
474474
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
475475
; OPT-NEXT: ret i32 [[LOAD]]
476476
;
477-
; ASM-LABEL: cast_private_to_flat_to_global:
478-
; ASM: ; %bb.0:
479-
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480-
; ASM-NEXT: v_mov_b32_e32 v1, 0
481-
; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
482-
; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
483-
; ASM-NEXT: ds_read_b32 v0, v0
484-
; ASM-NEXT: s_waitcnt lgkmcnt(0)
485-
; ASM-NEXT: s_setpc_b64 s[30:31]
477+
; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global:
478+
; DAGISEL-ASM: ; %bb.0:
479+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
481+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
482+
; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0
483+
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
484+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
485+
;
486+
; GISEL-ASM-LABEL: cast_private_to_flat_to_global:
487+
; GISEL-ASM: ; %bb.0:
488+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 0
490+
; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
491+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
492+
; GISEL-ASM-NEXT: ds_read_b32 v0, v0
493+
; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
494+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
486495
%flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr
487496
%local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3)
488497
%load = load volatile i32, ptr addrspace(3) %local.ptr

0 commit comments

Comments
 (0)