Skip to content

Commit 6bc272a

Browse files
authored
Merge branch 'main' into mbstowcs-implementation
2 parents e116c79 + 1031f14 commit 6bc272a

File tree

15 files changed

+1052
-49
lines changed

15 files changed

+1052
-49
lines changed

clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,12 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op,
194194
}
195195

196196
case CK_LValueToRValueBitCast: {
197-
cgf.cgm.errorNYI("ComplexExprEmitter::emitCast CK_LValueToRValueBitCast");
198-
return {};
197+
LValue sourceLVal = cgf.emitLValue(op);
198+
Address addr = sourceLVal.getAddress().withElementType(
199+
builder, cgf.convertTypeForMem(destTy));
200+
LValue destLV = cgf.makeAddrLValue(addr, destTy);
201+
assert(!cir::MissingFeatures::opTBAA());
202+
return emitLoadOfLValue(destLV, op->getExprLoc());
199203
}
200204

201205
case CK_BitCast:

clang/test/CIR/CodeGen/complex-cast.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,3 +326,33 @@ void complex_to_complex_cast() {
326326
// OGCG: store i32 %[[REAL_INT_CAST]], ptr {{.*}}, align 4
327327
// OGCG: store i32 %[[IMAG_INT_CAST]], ptr getelementptr inbounds nuw ({ i32, i32 }, ptr {{.*}}, i32 0, i32 1), align 4
328328

329+
struct CX {
330+
double real;
331+
double imag;
332+
};
333+
334+
void lvalue_to_rvalue_bitcast() {
335+
CX a;
336+
double _Complex b = __builtin_bit_cast(double _Complex, a);
337+
}
338+
339+
340+
// CIR-BEFORE: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
341+
342+
// CIR-AFTER: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
343+
344+
// LLVM: %[[PTR_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
345+
// LLVM: %[[COMPLEX_ADDR:.*]] = alloca { double, double }, i64 1, align 8
346+
// LLVM: %[[PTR_TO_COMPLEX:.*]] = load { double, double }, ptr %[[PTR_ADDR]], align 8
347+
// LLVM: store { double, double } %[[PTR_TO_COMPLEX]], ptr %[[COMPLEX_ADDR]], align 8
348+
349+
// OGCG: %[[A_ADDR:.*]] = alloca %struct.CX, align 8
350+
// OGCG: %[[B_ADDR:.*]] = alloca { double, double }, align 8
351+
// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 0
352+
// OGCG: %[[A_REAL:.*]] = load double, ptr %[[A_REAL_PTR]], align 8
353+
// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 1
354+
// OGCG: %[[A_IMAG:.*]] = load double, ptr %[[A_IMAG_PTR]], align 8
355+
// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 0
356+
// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 1
357+
// OGCG: store double %[[A_REAL]], ptr %[[B_REAL_PTR]], align 8
358+
// OGCG: store double %[[A_IMAG]], ptr %[[B_IMAG_PTR]], align 8

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@ def FeatureSafeSmemPrefetch : SubtargetFeature<"safe-smem-prefetch",
280280
"SMEM prefetches do not fail on illegal address"
281281
>;
282282

283+
def FeatureSafeCUPrefetch : SubtargetFeature<"safe-cu-prefetch",
284+
"HasSafeCUPrefetch",
285+
"true",
286+
"VMEM CU scope prefetches do not fail on illegal address"
287+
>;
288+
283289
def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
284290
"HasVcmpxExecWARHazard",
285291
"true",

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,5 +446,8 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
446446
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
447447
GISDNodeXFormEquiv<as_hw_round_mode>;
448448

449+
def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
450+
GISDNodeXFormEquiv<PrefetchLoc>;
451+
449452
def gi_MFMALdScaleModifierOp : GICustomOperandRenderer<"renderScaledMAIIntrinsicOperand">,
450453
GISDNodeXFormEquiv<MFMALdScaleXForm>;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "SIModeRegisterDefaults.h"
2020
#include "llvm/Analysis/ValueTracking.h"
2121
#include "llvm/CodeGen/SelectionDAGISel.h"
22+
#include "llvm/Support/AMDGPUAddrSpace.h"
2223
#include "llvm/Target/TargetMachine.h"
2324

2425
namespace llvm {

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7068,6 +7068,17 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
70687068
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
70697069
}
70707070

7071+
void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
7072+
const MachineInstr &MI,
7073+
int OpIdx) const {
7074+
uint32_t V = MI.getOperand(2).getImm();
7075+
V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK))
7076+
<< AMDGPU::CPol::SCOPE_SHIFT;
7077+
if (!Subtarget->hasSafeCUPrefetch())
7078+
V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
7079+
MIB.addImm(V);
7080+
}
7081+
70717082
/// Convert from 2-bit value to enum values used for op_sel* source modifiers.
70727083
void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
70737084
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
414414

415415
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
416416
int OpIdx) const;
417+
418+
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
419+
int OpIdx) const;
420+
417421
void renderScaledMAIIntrinsicOperand(MachineInstrBuilder &MIB,
418422
const MachineInstr &MI, int OpIdx) const;
419423

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3501,19 +3501,24 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
35013501
applyMappingMAD_64_32(B, OpdMapper);
35023502
return;
35033503
case AMDGPU::G_PREFETCH: {
3504-
if (!Subtarget.hasPrefetch() || !Subtarget.hasSafeSmemPrefetch()) {
3504+
if (!Subtarget.hasSafeSmemPrefetch() && !Subtarget.hasVmemPrefInsts()) {
35053505
MI.eraseFromParent();
35063506
return;
35073507
}
35083508
Register PtrReg = MI.getOperand(0).getReg();
35093509
unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
3510-
if (PtrBank == AMDGPU::VGPRRegBankID) {
3510+
if (PtrBank == AMDGPU::VGPRRegBankID &&
3511+
(!Subtarget.hasVmemPrefInsts() || !MI.getOperand(3).getImm())) {
3512+
// Cannot do I$ prefetch with divergent pointer.
35113513
MI.eraseFromParent();
35123514
return;
35133515
}
35143516
unsigned AS = MRI.getType(PtrReg).getAddressSpace();
3515-
if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
3516-
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
3517+
if ((!AMDGPU::isFlatGlobalAddrSpace(AS) &&
3518+
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
3519+
(!Subtarget.hasSafeSmemPrefetch() &&
3520+
(AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
3521+
!MI.getOperand(3).getImm() /* I$ prefetch */))) {
35173522
MI.eraseFromParent();
35183523
return;
35193524
}

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,50 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
21842184

21852185
} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
21862186

2187+
def PrefetchLoc: SDNodeXForm<timm, [{
2188+
uint32_t V = N->getZExtValue();
2189+
V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
2190+
if (!Subtarget->hasSafeCUPrefetch())
2191+
V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
2192+
return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
2193+
}]>;
2194+
2195+
def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2196+
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2197+
[{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
2198+
let GISelPredicateCode = [{
2199+
return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
2200+
}];
2201+
}
2202+
2203+
def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2204+
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2205+
[{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2206+
(cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2207+
!Subtarget->hasSafeSmemPrefetch()); }]> {
2208+
let GISelPredicateCode = [{
2209+
return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2210+
((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2211+
!Subtarget->hasSafeSmemPrefetch());
2212+
}];
2213+
}
2214+
2215+
multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
2216+
def : GCNPat <
2217+
(prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2218+
(!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
2219+
> {
2220+
let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
2221+
}
2222+
2223+
def : GCNPat <
2224+
(prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2225+
(!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
2226+
> {
2227+
let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
2228+
}
2229+
}
2230+
21872231
multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21882232
def : GCNPat <
21892233
(intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
@@ -2198,6 +2242,14 @@ multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
21982242
}
21992243

22002244
let SubtargetPredicate = HasVmemPrefInsts in {
2245+
defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
2246+
defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;
2247+
2248+
// Patterns for forced vector prefetch with rw = 1.
2249+
defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
2250+
defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;
2251+
2252+
22012253
// Patterns for target intrinsics
22022254
defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
22032255
defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
247247
bool HasInstFwdPrefetchBug = false;
248248
bool HasVmemPrefInsts = false;
249249
bool HasSafeSmemPrefetch = false;
250+
bool HasSafeCUPrefetch = false;
250251
bool HasVcmpxExecWARHazard = false;
251252
bool HasLdsBranchVmemWARHazard = false;
252253
bool HasNSAtoVMEMBug = false;
@@ -995,6 +996,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
995996

996997
bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; }
997998

999+
bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1000+
9981001
// Has s_cmpk_* instructions.
9991002
bool hasSCmpK() const { return getGeneration() < GFX12; }
10001003

0 commit comments

Comments
 (0)