Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11481,6 +11481,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
// FIXME: There's no way to have a built-in with an rvalue ref arg.
case 'C':
Type = Type.withConst();
// adjust 'const char *' to 'const char __constant *' on OpenCL
if (Context.getLangOpts().OpenCL &&
Type.getTypePtr() == Context.CharTy.getTypePtr()) {
Type = Context.getAddrSpaceQualType(Type, LangAS::opencl_constant);
}
break;
case 'D':
Type = Context.getVolatileType(Type);
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/AST/Decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3600,8 +3600,10 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {

// OpenCL v1.2 s6.9.f - The library functions defined in
// the C99 standard headers are not available.
// EXCEPTION: printf is supported for AMDGPU
if (Context.getLangOpts().OpenCL &&
Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) &&
BuiltinID != Builtin::BIprintf)
return 0;

// CUDA does not have device-side standard library. printf and malloc are the
Expand Down
11 changes: 9 additions & 2 deletions clang/lib/CodeGen/CGGPUBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ bool containsNonScalarVarargs(CodeGenFunction *CGF, const CallArgList &Args) {
RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
llvm::Function *Decl, bool WithSizeArg) {
CodeGenModule &CGM = CGF->CGM;
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
CGBuilderTy &Builder = CGF->Builder;
assert(E->getBuiltinCallee() == Builtin::BIprintf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
Expand All @@ -155,9 +156,15 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,

auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
llvm::Value *BufferPtr = r.first;
llvm::Value *Fmt = Args[0].getRValue(*CGF).getScalarVal();

llvm::SmallVector<llvm::Value *, 3> Vec = {
Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
// For OpenCL, the default addrspace of 'format' argument is LangAS::opencl_constant,
// however, the 'vprintf' requires it to be unqualified 'ptr' type. Do pointer cast if
// it's the case.
if (CGM.getContext().getLangOpts().OpenCL)
Fmt = Builder.CreatePointerCast(Fmt, llvm::PointerType::getUnqual(Ctx));

llvm::SmallVector<llvm::Value *, 3> Vec = {Fmt, BufferPtr};
if (WithSizeArg) {
// Passing > 32bit of data as a local alloca doesn't work for nvptx or
// amdgpu
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7008,8 +7008,12 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
}

void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
if (Decl->getType().hasAddressSpace())
return;
// Address space is only meaningful for pointer type
if (Decl->getType()->isPointerType()) {
const PointerType *T = dyn_cast<PointerType>(Decl->getType().getTypePtr());
if (T->getPointeeType().hasAddressSpace())
return;
}
if (Decl->getType()->isDependentType())
return;
if (VarDecl *Var = dyn_cast<VarDecl>(Decl)) {
Expand Down
14 changes: 14 additions & 0 deletions clang/test/CodeGenOpenCL/test-printf-nvptx.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -cl-std=CL3.0 -triple nvptx-- -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NV %s

int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));

// NV-LABEL: define dso_local spir_kernel void @test_printf(
// NV-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 {
// NV-NEXT: entry:
// NV-NEXT: [[TMP0:%.*]] = call i32 @vprintf(ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr null)
// NV-NEXT: ret void
//
__kernel void test_printf() {
printf("hello, printf on nvptx.");
}
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,10 @@ class TargetTransformInfo {
// (set to UINT_MAX to disable). This does not apply in cases where the
// loop is being fully unrolled.
unsigned MaxCount;
/// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
/// to be overrided by a target gives more flexiblity on certain cases.
/// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
unsigned MaxUpperBound;
/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
/// applies even if full unrolling is selected. This allows a target to fall
/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Count = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.MaxUpperBound = UnrollMaxUpperBound;
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
UP.BEInsns = 2;
UP.Partial = false;
Expand Down Expand Up @@ -237,6 +238,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
if (UnrollMaxCount.getNumOccurrences() > 0)
UP.MaxCount = UnrollMaxCount;
if (UnrollMaxUpperBound.getNumOccurrences() > 0)
UP.MaxUpperBound = UnrollMaxUpperBound;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
Expand Down Expand Up @@ -777,7 +780,7 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
return TripCount;

if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
MaxTripCount <= UnrollMaxUpperBound)
MaxTripCount <= UP.MaxUpperBound)
return MaxTripCount;

// if didn't return until here, should continue to other priorties
Expand Down Expand Up @@ -952,7 +955,7 @@ bool llvm::computeUnrollCount(
// cost of exact full unrolling. As such, if we have an exact count and
// found it unprofitable, we'll never chose to bounded unroll.
if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
MaxTripCount <= UnrollMaxUpperBound) {
MaxTripCount <= UP.MaxUpperBound) {
UP.Count = MaxTripCount;
if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
MaxTripCount, UCE, UP)) {
Expand Down Expand Up @@ -1026,7 +1029,7 @@ bool llvm::computeUnrollCount(
}

// Don't unroll a small upper bound loop unless user or TTI asked to do so.
if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
UP.Count = 0;
return false;
}
Expand Down
Loading