Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
def TargetSPIRV : TargetArch<["spirv", "spirv32", "spirv64"]>;
def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
def TargetNativeCPU : TargetArch<["native_cpu"]>;
def TargetWindows : TargetSpec {
let OSes = ["Win32"];
}
Expand Down Expand Up @@ -4530,6 +4531,11 @@ def RISCVVLSCC: DeclOrTypeAttr, TargetSpecificAttr<TargetRISCV> {
let Documentation = [RISCVVLSCCDocs];
}

def NativeCPULibclcCall : DeclOrTypeAttr, TargetSpecificAttr<TargetNativeCPU> {
let Spellings = [Clang<"libclc_call", 0>];
let Documentation = [Undocumented];
}

def Target : InheritableAttr {
let Spellings = [GCC<"target">];
let Args = [StringArgument<"featuresStr">];
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,11 @@ class TargetInfo : public TransferrableTargetInfo,
return CC_C;
}

/// Gets the calling convention for libclc built-ins for the given target.
virtual CallingConv getLibclcCallingConv() const {
return getDefaultCallingConv();
}

/// Get the default atomic options.
AtomicOptions getAtomicOpts() const { return AtomicOpts; }

Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4341,6 +4341,7 @@ bool AttributedType::isCallingConv() const {
case attr::PreserveNone:
case attr::RISCVVectorCC:
case attr::RISCVVLSCC:
case attr::NativeCPULibclcCall:
return true;
}
llvm_unreachable("invalid attr kind");
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2125,6 +2125,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
case attr::RISCVVLSCC:
OS << "riscv_vls_cc";
break;
case attr::NativeCPULibclcCall:
OS << "libclc_call";
break;
case attr::NoDeref:
OS << "noderef";
break;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Targets/NativeCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ class LLVM_LIBRARY_VISIBILITY NativeCPUTargetInfo final : public TargetInfo {

void setSupportedOpenCLOpts() override { supportAllOpenCLOpts(); }

CallingConv getLibclcCallingConv() const override { return CC_SpirFunction; }

CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
if (CC == CC_SpirFunction)
return CCCR_OK;

if (HostTarget)
return HostTarget->checkCallingConvention(CC);

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
}
}

if (D->hasAttr<NativeCPULibclcCallAttr>())
return CC_SpirFunction;

return CC_C;
}

Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/Targets/NativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class NativeCPUABIInfo : public DefaultABIInfo {
public:
NativeCPUABIInfo(CodeGen::CodeGenTypes &CGT, const ABIInfo *HostABIInfo)
: DefaultABIInfo(CGT), HostABIInfo(HostABIInfo) {}

void computeInfo(CGFunctionInfo &FI) const override;
};

class NativeCPUTargetCodeGenInfo : public TargetCodeGenInfo {
Expand All @@ -37,6 +39,17 @@ class NativeCPUTargetCodeGenInfo : public TargetCodeGenInfo {
};
} // namespace

void NativeCPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (HostABIInfo &&
FI.getCallingConvention() != llvm::CallingConv::SPIR_FUNC) {
HostABIInfo->computeInfo(FI);
return;
}

DefaultABIInfo::computeInfo(FI);
FI.setEffectiveCallingConvention(llvm::CallingConv::C);
}

std::unique_ptr<TargetCodeGenInfo> CodeGen::createNativeCPUTargetCodeGenInfo(
CodeGenModule &CGM,
std::unique_ptr<TargetCodeGenInfo> HostTargetCodeGenInfo) {
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5428,6 +5428,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) RISCVVLSCCAttr(S.Context, AL, VectorLength));
return;
}
case ParsedAttr::AT_NativeCPULibclcCall:
D->addAttr(::new (S.Context) NativeCPULibclcCallAttr(S.Context, AL));
return;
default:
llvm_unreachable("unexpected attribute kind");
}
Expand Down Expand Up @@ -5699,6 +5702,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
CC = CC_DeviceKernel;
break;
}
case ParsedAttr::AT_NativeCPULibclcCall:
CC = CC_SpirFunction;
break;
default: llvm_unreachable("unexpected attribute kind");
}

Expand Down Expand Up @@ -7700,6 +7706,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_PreserveNone:
case ParsedAttr::AT_RISCVVectorCC:
case ParsedAttr::AT_RISCVVLSCC:
case ParsedAttr::AT_NativeCPULibclcCall:
handleCallConvAttr(S, D, AL);
break;
case ParsedAttr::AT_DeviceKernel:
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaLookup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "clang/AST/ExprCXX.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/Preprocessor.h"
Expand Down Expand Up @@ -788,7 +789,7 @@ static void GetProgModelBuiltinFctOverloads(
std::vector<QualType> &FunctionList, SmallVector<QualType, 1> &RetTypes,
SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes, bool IsVariadic) {
FunctionProtoType::ExtProtoInfo PI(
Context.getDefaultCallingConvention(false, false, true));
Context.getTargetInfo().getLibclcCallingConv());
PI.Variadic = IsVariadic;
PI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo{EST_BasicNoexcept};

Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_M68kRTD: \
case ParsedAttr::AT_PreserveNone: \
case ParsedAttr::AT_RISCVVectorCC: \
case ParsedAttr::AT_RISCVVLSCC
case ParsedAttr::AT_RISCVVLSCC: \
case ParsedAttr::AT_NativeCPULibclcCall

// Function type attributes.
#define FUNCTION_TYPE_ATTRS_CASELIST \
Expand Down Expand Up @@ -7661,6 +7662,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {

return ::new (Ctx) RISCVVLSCCAttr(Ctx, Attr, ABIVLen);
}
case ParsedAttr::AT_NativeCPULibclcCall:
return createSimpleAttr<NativeCPULibclcCallAttr>(Ctx, Attr);
}
llvm_unreachable("unexpected attribute kind!");
}
Expand Down
31 changes: 31 additions & 0 deletions clang/test/CodeGen/callingconv-native_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %clang_cc1 -triple native_cpu -aux-triple x86_64-unknown-linux-gnu -fsycl-is-device -emit-llvm -o - %s | FileCheck %s

struct S {
char c;
short s;
};
using short16 = short __attribute__((ext_vector_type(16)));

__attribute__((sycl_device))
S foo(short16, S);

__attribute__((sycl_device, libclc_call))
S bar(short16, S);

// CHECK: define noundef <16 x i16> @_Z3bazRDv16_sR1S(ptr noundef nonnull align 32 dereferenceable(32) %x, ptr noundef nonnull align 2 dereferenceable(4) %y)
__attribute__((sycl_device))
short16 baz(short16 &x, S &y) {
// Host ABI:
// short16 argument is passed by reference.
// S is passed by value.
// S is returned by value.
// CHECK: call i32 @_Z3fooDv16_s1S(ptr noundef byval(<16 x i16>) align 32 {{%.*}}, i32 {{%.*}})
y = foo(x, y);
// Libclc ABI:
// short16 is passed by value.
// S is passed by reference.
// S is returned by reference.
// CHECK: call void @_Z3barDv16_s1S(ptr dead_on_unwind writable sret(%struct.S) align 2 {{%.*}}, <16 x i16> noundef {{%.*}}, ptr noundef byval(%struct.S) align 2 {{%.*}})
y = bar(x, y);
return x;
}
4 changes: 4 additions & 0 deletions libclc/clc/include/clc/clcfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#define _CLC_DEF
#elif defined(CLC_CLSPV)
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
#elif defined(CLC_NATIVE_CPU)
#define _CLC_DEF __attribute__((always_inline)) __attribute__((libclc_call))
#undef _CLC_DECL
#define _CLC_DECL __attribute__((libclc_call))
#else
#define _CLC_DEF __attribute__((always_inline))
#endif
Expand Down
5 changes: 3 additions & 2 deletions libdevice/nativecpu_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ using __nativecpu_state = native_cpu::state;

#undef DEVICE_EXTERNAL
#undef DEVICE_EXTERN_C
#define DEVICE_EXTERN_C extern "C" SYCL_EXTERNAL
#define DEVICE_EXTERN_C extern "C" SYCL_EXTERNAL __attribute__((libclc_call))
#define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline))
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline))
#define DEVICE_EXTERNAL \
SYCL_EXTERNAL __attribute__((always_inline, libclc_call))

// Several functions are used implicitly by WorkItemLoopsPass and
// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Linker/IRMover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,13 @@ Error IRLinker::run() {
EnableTripleWarning = !SrcHasLibDeviceTriple;
EnableDLWarning = !(SrcHasLibDeviceTriple && SrcHasLibDeviceDL);
}
// Likewise, during SYCL Native CPU compilation we link with bitcode with a
// generic data layout, which is compatible with the concrete host data layout
// and the concrete host target that we use later on.
if (SrcTriple.isNativeCPU()) {
EnableDLWarning = false;
EnableTripleWarning = false;
}

if (EnableDLWarning && (SrcM->getDataLayout() != DstM.getDataLayout())) {
emitWarning("Linking two modules of different data layouts: '" +
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/sycl/detail/defines_elementary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@
#endif // __SYCL_ALWAYS_INLINE

#ifdef SYCL_EXTERNAL
#ifdef __NativeCPU__
#define __DPCPP_SYCL_EXTERNAL SYCL_EXTERNAL __attribute__((__libclc_call__))
#define __DPCPP_SYCL_EXTERNAL_LIBC SYCL_EXTERNAL
#else
#define __DPCPP_SYCL_EXTERNAL SYCL_EXTERNAL
#endif
#else
#ifdef __SYCL_DEVICE_ONLY__
#define __DPCPP_SYCL_EXTERNAL __attribute__((sycl_device))
Expand Down
20 changes: 17 additions & 3 deletions sycl/include/sycl/group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,18 @@ template <int Dimensions = 1> class __SYCL_TYPE(group) group {

bool leader() const { return (get_local_linear_id() == 0); }

// Note: These signatures for parallel_for_work_item are intentionally
// non-conforming. The spec says this should take const WorkItemFunctionT &,
// but we take it by value, and rely on passing by value being done as passing
// a copy by reference (ptr byval) to ensure that the special handling in
// SYCLLowerWGScopePass to mutate the passed functor object works.
Comment on lines +178 to +182
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hierarchical parallelism is deprecated in SYCL 2020, so nobody cares, I guess.


template <typename WorkItemFunctionT>
void parallel_for_work_item(WorkItemFunctionT Func) const {
#ifdef __NativeCPU__
__attribute__((__libclc_call__))
#endif
void
parallel_for_work_item(WorkItemFunctionT Func) const {
// need barriers to enforce SYCL semantics for the work item loop -
// compilers are expected to optimize when possible
detail::workGroupBarrier();
Expand Down Expand Up @@ -227,8 +237,12 @@ template <int Dimensions = 1> class __SYCL_TYPE(group) group {
}

template <typename WorkItemFunctionT>
void parallel_for_work_item(range<Dimensions> flexibleRange,
WorkItemFunctionT Func) const {
#ifdef __NativeCPU__
__attribute__((__libclc_call__))
#endif
void
parallel_for_work_item(range<Dimensions> flexibleRange,
WorkItemFunctionT Func) const {
detail::workGroupBarrier();
#ifdef __SYCL_DEVICE_ONLY__
range<Dimensions> GlobalSize{
Expand Down
Loading