diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index d9dc8290b0e49..7318e7640478f 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -849,8 +849,10 @@ class ASTNodeTraverser void VisitSYCLKernelCallStmt(const SYCLKernelCallStmt *Node) { Visit(Node->getOriginalStmt()); - if (Traversal != TK_IgnoreUnlessSpelledInSource) + if (Traversal != TK_IgnoreUnlessSpelledInSource) { + Visit(Node->getKernelLaunchStmt()); Visit(Node->getOutlinedFunctionDecl()); + } } void VisitOMPExecutableDirective(const OMPExecutableDirective *Node) { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 9d90280ec15d4..020fc1ed692da 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3031,11 +3031,20 @@ DEF_TRAVERSE_STMT(CapturedStmt, { TRY_TO(TraverseDecl(S->getCapturedDecl())); }) DEF_TRAVERSE_STMT(SYCLKernelCallStmt, { if (getDerived().shouldVisitImplicitCode()) { TRY_TO(TraverseStmt(S->getOriginalStmt())); + TRY_TO(TraverseStmt(S->getKernelLaunchStmt())); TRY_TO(TraverseDecl(S->getOutlinedFunctionDecl())); ShouldVisitChildren = false; } }) +DEF_TRAVERSE_STMT(UnresolvedSYCLKernelCallStmt, { + if (getDerived().shouldVisitImplicitCode()) { + TRY_TO(TraverseStmt(S->getOriginalStmt())); + TRY_TO(TraverseStmt(S->getKernelLaunchIdExpr())); + ShouldVisitChildren = false; + } +}) + DEF_TRAVERSE_STMT(CXXOperatorCallExpr, {}) DEF_TRAVERSE_STMT(CXXRewrittenBinaryOperator, { if (!getDerived().shouldVisitImplicitCode()) { diff --git a/clang/include/clang/AST/StmtSYCL.h b/clang/include/clang/AST/StmtSYCL.h index 28ace12d7916b..32ca0522ee8fa 100644 --- a/clang/include/clang/AST/StmtSYCL.h +++ b/clang/include/clang/AST/StmtSYCL.h @@ -28,35 +28,45 @@ namespace clang { /// of such a function specifies the statements to be executed on a SYCL device /// to invoke a SYCL kernel with a particular set of kernel arguments. The /// SYCLKernelCallStmt associates an original statement (the compound statement -/// that is the function body) with an OutlinedFunctionDecl that holds the -/// kernel parameters and the transformed body. During code generation, the -/// OutlinedFunctionDecl is used to emit an offload kernel entry point suitable -/// for invocation from a SYCL library implementation. If executed, the -/// SYCLKernelCallStmt behaves as a no-op; no code generation is performed for -/// it. +/// that is the function body) with a kernel launch statement to execute on a +/// SYCL host and an OutlinedFunctionDecl that holds the kernel parameters and +/// the transformed body to execute on a SYCL device. During code generation, +/// the OutlinedFunctionDecl is used to emit an offload kernel entry point +/// suitable for invocation from a SYCL library implementation. class SYCLKernelCallStmt : public Stmt { friend class ASTStmtReader; friend class ASTStmtWriter; private: Stmt *OriginalStmt = nullptr; + Stmt *KernelLaunchStmt = nullptr; OutlinedFunctionDecl *OFDecl = nullptr; public: /// Construct a SYCL kernel call statement. - SYCLKernelCallStmt(CompoundStmt *CS, OutlinedFunctionDecl *OFD) - : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), OFDecl(OFD) {} + SYCLKernelCallStmt(CompoundStmt *CS, Stmt *S, OutlinedFunctionDecl *OFD) + : Stmt(SYCLKernelCallStmtClass), OriginalStmt(CS), KernelLaunchStmt(S), + OFDecl(OFD) {} /// Construct an empty SYCL kernel call statement. SYCLKernelCallStmt(EmptyShell Empty) : Stmt(SYCLKernelCallStmtClass, Empty) {} - /// Retrieve the model statement. + /// Retrieve the original statement. CompoundStmt *getOriginalStmt() { return cast(OriginalStmt); } const CompoundStmt *getOriginalStmt() const { return cast(OriginalStmt); } + + /// Set the original statement. void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; } + /// Retrieve the kernel launch statement. + Stmt *getKernelLaunchStmt() { return KernelLaunchStmt; } + const Stmt *getKernelLaunchStmt() const { return KernelLaunchStmt; } + + /// Set the kernel launch statement. + void setKernelLaunchStmt(Stmt *S) { KernelLaunchStmt = S; } + /// Retrieve the outlined function declaration. OutlinedFunctionDecl *getOutlinedFunctionDecl() { return OFDecl; } const OutlinedFunctionDecl *getOutlinedFunctionDecl() const { return OFDecl; } @@ -89,6 +99,58 @@ class SYCLKernelCallStmt : public Stmt { } }; +// UnresolvedSYCLKernelCallStmt represents a SYCL kernel entry point +// function for a kernel that has not been instantiated yet. This Stmt should be +// transformed to a SYCLKernelCallStmt once the kernel and its name is known. +class UnresolvedSYCLKernelCallStmt : public Stmt { + friend class ASTStmtReader; + Stmt *OriginalStmt = nullptr; + // KernelLaunchIdExpr stores an UnresolvedLookupExpr or UnresolvedMemberExpr + // corresponding to the SYCL kernel launch function for which a call + // will be synthesized during template instantiation. + Expr *KernelLaunchIdExpr = nullptr; + UnresolvedSYCLKernelCallStmt(CompoundStmt *CS, Expr *IdExpr) + : Stmt(UnresolvedSYCLKernelCallStmtClass), OriginalStmt(CS), + KernelLaunchIdExpr(IdExpr) {} + + void setKernelLaunchIdExpr(Expr *IdExpr) { KernelLaunchIdExpr = IdExpr; } + void setOriginalStmt(CompoundStmt *CS) { OriginalStmt = CS; } + +public: + static UnresolvedSYCLKernelCallStmt * + Create(const ASTContext &C, CompoundStmt *CS, Expr *IdExpr) { + return new (C) UnresolvedSYCLKernelCallStmt(CS, IdExpr); + } + + static UnresolvedSYCLKernelCallStmt *CreateEmpty(const ASTContext &C) { + return new (C) UnresolvedSYCLKernelCallStmt(nullptr, nullptr); + } + + Expr *getKernelLaunchIdExpr() const { return KernelLaunchIdExpr; } + CompoundStmt *getOriginalStmt() { return cast(OriginalStmt); } + const CompoundStmt *getOriginalStmt() const { + return cast(OriginalStmt); + } + + SourceLocation getBeginLoc() const LLVM_READONLY { + return getOriginalStmt()->getBeginLoc(); + } + + SourceLocation getEndLoc() const LLVM_READONLY { + return getOriginalStmt()->getEndLoc(); + } + static bool classof(const Stmt *T) { + return T->getStmtClass() == UnresolvedSYCLKernelCallStmtClass; + } + child_range children() { + return child_range(&OriginalStmt, &OriginalStmt + 1); + } + + const_child_range children() const { + return const_child_range(&OriginalStmt, &OriginalStmt + 1); + } +}; + } // end namespace clang #endif // LLVM_CLANG_AST_STMTSYCL_H diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4ce1bb6b394f7..4101050dfa1f2 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -479,13 +479,13 @@ The SYCL kernel in the previous code sample meets these expectations. def SYCLKernelEntryPointDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``sycl_kernel_entry_point`` attribute facilitates the generation of an -offload kernel entry point, sometimes called a SYCL kernel caller function, -suitable for invoking a SYCL kernel on an offload device. The attribute is -intended for use in the implementation of SYCL kernel invocation functions -like the ``single_task`` and ``parallel_for`` member functions of the -``sycl::handler`` class specified in section 4.9.4, "Command group ``handler`` -class", of the SYCL 2020 specification. +The ``sycl_kernel_entry_point`` attribute facilitates the launch of a SYCL +kernel and the generation of an offload kernel entry point, sometimes called +a SYCL kernel caller function, suitable for invoking a SYCL kernel on an +offload device. The attribute is intended for use in the implementation of +SYCL kernel invocation functions like the ``single_task`` and ``parallel_for`` +member functions of the ``sycl::handler`` class specified in section 4.9.4, +"Command group ``handler`` class", of the SYCL 2020 specification. The attribute requires a single type argument that specifies a class type that meets the requirements for a SYCL kernel name as described in section 5.2, @@ -497,7 +497,7 @@ The attribute only appertains to functions and only those that meet the following requirements. * Has a non-deduced ``void`` return type. -* Is not a non-static member function, constructor, or destructor. +* Is not a constructor or destructor. * Is not a C variadic function. * Is not a coroutine. * Is not defined as deleted or as defaulted. @@ -512,39 +512,43 @@ follows. namespace sycl { class handler { + template + void sycl_enqueue_kernel_launch(const char *KernelName, Ts...) { + // Call functions appropriate for the desired offload backend + // (OpenCL, CUDA, HIP, Level Zero, etc...) to enqueue kernel invocation. + } + template [[ clang::sycl_kernel_entry_point(KernelNameType) ]] - static void kernel_entry_point(KernelType kernel) { - kernel(); + void kernel_entry_point(KernelType Kernel) { + Kernel(); } public: template - void single_task(KernelType kernel) { - // Call kernel_entry_point() to trigger generation of an offload - // kernel entry point. - kernel_entry_point(kernel); - // Call functions appropriate for the desired offload backend - // (OpenCL, CUDA, HIP, Level Zero, etc...). + void single_task(KernelType Kernel) { + // Call kernel_entry_point() to launch the kernel and to trigger + // generation of an offload kernel entry point. + kernel_entry_point(Kernel); } }; } // namespace sycl -A SYCL kernel is a callable object of class type that is constructed on a host, -often via a lambda expression, and then passed to a SYCL kernel invocation -function to be executed on an offload device. A SYCL kernel invocation function -is responsible for copying the provided SYCL kernel object to an offload -device and initiating a call to it. The SYCL kernel object and its data members -constitute the parameters of an offload kernel. - -A SYCL kernel type is required to satisfy the device copyability requirements -specified in section 3.13.1, "Device copyable", of the SYCL 2020 specification. -Additionally, any data members of the kernel object type are required to satisfy -section 4.12.4, "Rules for parameter passing to kernels". For most types, these -rules require that the type is trivially copyable. However, the SYCL -specification mandates that certain special SYCL types, such as -``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are not -trivially copyable. These types require special handling because they cannot +A SYCL kernel object is a callable object of class type that is constructed on +a host, often via a lambda expression, and then passed to a SYCL kernel +invocation function to be executed on an offload device. A SYCL kernel +invocation function is responsible for copying the provided SYCL kernel object +to an offload device and initiating a call to it. The SYCL kernel object and +its data members constitute the parameters of an offload kernel. + +A SYCL kernel object type is required to satisfy the device copyability +requirements specified in section 3.13.1, "Device copyable", of the SYCL 2020 +specification. Additionally, any data members of the kernel object type are +required to satisfy section 4.12.4, "Rules for parameter passing to kernels". +For most types, these rules require that the type is trivially copyable. +However, the SYCL specification mandates that certain special SYCL types, such +as ``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are +not trivially copyable. These types require special handling because they cannot be copied to device memory as if by ``memcpy()``. Additionally, some offload backends, OpenCL for example, require objects of some of these types to be passed as individual arguments to the offload kernel. @@ -559,7 +563,7 @@ like OpenCL): #. Identifying the offload kernel entry point to be used for the SYCL kernel. -#. Deconstructing the SYCL kernel object, if necessary, to produce the set of +#. Decomposing the SYCL kernel object, if necessary, to produce the set of offload kernel arguments required by the offload kernel entry point. #. Copying the offload kernel arguments to device memory. @@ -568,17 +572,23 @@ like OpenCL): The offload kernel entry point for a SYCL kernel performs the following tasks: -#. Reconstituting the SYCL kernel object, if necessary, using the offload +#. Reconstructing the SYCL kernel object, if necessary, using the offload kernel parameters. -#. Calling the ``operator()`` member function of the (reconstituted) SYCL kernel +#. Calling the ``operator()`` member function of the (reconstructed) SYCL kernel object. -The ``sycl_kernel_entry_point`` attribute automates generation of an offload -kernel entry point that performs those latter tasks. The parameters and body of -a function declared with the ``sycl_kernel_entry_point`` attribute specify a -pattern from which the parameters and body of the entry point function are -derived. Consider the following call to a SYCL kernel invocation function. +The ``sycl_kernel_entry_point`` attribute facilitates or automates these tasks +by generating the offload kernel entry point, generating a unique symbol name +for it, synthesizing code for kernel argument decomposition and reconstruction, +and synthesizing a call to a ``sycl_enqueue_kernel_launch`` function template +with the kernel name type, kernel symbol name, and (decomposed) kernel arguments +passed as template or function arguments. + +A function declared with the ``sycl_kernel_entry_point`` attribute specifies +the parameters and body of the offload entry point function. Consider the +following call to the ``single_task()`` SYCL kernel invocation function assuming +an implementation similar to the one shown above. .. code-block:: c++ @@ -592,31 +602,33 @@ derived. Consider the following call to a SYCL kernel invocation function. The SYCL kernel object is the result of the lambda expression. It has two data members corresponding to the captures of ``sout`` and ``s``. Since one of these data members corresponds to a special SYCL type that must be passed -individually as an offload kernel parameter, it is necessary to decompose the -SYCL kernel object into its constituent parts; the offload kernel will have -two kernel parameters. Given a SYCL implementation that uses a -``sycl_kernel_entry_point`` attributed function like the one shown above, an +individually as an offload kernel argument, it is necessary to decompose the +SYCL kernel object into its constituent parts and pass them individually. An offload kernel entry point function will be generated that looks approximately as follows. .. code-block:: c++ void sycl-kernel-caller-for-KN(sycl::stream sout, S s) { - kernel-type kernel = { sout, s ); - kernel(); + kernel-type Kernel = { sout, s ); + Kernel(); } There are a few items worthy of note: #. The name of the generated function incorporates the SYCL kernel name, ``KN``, that was passed as the ``KernelNameType`` template parameter to - ``kernel_entry_point()`` and provided as the argument to the + ``single_task()`` and eventually provided as the argument to the ``sycl_kernel_entry_point`` attribute. There is a one-to-one correspondence between SYCL kernel names and offload kernel entry points. +#. The parameters and the call to ``Kernel()`` correspond to the definition of + ``kernel_entry_point()`` called by ``single_task()`` with the SYCL kernel + object argument decomposed and reconstructed. + #. The SYCL kernel is a lambda closure type and therefore has no name; ``kernel-type`` is substituted above and corresponds to the ``KernelType`` - template parameter deduced in the call to ``kernel_entry_point()``. + template parameter deduced in the call to ``single_task()``. Lambda types cannot be declared and initialized using the aggregate initialization syntax used above, but the intended behavior should be clear. @@ -630,24 +642,55 @@ There are a few items worthy of note: or more parameters depending on how the SYCL library implementation defines these types. -#. The call to ``kernel_entry_point()`` has no effect other than to trigger - emission of the entry point function. The statments that make up the body - of the function are not executed when the function is called; they are - only used in the generation of the entry point function. +The call to ``kernel_entry_point()`` by ``single_task()`` is effectively +replaced with synthesized code that looks approximately as follows. + +.. code-block:: c++ + + sycl::stream sout = Kernel.sout; + S s = Kernel.s; + sycl_enqueue_kernel_launch("kernel-symbol-name", sout, s); + +There are a few items worthy of note: + +#. The SYCL kernel object is a lambda closure type and its captures do not + have formal names and cannot be accessed using the member access syntax used + above, but the intended behavior should be clear. + +#. ``kernel-symbol-name`` is substituted for the actual symbol name that would + be generated; these names are implementation details subject to change. + +#. Lookup for the ``sycl_enqueue_kernel_launch()`` function template is + performed from the (possibly instantiated) location of the definition of + ``kernel_entry_point()``. If overload resolution fails, the program is + ill-formed. If the selected overload is a non-static member function, then + ``this`` is passed for the implicit object parameter. + +#. Function arguments passed to ``sycl_enqueue_kernel_launch()`` are passed + as if by ``std::forward(x)``. + +#. The ``sycl_enqueue_kernel_launch()`` function is expected to be provided by + the SYCL library implementation. It is responsible for scheduling execution + of the generated offload kernel entry point identified by + ``kernel-symbol-name`` and copying the (decomposed) kernel arguments to + device memory, presumably via an offload backend such as OpenCL. It is not necessary for a function declared with the ``sycl_kernel_entry_point`` attribute to be called for the offload kernel entry point to be emitted. For inline functions and function templates, any ODR-use will suffice. For other functions, an ODR-use is not required; the offload kernel entry point will be -emitted if the function is defined. +emitted if the function is defined. In any case, a call to the function is +required for the synthesized call to ``sycl_enqueue_kernel_launch()`` to occur. Functions declared with the ``sycl_kernel_entry_point`` attribute are not limited to the simple example shown above. They may have additional template parameters, declare additional function parameters, and have complex control -flow in the function body. Function parameter decomposition and reconstitution +flow in the function body. Function parameter decomposition and reconstruction is performed for all function parameters. The function must abide by the language feature restrictions described in section 5.4, "Language restrictions -for device functions" in the SYCL 2020 specification. +for device functions" in the SYCL 2020 specification. If the function is a +non-static member function, ``this`` shall not be used in a potentially +evaluated expression. }]; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6f9f7e2714cb1..0ba99e5f653f1 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13093,13 +13093,17 @@ def err_sycl_kernel_virtual_arg : Error< // SYCL kernel entry point diagnostics def err_sycl_entry_point_invalid : Error< "the %0 attribute cannot be applied to a" - " %select{non-static member function|variadic function|deleted function|" - "defaulted function|constexpr function|consteval function|" - "function declared with the 'noreturn' attribute|coroutine|" + " %select{variadic function|deleted function|defaulted function|" + "constructor|destructor|coroutine|" + "constexpr function|consteval function|" + "function declared with the 'noreturn' attribute|" "function defined with a function try block}1">; def err_sycl_entry_point_invalid_redeclaration : Error< "the %0 kernel name argument does not match prior" " declaration%diff{: $ vs $|}1,2">; +def err_sycl_entry_point_invalid_this : Error< + "'this' cannot be%select{| implicitly}0 used in a potentially evaluated" + " expression in the body of a function declared with the %1 attribute">; def err_sycl_kernel_name_conflict : Error< "the %0 kernel name argument conflicts with a previous declaration">; def warn_sycl_kernel_name_not_a_class_type : Warning< @@ -13115,6 +13119,15 @@ def err_sycl_entry_point_return_type : Error< def err_sycl_entry_point_deduced_return_type : Error< "the %0 attribute only applies to functions with a non-deduced 'void' return" " type">; +def err_sycl_host_no_launch_function : Error< + "unable to find suitable 'sycl_enqueue_kernel_launch' function for host code " + "synthesis">; +def warn_sycl_device_no_host_launch_function : Warning< + "unable to find suitable 'sycl_enqueue_kernel_launch' function for host code " + "synthesis">, + InGroup>; +def note_sycl_host_launch_function : Note< + "define 'sycl_enqueue_kernel_launch' function template to fix this problem">; def warn_cuda_maxclusterrank_sm_90 : Warning< "'maxclusterrank' requires sm_90 or higher, CUDA arch provided: %0, ignoring " diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 9b7bb00fb7684..fa1e838ca5f61 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -25,6 +25,7 @@ def CaseStmt : StmtNode; def DefaultStmt : StmtNode; def CapturedStmt : StmtNode; def SYCLKernelCallStmt : StmtNode; +def UnresolvedSYCLKernelCallStmt : StmtNode; // Statements that might produce a value (for example, as the last non-null // statement in a GNU statement-expression). diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h index 94b247a689c2d..ca0481c166eaa 100644 --- a/clang/include/clang/Sema/ScopeInfo.h +++ b/clang/include/clang/Sema/ScopeInfo.h @@ -245,6 +245,10 @@ class FunctionScopeInfo { /// The set of GNU address of label extension "&&label". llvm::SmallVector AddrLabels; + /// An unresolved identifier lookup expression for an implicit call + /// to a SYCL kernel launch function in a dependent context. + Expr *SYCLKernelLaunchIdExpr = nullptr; + public: /// Represents a simple identification of a weak object. /// diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index fede263619501..8ce56ec3ba34a 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -672,8 +672,11 @@ class SemaSYCL : public SemaBase { // Used to check whether the function represented by FD is a SYCL // free function kernel or not. bool isFreeFunction(const FunctionDecl *FD); - - StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body); + + StmtResult BuildSYCLKernelCallStmt(FunctionDecl *FD, CompoundStmt *Body, + Expr *LaunchIdExpr); + ExprResult BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD, QualType KNT); + StmtResult BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *CS, Expr *IdExpr); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 17c421ef52b81..d9cfd32f11faf 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1620,6 +1620,9 @@ enum StmtCode { /// A SYCLKernelCallStmt record. STMT_SYCLKERNELCALL, + /// A SYCLKernelCallStmt record. + STMT_UNRESOLVED_SYCL_KERNEL_CALL, + /// A GCC-style AsmStmt record. STMT_GCCASM, diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index e71d0ac7cc7d5..9df4614bc74ee 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" +#include "clang/AST/StmtSYCL.h" #include "clang/Basic/ExceptionSpecificationType.h" #include "llvm/ADT/ArrayRef.h" diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 608c6dc9d2129..b420758b9168d 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -586,7 +586,7 @@ void StmtPrinter::VisitCapturedStmt(CapturedStmt *Node) { } void StmtPrinter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *Node) { - PrintStmt(Node->getOutlinedFunctionDecl()->getBody()); + PrintStmt(Node->getOriginalStmt()); } void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) { @@ -1435,6 +1435,11 @@ void StmtPrinter::VisitSYCLUniqueStableIdExpr(SYCLUniqueStableIdExpr *Node) { OS << ")"; } +void StmtPrinter::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *Node) { + PrintStmt(Node->getOriginalStmt()); +} + void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) { OS << PredefinedExpr::getIdentKindName(Node->getIdentKind()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index e05345451c385..d153a8b48a732 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1374,6 +1374,11 @@ void StmtProfiler::VisitSYCLUniqueStableNameExpr( VisitType(S->getTypeSourceInfo()->getType()); } +void StmtProfiler::VisitUnresolvedSYCLKernelCallStmt( + const UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); +} + void StmtProfiler::VisitSYCLUniqueStableIdExpr( const SYCLUniqueStableIdExpr *S) { VisitExpr(S); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 031ef73214e76..9e26b145e0589 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtSYCL.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticSema.h" @@ -540,21 +541,7 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S, EmitSEHLeaveStmt(cast(*S)); break; case Stmt::SYCLKernelCallStmtClass: - // SYCL kernel call statements are generated as wrappers around the body - // of functions declared with the sycl_kernel_entry_point attribute. Such - // functions are used to specify how a SYCL kernel (a function object) is - // to be invoked; the SYCL kernel call statement contains a transformed - // variation of the function body and is used to generate a SYCL kernel - // caller function; a function that serves as the device side entry point - // used to execute the SYCL kernel. The sycl_kernel_entry_point attributed - // function is invoked by host code in order to trigger emission of the - // device side SYCL kernel caller function and to generate metadata needed - // by SYCL run-time library implementations; the function is otherwise - // intended to have no effect. As such, the function body is not evaluated - // as part of the invocation during host compilation (and the function - // should not be called or emitted during device compilation); the SYCL - // kernel call statement is thus handled as a null statement for the - // purpose of code generation. + EmitSYCLKernelCallStmt(cast(*S)); break; } return true; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 9c307f6c3bb54..6c471b8068ccb 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3641,6 +3641,8 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitCoyieldLValue(const CoyieldExpr *E); RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID); + void EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S); + void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); diff --git a/clang/lib/CodeGen/CodeGenSYCL.cpp b/clang/lib/CodeGen/CodeGenSYCL.cpp index 31e0ed9625750..d028ef6a185ec 100644 --- a/clang/lib/CodeGen/CodeGenSYCL.cpp +++ b/clang/lib/CodeGen/CodeGenSYCL.cpp @@ -17,6 +17,21 @@ using namespace clang; using namespace CodeGen; +void CodeGenFunction::EmitSYCLKernelCallStmt(const SYCLKernelCallStmt &S) { + if (getLangOpts().SYCLIsDevice) { + // A sycl_kernel_entry_point attributed function is unlikely to be emitted + // during device compilation, but might be if it is ODR-used from device + // code that is emitted. In these cases, the function is emitted with an + // empty body; the original body is emitted in the offload kernel entry + // point and the synthesized kernel launch code is only relevant for host + // compilation. + return; + } + + assert(getLangOpts().SYCLIsHost); + EmitStmt(S.getKernelLaunchStmt()); +} + static void SetDeviceKernelAttributes(llvm::Function *Fn, CodeGenFunction &CGF) { // SYCL 2020 device language restrictions require forward progress and // disallow recursion. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 50f1a337ed916..17c040c5ff0c9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -15872,7 +15872,6 @@ Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D, if (!Bases.empty()) OpenMP().ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(Dcl, Bases); - return Dcl; } @@ -16284,6 +16283,20 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, maybeAddDeclWithEffects(FD); + if (FD && !FD->isInvalidDecl() && + FD->hasAttr() && FnBodyScope) { + // Building KernelLaunchIdExpr requires performing an unqualified lookup + // which can only be done correctly while the stack of parsing scopes is + // alive, so we do it here when we start parsing function body even if it is + // a templated function. + const auto *SKEPAttr = FD->getAttr(); + if (!SKEPAttr->isInvalidAttr()) { + ExprResult LaunchIdExpr = + SYCL().BuildSYCLKernelLaunchIdExpr(FD, SKEPAttr->getKernelName()); + getCurFunction()->SYCLKernelLaunchIdExpr = LaunchIdExpr.get(); + } + } + return D; } @@ -16473,25 +16486,37 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, FD->getAttr(); if (FD->isDefaulted()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*defaulted function*/ 3; + << SKEPAttr << /*defaulted function*/ 2; SKEPAttr->setInvalidAttr(); } else if (FD->isDeleted()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*deleted function*/ 2; + << SKEPAttr << /*deleted function*/ 1; SKEPAttr->setInvalidAttr(); } else if (FSI->isCoroutine()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*coroutine*/ 7; + << SKEPAttr << /*coroutine*/ 5; SKEPAttr->setInvalidAttr(); } else if (Body && isa(Body)) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*function defined with a function try block*/ 8; + << SKEPAttr << /*function defined with a function try block*/ 9; SKEPAttr->setInvalidAttr(); } - if (Body && !FD->isTemplated() && !SKEPAttr->isInvalidAttr()) { - StmtResult SR = - SYCL().BuildSYCLKernelCallStmt(FD, cast(Body)); + // We don't need to build SYCLKernelCallStmt for template instantiations + // since it was already created by template instantiator. + if (Body && !SKEPAttr->isInvalidAttr()) { + StmtResult SR; + if (FD->isTemplated()) { + SR = SYCL().BuildUnresolvedSYCLKernelCallStmt( + cast(Body), getCurFunction()->SYCLKernelLaunchIdExpr); + } else if (FD->isTemplateInstantiation()) { + assert(isa(Body)); + SR = Body; + } else { + SR = SYCL().BuildSYCLKernelCallStmt( + FD, cast(Body), + getCurFunction()->SYCLKernelLaunchIdExpr); + } if (SR.isInvalid()) return nullptr; Body = SR.get(); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 50a0be09c3186..16020db89e53b 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -15,6 +15,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtSYCL.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" @@ -1251,6 +1252,15 @@ CanThrowResult Sema::canThrow(const Stmt *S) { return CT; } + case Stmt::SYCLKernelCallStmtClass: { + auto *SKCS = cast(S); + if (getLangOpts().SYCLIsDevice) + return canSubStmtsThrow(*this, + SKCS->getOutlinedFunctionDecl()->getBody()); + assert(getLangOpts().SYCLIsHost); + return canSubStmtsThrow(*this, SKCS->getKernelLaunchStmt()); + } + // ObjC message sends are like function calls, but never have exception // specs. case Expr::ObjCMessageExprClass: @@ -1435,7 +1445,6 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::AttributedStmtClass: case Stmt::BreakStmtClass: case Stmt::CapturedStmtClass: - case Stmt::SYCLKernelCallStmtClass: case Stmt::CaseStmtClass: case Stmt::CompoundStmtClass: case Stmt::ContinueStmtClass: diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index dc16b824a883c..ccb3718c03116 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -7814,43 +7814,45 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) { } } - if (const auto *MD = dyn_cast(FD)) { - if (!MD->isStatic()) { - Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*non-static member function*/ 0; - SKEPAttr->setInvalidAttr(); - } + if (isa(FD)) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << SKEPAttr << /*constructor*/ 3; + SKEPAttr->setInvalidAttr(); + } + if (isa(FD)) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << SKEPAttr << /*destructor*/ 4; + SKEPAttr->setInvalidAttr(); } - if (FD->isVariadic()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*variadic function*/ 1; + << SKEPAttr << /*variadic function*/ 0; SKEPAttr->setInvalidAttr(); } if (FD->isDefaulted()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*defaulted function*/ 3; + << SKEPAttr << /*defaulted function*/ 2; SKEPAttr->setInvalidAttr(); } else if (FD->isDeleted()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*deleted function*/ 2; + << SKEPAttr << /*deleted function*/ 1; SKEPAttr->setInvalidAttr(); } if (FD->isConsteval()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*consteval function*/ 5; + << SKEPAttr << /*consteval function*/ 7; SKEPAttr->setInvalidAttr(); } else if (FD->isConstexpr()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*constexpr function*/ 4; + << SKEPAttr << /*constexpr function*/ 6; SKEPAttr->setInvalidAttr(); } if (FD->isNoReturn()) { Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) - << SKEPAttr << /*function declared with the 'noreturn' attribute*/ 6; + << SKEPAttr << /*function declared with the 'noreturn' attribute*/ 8; SKEPAttr->setInvalidAttr(); } @@ -7886,8 +7888,138 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) { } } +ExprResult SemaSYCL::BuildSYCLKernelLaunchIdExpr(FunctionDecl *FD, + QualType KNT) { + + ASTContext &Ctx = SemaRef.getASTContext(); + // Some routines need a valid source location to work correctly. + SourceLocation BodyLoc = + FD->getEndLoc().isValid() ? FD->getEndLoc() : FD->getLocation(); + + IdentifierInfo &LaunchFooName = + Ctx.Idents.get("sycl_enqueue_kernel_launch", tok::TokenKind::identifier); + + // Perform overload resolution for a call to an accessible (member) function + // template named 'sycl_enqueue_kernel_launch' "from within the definition of + // FD where": + // - The kernel name type is passed as the first template argument. + // - Any remaining template parameters are deduced from the function + // arguments or assigned by default template arguments. + // - 'this' is passed as the implicit function argument if 'FD' is a + // non-static member function. + // - The name of the kernel, expressed as a string literal, is passed as the + // first function argument. + // - The parameters of FD are forwarded as-if by 'std::forward()' as the + // remaining explicit function arguments. + // - Any remaining function arguments are initialized by default arguments. + LookupResult Result(SemaRef, &LaunchFooName, BodyLoc, + Sema::LookupOrdinaryName); + CXXScopeSpec SS; + SemaRef.LookupTemplateName(Result, SemaRef.getCurScope(), SS, + /*ObjectType=*/QualType(), + /*EnteringContext=*/false, BodyLoc); + + if (Result.empty() || Result.isAmbiguous()) { + SemaRef.Diag(BodyLoc, SemaRef.getLangOpts().SYCLIsHost + ? diag::err_sycl_host_no_launch_function + : diag::warn_sycl_device_no_host_launch_function); + SemaRef.Diag(BodyLoc, diag::note_sycl_host_launch_function); + + return ExprError(); + } + + TemplateArgumentListInfo TALI{BodyLoc, BodyLoc}; + TemplateArgument KNTA = TemplateArgument(KNT); + TemplateArgumentLoc TAL = + SemaRef.getTrivialTemplateArgumentLoc(KNTA, QualType(), BodyLoc); + TALI.addArgument(TAL); + ExprResult IdExpr; + if (SemaRef.isPotentialImplicitMemberAccess(SS, Result, + /*IsAddressOfOperand=*/false)) + // BuildPossibleImplicitMemberExpr creates UnresolvedMemberExpr. Using it + // allows to pass implicit/explicit this argument automatically. + IdExpr = SemaRef.BuildPossibleImplicitMemberExpr(SS, BodyLoc, Result, &TALI, + SemaRef.getCurScope()); + else + IdExpr = SemaRef.BuildTemplateIdExpr(SS, BodyLoc, Result, + /*RequiresADL=*/true, &TALI); + + // Can happen if SKEP attributed function is a static member, but the launcher + // is a regular member. Perhaps emit a note saying that we're in host code + // synthesis. + if (IdExpr.isInvalid()) + return ExprError(); + + return IdExpr; +} + +StmtResult SemaSYCL::BuildUnresolvedSYCLKernelCallStmt(CompoundStmt *CS, + Expr *IdExpr) { + return UnresolvedSYCLKernelCallStmt::Create(SemaRef.getASTContext(), CS, + IdExpr); +} + namespace { +void PrepareKernelArgumentsForKernelLaunch(SmallVectorImpl &Args, + const SYCLKernelInfo *SKI, + Sema &SemaRef, + SourceLocation Loc) { + assert(SKI && "Need a kernel!"); + ASTContext &Ctx = SemaRef.getASTContext(); + + // Prepare a string literal that contains the kernel name. + const std::string KernelName = SKI->GetKernelName(); + QualType KernelNameCharTy = Ctx.CharTy.withConst(); + llvm::APInt KernelNameSize(Ctx.getTypeSize(Ctx.getSizeType()), + KernelName.size() + 1); + QualType KernelNameArrayTy = Ctx.getConstantArrayType( + KernelNameCharTy, KernelNameSize, nullptr, ArraySizeModifier::Normal, 0); + Expr *KernelNameExpr = + StringLiteral::Create(Ctx, KernelName, StringLiteralKind::Ordinary, + /*Pascal*/ false, KernelNameArrayTy, Loc); + QualType FuncParamTy = Ctx.getPointerType(Ctx.CharTy.withConst()); + ImplicitCastExpr *KernelNameArrayDecayExpr = new (Ctx) ImplicitCastExpr( + ImplicitCastExpr::OnStack, FuncParamTy, CK_ArrayToPointerDecay, + KernelNameExpr, VK_PRValue, FPOptionsOverride()); + Args.push_back(KernelNameArrayDecayExpr); + + // Right now we simply forward the arguments of the skep-attributed function. + // With decomposition present there can be another logic. + // Make sure to use CurContext to avoid diagnostics that we're using a + // variable coming from another context. The function should be the same as in + // the kernel info though. + auto *FD = cast(SemaRef.CurContext); + assert(declaresSameEntity(FD, SKI->getKernelEntryPointDecl())); + for (ParmVarDecl *PVD : FD->parameters()) { + QualType ParamType = PVD->getOriginalType().getNonReferenceType(); + Expr *DRE = SemaRef.BuildDeclRefExpr(PVD, ParamType, VK_LValue, Loc); + assert(DRE); + Args.push_back(DRE); + } +} + +StmtResult BuildSYCLKernelLaunchStmt(Sema &SemaRef, + const SYCLKernelInfo *SKI, + Expr *IdExpr, SourceLocation Loc) { + SmallVector Stmts; + assert(SKI && "Need a Kernel!"); + + if (IdExpr) { + llvm::SmallVector Args; + PrepareKernelArgumentsForKernelLaunch(Args, SKI, SemaRef, Loc); + ExprResult LaunchResult = + SemaRef.BuildCallExpr(SemaRef.getCurScope(), IdExpr, Loc, Args, Loc); + if (LaunchResult.isInvalid()) + return StmtError(); + + Stmts.push_back(SemaRef.MaybeCreateExprWithCleanups(LaunchResult).get()); + } + + return CompoundStmt::Create(SemaRef.getASTContext(), Stmts, + FPOptionsOverride(), Loc, Loc); +} + // The body of a function declared with the [[sycl_kernel_entry_point]] // attribute is cloned and transformed to substitute references to the original // function parameters with references to replacement variables that stand in @@ -7898,9 +8030,10 @@ class OutlinedFunctionDeclBodyInstantiator public: using ParmDeclMap = llvm::DenseMap; - OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M) + OutlinedFunctionDeclBodyInstantiator(Sema &S, ParmDeclMap &M, + FunctionDecl *FD) : TreeTransform(S), SemaRef(S), - MapRef(M) {} + MapRef(M), FD(FD) {} // A new set of AST nodes is always required. bool AlwaysRebuild() { return true; } @@ -7926,18 +8059,61 @@ class OutlinedFunctionDeclBodyInstantiator return DRE; } + // Diagnose CXXThisExpr in a potentially evaluated expression. + ExprResult TransformCXXThisExpr(CXXThisExpr *CTE) { + if (SemaRef.currentEvaluationContext().isPotentiallyEvaluated()) { + SemaRef.Diag(CTE->getExprLoc(), diag::err_sycl_entry_point_invalid_this) + << (CTE->isImplicitCXXThis() ? /* implicit */ 1 : /* empty */ 0) + << FD->getAttr(); + } + return CTE; + } + private: Sema &SemaRef; ParmDeclMap &MapRef; + FunctionDecl *FD; }; +OutlinedFunctionDecl *BuildSYCLKernelEntryPointOutline(Sema &SemaRef, + FunctionDecl *FD, + CompoundStmt *Body) { + using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap; + ParmDeclMap ParmMap; + + OutlinedFunctionDecl *OFD = OutlinedFunctionDecl::Create( + SemaRef.getASTContext(), FD, FD->getNumParams()); + unsigned i = 0; + for (ParmVarDecl *PVD : FD->parameters()) { + ImplicitParamDecl *IPD = ImplicitParamDecl::Create( + SemaRef.getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(), + PVD->getType(), ImplicitParamKind::Other); + OFD->setParam(i, IPD); + ParmMap[PVD] = IPD; + ++i; + } + + OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap, + FD); + Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get(); + OFD->setBody(OFDBody); + OFD->setNothrow(); + + return OFD; +} + } // unnamed namespace StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD, - CompoundStmt *Body) { + CompoundStmt *Body, + Expr *LaunchIdExpr) { assert(!FD->isInvalidDecl()); assert(!FD->isTemplated()); assert(FD->hasPrototype()); + // The current context must be the function definition context to ensure + // that name lookup and parameter and local variable creation are performed + // within the correct scope. + assert(SemaRef.CurContext == FD); const auto *SKEPAttr = FD->getAttr(); assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute"); @@ -7950,29 +8126,20 @@ StmtResult SemaSYCL::BuildSYCLKernelCallStmt(FunctionDecl *FD, getASTContext().getSYCLKernelInfo(SKEPAttr->getKernelName()); assert(declaresSameEntity(SKI.getKernelEntryPointDecl(), FD) && "SYCL kernel name conflict"); - (void)SKI; - using ParmDeclMap = OutlinedFunctionDeclBodyInstantiator::ParmDeclMap; - ParmDeclMap ParmMap; - - assert(SemaRef.CurContext == FD); + // Build the outline of the synthesized device entry point function. OutlinedFunctionDecl *OFD = - OutlinedFunctionDecl::Create(getASTContext(), FD, FD->getNumParams()); - unsigned i = 0; - for (ParmVarDecl *PVD : FD->parameters()) { - ImplicitParamDecl *IPD = ImplicitParamDecl::Create( - getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(), - PVD->getType(), ImplicitParamKind::Other); - OFD->setParam(i, IPD); - ParmMap[PVD] = IPD; - ++i; - } + BuildSYCLKernelEntryPointOutline(SemaRef, FD, Body); + assert(OFD); - OutlinedFunctionDeclBodyInstantiator OFDBodyInstantiator(SemaRef, ParmMap); - Stmt *OFDBody = OFDBodyInstantiator.TransformStmt(Body).get(); - OFD->setBody(OFDBody); - OFD->setNothrow(); - Stmt *NewBody = new (getASTContext()) SYCLKernelCallStmt(Body, OFD); + // Build host kernel launch stmt. + SourceLocation BodyLoc = + FD->getEndLoc().isValid() ? FD->getEndLoc() : FD->getLocation(); + StmtResult LaunchRes = + BuildSYCLKernelLaunchStmt(SemaRef, &SKI, LaunchIdExpr, BodyLoc); + + Stmt *NewBody = + new (getASTContext()) SYCLKernelCallStmt(Body, LaunchRes.get(), OFD); return NewBody; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 79d9f854839da..df68c4b8f7f63 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -12836,6 +12836,31 @@ ExprResult TreeTransform::TransformSYCLUniqueStableIdExpr( NewExpr.get()); } +template +StmtResult TreeTransform::TransformUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + auto *FD = cast(SemaRef.CurContext); + const auto *SKEPAttr = FD->getAttr(); + if (!SKEPAttr || SKEPAttr->isInvalidAttr()) + return StmtError(); + + ExprResult IdExpr = getDerived().TransformExpr(S->getKernelLaunchIdExpr()); + + if (IdExpr.isInvalid()) + return StmtError(); + + StmtResult Body = getDerived().TransformStmt(S->getOriginalStmt()); + if (Body.isInvalid()) + return StmtError(); + + StmtResult SR = SemaRef.SYCL().BuildSYCLKernelCallStmt( + cast(SemaRef.CurContext), cast(Body.get()), + IdExpr.get()); + + return SR; +} + + template ExprResult TreeTransform::TransformPredefinedExpr(PredefinedExpr *E) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 69009b1134686..29962c05a4650 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -527,6 +527,7 @@ void ASTStmtReader::VisitCapturedStmt(CapturedStmt *S) { void ASTStmtReader::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) { VisitStmt(S); S->setOriginalStmt(cast(Record.readSubStmt())); + S->setKernelLaunchStmt(cast(Record.readSubStmt())); S->setOutlinedFunctionDecl(readDeclAs()); } @@ -602,6 +603,14 @@ void ASTStmtReader::VisitSYCLUniqueStableIdExpr(SYCLUniqueStableIdExpr *E) { E->setExpr(Record.readSubExpr()); } +void ASTStmtReader::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); + + S->setOriginalStmt(cast(Record.readSubStmt())); + S->setKernelLaunchIdExpr(Record.readExpr()); +} + void ASTStmtReader::VisitPredefinedExpr(PredefinedExpr *E) { VisitExpr(E); bool HasFunctionName = Record.readInt(); @@ -3197,6 +3206,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case EXPR_SYCL_UNIQUE_STABLE_ID: S = SYCLUniqueStableIdExpr::CreateEmpty(Context); + case STMT_UNRESOLVED_SYCL_KERNEL_CALL: + S = UnresolvedSYCLKernelCallStmt::CreateEmpty(Context); + break; + case EXPR_OPENACC_ASTERISK_SIZE: S = OpenACCAsteriskSizeExpr::CreateEmpty(Context); break; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index ecbb2cc9a868a..5c82a01f36246 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -611,6 +611,7 @@ void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) { void ASTStmtWriter::VisitSYCLKernelCallStmt(SYCLKernelCallStmt *S) { VisitStmt(S); Record.AddStmt(S->getOriginalStmt()); + Record.AddStmt(S->getKernelLaunchStmt()); Record.AddDeclRef(S->getOutlinedFunctionDecl()); Code = serialization::STMT_SYCLKERNELCALL; @@ -680,6 +681,16 @@ void ASTStmtWriter::VisitSYCLUniqueStableIdExpr(SYCLUniqueStableIdExpr *E) { Code = serialization::EXPR_SYCL_UNIQUE_STABLE_ID; } +void ASTStmtWriter::VisitUnresolvedSYCLKernelCallStmt( + UnresolvedSYCLKernelCallStmt *S) { + VisitStmt(S); + + Record.AddStmt(S->getOriginalStmt()); + Record.AddStmt(S->getKernelLaunchIdExpr()); + + Code = serialization::STMT_UNRESOLVED_SYCL_KERNEL_CALL; +} + void ASTStmtWriter::VisitPredefinedExpr(PredefinedExpr *E) { VisitExpr(E); diff --git a/clang/test/AST/ast-print-sycl-kernel-call.cpp b/clang/test/AST/ast-print-sycl-kernel-call.cpp new file mode 100644 index 0000000000000..2243ee024be1a --- /dev/null +++ b/clang/test/AST/ast-print-sycl-kernel-call.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -fsycl-is-host -ast-print %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -ast-print %s -o - | FileCheck %s + +struct sycl_kernel_launcher { + template + void sycl_enqueue_kernel_launch(const char *, Ts...) {} + + template + void kernel_entry_point(KernelType kernel) { + kernel(); + } +// CHECK: template void kernel_entry_point(KernelType kernel) { +// CHECK-NEXT: kernel(); +// CHECK-NEXT: } +// CHECK: template<> void kernel_entry_point((lambda at {{.*}}) kernel) { +// CHECK-NEXT: kernel(); +// CHECK-NEXT: } +}; + +void f(sycl_kernel_launcher skl) { + skl.kernel_entry_point([]{}); +} diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp index 8e8e03c2451a0..9fb569064a540 100644 --- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp +++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp @@ -34,6 +34,8 @@ template struct K { void operator()(Ts...) const {} }; +template +void sycl_enqueue_kernel_launch(const char *, Ts...) {} [[clang::sycl_kernel_entry_point(KN<1>)]] void skep1() { @@ -41,6 +43,12 @@ void skep1() { // CHECK: |-FunctionDecl {{.*}} skep1 'void ()' // CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} // CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' {{.*}} +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi1EE" // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<1> @@ -57,9 +65,10 @@ void skep2>(K<2>); // CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT // CHECK-NEXT: | |-FunctionDecl {{.*}} skep2 'void (KT)' // CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT' -// CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK-NEXT: | | | `-CallExpr {{.*}} '' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} '' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' // CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT // CHECK-NEXT: | `-FunctionDecl {{.*}} skep2 'void (K<2>)' explicit_instantiation_definition @@ -77,6 +86,15 @@ void skep2>(K<2>); // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<2>' lvalue // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<2>)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi2EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<2>' 'void (const K<2> &) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<2>' lvalue ParmVar {{.*}} 'k' 'K<2>' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<2>' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -102,9 +120,10 @@ void skep3>(K<3> k) { // CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KT // CHECK-NEXT: | |-FunctionDecl {{.*}} skep3 'void (KT)' // CHECK-NEXT: | | |-ParmVarDecl {{.*}} k 'KT' -// CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK-NEXT: | | | `-CallExpr {{.*}} '' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} '' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' // CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} KNT // CHECK-NEXT: | `-Function {{.*}} 'skep3' 'void (K<3>)' @@ -123,6 +142,15 @@ void skep3>(K<3> k) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<3>' lvalue // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<3>)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<3>)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, K<3>)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi3EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'K<3>' 'void (const K<3> &) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const K<3>' lvalue +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'K<3>' lvalue ParmVar {{.*}} 'k' 'K<3>' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<3>' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -152,6 +180,19 @@ void skep4(K<4> k, int p1, int p2) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, K<4>, int, int)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, K<4>, int, int)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, K<4>, int, int)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi4EE" +// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<4>' 'void (const K<4> &) noexcept' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'const K<4>' lvalue +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<4>' lvalue ParmVar {{.*}} 'k' 'K<4>' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p1' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p2' 'int' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<4>' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used p1 'int' @@ -182,7 +223,24 @@ void skep5(int unused1, K<5> k, int unused2, int p, int unused3) { // CHECK-NEXT: | |-ParmVarDecl {{.*}} unused3 'int' // CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} // CHECK-NEXT: | | |-CompoundStmt {{.*}} -// CHECK: | | `-OutlinedFunctionDecl {{.*}} +// CHECK: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, int, K<5>, int, int, int)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, int, K<5>, int, int, int)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, int, K<5>, int, int, int)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi5EE" +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused1' 'int' +// CHECK-NEXT: | | | |-CXXConstructExpr {{.*}} 'K<5>' 'void (const K<5> &) noexcept' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'const K<5>' lvalue +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'K<5>' lvalue ParmVar {{.*}} 'k' 'K<5>' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused2' 'int' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'p' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'unused3' 'int' +// CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused1 'int' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'K<5>' // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit unused2 'int' @@ -227,6 +285,14 @@ void skep6(const S6 &k) { // CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)() const' // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S6)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S6)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, S6)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi6EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S6' 'void (const S6 &) noexcept' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'const S6 &' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -260,6 +326,15 @@ void skep7(S7 k) { // CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' // CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const S7' lvalue // CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7' +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S7)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S7)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, S7)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi7EE" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S7' 'void (const S7 &) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const S7' lvalue +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S7' lvalue ParmVar {{.*}} 'k' 'S7' // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}} // CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} implicit used k 'S7' // CHECK-NEXT: | | `-CompoundStmt {{.*}} @@ -270,6 +345,112 @@ void skep7(S7 k) { // CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'S7' lvalue ImplicitParam {{.*}} 'k' 'S7' // CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<7> +// Symbol names generated for the kernel entry point function should be +// representable in the ordinary literal encoding even when the kernel name +// type is named with esoteric characters. +struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ) +struct S8 { + void operator()() const; +}; +[[clang::sycl_kernel_entry_point(\u03b4\u03c4\u03c7)]] +void skep8(S8 k) { + k(); +} +// CHECK: |-FunctionDecl {{.*}} skep8 'void (S8)' +// CHECK-NEXT: | |-ParmVarDecl {{.*}} used k 'S8' +// CHECK-NEXT: | |-SYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | |-CompoundStmt {{.*}} +// CHECK: | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CallExpr {{.*}} 'void' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'void (*)(const char *, S8)' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'void (const char *, S8)' lvalue Function {{.*}} 'sycl_enqueue_kernel_launch' 'void (const char *, S8)' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | `-StringLiteral {{.*}} 'const char[12]' lvalue "_ZTS6\316\264\317\204\317\207" +// CHECK-NEXT: | | | `-CXXConstructExpr {{.*}} 'S8' 'void (const S8 &) noexcept' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'const S8' lvalue +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'S8' lvalue ParmVar {{.*}} 'k' 'S8' +// CHECK: | | `-OutlinedFunctionDecl {{.*}} +// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} + +class Handler { +template +void sycl_enqueue_kernel_launch(const char *, Ts...) {} +public: +template +[[clang::sycl_kernel_entry_point(KNT)]] +void skep9(KT k, int a, int b) { + k(a, b); +} +}; +void foo() { + Handler H; + H.skep9>([=](int a, int b){return a+b;}, 1, 2); +} + +// CHECK: | |-FunctionTemplateDecl {{.*}} skep9 +// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 KNT +// CHECK-NEXT: | | |-TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 1 KT +// CHECK-NEXT: | | |-CXXMethodDecl {{.*}} skep9 'void (KT, int, int)' implicit-inline +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced k 'KT' +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced a 'int' +// CHECK-NEXT: | | | |-ParmVarDecl {{.*}} referenced b 'int' +// CHECK-NEXT: | | | |-UnresolvedSYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CallExpr {{.*}} '' +// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'KT' lvalue ParmVar {{.*}} 'k' 'KT' +// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | `-SYCLKernelEntryPointAttr {{.*}} KNT +// CHECK-NEXT: | | `-CXXMethodDecl {{.*}} used skep9 {{.*}} implicit_instantiation implicit-inline +// CHECK-NEXT: | | |-TemplateArgument type 'KN<9>' +// CHECK-NEXT: | | | `-RecordType {{.*}} 'KN<9>' canonical +// CHECK-NEXT: | | | `-ClassTemplateSpecialization {{.*}}'KN' +// CHECK-NEXT: | | |-TemplateArgument type {{.*}} +// CHECK-NEXT: | | | `-RecordType {{.*}} +// CHECK-NEXT: | | | `-CXXRecord {{.*}} +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used k {{.*}} +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used a 'int' +// CHECK-NEXT: | | |-ParmVarDecl {{.*}} used b 'int' +// CHECK-NEXT: | | |-SYCLKernelCallStmt {{.*}} +// CHECK-NEXT: | | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CXXOperatorCallExpr {{.*}} 'int' '()' +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const' +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} lvalue +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | | | `-CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: | | | | |-MemberExpr {{.*}} '' ->sycl_enqueue_kernel_launch {{.*}} +// CHECK-NEXT: | | | | | `-CXXThisExpr {{.*}} 'Handler *' implicit this +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'const char *' +// CHECK-NEXT: | | | | | `-StringLiteral {{.*}} 'const char[14]' lvalue "_ZTS2KNILi9EE" +// CHECK-NEXT: | | | | |-CXXConstructExpr {{.*}} +// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} lvalue +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} lvalue ParmVar {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'a' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'b' 'int' +// CHECK-NEXT: | | | `-OutlinedFunctionDecl {{.*}} +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used k {{.*}} +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used a 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} implicit used b 'int' +// CHECK-NEXT: | | | `-CompoundStmt {{.*}} +// CHECK-NEXT: | | | `-CXXOperatorCallExpr {{.*}} 'int' '()' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int (*)(int, int) const' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int (int, int) const' lvalue CXXMethod {{.*}} 'operator()' 'int (int, int) const' +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} lvalue +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} lvalue ImplicitParam {{.*}} 'k' {{.*}} +// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'a' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ImplicitParam {{.*}} 'b' 'int' +// CHECK-NEXT: | | `-SYCLKernelEntryPointAttr {{.*}} struct KN<9> + void the_end() {} // CHECK: `-FunctionDecl {{.*}} the_end 'void ()' diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp index 1a82bdc1f5698..58daf9bca1c3c 100644 --- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp +++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp @@ -28,6 +28,9 @@ // A unique kernel name type is required for each declared kernel entry point. template struct KN; +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void skep1() { } diff --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp index daaf2382679da..424cafcadc8aa 100644 --- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp +++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp @@ -23,8 +23,13 @@ // Test the generation of SYCL kernel caller functions. These functions are // generated from functions declared with the sycl_kernel_entry_point attribute -// and emited during device compilation. They are not emitted during device -// compilation. +// and emited during device compilation. +// Test the generation of SYCL kernel launch statements during host compilation. +// These statements are calls to sycl_enqueus_kernel_launch functions or class +// members in case skep-attributed functions are also members of the same class. + +template +void sycl_enqueue_kernel_launch(const char *, KernelObj) {} struct single_purpose_kernel_name; struct single_purpose_kernel { @@ -42,19 +47,37 @@ void kernel_single_task(KernelType kernelFunc) { kernelFunc(42); } +// Exercise code gen with kernel name types named with esoteric characters. +struct \u03b4\u03c4\u03c7; // Delta Tau Chi (δτχ) + +class Handler { +template +void sycl_enqueue_kernel_launch(const char *, Ts...) {} +public: +template +[[clang::sycl_kernel_entry_point(KNT)]] +void skep(KT k, int a, int b) { + k(a, b); +} +}; + int main() { single_purpose_kernel obj; single_purpose_kernel_task(obj); int capture; auto lambda = [=](auto) { (void) capture; }; kernel_single_task(lambda); + kernel_single_task<\u03b4\u03c4\u03c7>([](int){}); + Handler H; + H.skep([=](int a, int b){return a+b;}, 1, 2); } // Verify that SYCL kernel caller functions are not emitted during host // compilation. // -// CHECK-HOST-NOT: _ZTS26single_purpose_kernel_name -// CHECK-HOST-NOT: _ZTSZ4mainE18lambda_kernel_name +// CHECK-HOST-NOT: define {{.*}} @_ZTS26single_purpose_kernel_name +// CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainEUlT_E_ +// CHECK-HOST-NOT: define {{.*}} @"_ZTS6\CE\B4\CF\84\CF\87" // Verify that sycl_kernel_entry_point attributed functions are not emitted // during device compilation. @@ -62,37 +85,115 @@ int main() { // CHECK-DEVICE-NOT: single_purpose_kernel_task // CHECK-DEVICE-NOT: kernel_single_task -// Verify that no code is generated for the bodies of sycl_kernel_entry_point -// attributed functions during host compilation. ODR-use of these functions may -// require them to be emitted, but they have no effect if called. +// Verify that kernel launch code is generated for sycl_kernel_entry_point +// attributed functions during host compilation. +// +// CHECK-HOST-LINUX: @.str = private unnamed_addr constant [33 x i8] c"_ZTS26single_purpose_kernel_name\00", align 1 +// CHECK-HOST-LINUX: @.str.1 = private unnamed_addr constant [18 x i8] c"_ZTSZ4mainEUlT_E_\00", align 1 +// CHECK-HOST-LINUX: @.str.2 = private unnamed_addr constant [12 x i8] c"_ZTS6\CE\B4\CF\84\CF\87\00", align 1 // // CHECK-HOST-LINUX: define dso_local void @_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} { // CHECK-HOST-LINUX-NEXT: entry: // CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-LINUX-NEXT: call void @_Z26sycl_enqueue_kernel_launchI26single_purpose_kernel_name21single_purpose_kernelEvPKcT0_(ptr noundef @.str) // CHECK-HOST-LINUX-NEXT: ret void // CHECK-HOST-LINUX-NEXT: } // // CHECK-HOST-LINUX: define internal void @_Z18kernel_single_taskIZ4mainEUlT_E_S1_EvT0_(i32 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-LINUX-NEXT: entry: // CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon, align 4 // CHECK-HOST-LINUX-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-LINUX-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false) +// CHECK-HOST-LINUX-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %coerce.dive1, align 4 +// CHECK-HOST-LINUX-NEXT: call void @_Z26sycl_enqueue_kernel_launchIZ4mainEUlT_E_S1_EvPKcT0_(ptr noundef @.str.1, i32 %0) // CHECK-HOST-LINUX-NEXT: ret void // CHECK-HOST-LINUX-NEXT: } // +// CHECK-HOST-LINUX: define internal void @"_Z18kernel_single_taskI6\CE\B4\CF\84\CF\87Z4mainEUliE_EvT0_"() #{{[0-9]+}} { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %kernelFunc = alloca %class.anon.0, align 1 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.0, align 1 +// CHECK-HOST-LINUX-NEXT: call void @"_Z26sycl_enqueue_kernel_launchI6\CE\B4\CF\84\CF\87Z4mainEUliE_EvPKcT0_"(ptr noundef @.str.2) +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } + +// CHECK-HOST-LINUX: define internal void @_ZN7Handler4skepIZ4mainE22notaverygoodkernelnameZ4mainEUliiE_EEvT0_ii(ptr noundef nonnull align 1 dereferenceable(1) %this, i32 noundef %a, i32 noundef %b) #0 align 2 { +// CHECK-HOST-LINUX-NEXT: entry: +// CHECK-HOST-LINUX-NEXT: %k = alloca %class.anon.1, align 1 +// CHECK-HOST-LINUX-NEXT: %this.addr = alloca ptr, align 8 +// CHECK-HOST-LINUX-NEXT: %a.addr = alloca i32, align 4 +// CHECK-HOST-LINUX-NEXT: %b.addr = alloca i32, align 4 +// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.1, align 1 +// CHECK-HOST-LINUX-NEXT: store ptr %this, ptr %this.addr, align 8 +// CHECK-HOST-LINUX-NEXT: store i32 %a, ptr %a.addr, align 4 +// CHECK-HOST-LINUX-NEXT: store i32 %b, ptr %b.addr, align 4 +// CHECK-HOST-LINUX-NEXT: %this1 = load ptr, ptr %this.addr, align 8 +// CHECK-HOST-LINUX-NEXT: %0 = load i32, ptr %a.addr, align 4 +// CHECK-HOST-LINUX-NEXT: %1 = load i32, ptr %b.addr, align 4 +// CHECK-HOST-LINUX-NEXT: call void @_ZN7Handler26sycl_enqueue_kernel_launchIZ4mainE22notaverygoodkernelnameJZ4mainEUliiE_iiEEEvPKcDpT0_(ptr noundef nonnull align 1 dereferenceable(1) %this1, ptr noundef @.str.3, i32 noundef %0, i32 noundef %1) +// CHECK-HOST-LINUX-NEXT: ret void +// CHECK-HOST-LINUX-NEXT: } + // CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-WINDOWS-NEXT: entry: // CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %struct.single_purpose_kernel, align 1 // CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.single_purpose_kernel, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_enqueue_kernel_launch@Usingle_purpose_kernel_name@@Usingle_purpose_kernel@@@@YAXPEBDUsingle_purpose_kernel@@@Z"(ptr noundef @"??_C@_0CB@KFIJOMLB@_ZTS26single_purpose_kernel_name@", i8 %0) // CHECK-HOST-WINDOWS-NEXT: ret void // CHECK-HOST-WINDOWS-NEXT: } // // CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@V@?0??main@@9@V1?0??2@9@@@YAXV@?0??main@@9@@Z"(i32 %kernelFunc.coerce) #{{[0-9]+}} { // CHECK-HOST-WINDOWS-NEXT: entry: // CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon, align 4 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon, align 4 // CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %kernelFunc, i32 0, i32 0 // CHECK-HOST-WINDOWS-NEXT: store i32 %kernelFunc.coerce, ptr %coerce.dive, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %kernelFunc, i64 4, i1 false) +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %coerce.dive1, align 4 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_enqueue_kernel_launch@V@?0??main@@9@V1?0??2@9@@@YAXPEBDV@?0??main@@9@@Z"(ptr noundef @"??_C@_0BC@NHCDOLAA@_ZTSZ4mainEUlT_E_?$AA@", i32 %0) +// +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } +// +// CHECK-HOST-WINDOWS: define internal void @"??$kernel_single_task@U\CE\B4\CF\84\CF\87@@V@?0??main@@9@@@YAXV@?0??main@@9@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} { +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %class.anon.0, align 1 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.0, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.0, ptr %kernelFunc, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i8 %kernelFunc.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %class.anon.0, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i8, ptr %coerce.dive1, align 1 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_enqueue_kernel_launch@U\CE\B4\CF\84\CF\87@@V@?0??main@@9@@@YAXPEBDV@?0??main@@9@@Z"(ptr noundef @"??_C@_0M@BCGAEMBE@_ZTS6?N?$LE?O?$IE?O?$IH?$AA@", i8 %0) +// CHECK-HOST-WINDOWS-NEXT: ret void +// CHECK-HOST-WINDOWS-NEXT: } + +// CHECK-HOST-WINDOWS: define internal void @"??$skep@Vnotaverygoodkernelname@?1??main@@9@V@?0??2@9@@Handler@@QEAAXV@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this, i8 %k.coerce, i32 noundef %a, i32 noundef %b) #0 align 2 { +// CHECK-HOST-WINDOWS-NEXT: entry: +// CHECK-HOST-WINDOWS-NEXT: %k = alloca %class.anon.1, align 1 +// CHECK-HOST-WINDOWS-NEXT: %b.addr = alloca i32, align 4 +// CHECK-HOST-WINDOWS-NEXT: %a.addr = alloca i32, align 4 +// CHECK-HOST-WINDOWS-NEXT: %this.addr = alloca ptr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.1, align 1 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.1, ptr %k, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: store i8 %k.coerce, ptr %coerce.dive, align 1 +// CHECK-HOST-WINDOWS-NEXT: store i32 %b, ptr %b.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: store i32 %a, ptr %a.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: store ptr %this, ptr %this.addr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %this1 = load ptr, ptr %this.addr, align 8 +// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %b.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: %1 = load i32, ptr %a.addr, align 4 +// CHECK-HOST-WINDOWS-NEXT: %coerce.dive2 = getelementptr inbounds nuw %class.anon.1, ptr %agg.tmp, i32 0, i32 0 +// CHECK-HOST-WINDOWS-NEXT: %2 = load i8, ptr %coerce.dive2, align 1 +// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_enqueue_kernel_launch@Vnotaverygoodkernelname@?1??main@@9@V@?0??2@9@HH@Handler@@AEAAXPEBDV@?0??main@@9@HH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this1, ptr noundef @"??_C@_0CE@NJIGCEIA@_ZTSZ4mainE22notaverygoodkerneln@", i8 %2, i32 noundef %1, i32 noundef %0) // CHECK-HOST-WINDOWS-NEXT: ret void // CHECK-HOST-WINDOWS-NEXT: } @@ -174,6 +275,44 @@ int main() { // CHECK-SPIR-NEXT: } // CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_ +// IR for the SYCL kernel caller function generated for kernel_single_task with +// the Delta Tau Chi type as the SYCL kernel name type. +// +// CHECK-AMDGCN: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-AMDGCN-NEXT: define dso_local amdgpu_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-AMDGCN-SAME: (ptr addrspace(4) noundef byref(%class.anon.0) align 1 %0) #[[AMDGCN_ATTR0]] { +// CHECK-AMDGCN-NEXT: entry: +// CHECK-AMDGCN-NEXT: %coerce = alloca %class.anon.0, align 1, addrspace(5) +// CHECK-AMDGCN-NEXT: %kernelFunc = addrspacecast ptr addrspace(5) %coerce to ptr +// CHECK-AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %kernelFunc, ptr addrspace(4) align 1 %0, i64 1, i1 false) +// CHECK-AMDGCN-NEXT: call void @_ZZ4mainENKUliE_clEi +// CHECK-AMDGCN-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[AMDGCN_ATTR1:[0-9]+]] +// CHECK-AMDGCN-NEXT: ret void +// CHECK-AMDGCN-NEXT: } +// CHECK-AMDGCN: define internal void @_ZZ4mainENKUliE_clEi +// +// CHECK-NVPTX: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-NVPTX-NEXT: define dso_local ptx_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-NVPTX-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[NVPTX_ATTR0:[0-9]+]] { +// CHECK-NVPTX-NEXT: entry: +// CHECK-NVPTX-NEXT: call void @_ZZ4mainENKUliE_clEi +// CHECK-NVPTX-SAME: (ptr noundef nonnull align 1 dereferenceable(1) %kernelFunc, i32 noundef 42) #[[NVPTX_ATTR1:[0-9]+]] +// CHECK-NVPTX-NEXT: ret void +// CHECK-NVPTX-NEXT: } +// CHECK-NVPTX: define internal void @_ZZ4mainENKUliE_clEi +// +// CHECK-SPIR: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-SPIR-NEXT: define {{[a-z_ ]*}}spir_kernel void @"_ZTS6\CE\B4\CF\84\CF\87" +// CHECK-SPIR-SAME: (ptr noundef byval(%class.anon.0) align 1 %kernelFunc) #[[SPIR_ATTR0:[0-9]+]] { +// CHECK-SPIR-NEXT: entry: +// CHECK-SPIR-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4) +// CHECK-SPIR-NEXT: call spir_func void @_ZZ4mainENKUliE_clEi +// CHECK-SPIR-SAME: (ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %kernelFunc.ascast, i32 noundef 42) #[[SPIR_ATTR1:[0-9]+]] +// CHECK-SPIR-NEXT: ret void +// CHECK-SPIR-NEXT: } +// CHECK-SPIR: define internal spir_func void @_ZZ4mainENKUliE_clEi + + // CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-optlevel"="0" } // CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind } // diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp index 2948b5a864336..6ee2a34efb73f 100644 --- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,6 +1,8 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE=addrspace(1) ' // RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-host -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s '-D$ADDRSPACE=' +template +void sycl_enqueue_kernel_launch(const char *, KernelObj) {} template [[clang::sycl_kernel_entry_point(KN)]] void kernel(Func F){ diff --git a/clang/test/CodeGenSYCL/unnamed-types.cpp b/clang/test/CodeGenSYCL/unnamed-types.cpp index 9a409f9e3ad5d..1cc368da2248f 100644 --- a/clang/test/CodeGenSYCL/unnamed-types.cpp +++ b/clang/test/CodeGenSYCL/unnamed-types.cpp @@ -13,6 +13,11 @@ // RUN: -aux-triple x86_64-pc-windows-msvc -triple spir64-unknown--unknown \ // RUN: %s -o - | FileCheck %s --check-prefix=MSVC +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + namespace QL { auto dg1 = [] { return 1; }; inline auto dg_inline1 = [] { return 1; }; diff --git a/clang/test/SemaSYCL/sycl-host-kernel-launch.cpp b/clang/test/SemaSYCL/sycl-host-kernel-launch.cpp new file mode 100644 index 0000000000000..1d7c184c61692 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-host-kernel-launch.cpp @@ -0,0 +1,198 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify=host,expected %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -fcxx-exceptions -verify=device,expected %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify=host,expected %s + +// A unique kernel name type is required for each declared kernel entry point. +template struct KN; + +[[clang::sycl_kernel_entry_point(KN<1>)]] +void nolauncher() {} +// host-error@-1 {{unable to find suitable 'sycl_enqueue_kernel_launch' function for host code synthesis}} +// device-warning@-2 {{unable to find suitable 'sycl_enqueue_kernel_launch' function for host code synthesis}} +// expected-note@-3 {{define 'sycl_enqueue_kernel_launch' function template to fix}} + +void sycl_enqueue_kernel_launch(const char *, int arg); +// expected-note@-1 {{declared as a non-template here}} + +[[clang::sycl_kernel_entry_point(KN<2>)]] +void nontemplatel() {} +// host-error@-1 {{unable to find suitable 'sycl_enqueue_kernel_launch' function for host code synthesis}} +// device-warning@-2 {{unable to find suitable 'sycl_enqueue_kernel_launch' function for host code synthesis}} +// expected-note@-3 {{define 'sycl_enqueue_kernel_launch' function template to fix}} +// expected-error@-4 {{'sycl_enqueue_kernel_launch' following the 'template' keyword does not refer to a template}} + +template +void sycl_enqueue_kernel_launch(const char *, int arg); +// expected-note@-1 {{candidate function template not viable: requires 2 arguments, but 1 was provided}} +// expected-note@-2 2{{candidate function template not viable: no known conversion from 'Kern' to 'int' for 2nd argument}} + +[[clang::sycl_kernel_entry_point(KN<3>)]] +void notenoughargs() {} +// expected-error@-1 {{no matching function for call to 'sycl_enqueue_kernel_launch'}} +// FIXME: Should this also say "no suitable function for host code synthesis"? + + +template +void sycl_enqueue_kernel_launch(const char *, bool arg = 1); +// expected-note@-1 2{{candidate function template not viable: no known conversion from 'Kern' to 'bool' for 2nd argument}} + +[[clang::sycl_kernel_entry_point(KN<4>)]] +void enoughargs() {} + +namespace boop { +template +void sycl_enqueue_kernel_launch(const char *, KernelObj); + +template +[[clang::sycl_kernel_entry_point(KernName)]] +void iboop(KernelObj Kernel) { + Kernel(); +} +} + +template +[[clang::sycl_kernel_entry_point(KernName)]] +void idontboop(KernelObj Kernel) { + Kernel(); +} +// expected-error@-3 {{no matching function for call to 'sycl_enqueue_kernel_launch'}} + +struct Kern { + int a; + int *b; + Kern(int _a, int* _b) : a(_a), b(_b) {} + void operator()(){ *b = a;} +}; + +void foo() { + int *a; + Kern b(1, a); + idontboop>(b); + // expected-note@-1 {{in instantiation of function template specialization 'idontboop, Kern>' requested here}} + boop::iboop>(b); +} + +class MaybeHandler { + +template +void sycl_enqueue_kernel_launch(const char *); + +template +void sycl_enqueue_kernel_launch(const char *, Tys ...Args); + +public: + +template +[[clang::sycl_kernel_entry_point(KernName)]] +void entry(KernelObj Kernel) { + Kernel(); +} +}; + +class MaybeHandler2 { + +template +static void sycl_enqueue_kernel_launch(const char *, Tys ...Args); + +public: + +template +[[clang::sycl_kernel_entry_point(KernName)]] +void entry(KernelObj Kernel) { + Kernel(); +} +}; + +class MaybeHandler3 { + +template +static void sycl_enqueue_kernel_launch(const char *, Tys ...Args); + +public: + +template +[[clang::sycl_kernel_entry_point(KernName)]] +static void entry(KernelObj Kernel) { + Kernel(); +} +}; + +class MaybeHandler4 { + +template +void sycl_enqueue_kernel_launch(const char *, Tys ...Args); + +public: + +template +[[clang::sycl_kernel_entry_point(KernName)]] +static void entry(KernelObj Kernel) { + // expected-error@-1 {{call to non-static member function without an object argument}} + // FIXME: Should that be clearer? + Kernel(); +} +}; + +template +struct base_handler { + template + void sycl_enqueue_kernel_launch(const char*, Ts...) {} +}; +struct derived_handler : base_handler { + template + [[clang::sycl_kernel_entry_point(KNT)]] + void entry(KT k) { k(); } +}; + +template +struct derived_handler_t : base_handler> { + template +// FIXME this fails because accessing members of dependent bases requires +// explicit qualification. + [[clang::sycl_kernel_entry_point(KNT)]] + void entry(KT k) { k(); } + // expected-error@-1 {{no matching function for call to 'sycl_enqueue_kernel_launch'}} +}; + +template +struct kernel_launcher { + template + void operator()(const char*, Ts...) const {} +}; + +namespace var { +template +kernel_launcher sycl_enqueue_kernel_launch; + +struct handler { + template + [[clang::sycl_kernel_entry_point(KNT)]] + void entry(KT k) { k(); } +}; +} + + +void bar() { + int *a; + Kern b(1, a); + MaybeHandler H; + MaybeHandler2 H1; + MaybeHandler3 H2; + MaybeHandler4 H3; + H.entry>(b); + H1.entry>(b); + H2.entry>(b); + H3.entry>(b); + + derived_handler H5; + H5.entry>(b); + + derived_handler_t<13> H6; + H6.entry>(b); //expected-note {{in instantiation of function template specialization}} + + var::handler h; + h.entry>(b); +} + + + diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp index 4774c8ef545f8..f2c9bc2ef44d7 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp @@ -1,5 +1,8 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-host -fcxx-exceptions -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s // These tests validate appertainment for the sycl_kernel_entry_point attribute. @@ -37,6 +40,10 @@ struct coroutine_traits { // A unique kernel name type is required for each declared kernel entry point. template struct KN; +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} //////////////////////////////////////////////////////////////////////////////// // Valid declarations. @@ -131,6 +138,16 @@ struct S15 { static T ok15(); }; +struct S16 { + // Non-static member function declaration. + [[clang::sycl_kernel_entry_point(KN<16>)]] + void ok16(); +}; + +#if __cplusplus >= 202302L +auto ok17 = [] [[clang::sycl_kernel_entry_point(KN<17>)]] -> void {}; +#endif + //////////////////////////////////////////////////////////////////////////////// // Invalid declarations. @@ -163,13 +180,6 @@ struct B2 { static int bad2; }; -struct B3 { - // Non-static member function declaration. - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} - [[clang::sycl_kernel_entry_point(BADKN<3>)]] - void bad3(); -}; - // expected-error@+1 {{'clang::sycl_kernel_entry_point' attribute only applies to functions}} namespace [[clang::sycl_kernel_entry_point(BADKN<4>)]] bad4 {} @@ -244,13 +254,13 @@ void bad19() { #endif struct B20 { - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a constructor}} [[clang::sycl_kernel_entry_point(BADKN<20>)]] B20(); }; struct B21 { - // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a destructor}} [[clang::sycl_kernel_entry_point(BADKN<21>)]] ~B21(); }; @@ -337,11 +347,6 @@ struct B34 { [[noreturn]] friend void bad34() {} }; -#if __cplusplus >= 202302L -// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} -auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {}; -#endif - #if __cplusplus >= 202302L // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}} auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {}; diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp index 8f81fa218c171..4e249c763f503 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests validate parsing of the sycl_kernel_entry_point argument list @@ -8,6 +10,10 @@ template struct ST; // #ST-decl template using TTA = ST; // #TTA-decl +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} //////////////////////////////////////////////////////////////////////////////// // Valid declarations. diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp index 8788e147a2ae4..44e55c4958840 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp @@ -17,6 +17,11 @@ module M2 { header "m2.h" } #--- common.h template struct KN; +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void common_test1() {} @@ -25,7 +30,6 @@ template void common_test2() {} template void common_test2>(); - #--- m1.h #include "common.h" diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp index 0575a7a5a67eb..78659f762f0f3 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp @@ -15,6 +15,11 @@ #--- pch.h template struct KN; +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + [[clang::sycl_kernel_entry_point(KN<1>)]] void pch_test1() {} // << expected previous declaration note here. @@ -26,11 +31,11 @@ template void pch_test2>(); #--- test.cpp // expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} -// expected-note@pch.h:4 {{previous declaration is here}} +// expected-note@pch.h:9 {{previous declaration is here}} [[clang::sycl_kernel_entry_point(KN<1>)]] void test1() {} // expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} -// expected-note@pch.h:8 {{previous declaration is here}} +// expected-note@pch.h:13 {{previous declaration is here}} [[clang::sycl_kernel_entry_point(KN<2>)]] void test2() {} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp index c7b83932fefe6..15c5dfde8fb2b 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests validate that the kernel name type argument provided to the @@ -7,6 +9,12 @@ // specification. struct S1; + +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + // expected-warning@+3 {{redundant 'clang::sycl_kernel_entry_point' attribute}} // expected-note@+1 {{previous attribute is here}} [[clang::sycl_kernel_entry_point(S1), diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp index 4c61570419629..d647215f35e95 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp @@ -1,4 +1,6 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-host -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s // These tests are intended to validate that a sycl_kernel_entry_point attribute @@ -8,6 +10,11 @@ // attribute during instantiation of a specialization unless that specialization // is selected by overload resolution. +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + // FIXME: C++23 [temp.expl.spec]p12 states: // FIXME: ... Similarly, attributes appearing in the declaration of a template // FIXME: have no effect on an explicit specialization of that template. diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp new file mode 100644 index 0000000000000..faf7c40760a95 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-this.cpp @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-host -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++17 -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-host -verify -DCXX20 %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++20 -fsycl-is-device -verify -DCXX20 %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-host -verify -DCXX23 %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -std=c++23 -fsycl-is-device -verify -DCXX23 %s + +// These tests validate diagnostics for invalid use of 'this' in the body of +// a function declared with the sycl_kernel_entry_point attribute. + + +template struct remove_reference_t { + using type = T; +}; +template struct remove_reference_t { + using type = T; +}; + +namespace std { +struct type_info { + virtual ~type_info(); +}; +} // namespace std + +// A launcher function definition required for host code synthesis to silence +// complains. +template +void sycl_enqueue_kernel_launch(const char *, Tys &&...Args) {} + +//////////////////////////////////////////////////////////////////////////////// +// Valid declarations. +//////////////////////////////////////////////////////////////////////////////// +template struct KN; + +struct S1 { + [[clang::sycl_kernel_entry_point(KN<1>)]] void ok1() { + (void)sizeof(this); + } +}; + +struct S2 { + [[clang::sycl_kernel_entry_point(KN<2>)]] void ok2() { + (void)noexcept(this); + } +}; + +struct S3 { + [[clang::sycl_kernel_entry_point(KN<3>)]] void ok3() { + decltype(this) x = nullptr; + } +}; + +struct S4 { + static void smf(); + [[clang::sycl_kernel_entry_point(KN<4>)]] void ok4() { + remove_reference_t::type::smf(); + } +}; + +struct S5 { + int dm; + void mf(); + [[clang::sycl_kernel_entry_point(KN<5>)]] void ok5() { + (void)typeid(*this); // S5 is not abstract, so 'this' is not evaluated. + (void)typeid(dm); // 'int' is not an abstract class type; implicit 'this' is not evaluated. + (void)typeid(mf()); // 'void' is not an abstract class type; implicit 'this' is not evaluated. + } +}; + +template +struct S6 { + void mf() noexcept(B); + [[clang::sycl_kernel_entry_point(KN)]] void ok6() noexcept(noexcept(mf())) {} +}; +template void S6, false>::ok6(); +template void S6, true>::ok6(); + +template +struct S7 { + void mf() noexcept(B); + [[clang::sycl_kernel_entry_point(KN)]] void ok7() noexcept(noexcept(this->mf())) {} +}; +template void S7, false>::ok7(); +template void S7, true>::ok7(); + +#if defined(CXX20) +template +struct S8 { + void mf(T); + [[clang::sycl_kernel_entry_point(KN)]] void ok8() requires(requires { mf(1); }) {} +}; +template void S8, int>::ok8(); + +template +struct S9 { + void mf(T); + [[clang::sycl_kernel_entry_point(KN)]] void ok9() requires(requires { this->mf(1); }) {} +}; +template void S9, int>::ok9(); +#endif + +#if defined(CXX23) +struct S10 { + [[clang::sycl_kernel_entry_point(KN<10>)]] void ok10(this S10 self) { + (void)self; + } +}; +#endif + + +//////////////////////////////////////////////////////////////////////////////// +// Invalid declarations. +//////////////////////////////////////////////////////////////////////////////// + +template struct BADKN; + +// expected-error@+3 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B1 { + [[clang::sycl_kernel_entry_point(BADKN<1>)]] void bad1() { + (void)this; + } +}; + +// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B2 { + int dm; + [[clang::sycl_kernel_entry_point(BADKN<2>)]] void bad2() { + (void)dm; + } +}; + +// expected-error@+4 {{'this' cannot be implicitly used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B3 { + void mf(); + [[clang::sycl_kernel_entry_point(BADKN<3>)]] void bad3() { + (void)mf(); + } +}; + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B4 { + virtual void vmf() = 0; + [[clang::sycl_kernel_entry_point(BADKN<4>)]] void bad4() { + (void)typeid(*this); // B4 is abstract, so 'this' is evaluated. + } +}; + +// A diagnostic is not currently issued for uninstantiated definitions. In this +// case, a declaration is instantiated, but a definition isn't. A diagnostic +// will be issued if a definition is instantiated (as the next test exercises). +struct B5 { + template + [[clang::sycl_kernel_entry_point(KN)]] void bad5() { + (void)this; + } +}; +extern template void B5::bad5>(); + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +struct B6 { + template + [[clang::sycl_kernel_entry_point(KN)]] void bad6() { + (void)this; + } +}; +// expected-note@+1 {{in instantiation of function template specialization 'B6::bad6>' requested here}} +template void B6::bad6>(); + +// A diagnostic is not currently issued for uninstantiated definitions. In this +// case, a declaration is instantiated, but a definition isn't. A diagnostic +// will be issued if a definition is instantiated (as the next test exercises). +template +struct B7 { + [[clang::sycl_kernel_entry_point(KN)]] void bad7() { + (void)this; + } +}; +extern template void B7>::bad7(); + +// expected-error@+4 {{'this' cannot be used in a potentially evaluated expression in the body of a function declared with the 'clang::sycl_kernel_entry_point' attribute}} +template +struct B8 { + [[clang::sycl_kernel_entry_point(KN)]] void bad8() { + (void)this; + } +}; +// expected-note@+1 {{in instantiation of member function 'B8>::bad8' requested here}} +template void B8>::bad8(); diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index a80ddc9feb83f..6af1b2adacc1b 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -53,12 +53,14 @@ struct GetMergedKernelProperties< struct KernelWrapperHelperFuncs { #ifdef SYCL_LANGUAGE_VERSION +#define __SYCL_ENTRY_POINT_ATTR__(KernelName) [[clang::sycl_kernel_entry_point(KernelName)]] #ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel_entry_point(KernelName)]] #else #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]] #endif // __INTEL_SYCL_USE_INTEGRATION_HEADERS #else +#define __SYCL_ENTRY_POINT_ATTR__(KernelName) #define __SYCL_KERNEL_ATTR__ #endif // SYCL_LANGUAGE_VERSION diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index c610632e4c673..a8020a897502c 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2727,8 +2727,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event single_task( const KernelType &KernelFunc, const detail::code_location &CodeLoc = detail::code_location::current()) { - return single_task( - ext::oneapi::experimental::empty_properties_t{}, KernelFunc, CodeLoc); + (void)CodeLoc; + return single_task_impl(KernelFunc); } /// single_task version with a kernel represented as a lambda. @@ -3808,6 +3808,70 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { TlsCodeLocCapture.isToplevel()); } + event remembered_event; + void set_std_layout_arg(int, const char *, size_t); + void remember_kernel_single_task(const char *); + void enqueue_remembered_kernel(); + void remember_range(range<3> R); + void remember_range(range<2> R); + void remember_range(range<1> R); + + template void setArgHelper(int ArgIndex, T &&Arg) { + set_std_layout_arg(ArgIndex, reinterpret_cast(&Arg), sizeof(T)); + } + + void setArgsHelper(int) {} + + template + void setArgsHelper(int ArgIndex, T &&Arg, Ts &&...Args) { + setArgHelper(ArgIndex, std::forward(Arg)); + setArgsHelper(++ArgIndex, std::forward(Args)...); + } + + template + void sycl_enqueue_kernel_launch(const char *KernelName, Args... args) { + remember_kernel_single_task(KernelName); + setArgsHelper(0, args...); + enqueue_remembered_kernel(); + // get kernel + // call set_args + // call enqueue + } + + template + __SYCL_ENTRY_POINT_ATTR__(KernelName) + void __kernel_single_task(const KernelType KernelFunc) { + KernelFunc(); + } + + template + __SYCL_ENTRY_POINT_ATTR__(KernelName) + void __kernel_parallel_for(const KernelType KernelFunc) { + // Builder is only defined for device +#ifdef __SYCL_DEVICE_ONLY__ + KernelFunc(detail::Builder::getElement(detail::declptr())); +#endif + (void)KernelFunc; + } + + /// special overload that does not accept properties or reductions (to + /// simplify a prototype) + template + event single_task_impl(KernelFunctor &&Functor) { + remember_range({1, 1, 1}); + __kernel_single_task(Functor); + return remembered_event; + } + + template + event parallel_for_impl(range R, KernelFunctor &&Functor) { + // save range to impl for parallel for + using KernelObjectArgTy = item; + remember_range(R); + __kernel_parallel_for(Functor); + return remembered_event; + } + /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. /// diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 0ea4ff2d051e6..c09dd57e5c0d6 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -96,8 +96,9 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { #endif #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - DeviceKernelInfo() = default; + //DeviceKernelInfo() = default; #endif + DeviceKernelInfo() = default; DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); void init(KernelNameStrRefT KernelName); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 79769d8819000..59348b7a512b8 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -16,6 +16,7 @@ #include #include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION #include @@ -959,6 +960,78 @@ void queue_impl::verifyProps(const property_list &Props) const { CheckPropertiesWithData); } +void queue_impl::remember_range(range<3> R) { + this->RememberedNDRDesc = NDRDescT(R); +} +void queue_impl::remember_range(range<2> R) { + this->RememberedNDRDesc = NDRDescT(R); +} +void queue_impl::remember_range(range<1> R) { + this->RememberedNDRDesc = NDRDescT(R); +} + +void queue_impl::remember_kernel_single_task(const char *KernelName) { + + DeviceKernelInfo Info; + Info.Name = KernelName; + Info.init(KernelName); + std::cout << KernelName << std::endl; + FastKernelCacheValPtr KernelCacheVal = + detail::ProgramManager::getInstance().getOrCreateKernel( + getContextImpl(), getDeviceImpl(), Info, + RememberedNDRDesc); + UrKernel = KernelCacheVal->MKernelHandle; +} +void queue_impl::set_std_layout_arg(int ArgIndex, const char *ArgPtr, size_t ArgSize) { + adapter_impl &Adapter = getAdapter(); + Adapter.call(UrKernel, ArgIndex, ArgSize, + /*wtf*/nullptr, ArgPtr); +} +EventImplPtr queue_impl::enqueue_remembered_kernel() { + adapter_impl &Adapter = getAdapter(); + NDRDescT NDRDesc = RememberedNDRDesc ; + // COPY-PASTE from commands.cpp ON + // Remember this information before the range dimensions are reversed + const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); + + ReverseRangeDimensionsForKernel(NDRDesc); + + size_t RequiredWGSize[3] = {0, 0, 0}; + size_t *LocalSize = nullptr; + + if (HasLocalSize) + LocalSize = &NDRDesc.LocalSize[0]; + else { + Adapter.call( + UrKernel, getDeviceImpl().getHandleRef(), + UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), + RequiredWGSize, + /* pPropSizeRet = */ nullptr); + + const bool EnforcedLocalSize = + (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 || + RequiredWGSize[2] != 0); + if (EnforcedLocalSize) + LocalSize = RequiredWGSize; + } + // COPY-PASTE from commands.cpp OFF + std::vector property_list; + auto ResEvent = detail::event_impl::create_device_event(*this); + ur_event_handle_t UREvent = nullptr; + ur_result_t Error = Adapter.call_nocheck( + getHandleRef(), UrKernel, NDRDesc.Dims, + /*HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr*/ + nullptr, &NDRDesc.GlobalSize[0], LocalSize, property_list.size(), + property_list.empty() ? nullptr : property_list.data(), + /*RawEvents.size()*/ 0, + /*RawEvents.empty() ? nullptr : &RawEvents[0]*/ nullptr, + &UREvent); + if (Error == UR_RESULT_SUCCESS) { + ResEvent->setHandle(UREvent); + } + return ResEvent; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c3d6748695423..639cdd329f9cf 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -245,6 +245,16 @@ class queue_impl : public std::enable_shared_from_this { private_tag{}); } + ur_kernel_handle_t UrKernel = nullptr; + //range<3> remembered_range; + NDRDescT RememberedNDRDesc; + void remember_range(range<1> R); + void remember_range(range<2> R); + void remember_range(range<3> R); + void remember_kernel_single_task(const char *); + void set_std_layout_arg(int ArgIndex, const char *ArgPtr, size_t ArgSize); + EventImplPtr enqueue_remembered_kernel(); + ~queue_impl() { try { #if XPTI_ENABLE_INSTRUMENTATION diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 64ed07032ddc9..8051838b5d2f9 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -412,6 +412,30 @@ bool queue::khr_empty() const { return impl->queue_empty(); } void queue::ext_oneapi_prod() { impl->flush(); } +void queue::set_std_layout_arg(int ArgIndex, const char *ArgPtr, + size_t ArgSize) { + impl->set_std_layout_arg(ArgIndex, ArgPtr, ArgSize); +} + +void queue::remember_kernel_single_task(const char *KernelName) { + impl->remember_kernel_single_task(KernelName); +} + +void queue::remember_range(range<3> R) { + impl->remember_range(R); +} +void queue::remember_range(range<2> R) { + impl->remember_range(R); +} +void queue::remember_range(range<1> R) { + impl->remember_range(R); +} + +void queue::enqueue_remembered_kernel() { + remembered_event = + detail::createSyclObjFromImpl(impl->enqueue_remembered_kernel()); +} + ur_native_handle_t queue::getNative(int32_t &NativeHandleDesc) const { return impl->getNative(NativeHandleDesc); }