diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index ed5576aee733..1c2d98e3047b 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -265,7 +265,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemPrefetchAsync", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(2)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "prefetch", ARG(0), ARG(1))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( @@ -280,7 +280,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), @@ -289,7 +289,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))), CONDITIONAL_FACTORY_ENTRY( @@ -303,7 +303,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( @@ -312,7 +312,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 401e13b922eb..219e5514aa1f 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -10589,7 +10589,8 @@ void MemoryMigrationRule::prefetchMigration( ? "cpu_device()" : "get_device(" + StmtStrArg2 + ")"); requestFeature(HelperFeatureEnum::device_ext); - Replacement = Prefix + "." + DpctGlobalInfo::getDeviceQueueName() + "()" + + Replacement = Prefix + "." + + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()" + (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") + "prefetch(" + StmtStrArg0 + "," + StmtStrArg1 + ")"; } else { @@ -10791,7 +10792,7 @@ void MemoryMigrationRule::cudaMemAdvise(const MatchFinder::MatchResult &Result, std::ostringstream OS; if (getStmtSpelling(C->getArg(3)) == "cudaCpuDeviceId") { OS << MapNames::getDpctNamespace() + "cpu_device()." + - DpctGlobalInfo::getDeviceQueueName() + "()"; + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()"; OS << (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") << "mem_advise(" << Arg0Str << ", " << Arg1Str << ", " << Arg2Str << ")"; emplaceTransformation(new ReplaceStmt(C, OS.str())); @@ -10799,7 +10800,7 @@ void MemoryMigrationRule::cudaMemAdvise(const MatchFinder::MatchResult &Result, return; } OS << MapNames::getDpctNamespace() + "get_device(" << Arg3Str - << ")." + DpctGlobalInfo::getDeviceQueueName() + "()"; + << ")." + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()"; OS << (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") << "mem_advise(" << Arg0Str << ", " << Arg1Str << ", " << Arg2Str << ")"; emplaceTransformation(new ReplaceStmt(C, OS.str())); diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 335ef5ae105e..85d5783ca551 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -63,8 +63,7 @@ const std::string &getDefaultString(HelperFuncType HFT) { const static std::string DefaultQueue = DpctGlobalInfo::useNoQueueDevice() ? DpctGlobalInfo::getGlobalQueueName() - : buildString(MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + "()"); + : DpctGlobalInfo::getDefaultQueueFreeFuncCall(); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_DefaultQueuePtr: { @@ -74,8 +73,8 @@ const std::string &getDefaultString(HelperFuncType HFT) { : (DpctGlobalInfo::useSYCLCompat() ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") - : buildString("&" + MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + "()")); + : buildString( + "&", DpctGlobalInfo::getDefaultQueueFreeFuncCall())); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_CurrentDevice: { @@ -269,6 +268,15 @@ void processTypeLoc(const TypeLoc &TL, ExprAnalysis &EA, } EA.applyAllSubExprRepl(); } +HelperFuncCatalog getQueueKind() { + if (DpctGlobalInfo::useSYCLCompat()) { + return HelperFuncCatalog::GetDefaultQueue; + } + if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) { + return HelperFuncCatalog::GetInOrderQueue; + } + return HelperFuncCatalog::GetOutOfOrderQueue; +} ///// class FreeQueriesInfo ///// class FreeQueriesInfo { @@ -930,6 +938,11 @@ void DpctFileInfo::insertHeader(HeaderType Type, unsigned Offset, << CCLVerValue << getNL(); insertHeader(MigratedMacroDefinitionOS.str(), FileBeginOffset, InsertPosition::IP_AlwaysLeft); + for (const auto &File : + DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes()) { + insertHeader("#include \"" + File + +"\"" + getNL(), FirstIncludeOffset, + InsertPosition::IP_Right); + } return; // Because includes and @@ -1225,15 +1238,26 @@ std::string DpctGlobalInfo::getDefaultQueue(const Stmt *S) { return buildString(RegexPrefix, 'Q', Idx, RegexSuffix); } -const std::string &DpctGlobalInfo::getDeviceQueueName() { - static const std::string DeviceQueue = [&]() { +const std::string &DpctGlobalInfo::getDefaultQueueFreeFuncCall() { + static const std::string DefaultQueueFreeFuncCall = [&]() { + if (auto Iter = MapNames::CustomHelperFunctionMap.find(getQueueKind()); + Iter != MapNames::CustomHelperFunctionMap.end()) { + return Iter->second; + } + return MapNames::getDpctNamespace() + "get_" + + getDefaultQueueMemFuncName() + "()"; + }(); + return DefaultQueueFreeFuncCall; +} +const std::string &DpctGlobalInfo::getDefaultQueueMemFuncName() { + static const std::string DefaultQueueMemFuncName = [&]() { if (DpctGlobalInfo::useSYCLCompat()) return "default_queue"; if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) return "out_of_order_queue"; return "in_order_queue"; }(); - return DeviceQueue; + return DefaultQueueMemFuncName; } void DpctGlobalInfo::setContext(ASTContext &C) { Context = &C; @@ -1588,7 +1612,8 @@ void DpctGlobalInfo::buildReplacements() { QDecl << "&q_ct1 = "; if (DpctGlobalInfo::useSYCLCompat()) QDecl << '*'; - QDecl << "dev_ct1." << DpctGlobalInfo::getDeviceQueueName() << "();"; + QDecl << "dev_ct1." << DpctGlobalInfo::getDefaultQueueMemFuncName() + << "();"; } else { DevDecl << MapNames::getClNamespace() + "device dev_ct1;"; // Now the UsmLevel must not be UL_None here. @@ -2237,6 +2262,7 @@ void DpctGlobalInfo::resetInfo() { SpellingLocToDFIsMapForAssumeNDRange.clear(); DFIToSpellingLocsMapForAssumeNDRange.clear(); FreeQueriesInfo::reset(); + CustomHelperFunctionAddtionalIncludes.clear(); } void DpctGlobalInfo::updateSpellingLocDFIMaps( SourceLocation SL, std::shared_ptr DFI) { @@ -2454,6 +2480,8 @@ std::vector>> std::vector>> DpctGlobalInfo::CodePinDumpFuncDepsVec; std::unordered_set DpctGlobalInfo::NeedParenAPISet = {}; +std::unordered_set + DpctGlobalInfo::CustomHelperFunctionAddtionalIncludes = {}; ///// class DpctNameGenerator ///// void DpctNameGenerator::printName(const FunctionDecl *FD, llvm::raw_ostream &OS) { @@ -6070,6 +6098,7 @@ void KernelCallExpr::removeExtraIndent() { getFilePath(), getOffset() - LocInfo.Indent.length(), LocInfo.Indent.length(), "", nullptr)); } + void KernelCallExpr::addDevCapCheckStmt() { llvm::SmallVector AspectList; if (getVarMap().hasBF64()) { @@ -6079,17 +6108,28 @@ void KernelCallExpr::addDevCapCheckStmt() { AspectList.push_back(MapNames::getClNamespace() + "aspect::fp16"); } if (!AspectList.empty()) { - requestFeature(HelperFeatureEnum::device_ext); std::string Str; llvm::raw_string_ostream OS(Str); - OS << MapNames::getDpctNamespace() << "get_device("; - OS << MapNames::getDpctNamespace() << "get_device_id("; - printStreamBase(OS); - OS << "get_device())).has_capability_or_fail({" << AspectList.front(); - for (size_t i = 1; i < AspectList.size(); ++i) { - OS << ", " << AspectList[i]; - } - OS << "});"; + if (auto Iter = MapNames::CustomHelperFunctionMap.find(getQueueKind()); + Iter != MapNames::CustomHelperFunctionMap.end()) { + OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; + OS << Iter->second << ".get_device(), "; + OS << "{" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; + } + OS << "});"; + } else { + requestFeature(HelperFeatureEnum::device_ext); + OS << MapNames::getDpctNamespace() << "get_device("; + OS << MapNames::getDpctNamespace() << "get_device_id("; + printStreamBase(OS); + OS << "get_device())).has_capability_or_fail({" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; + } + OS << "});"; + } OuterStmts.OthersList.emplace_back(OS.str()); } } @@ -6139,8 +6179,7 @@ void KernelCallExpr::addStreamDecl() { buildString(MapNames::getClNamespace() + "stream ", DpctGlobalInfo::getStreamName(), "(64 * 1024, 80, cgh);")); if (getVarMap().hasSync()) { - auto DefaultQueue = buildString(MapNames::getDpctNamespace(), "get_", - DpctGlobalInfo::getDeviceQueueName(), "()"); + auto DefaultQueue = DpctGlobalInfo::getDefaultQueueFreeFuncCall(); if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) { OuterStmts.OthersList.emplace_back( buildString(MapNames::getDpctNamespace(), "global_memory<", diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index ec8ef3a87f4d..eeb86997ce83 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -663,16 +663,14 @@ class DpctGlobalInfo { : DefaultQueueCounter(DefaultQueueCounter), CurrentDeviceCounter(CurrentDeviceCounter), PlaceholderStr{ - "", - buildString(MapNames::getDpctNamespace(), "get_", - DpctGlobalInfo::getDeviceQueueName(), "()"), + "", DpctGlobalInfo::getDefaultQueueFreeFuncCall(), MapNames::getDpctNamespace() + "get_current_device()", (DpctGlobalInfo::useSYCLCompat() ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") - : buildString("&" + MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + - "()"))} {} + : buildString( + "&", DpctGlobalInfo::getDefaultQueueFreeFuncCall()))} { + } int DefaultQueueCounter = 0; int CurrentDeviceCounter = 0; std::string PlaceholderStr[4]; @@ -750,7 +748,8 @@ class DpctGlobalInfo { static std::string getSubGroup(const Stmt *, const FunctionDecl *FD = nullptr); static std::string getDefaultQueue(const Stmt *); - static const std::string &getDeviceQueueName(); + static const std::string &getDefaultQueueFreeFuncCall(); + static const std::string &getDefaultQueueMemFuncName(); static const std::string &getStreamName() { const static std::string StreamName = "stream" + getCTFixedSuffix(); return StreamName; @@ -1329,6 +1328,10 @@ class DpctGlobalInfo { static bool useBFloat16() { return getUsingExtensionDE(DPCPPExtensionsDefaultEnabled::ExtDE_BFloat16); } + static std::unordered_set & + getCustomHelperFunctionAddtionalIncludes() { + return CustomHelperFunctionAddtionalIncludes; + } std::shared_ptr insertFile(const clang::tooling::UnifiedPath &FilePath) { return insertObject(FileMap, FilePath); @@ -1644,6 +1647,7 @@ class DpctGlobalInfo { static std::vector>> CodePinDumpFuncDepsVec; static std::unordered_set NeedParenAPISet; + static std::unordered_set CustomHelperFunctionAddtionalIncludes; }; /// Generate mangle name of FunctionDecl as key of DeviceFunctionInfo. diff --git a/clang/lib/DPCT/MapNames.cpp b/clang/lib/DPCT/MapNames.cpp index 06542fc724eb..180862377dae 100644 --- a/clang/lib/DPCT/MapNames.cpp +++ b/clang/lib/DPCT/MapNames.cpp @@ -4600,6 +4600,8 @@ MapNames::MapTy TextureRule::ResourceTypeNames{{"devPtr", "data_ptr"}, {"numChannels", "channel_num"}}; std::vector MapNames::PatternRewriters; +std::map + MapNames::CustomHelperFunctionMap; const MapNames::MapTy MemoryDataTypeRule::PitchMemberNames{ {"pitch", "pitch"}, {"ptr", "data_ptr"}, {"xsize", "x"}, {"ysize", "y"}}; diff --git a/clang/lib/DPCT/MapNames.h b/clang/lib/DPCT/MapNames.h index 6a5f3db5064e..56c9418aa609 100644 --- a/clang/lib/DPCT/MapNames.h +++ b/clang/lib/DPCT/MapNames.h @@ -20,6 +20,11 @@ namespace dpct { enum class KernelArgType; enum class HelperFileEnum : unsigned int; struct HelperFunc; +enum class HelperFuncCatalog { + GetDefaultQueue, + GetOutOfOrderQueue, + GetInOrderQueue, +}; } // namespace dpct } // namespace clang @@ -420,6 +425,9 @@ class MapNames { /// {Original API, {ToType, FromType}} static std::unordered_map> MathTypeCastingMap; + + static std::map + CustomHelperFunctionMap; }; class MigrationStatistics { diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index 1c128c47ea9a..1304f1e32acf 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -277,6 +277,32 @@ void registerPatternRewriterRule(MetaRuleObject &R) { R.BuildScriptSyntax, R.Priority)); } +void registerHelperFunctionRule(MetaRuleObject &R) { + static const std::unordered_map + String2HelperFuncCatalogMap{ + {"get_default_queue", dpct::HelperFuncCatalog::GetDefaultQueue}, + {"get_in_order_queue", dpct::HelperFuncCatalog::GetInOrderQueue}, + {"get_out_of_order_queue", + dpct::HelperFuncCatalog::GetOutOfOrderQueue}}; + if (R.Priority == RulePriority::Takeover) { + if (auto Iter = String2HelperFuncCatalogMap.find(R.In); + Iter != String2HelperFuncCatalogMap.end()) { + // This map is inited here. + // It saves the customized string which used for each kind of helper + // function call in the migrated code. + MapNames::CustomHelperFunctionMap.insert({Iter->second, R.Out}); + dpct::DpctGlobalInfo::setUsingDRYPattern(false); + dpct::DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes().insert( + R.Includes.begin(), R.Includes.end()); + } else { + llvm::outs() + << "Warning: The rule named " << R.RuleId + << " (Kind: HelperFunction) is ignored, as the API specified " + "in the \"In\" field is not supported for customization.\n"; + } + } +} + MetaRuleObject::PatternRewriter &MetaRuleObject::PatternRewriter::operator=( const MetaRuleObject::PatternRewriter &PR) { if (this != &PR) { @@ -366,6 +392,9 @@ void importRules(std::vector &RuleFiles) { case (RuleKind::CMakeRule): registerCmakeMigrationRule(*r); break; + case (RuleKind::HelperFunction): + registerHelperFunctionRule(*r); + break; case (RuleKind::PythonRule): registerPythonMigrationRule(*r); break; diff --git a/clang/lib/DPCT/Rules.h b/clang/lib/DPCT/Rules.h index e82f961e6119..bb0de5347626 100644 --- a/clang/lib/DPCT/Rules.h +++ b/clang/lib/DPCT/Rules.h @@ -25,6 +25,7 @@ enum RuleKind { DisableAPIMigration, PatternRewriter, CMakeRule, + HelperFunction, PythonRule }; @@ -210,6 +211,7 @@ template <> struct llvm::yaml::ScalarEnumerationTraits { Io.enumCase(Value, "DisableAPIMigration", RuleKind::DisableAPIMigration); Io.enumCase(Value, "PatternRewriter", RuleKind::PatternRewriter); Io.enumCase(Value, "CMakeRule", RuleKind::CMakeRule); + Io.enumCase(Value, "HelperFunction", RuleKind::HelperFunction); Io.enumCase(Value, "PythonRule", RuleKind::PythonRule); } }; diff --git a/clang/test/dpct/user_defined_rule_helper1.cu b/clang/test/dpct/user_defined_rule_helper1.cu new file mode 100644 index 000000000000..2b281552e1c7 --- /dev/null +++ b/clang/test/dpct/user_defined_rule_helper1.cu @@ -0,0 +1,47 @@ +// RUN: mkdir %T/user_defined_rule_helper1 +// RUN: dpct --out-root %T/user_defined_rule_helper1 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_1.yaml --format-range=none > %T/user_defined_rule_helper1/warning.txt 2>&1 +// RUN: FileCheck --input-file %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp --match-full-lines %s +// RUN: FileCheck --input-file %T/user_defined_rule_helper1/warning.txt --match-full-lines %s -check-prefix=WARNING +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp -o %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.o %} +// RUN: rm -rf %T/user_defined_rule_helper1 + +#ifndef NO_BUILD_TEST + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper1.h" +#include + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: static_cast(c10::xpu::getCurrentXPUStream1()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream1())); + // CHECK-NEXT: { + // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream1()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream1()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(d); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: dpct::dpct_free(d, static_cast(c10::xpu::getCurrentXPUStream1())); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif + +// WARNING: Warning: The rule named rule2 (Kind: HelperFunction) is ignored, as the API specified in the "In" field is not supported for customization. diff --git a/clang/test/dpct/user_defined_rule_helper2.cu b/clang/test/dpct/user_defined_rule_helper2.cu new file mode 100644 index 000000000000..b6734650894a --- /dev/null +++ b/clang/test/dpct/user_defined_rule_helper2.cu @@ -0,0 +1,47 @@ +// RUN: dpct --out-root %T/user_defined_rule_helper2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_2.yaml --format-range=none --usm-level=none +// RUN: FileCheck --input-file %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.cpp -o %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.o %} + +#ifndef NO_BUILD_TEST + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper2.h" +#include + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: static_cast(c10::xpu::getCurrentXPUStream2()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = (double *)dpct::dpct_malloc(sizeof(double)); + // CHECK-NEXT: { + // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream2()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream2()).submit( + // CHECK-NEXT: [&](sycl::handler &cgh) { + // CHECK-NEXT: auto d_acc_ct0 = dpct::get_access(d, cgh); + // CHECK-EMPTY: + // CHECK-NEXT: cgh.parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(&d_acc_ct0[0]); + // CHECK-NEXT: }); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: dpct::dpct_free(d); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif diff --git a/clang/test/dpct/user_defined_rule_helper3.cu b/clang/test/dpct/user_defined_rule_helper3.cu new file mode 100644 index 000000000000..1c87ca0fdc13 --- /dev/null +++ b/clang/test/dpct/user_defined_rule_helper3.cu @@ -0,0 +1,42 @@ +// RUN: dpct --out-root %T/user_defined_rule_helper3 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_3.yaml --format-range=none --use-syclcompat +// RUN: FileCheck --input-file %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.cpp -o %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.o %} + +#ifndef NO_BUILD_TEST + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper3.h" +#include + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: static_cast(c10::xpu::getCurrentXPUStream3()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream3())); + // CHECK-NEXT: { + // CHECK-NEXT: syclcompat::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream3()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream3()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(d); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: syclcompat::wait_and_free(d, static_cast(c10::xpu::getCurrentXPUStream3())); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif diff --git a/clang/test/dpct/xpu_1.yaml b/clang/test/dpct/xpu_1.yaml new file mode 100644 index 000000000000..0bbf932311a6 --- /dev/null +++ b/clang/test/dpct/xpu_1.yaml @@ -0,0 +1,12 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_in_order_queue + Out: static_cast(c10::xpu::getCurrentXPUStream1()) + Includes: ["xpu_helper1.h"] +- Rule: rule2 + Kind: HelperFunction + Priority: Takeover + In: unknown_function + Out: unknown_function_2 diff --git a/clang/test/dpct/xpu_2.yaml b/clang/test/dpct/xpu_2.yaml new file mode 100644 index 000000000000..62a9b50ace6b --- /dev/null +++ b/clang/test/dpct/xpu_2.yaml @@ -0,0 +1,7 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_out_of_order_queue + Out: static_cast(c10::xpu::getCurrentXPUStream2()) + Includes: ["xpu_helper2.h"] diff --git a/clang/test/dpct/xpu_3.yaml b/clang/test/dpct/xpu_3.yaml new file mode 100644 index 000000000000..3d6d0c7a0df5 --- /dev/null +++ b/clang/test/dpct/xpu_3.yaml @@ -0,0 +1,7 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_default_queue + Out: static_cast(c10::xpu::getCurrentXPUStream3()) + Includes: ["xpu_helper3.h"]