From e2646d6e68257d600518c2007201e06a33469dd5 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Wed, 16 Oct 2024 09:03:22 +0800 Subject: [PATCH 01/11] [SYCLomatic] Custom the helper function usage in the migrated code by rule file Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/APINamesMemory.inc | 10 +-- clang/lib/DPCT/ASTTraversal.cpp | 7 +- clang/lib/DPCT/AnalysisInfo.cpp | 115 ++++++++++++++++++++------ clang/lib/DPCT/AnalysisInfo.h | 13 +-- clang/lib/DPCT/MapNames.cpp | 2 + clang/lib/DPCT/MapNames.h | 7 ++ clang/lib/DPCT/Rules.cpp | 16 ++++ clang/lib/DPCT/Rules.h | 4 +- clang/test/dpct/ipex_xpu.yaml | 13 +++ clang/test/dpct/user_defined_rule2.cu | 37 +++++++++ 10 files changed, 184 insertions(+), 40 deletions(-) create mode 100644 clang/test/dpct/ipex_xpu.yaml create mode 100644 clang/test/dpct/user_defined_rule2.cu diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index ed5576aee733..1c2d98e3047b 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -265,7 +265,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemPrefetchAsync", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(2)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "prefetch", ARG(0), ARG(1))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( @@ -280,7 +280,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), @@ -289,7 +289,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))), CONDITIONAL_FACTORY_ENTRY( @@ -303,7 +303,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( @@ -312,7 +312,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), - false, DpctGlobalInfo::getDeviceQueueName()), + false, DpctGlobalInfo::getDefaultQueueMemFuncName()), DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 6224f1183290..902b5150905f 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -10570,7 +10570,8 @@ void MemoryMigrationRule::prefetchMigration( ? "cpu_device()" : "get_device(" + StmtStrArg2 + ")"); requestFeature(HelperFeatureEnum::device_ext); - Replacement = Prefix + "." + DpctGlobalInfo::getDeviceQueueName() + "()" + + Replacement = Prefix + "." + + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()" + (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") + "prefetch(" + StmtStrArg0 + "," + StmtStrArg1 + ")"; } else { @@ -10772,7 +10773,7 @@ void MemoryMigrationRule::cudaMemAdvise(const MatchFinder::MatchResult &Result, std::ostringstream OS; if (getStmtSpelling(C->getArg(3)) == "cudaCpuDeviceId") { OS << MapNames::getDpctNamespace() + "cpu_device()." + - DpctGlobalInfo::getDeviceQueueName() + "()"; + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()"; OS << (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") << "mem_advise(" << Arg0Str << ", " << Arg1Str << ", " << Arg2Str << ")"; emplaceTransformation(new ReplaceStmt(C, OS.str())); @@ -10780,7 +10781,7 @@ void MemoryMigrationRule::cudaMemAdvise(const MatchFinder::MatchResult &Result, return; } OS << MapNames::getDpctNamespace() + "get_device(" << Arg3Str - << ")." + DpctGlobalInfo::getDeviceQueueName() + "()"; + << ")." + DpctGlobalInfo::getDefaultQueueMemFuncName() + "()"; OS << (DpctGlobalInfo::useSYCLCompat() ? "->" : ".") << "mem_advise(" << Arg0Str << ", " << Arg1Str << ", " << Arg2Str << ")"; emplaceTransformation(new ReplaceStmt(C, OS.str())); diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index dbbb12ffbc93..104242d097e7 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -63,8 +63,8 @@ const std::string &getDefaultString(HelperFuncType HFT) { const static std::string DefaultQueue = DpctGlobalInfo::useNoQueueDevice() ? DpctGlobalInfo::getGlobalQueueName() - : buildString(MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + "()"); + : buildString(MapNames::getDpctNamespace() + + DpctGlobalInfo::getDefaultQueueFreeFuncCall()); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_DefaultQueuePtr: { @@ -74,8 +74,9 @@ const std::string &getDefaultString(HelperFuncType HFT) { : (DpctGlobalInfo::useSYCLCompat() ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") - : buildString("&" + MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + "()")); + : buildString( + "&" + MapNames::getDpctNamespace() + + DpctGlobalInfo::getDefaultQueueFreeFuncCall())); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_CurrentDevice: { @@ -1225,15 +1226,30 @@ std::string DpctGlobalInfo::getDefaultQueue(const Stmt *S) { return buildString(RegexPrefix, 'Q', Idx, RegexSuffix); } -const std::string &DpctGlobalInfo::getDeviceQueueName() { - static const std::string DeviceQueue = [&]() { - if (DpctGlobalInfo::useSYCLCompat()) - return "default_queue"; - if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) - return "out_of_order_queue"; - return "in_order_queue"; - }(); - return DeviceQueue; +const std::string &DpctGlobalInfo::getDefaultQueueFreeFuncCall() { + static const std::string DefaultQueueFreeFunc = "get_default_queue()"; + static const std::string OutOfOrderQueueFreeFunc = "get_out_of_order_queue()"; + static const std::string InOrderQueueFreeFunc = "get_in_order_queue()"; + auto Iter = + MapNames::CustomHelperFunctionMap.find(HelperFuncCatalog::DefaultQueue); + if (Iter != MapNames::CustomHelperFunctionMap.end()) { + return Iter->second; + } + if (DpctGlobalInfo::useSYCLCompat()) + return DefaultQueueFreeFunc; + if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) + return OutOfOrderQueueFreeFunc; + return InOrderQueueFreeFunc; +} +const std::string &DpctGlobalInfo::getDefaultQueueMemFuncName() { + static const std::string DefaultQueueMemFunc = "default_queue"; + static const std::string OutOfOrderQueueMemFunc = "out_of_order_queue"; + static const std::string InOrderQueueMemFunc = "in_order_queue"; + if (DpctGlobalInfo::useSYCLCompat()) + return DefaultQueueMemFunc; + if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) + return OutOfOrderQueueMemFunc; + return InOrderQueueMemFunc; } void DpctGlobalInfo::setContext(ASTContext &C) { Context = &C; @@ -1588,7 +1604,8 @@ void DpctGlobalInfo::buildReplacements() { QDecl << "&q_ct1 = "; if (DpctGlobalInfo::useSYCLCompat()) QDecl << '*'; - QDecl << "dev_ct1." << DpctGlobalInfo::getDeviceQueueName() << "();"; + QDecl << "dev_ct1." << DpctGlobalInfo::getDefaultQueueMemFuncName() + << "();"; } else { DevDecl << MapNames::getClNamespace() + "device dev_ct1;"; // Now the UsmLevel must not be UL_None here. @@ -6060,6 +6077,38 @@ void KernelCallExpr::removeExtraIndent() { getFilePath(), getOffset() - LocInfo.Indent.length(), LocInfo.Indent.length(), "", nullptr)); } + +namespace { +void buildHasCapabilityOrFailStr(const std::string &Aspects, + llvm::raw_string_ostream &OS, + const OutputBuilder &OB) { + switch (OB.Kind) { + case (OutputBuilder::Kind::Top): + for (auto &ob : OB.SubBuilders) { + buildHasCapabilityOrFailStr(Aspects, OS, *ob); + } + return; + case (OutputBuilder::Kind::String): + OS << OB.Str; + return; + case (OutputBuilder::Kind::Arg): { + if (OB.ArgIndex > 1) { + OS << ""; + return; + } + OS << Aspects; + return; + } + default: { + DpctDebugs() << "[buildHasCapabilityOrFailStr OutputBuilder::Kind] " + "Unexpected value: " + << OB.Kind << "\n"; + assert(0); + } + } +} +} // namespace + void KernelCallExpr::addDevCapCheckStmt() { llvm::SmallVector AspectList; if (getVarMap().hasBF64()) { @@ -6069,17 +6118,32 @@ void KernelCallExpr::addDevCapCheckStmt() { AspectList.push_back(MapNames::getClNamespace() + "aspect::fp16"); } if (!AspectList.empty()) { - requestFeature(HelperFeatureEnum::device_ext); std::string Str; llvm::raw_string_ostream OS(Str); - OS << MapNames::getDpctNamespace() << "get_device("; - OS << MapNames::getDpctNamespace() << "get_device_id("; - printStreamBase(OS); - OS << "get_device())).has_capability_or_fail({" << AspectList.front(); - for (size_t i = 1; i < AspectList.size(); ++i) { - OS << ", " << AspectList[i]; - } - OS << "});"; + auto Iter = MapNames::CustomHelperFunctionMap.find( + HelperFuncCatalog::HasCapabilityOrFail); + if (Iter != MapNames::CustomHelperFunctionMap.end()) { + OutputBuilder OB; + OB.parse(Iter->second); + OB.Kind = OutputBuilder::Kind::Top; + std::string Aspects = "{" + AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + Aspects += AspectList[i]; + } + Aspects += "}"; + buildHasCapabilityOrFailStr(Aspects, OS, OB); + OS << ";"; + } else { + requestFeature(HelperFeatureEnum::device_ext); + OS << MapNames::getDpctNamespace() << "get_device("; + OS << MapNames::getDpctNamespace() << "get_device_id("; + printStreamBase(OS); + OS << "get_device())).has_capability_or_fail({" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; + } + OS << "});"; + } OuterStmts.OthersList.emplace_back(OS.str()); } } @@ -6129,8 +6193,9 @@ void KernelCallExpr::addStreamDecl() { buildString(MapNames::getClNamespace() + "stream ", DpctGlobalInfo::getStreamName(), "(64 * 1024, 80, cgh);")); if (getVarMap().hasSync()) { - auto DefaultQueue = buildString(MapNames::getDpctNamespace(), "get_", - DpctGlobalInfo::getDeviceQueueName(), "()"); + auto DefaultQueue = + buildString(MapNames::getDpctNamespace(), + DpctGlobalInfo::getDefaultQueueFreeFuncCall()); if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) { OuterStmts.OthersList.emplace_back( buildString(MapNames::getDpctNamespace(), "global_memory<", diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 59867e9b47ca..5034f9d4e06a 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -663,15 +663,15 @@ class DpctGlobalInfo { CurrentDeviceCounter(CurrentDeviceCounter), PlaceholderStr{ "", - buildString(MapNames::getDpctNamespace(), "get_", - DpctGlobalInfo::getDeviceQueueName(), "()"), + buildString(MapNames::getDpctNamespace(), + DpctGlobalInfo::getDefaultQueueFreeFuncCall()), MapNames::getDpctNamespace() + "get_current_device()", (DpctGlobalInfo::useSYCLCompat() ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") - : buildString("&" + MapNames::getDpctNamespace() + "get_" + - DpctGlobalInfo::getDeviceQueueName() + - "()"))} {} + : buildString( + "&" + MapNames::getDpctNamespace() + + DpctGlobalInfo::getDefaultQueueFreeFuncCall()))} {} int DefaultQueueCounter = 0; int CurrentDeviceCounter = 0; std::string PlaceholderStr[4]; @@ -749,7 +749,8 @@ class DpctGlobalInfo { static std::string getSubGroup(const Stmt *, const FunctionDecl *FD = nullptr); static std::string getDefaultQueue(const Stmt *); - static const std::string &getDeviceQueueName(); + static const std::string &getDefaultQueueFreeFuncCall(); + static const std::string &getDefaultQueueMemFuncName(); static const std::string &getStreamName() { const static std::string StreamName = "stream" + getCTFixedSuffix(); return StreamName; diff --git a/clang/lib/DPCT/MapNames.cpp b/clang/lib/DPCT/MapNames.cpp index dd307b7d2f36..1e9300088045 100644 --- a/clang/lib/DPCT/MapNames.cpp +++ b/clang/lib/DPCT/MapNames.cpp @@ -4560,6 +4560,8 @@ MapNames::MapTy TextureRule::ResourceTypeNames{{"devPtr", "data_ptr"}, {"numChannels", "channel_num"}}; std::vector MapNames::PatternRewriters; +std::map + MapNames::CustomHelperFunctionMap; const MapNames::MapTy MemoryDataTypeRule::PitchMemberNames{ {"pitch", "pitch"}, {"ptr", "data_ptr"}, {"xsize", "x"}, {"ysize", "y"}}; diff --git a/clang/lib/DPCT/MapNames.h b/clang/lib/DPCT/MapNames.h index 6a5f3db5064e..45db010aa097 100644 --- a/clang/lib/DPCT/MapNames.h +++ b/clang/lib/DPCT/MapNames.h @@ -20,6 +20,10 @@ namespace dpct { enum class KernelArgType; enum class HelperFileEnum : unsigned int; struct HelperFunc; +enum class HelperFuncCatalog { + DefaultQueue, + HasCapabilityOrFail +}; } // namespace dpct } // namespace clang @@ -420,6 +424,9 @@ class MapNames { /// {Original API, {ToType, FromType}} static std::unordered_map> MathTypeCastingMap; + + static std::map + CustomHelperFunctionMap; }; class MigrationStatistics { diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index 712bcecbd9b8..ee1a37a3c942 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -276,6 +276,19 @@ void registerPatternRewriterRule(MetaRuleObject &R) { R.Priority)); } +void registerHelperFunctionRule(MetaRuleObject &R) { + if (R.In == "DefaultQueue" && R.Priority == RulePriority::Takeover) { + MapNames::CustomHelperFunctionMap.insert( + {dpct::HelperFuncCatalog::DefaultQueue, R.Out}); + dpct::DpctGlobalInfo::setUsingDRYPattern(false); + } else if (R.In == "HasCapabilityOrFail" && + R.Priority == RulePriority::Takeover) { + MapNames::CustomHelperFunctionMap.insert( + {dpct::HelperFuncCatalog::HasCapabilityOrFail, R.Out}); + dpct::DpctGlobalInfo::setUsingDRYPattern(false); + } +} + MetaRuleObject::PatternRewriter &MetaRuleObject::PatternRewriter::operator=( const MetaRuleObject::PatternRewriter &PR) { if (this != &PR) { @@ -365,6 +378,9 @@ void importRules(std::vector &RuleFiles) { case (RuleKind::CMakeRule): registerCmakeMigrationRule(*r); break; + case (RuleKind::HelperFunction): + registerHelperFunctionRule(*r); + break; default: break; } diff --git a/clang/lib/DPCT/Rules.h b/clang/lib/DPCT/Rules.h index 45f13c51bd30..1b5c36137b2a 100644 --- a/clang/lib/DPCT/Rules.h +++ b/clang/lib/DPCT/Rules.h @@ -24,7 +24,8 @@ enum RuleKind { Enum, DisableAPIMigration, PatternRewriter, - CMakeRule + CMakeRule, + HelperFunction }; enum RulePriority { Takeover, Default, Fallback }; @@ -209,6 +210,7 @@ template <> struct llvm::yaml::ScalarEnumerationTraits { Io.enumCase(Value, "DisableAPIMigration", RuleKind::DisableAPIMigration); Io.enumCase(Value, "PatternRewriter", RuleKind::PatternRewriter); Io.enumCase(Value, "CMakeRule", RuleKind::CMakeRule); + Io.enumCase(Value, "HelperFunction", RuleKind::HelperFunction); } }; diff --git a/clang/test/dpct/ipex_xpu.yaml b/clang/test/dpct/ipex_xpu.yaml new file mode 100644 index 000000000000..5989fde259d5 --- /dev/null +++ b/clang/test/dpct/ipex_xpu.yaml @@ -0,0 +1,13 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: DefaultQueue + Out: static_cast(c10::xpu::getCurrentXPUStream()) + Includes: ["c10/xpu/XPUStream.h"] +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: HasCapabilityOrFail + Out: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), $1) + Includes: ["c10/xpu/XPUStream.h"] diff --git a/clang/test/dpct/user_defined_rule2.cu b/clang/test/dpct/user_defined_rule2.cu new file mode 100644 index 000000000000..7affdd6bb464 --- /dev/null +++ b/clang/test/dpct/user_defined_rule2.cu @@ -0,0 +1,37 @@ +// RUN: dpct --out-root %T/user_defined_rule2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/ipex_xpu.yaml --format-range=none +// RUN: FileCheck --input-file %T/user_defined_rule2/user_defined_rule2.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/user_defined_rule2/user_defined_rule2.dp.cpp -o %T/user_defined_rule2/user_defined_rule2.dp.o %} + +#ifndef BUILD_TEST + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: dpct::static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = sycl::malloc_device(1, dpct::static_cast(c10::xpu::getCurrentXPUStream())); + // CHECK-NEXT: { + // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: dpct::static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(d); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: dpct::dpct_free(d, dpct::static_cast(c10::xpu::getCurrentXPUStream())); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif From 857295b769b2c0dda14d12e8424668a595d03021 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Wed, 16 Oct 2024 09:46:28 +0800 Subject: [PATCH 02/11] Fix Signed-off-by: Jiang, Zhiwei --- clang/test/dpct/ipex_xpu.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/dpct/ipex_xpu.yaml b/clang/test/dpct/ipex_xpu.yaml index 5989fde259d5..9cc18eb390c5 100644 --- a/clang/test/dpct/ipex_xpu.yaml +++ b/clang/test/dpct/ipex_xpu.yaml @@ -4,10 +4,10 @@ Priority: Takeover In: DefaultQueue Out: static_cast(c10::xpu::getCurrentXPUStream()) - Includes: ["c10/xpu/XPUStream.h"] + Includes: [""] - Rule: rule1 Kind: HelperFunction Priority: Takeover In: HasCapabilityOrFail Out: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), $1) - Includes: ["c10/xpu/XPUStream.h"] + Includes: [""] From 15a3c43432c272b1328a47beab457a41b296c672 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Wed, 16 Oct 2024 17:42:45 +0800 Subject: [PATCH 03/11] Fix Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 68 ++++++++++++--------------- clang/lib/DPCT/AnalysisInfo.h | 8 ++-- clang/lib/DPCT/MapNames.h | 3 +- clang/lib/DPCT/Rules.cpp | 5 -- clang/test/dpct/ipex_xpu.yaml | 6 --- clang/test/dpct/user_defined_rule2.cu | 8 ++-- 6 files changed, 37 insertions(+), 61 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 171f19485843..661c5537e529 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -63,8 +63,7 @@ const std::string &getDefaultString(HelperFuncType HFT) { const static std::string DefaultQueue = DpctGlobalInfo::useNoQueueDevice() ? DpctGlobalInfo::getGlobalQueueName() - : buildString(MapNames::getDpctNamespace() + - DpctGlobalInfo::getDefaultQueueFreeFuncCall()); + : DpctGlobalInfo::getDefaultQueueFreeFuncCall(); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_DefaultQueuePtr: { @@ -75,8 +74,7 @@ const std::string &getDefaultString(HelperFuncType HFT) { ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") : buildString( - "&" + MapNames::getDpctNamespace() + - DpctGlobalInfo::getDefaultQueueFreeFuncCall())); + "&", DpctGlobalInfo::getDefaultQueueFreeFuncCall())); return DefaultQueue; } case clang::dpct::HelperFuncType::HFT_CurrentDevice: { @@ -1227,29 +1225,26 @@ std::string DpctGlobalInfo::getDefaultQueue(const Stmt *S) { return buildString(RegexPrefix, 'Q', Idx, RegexSuffix); } const std::string &DpctGlobalInfo::getDefaultQueueFreeFuncCall() { - static const std::string DefaultQueueFreeFunc = "get_default_queue()"; - static const std::string OutOfOrderQueueFreeFunc = "get_out_of_order_queue()"; - static const std::string InOrderQueueFreeFunc = "get_in_order_queue()"; - auto Iter = - MapNames::CustomHelperFunctionMap.find(HelperFuncCatalog::DefaultQueue); - if (Iter != MapNames::CustomHelperFunctionMap.end()) { - return Iter->second; - } - if (DpctGlobalInfo::useSYCLCompat()) - return DefaultQueueFreeFunc; - if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) - return OutOfOrderQueueFreeFunc; - return InOrderQueueFreeFunc; + static const std::string DefaultQueueFreeFuncCall = [&]() { + auto Iter = + MapNames::CustomHelperFunctionMap.find(HelperFuncCatalog::DefaultQueue); + if (Iter != MapNames::CustomHelperFunctionMap.end()) { + return Iter->second; + } + return MapNames::getDpctNamespace() + "get_" + + getDefaultQueueMemFuncName() + "()"; + }(); + return DefaultQueueFreeFuncCall; } const std::string &DpctGlobalInfo::getDefaultQueueMemFuncName() { - static const std::string DefaultQueueMemFunc = "default_queue"; - static const std::string OutOfOrderQueueMemFunc = "out_of_order_queue"; - static const std::string InOrderQueueMemFunc = "in_order_queue"; - if (DpctGlobalInfo::useSYCLCompat()) - return DefaultQueueMemFunc; - if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) - return OutOfOrderQueueMemFunc; - return InOrderQueueMemFunc; + static const std::string DefaultQueueMemFuncName = [&]() { + if (DpctGlobalInfo::useSYCLCompat()) + return "default_queue"; + if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) + return "out_of_order_queue"; + return "in_order_queue"; + }(); + return DefaultQueueMemFuncName; } void DpctGlobalInfo::setContext(ASTContext &C) { Context = &C; @@ -6130,19 +6125,16 @@ void KernelCallExpr::addDevCapCheckStmt() { if (!AspectList.empty()) { std::string Str; llvm::raw_string_ostream OS(Str); - auto Iter = MapNames::CustomHelperFunctionMap.find( - HelperFuncCatalog::HasCapabilityOrFail); - if (Iter != MapNames::CustomHelperFunctionMap.end()) { - OutputBuilder OB; - OB.parse(Iter->second); - OB.Kind = OutputBuilder::Kind::Top; - std::string Aspects = "{" + AspectList.front(); + if (auto Iter = MapNames::CustomHelperFunctionMap.find( + HelperFuncCatalog::DefaultQueue); + Iter != MapNames::CustomHelperFunctionMap.end()) { + OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; + OS << Iter->second << ".get_device(), "; + OS << "{" << AspectList.front(); for (size_t i = 1; i < AspectList.size(); ++i) { - Aspects += AspectList[i]; + OS << ", " << AspectList[i]; } - Aspects += "}"; - buildHasCapabilityOrFailStr(Aspects, OS, OB); - OS << ";"; + OS << "});"; } else { requestFeature(HelperFeatureEnum::device_ext); OS << MapNames::getDpctNamespace() << "get_device("; @@ -6203,9 +6195,7 @@ void KernelCallExpr::addStreamDecl() { buildString(MapNames::getClNamespace() + "stream ", DpctGlobalInfo::getStreamName(), "(64 * 1024, 80, cgh);")); if (getVarMap().hasSync()) { - auto DefaultQueue = - buildString(MapNames::getDpctNamespace(), - DpctGlobalInfo::getDefaultQueueFreeFuncCall()); + auto DefaultQueue = DpctGlobalInfo::getDefaultQueueFreeFuncCall(); if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) { OuterStmts.OthersList.emplace_back( buildString(MapNames::getDpctNamespace(), "global_memory<", diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index bb38d7d4d220..7cec5d2c02ec 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -662,16 +662,14 @@ class DpctGlobalInfo { : DefaultQueueCounter(DefaultQueueCounter), CurrentDeviceCounter(CurrentDeviceCounter), PlaceholderStr{ - "", - buildString(MapNames::getDpctNamespace(), - DpctGlobalInfo::getDefaultQueueFreeFuncCall()), + "", DpctGlobalInfo::getDefaultQueueFreeFuncCall(), MapNames::getDpctNamespace() + "get_current_device()", (DpctGlobalInfo::useSYCLCompat() ? buildString(MapNames::getDpctNamespace() + "get_current_device().default_queue()") : buildString( - "&" + MapNames::getDpctNamespace() + - DpctGlobalInfo::getDefaultQueueFreeFuncCall()))} {} + "&", DpctGlobalInfo::getDefaultQueueFreeFuncCall()))} { + } int DefaultQueueCounter = 0; int CurrentDeviceCounter = 0; std::string PlaceholderStr[4]; diff --git a/clang/lib/DPCT/MapNames.h b/clang/lib/DPCT/MapNames.h index 45db010aa097..83a57ac6930d 100644 --- a/clang/lib/DPCT/MapNames.h +++ b/clang/lib/DPCT/MapNames.h @@ -21,8 +21,7 @@ enum class KernelArgType; enum class HelperFileEnum : unsigned int; struct HelperFunc; enum class HelperFuncCatalog { - DefaultQueue, - HasCapabilityOrFail + DefaultQueue }; } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index ee1a37a3c942..77b556d48ff8 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -281,11 +281,6 @@ void registerHelperFunctionRule(MetaRuleObject &R) { MapNames::CustomHelperFunctionMap.insert( {dpct::HelperFuncCatalog::DefaultQueue, R.Out}); dpct::DpctGlobalInfo::setUsingDRYPattern(false); - } else if (R.In == "HasCapabilityOrFail" && - R.Priority == RulePriority::Takeover) { - MapNames::CustomHelperFunctionMap.insert( - {dpct::HelperFuncCatalog::HasCapabilityOrFail, R.Out}); - dpct::DpctGlobalInfo::setUsingDRYPattern(false); } } diff --git a/clang/test/dpct/ipex_xpu.yaml b/clang/test/dpct/ipex_xpu.yaml index 9cc18eb390c5..05585504ceee 100644 --- a/clang/test/dpct/ipex_xpu.yaml +++ b/clang/test/dpct/ipex_xpu.yaml @@ -5,9 +5,3 @@ In: DefaultQueue Out: static_cast(c10::xpu::getCurrentXPUStream()) Includes: [""] -- Rule: rule1 - Kind: HelperFunction - Priority: Takeover - In: HasCapabilityOrFail - Out: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), $1) - Includes: [""] diff --git a/clang/test/dpct/user_defined_rule2.cu b/clang/test/dpct/user_defined_rule2.cu index 7affdd6bb464..e5c059d5b357 100644 --- a/clang/test/dpct/user_defined_rule2.cu +++ b/clang/test/dpct/user_defined_rule2.cu @@ -6,7 +6,7 @@ __global__ void foo1_kernel() {} void foo1() { - // CHECK: dpct::static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK: static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: foo1_kernel(); @@ -18,17 +18,17 @@ __global__ void foo2_kernel(double *d) {} void foo2() { double *d; - // CHECK: d = sycl::malloc_device(1, dpct::static_cast(c10::xpu::getCurrentXPUStream())); + // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream())); // CHECK-NEXT: { // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: - // CHECK-NEXT: dpct::static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: foo2_kernel(d); // CHECK-NEXT: }); // CHECK-NEXT: } - // CHECK-NEXT: dpct::dpct_free(d, dpct::static_cast(c10::xpu::getCurrentXPUStream())); + // CHECK-NEXT: dpct::dpct_free(d, static_cast(c10::xpu::getCurrentXPUStream())); cudaMalloc(&d, sizeof(double)); foo2_kernel<<<1, 1>>>(d); cudaFree(d); From 12ac8342281494fe8b6c610399ec833c985e26de Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 18 Oct 2024 14:06:35 +0800 Subject: [PATCH 04/11] Update Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 7 ++ clang/lib/DPCT/AnalysisInfo.h | 5 + clang/lib/DPCT/Rules.cpp | 2 + clang/runtime/dpct-rt/include/dpct/dpct.hpp | 6 +- clang/runtime/dpct-rt/include/dpct/math.hpp | 112 ++++++++++---------- clang/test/dpct/user_defined_rule2.cu | 7 +- clang/test/dpct/{ipex_xpu.yaml => xpu.yaml} | 2 +- 7 files changed, 80 insertions(+), 61 deletions(-) rename clang/test/dpct/{ipex_xpu.yaml => xpu.yaml} (83%) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 661c5537e529..5ea2ce66110c 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -929,6 +929,11 @@ void DpctFileInfo::insertHeader(HeaderType Type, unsigned Offset, << CCLVerValue << getNL(); insertHeader(MigratedMacroDefinitionOS.str(), FileBeginOffset, InsertPosition::IP_AlwaysLeft); + for (const auto &File : + DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes()) { + insertHeader("#include \"" + File + +"\"" + getNL(), FirstIncludeOffset, + InsertPosition::IP_Right); + } return; // Because includes and @@ -2466,6 +2471,8 @@ std::vector>> std::vector>> DpctGlobalInfo::CodePinDumpFuncDepsVec; std::unordered_set DpctGlobalInfo::NeedParenAPISet = {}; +std::unordered_set + DpctGlobalInfo::CustomHelperFunctionAddtionalIncludes = {}; ///// class DpctNameGenerator ///// void DpctNameGenerator::printName(const FunctionDecl *FD, llvm::raw_ostream &OS) { diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 7cec5d2c02ec..d16bc572a660 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1327,6 +1327,10 @@ class DpctGlobalInfo { static bool useBFloat16() { return getUsingExtensionDE(DPCPPExtensionsDefaultEnabled::ExtDE_BFloat16); } + static std::unordered_set & + getCustomHelperFunctionAddtionalIncludes() { + return CustomHelperFunctionAddtionalIncludes; + } std::shared_ptr insertFile(const clang::tooling::UnifiedPath &FilePath) { return insertObject(FileMap, FilePath); @@ -1642,6 +1646,7 @@ class DpctGlobalInfo { static std::vector>> CodePinDumpFuncDepsVec; static std::unordered_set NeedParenAPISet; + static std::unordered_set CustomHelperFunctionAddtionalIncludes; }; /// Generate mangle name of FunctionDecl as key of DeviceFunctionInfo. diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index 77b556d48ff8..30276a162193 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -281,6 +281,8 @@ void registerHelperFunctionRule(MetaRuleObject &R) { MapNames::CustomHelperFunctionMap.insert( {dpct::HelperFuncCatalog::DefaultQueue, R.Out}); dpct::DpctGlobalInfo::setUsingDRYPattern(false); + dpct::DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes().insert( + R.Includes.begin(), R.Includes.end()); } } diff --git a/clang/runtime/dpct-rt/include/dpct/dpct.hpp b/clang/runtime/dpct-rt/include/dpct/dpct.hpp index d2559174cf24..a78a994d5c1a 100644 --- a/clang/runtime/dpct-rt/include/dpct/dpct.hpp +++ b/clang/runtime/dpct-rt/include/dpct/dpct.hpp @@ -19,14 +19,14 @@ template class dpct_kernel_scalar; #include "atomic.hpp" #include "device.hpp" -#include "image.hpp" +//#include "image.hpp" #include "kernel.hpp" #include "math.hpp" #include "memory.hpp" #include "util.hpp" -#include "bindless_images.hpp" -#include "graph.hpp" +//#include "bindless_images.hpp" +//#include "graph.hpp" #define USE_DPCT_HELPER 1 diff --git a/clang/runtime/dpct-rt/include/dpct/math.hpp b/clang/runtime/dpct-rt/include/dpct/math.hpp index e14408fbf5d5..a583126b24b6 100644 --- a/clang/runtime/dpct-rt/include/dpct/math.hpp +++ b/clang/runtime/dpct-rt/include/dpct/math.hpp @@ -2170,62 +2170,62 @@ template struct matrix_size_traits { static constexpr int cols = n; }; -// A class that wraps the syclex::matrix::joint_matrix class and provides -// copy constructor and assignment operator. -template > -class joint_matrix { - using joint_matrix_type = syclex::matrix::joint_matrix< - sycl::sub_group, T, use::value, matrix_size_traits::rows, - matrix_size_traits::cols, layout::value>; - - static inline decltype(auto) get_wi_data(joint_matrix_type &matrix) { - return sycl::ext::oneapi::detail::get_wi_data( - sycl::ext::oneapi::this_work_item::get_sub_group(), matrix); - } - -public: - joint_matrix() - : matrix(), x(matrix), num_elements(get_wi_data(matrix).length()) {} - joint_matrix(joint_matrix &other) - : x(matrix), num_elements(get_wi_data(matrix).length()) { - syclex::matrix::joint_matrix_copy( - sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), - matrix); - } - joint_matrix &operator=(joint_matrix &other) { - if (this != &other) { - syclex::matrix::joint_matrix_copy( - sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), - matrix); - } - return *this; - } - - joint_matrix_type &get() { return matrix; } - - const joint_matrix_type &get() const { return matrix; } - - class matrix_accessor { - friend joint_matrix; - joint_matrix_type &matrix; - matrix_accessor(joint_matrix_type &matrix) : matrix(matrix) {} - - public: - decltype(auto) operator[](unsigned I) { return get_wi_data(matrix)[I]; } - decltype(auto) operator[](unsigned I) const { - return get_wi_data(matrix)[I]; - } - }; - -private: - joint_matrix_type matrix; - -public: - matrix_accessor x; - const size_t num_elements; -}; +//// A class that wraps the syclex::matrix::joint_matrix class and provides +//// copy constructor and assignment operator. +//template > +//class joint_matrix { +// using joint_matrix_type = syclex::matrix::joint_matrix< +// sycl::sub_group, T, use::value, matrix_size_traits::rows, +// matrix_size_traits::cols, layout::value>; +// +// static inline decltype(auto) get_wi_data(joint_matrix_type &matrix) { +// return sycl::ext::oneapi::detail::get_wi_data( +// sycl::ext::oneapi::this_work_item::get_sub_group(), matrix); +// } +// +//public: +// joint_matrix() +// : matrix(), x(matrix), num_elements(get_wi_data(matrix).length()) {} +// joint_matrix(joint_matrix &other) +// : x(matrix), num_elements(get_wi_data(matrix).length()) { +// syclex::matrix::joint_matrix_copy( +// sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), +// matrix); +// } +// joint_matrix &operator=(joint_matrix &other) { +// if (this != &other) { +// syclex::matrix::joint_matrix_copy( +// sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), +// matrix); +// } +// return *this; +// } +// +// joint_matrix_type &get() { return matrix; } +// +// const joint_matrix_type &get() const { return matrix; } +// +// class matrix_accessor { +// friend joint_matrix; +// joint_matrix_type &matrix; +// matrix_accessor(joint_matrix_type &matrix) : matrix(matrix) {} +// +// public: +// decltype(auto) operator[](unsigned I) { return get_wi_data(matrix)[I]; } +// decltype(auto) operator[](unsigned I) const { +// return get_wi_data(matrix)[I]; +// } +// }; +// +//private: +// joint_matrix_type matrix; +// +//public: +// matrix_accessor x; +// const size_t num_elements; +//}; } // namespace matrix } // namespace experimental diff --git a/clang/test/dpct/user_defined_rule2.cu b/clang/test/dpct/user_defined_rule2.cu index e5c059d5b357..b9c8236e8694 100644 --- a/clang/test/dpct/user_defined_rule2.cu +++ b/clang/test/dpct/user_defined_rule2.cu @@ -1,9 +1,14 @@ -// RUN: dpct --out-root %T/user_defined_rule2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/ipex_xpu.yaml --format-range=none +// RUN: dpct --out-root %T/user_defined_rule2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu.yaml --format-range=none // RUN: FileCheck --input-file %T/user_defined_rule2/user_defined_rule2.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/user_defined_rule2/user_defined_rule2.dp.cpp -o %T/user_defined_rule2/user_defined_rule2.dp.o %} #ifndef BUILD_TEST +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper.h" +#include + __global__ void foo1_kernel() {} void foo1() { // CHECK: static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( diff --git a/clang/test/dpct/ipex_xpu.yaml b/clang/test/dpct/xpu.yaml similarity index 83% rename from clang/test/dpct/ipex_xpu.yaml rename to clang/test/dpct/xpu.yaml index 05585504ceee..ef994c9801ce 100644 --- a/clang/test/dpct/ipex_xpu.yaml +++ b/clang/test/dpct/xpu.yaml @@ -4,4 +4,4 @@ Priority: Takeover In: DefaultQueue Out: static_cast(c10::xpu::getCurrentXPUStream()) - Includes: [""] + Includes: ["xpu_helper.h"] From 04e2251ffdf32a1aeb3d6deb0c1fb2de7fe4801c Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 18 Oct 2024 14:52:41 +0800 Subject: [PATCH 05/11] Fix Signed-off-by: Jiang, Zhiwei --- clang/runtime/dpct-rt/include/dpct/dpct.hpp | 6 +- clang/runtime/dpct-rt/include/dpct/math.hpp | 112 ++++++++++---------- 2 files changed, 59 insertions(+), 59 deletions(-) diff --git a/clang/runtime/dpct-rt/include/dpct/dpct.hpp b/clang/runtime/dpct-rt/include/dpct/dpct.hpp index a78a994d5c1a..d2559174cf24 100644 --- a/clang/runtime/dpct-rt/include/dpct/dpct.hpp +++ b/clang/runtime/dpct-rt/include/dpct/dpct.hpp @@ -19,14 +19,14 @@ template class dpct_kernel_scalar; #include "atomic.hpp" #include "device.hpp" -//#include "image.hpp" +#include "image.hpp" #include "kernel.hpp" #include "math.hpp" #include "memory.hpp" #include "util.hpp" -//#include "bindless_images.hpp" -//#include "graph.hpp" +#include "bindless_images.hpp" +#include "graph.hpp" #define USE_DPCT_HELPER 1 diff --git a/clang/runtime/dpct-rt/include/dpct/math.hpp b/clang/runtime/dpct-rt/include/dpct/math.hpp index a583126b24b6..e14408fbf5d5 100644 --- a/clang/runtime/dpct-rt/include/dpct/math.hpp +++ b/clang/runtime/dpct-rt/include/dpct/math.hpp @@ -2170,62 +2170,62 @@ template struct matrix_size_traits { static constexpr int cols = n; }; -//// A class that wraps the syclex::matrix::joint_matrix class and provides -//// copy constructor and assignment operator. -//template > -//class joint_matrix { -// using joint_matrix_type = syclex::matrix::joint_matrix< -// sycl::sub_group, T, use::value, matrix_size_traits::rows, -// matrix_size_traits::cols, layout::value>; -// -// static inline decltype(auto) get_wi_data(joint_matrix_type &matrix) { -// return sycl::ext::oneapi::detail::get_wi_data( -// sycl::ext::oneapi::this_work_item::get_sub_group(), matrix); -// } -// -//public: -// joint_matrix() -// : matrix(), x(matrix), num_elements(get_wi_data(matrix).length()) {} -// joint_matrix(joint_matrix &other) -// : x(matrix), num_elements(get_wi_data(matrix).length()) { -// syclex::matrix::joint_matrix_copy( -// sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), -// matrix); -// } -// joint_matrix &operator=(joint_matrix &other) { -// if (this != &other) { -// syclex::matrix::joint_matrix_copy( -// sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), -// matrix); -// } -// return *this; -// } -// -// joint_matrix_type &get() { return matrix; } -// -// const joint_matrix_type &get() const { return matrix; } -// -// class matrix_accessor { -// friend joint_matrix; -// joint_matrix_type &matrix; -// matrix_accessor(joint_matrix_type &matrix) : matrix(matrix) {} -// -// public: -// decltype(auto) operator[](unsigned I) { return get_wi_data(matrix)[I]; } -// decltype(auto) operator[](unsigned I) const { -// return get_wi_data(matrix)[I]; -// } -// }; -// -//private: -// joint_matrix_type matrix; -// -//public: -// matrix_accessor x; -// const size_t num_elements; -//}; +// A class that wraps the syclex::matrix::joint_matrix class and provides +// copy constructor and assignment operator. +template > +class joint_matrix { + using joint_matrix_type = syclex::matrix::joint_matrix< + sycl::sub_group, T, use::value, matrix_size_traits::rows, + matrix_size_traits::cols, layout::value>; + + static inline decltype(auto) get_wi_data(joint_matrix_type &matrix) { + return sycl::ext::oneapi::detail::get_wi_data( + sycl::ext::oneapi::this_work_item::get_sub_group(), matrix); + } + +public: + joint_matrix() + : matrix(), x(matrix), num_elements(get_wi_data(matrix).length()) {} + joint_matrix(joint_matrix &other) + : x(matrix), num_elements(get_wi_data(matrix).length()) { + syclex::matrix::joint_matrix_copy( + sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), + matrix); + } + joint_matrix &operator=(joint_matrix &other) { + if (this != &other) { + syclex::matrix::joint_matrix_copy( + sycl::ext::oneapi::this_work_item::get_sub_group(), other.get(), + matrix); + } + return *this; + } + + joint_matrix_type &get() { return matrix; } + + const joint_matrix_type &get() const { return matrix; } + + class matrix_accessor { + friend joint_matrix; + joint_matrix_type &matrix; + matrix_accessor(joint_matrix_type &matrix) : matrix(matrix) {} + + public: + decltype(auto) operator[](unsigned I) { return get_wi_data(matrix)[I]; } + decltype(auto) operator[](unsigned I) const { + return get_wi_data(matrix)[I]; + } + }; + +private: + joint_matrix_type matrix; + +public: + matrix_accessor x; + const size_t num_elements; +}; } // namespace matrix } // namespace experimental From 296ce70c9c869ce09bd2327ffa7f3f2446140a0c Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 18 Oct 2024 15:35:39 +0800 Subject: [PATCH 06/11] Update lit Signed-off-by: Jiang, Zhiwei --- clang/test/dpct/user_defined_rule2.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/dpct/user_defined_rule2.cu b/clang/test/dpct/user_defined_rule2.cu index b9c8236e8694..fcb47b93e92d 100644 --- a/clang/test/dpct/user_defined_rule2.cu +++ b/clang/test/dpct/user_defined_rule2.cu @@ -1,8 +1,8 @@ // RUN: dpct --out-root %T/user_defined_rule2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu.yaml --format-range=none // RUN: FileCheck --input-file %T/user_defined_rule2/user_defined_rule2.dp.cpp --match-full-lines %s -// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/user_defined_rule2/user_defined_rule2.dp.cpp -o %T/user_defined_rule2/user_defined_rule2.dp.o %} +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule2/user_defined_rule2.dp.cpp -o %T/user_defined_rule2/user_defined_rule2.dp.o %} -#ifndef BUILD_TEST +#ifndef NO_BUILD_TEST // CHECK: #include // CHECK-NEXT: #include From dfba80678b80ad87857c32b242205187cb2e6caa Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Fri, 18 Oct 2024 16:20:59 +0800 Subject: [PATCH 07/11] update Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 51 ++++--------------------- clang/test/dpct/macro_test.cu | 3 +- clang/test/dpct/mf-test.cu | 2 +- clang/test/dpct/template-kernel-call.cu | 2 +- clang/test/dpct/tm-complex-profiling.cu | 6 +-- 5 files changed, 13 insertions(+), 51 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 5ea2ce66110c..27ef34e0f6cb 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -6090,37 +6090,6 @@ void KernelCallExpr::removeExtraIndent() { LocInfo.Indent.length(), "", nullptr)); } -namespace { -void buildHasCapabilityOrFailStr(const std::string &Aspects, - llvm::raw_string_ostream &OS, - const OutputBuilder &OB) { - switch (OB.Kind) { - case (OutputBuilder::Kind::Top): - for (auto &ob : OB.SubBuilders) { - buildHasCapabilityOrFailStr(Aspects, OS, *ob); - } - return; - case (OutputBuilder::Kind::String): - OS << OB.Str; - return; - case (OutputBuilder::Kind::Arg): { - if (OB.ArgIndex > 1) { - OS << ""; - return; - } - OS << Aspects; - return; - } - default: { - DpctDebugs() << "[buildHasCapabilityOrFailStr OutputBuilder::Kind] " - "Unexpected value: " - << OB.Kind << "\n"; - assert(0); - } - } -} -} // namespace - void KernelCallExpr::addDevCapCheckStmt() { llvm::SmallVector AspectList; if (getVarMap().hasBF64()) { @@ -6132,27 +6101,21 @@ void KernelCallExpr::addDevCapCheckStmt() { if (!AspectList.empty()) { std::string Str; llvm::raw_string_ostream OS(Str); + OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; if (auto Iter = MapNames::CustomHelperFunctionMap.find( HelperFuncCatalog::DefaultQueue); Iter != MapNames::CustomHelperFunctionMap.end()) { - OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; OS << Iter->second << ".get_device(), "; - OS << "{" << AspectList.front(); - for (size_t i = 1; i < AspectList.size(); ++i) { - OS << ", " << AspectList[i]; - } - OS << "});"; } else { requestFeature(HelperFeatureEnum::device_ext); - OS << MapNames::getDpctNamespace() << "get_device("; - OS << MapNames::getDpctNamespace() << "get_device_id("; printStreamBase(OS); - OS << "get_device())).has_capability_or_fail({" << AspectList.front(); - for (size_t i = 1; i < AspectList.size(); ++i) { - OS << ", " << AspectList[i]; - } - OS << "});"; + OS << "get_device(), "; + } + OS << "{" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; } + OS << "});"; OuterStmts.OthersList.emplace_back(OS.str()); } } diff --git a/clang/test/dpct/macro_test.cu b/clang/test/dpct/macro_test.cu index d4fdff578f42..9290f8d9c13a 100644 --- a/clang/test/dpct/macro_test.cu +++ b/clang/test/dpct/macro_test.cu @@ -1102,8 +1102,7 @@ template __global__ void foo31(); #define FOO31(DIMS) foo31<<<1,1>>>(); //CHECK: { -//CHECK-NEXT: dpct::get_device(dpct::get_device_id(q_ct1.get_device())) -//CHECK-NEXT: .has_capability_or_fail({sycl::aspect::fp64}); +//CHECK-NEXT: dpct::has_capability_or_fail(q_ct1.get_device(), {sycl::aspect::fp64}); //CHECK-EMPTY: //CHECK-NEXT: q_ct1.submit([&](sycl::handler &cgh) { //CHECK-NEXT: /* diff --git a/clang/test/dpct/mf-test.cu b/clang/test/dpct/mf-test.cu index b1da1260dbe6..68581ed28e42 100644 --- a/clang/test/dpct/mf-test.cu +++ b/clang/test/dpct/mf-test.cu @@ -68,7 +68,7 @@ void test() { kernel_extern<<<1,1>>>(); // CHECK: { - // CHECK-NEXT: dpct::get_device(dpct::get_device_id(q_ct1.get_device())).has_capability_or_fail({sycl::aspect::fp64, sycl::aspect::fp16}); + // CHECK-NEXT: dpct::has_capability_or_fail(q_ct1.get_device(), {sycl::aspect::fp64, sycl::aspect::fp16}); // CHECK-EMPTY: // CHECK-NEXT: q_ct1.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), diff --git a/clang/test/dpct/template-kernel-call.cu b/clang/test/dpct/template-kernel-call.cu index 326a046413aa..c16355d4e56c 100644 --- a/clang/test/dpct/template-kernel-call.cu +++ b/clang/test/dpct/template-kernel-call.cu @@ -256,7 +256,7 @@ __global__ void convert_kernel(T b){ // CHECK-NEXT:void convert(){ // CHECK-NEXT: T b; // CHECK-NEXT: { -// CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_out_of_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); +// CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_out_of_order_queue().get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_out_of_order_queue().submit( // CHECK-NEXT: [&](sycl::handler &cgh) { diff --git a/clang/test/dpct/tm-complex-profiling.cu b/clang/test/dpct/tm-complex-profiling.cu index 43dd6f663168..37d4667b16c0 100644 --- a/clang/test/dpct/tm-complex-profiling.cu +++ b/clang/test/dpct/tm-complex-profiling.cu @@ -356,7 +356,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); + // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), @@ -377,7 +377,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); + // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), @@ -399,7 +399,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); + // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), From fde6c39c3cb2ac8f1469754a069cc362788e1dd2 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 21 Oct 2024 11:03:31 +0800 Subject: [PATCH 08/11] Update lit Signed-off-by: Jiang, Zhiwei --- clang/test/dpct/syclcompat_test2.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/test/dpct/syclcompat_test2.cu b/clang/test/dpct/syclcompat_test2.cu index c760d1487e00..8fa890d16fb4 100644 --- a/clang/test/dpct/syclcompat_test2.cu +++ b/clang/test/dpct/syclcompat_test2.cu @@ -55,7 +55,10 @@ __global__ void k3() { } void f3() { +// TODO: The SYCLCompat PR has been merged https://github.com/intel/llvm/pull/15717. Enable the build test after the test complier upgrade. +#ifndef NO_BUILD_TEST k3<<<1, 1>>>(); +#endif } void f4() { From 9c3c73fe309d2ce93c16c8c70cc205f531bee20e Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Tue, 22 Oct 2024 16:01:51 +0800 Subject: [PATCH 09/11] Revert the change of has_capability_or_fail Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 20 +++++++++++++------- clang/test/dpct/macro_test.cu | 3 ++- clang/test/dpct/mf-test.cu | 2 +- clang/test/dpct/syclcompat_test2.cu | 3 --- clang/test/dpct/template-kernel-call.cu | 2 +- clang/test/dpct/tm-complex-profiling.cu | 6 +++--- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 27ef34e0f6cb..f8468d3437a9 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -6101,21 +6101,27 @@ void KernelCallExpr::addDevCapCheckStmt() { if (!AspectList.empty()) { std::string Str; llvm::raw_string_ostream OS(Str); - OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; if (auto Iter = MapNames::CustomHelperFunctionMap.find( HelperFuncCatalog::DefaultQueue); Iter != MapNames::CustomHelperFunctionMap.end()) { + OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; OS << Iter->second << ".get_device(), "; + OS << "{" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; + } + OS << "});"; } else { requestFeature(HelperFeatureEnum::device_ext); + OS << MapNames::getDpctNamespace() << "get_device("; + OS << MapNames::getDpctNamespace() << "get_device_id("; printStreamBase(OS); - OS << "get_device(), "; - } - OS << "{" << AspectList.front(); - for (size_t i = 1; i < AspectList.size(); ++i) { - OS << ", " << AspectList[i]; + OS << "get_device())).has_capability_or_fail({" << AspectList.front(); + for (size_t i = 1; i < AspectList.size(); ++i) { + OS << ", " << AspectList[i]; + } + OS << "});"; } - OS << "});"; OuterStmts.OthersList.emplace_back(OS.str()); } } diff --git a/clang/test/dpct/macro_test.cu b/clang/test/dpct/macro_test.cu index 9290f8d9c13a..d4fdff578f42 100644 --- a/clang/test/dpct/macro_test.cu +++ b/clang/test/dpct/macro_test.cu @@ -1102,7 +1102,8 @@ template __global__ void foo31(); #define FOO31(DIMS) foo31<<<1,1>>>(); //CHECK: { -//CHECK-NEXT: dpct::has_capability_or_fail(q_ct1.get_device(), {sycl::aspect::fp64}); +//CHECK-NEXT: dpct::get_device(dpct::get_device_id(q_ct1.get_device())) +//CHECK-NEXT: .has_capability_or_fail({sycl::aspect::fp64}); //CHECK-EMPTY: //CHECK-NEXT: q_ct1.submit([&](sycl::handler &cgh) { //CHECK-NEXT: /* diff --git a/clang/test/dpct/mf-test.cu b/clang/test/dpct/mf-test.cu index 68581ed28e42..b1da1260dbe6 100644 --- a/clang/test/dpct/mf-test.cu +++ b/clang/test/dpct/mf-test.cu @@ -68,7 +68,7 @@ void test() { kernel_extern<<<1,1>>>(); // CHECK: { - // CHECK-NEXT: dpct::has_capability_or_fail(q_ct1.get_device(), {sycl::aspect::fp64, sycl::aspect::fp16}); + // CHECK-NEXT: dpct::get_device(dpct::get_device_id(q_ct1.get_device())).has_capability_or_fail({sycl::aspect::fp64, sycl::aspect::fp16}); // CHECK-EMPTY: // CHECK-NEXT: q_ct1.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), diff --git a/clang/test/dpct/syclcompat_test2.cu b/clang/test/dpct/syclcompat_test2.cu index 8fa890d16fb4..c760d1487e00 100644 --- a/clang/test/dpct/syclcompat_test2.cu +++ b/clang/test/dpct/syclcompat_test2.cu @@ -55,10 +55,7 @@ __global__ void k3() { } void f3() { -// TODO: The SYCLCompat PR has been merged https://github.com/intel/llvm/pull/15717. Enable the build test after the test complier upgrade. -#ifndef NO_BUILD_TEST k3<<<1, 1>>>(); -#endif } void f4() { diff --git a/clang/test/dpct/template-kernel-call.cu b/clang/test/dpct/template-kernel-call.cu index c16355d4e56c..326a046413aa 100644 --- a/clang/test/dpct/template-kernel-call.cu +++ b/clang/test/dpct/template-kernel-call.cu @@ -256,7 +256,7 @@ __global__ void convert_kernel(T b){ // CHECK-NEXT:void convert(){ // CHECK-NEXT: T b; // CHECK-NEXT: { -// CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_out_of_order_queue().get_device(), {sycl::aspect::fp64}); +// CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_out_of_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_out_of_order_queue().submit( // CHECK-NEXT: [&](sycl::handler &cgh) { diff --git a/clang/test/dpct/tm-complex-profiling.cu b/clang/test/dpct/tm-complex-profiling.cu index 37d4667b16c0..43dd6f663168 100644 --- a/clang/test/dpct/tm-complex-profiling.cu +++ b/clang/test/dpct/tm-complex-profiling.cu @@ -356,7 +356,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); + // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), @@ -377,7 +377,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); + // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), @@ -399,7 +399,7 @@ void foo_test_4() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: { - // CHECK-NEXT: dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64}); + // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(dimGrid * dimBlock, dimBlock), From b3497977d33f993f55be6646d7c32e862dcb2484 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Wed, 23 Oct 2024 08:31:33 +0800 Subject: [PATCH 10/11] Refine Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 17 +++++-- clang/lib/DPCT/MapNames.h | 4 +- clang/lib/DPCT/Rules.cpp | 15 ++++-- ..._rule2.cu => user_defined_rule_helper1.cu} | 18 +++---- clang/test/dpct/user_defined_rule_helper2.cu | 47 +++++++++++++++++++ clang/test/dpct/user_defined_rule_helper3.cu | 42 +++++++++++++++++ clang/test/dpct/xpu.yaml | 7 --- clang/test/dpct/xpu_1.yaml | 7 +++ clang/test/dpct/xpu_2.yaml | 7 +++ clang/test/dpct/xpu_3.yaml | 7 +++ 10 files changed, 146 insertions(+), 25 deletions(-) rename clang/test/dpct/{user_defined_rule2.cu => user_defined_rule_helper1.cu} (65%) create mode 100644 clang/test/dpct/user_defined_rule_helper2.cu create mode 100644 clang/test/dpct/user_defined_rule_helper3.cu delete mode 100644 clang/test/dpct/xpu.yaml create mode 100644 clang/test/dpct/xpu_1.yaml create mode 100644 clang/test/dpct/xpu_2.yaml create mode 100644 clang/test/dpct/xpu_3.yaml diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index f8468d3437a9..3873fb9c29df 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -268,6 +268,15 @@ void processTypeLoc(const TypeLoc &TL, ExprAnalysis &EA, } EA.applyAllSubExprRepl(); } +HelperFuncCatalog getQueueKind() { + if (DpctGlobalInfo::useSYCLCompat()) { + return HelperFuncCatalog::GetDefaultQueue; + } + if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted) { + return HelperFuncCatalog::GetInOrderQueue; + } + return HelperFuncCatalog::GetOutOfOrderQueue; +} ///// class FreeQueriesInfo ///// class FreeQueriesInfo { @@ -1231,9 +1240,8 @@ std::string DpctGlobalInfo::getDefaultQueue(const Stmt *S) { } const std::string &DpctGlobalInfo::getDefaultQueueFreeFuncCall() { static const std::string DefaultQueueFreeFuncCall = [&]() { - auto Iter = - MapNames::CustomHelperFunctionMap.find(HelperFuncCatalog::DefaultQueue); - if (Iter != MapNames::CustomHelperFunctionMap.end()) { + if (auto Iter = MapNames::CustomHelperFunctionMap.find(getQueueKind()); + Iter != MapNames::CustomHelperFunctionMap.end()) { return Iter->second; } return MapNames::getDpctNamespace() + "get_" + @@ -6101,8 +6109,7 @@ void KernelCallExpr::addDevCapCheckStmt() { if (!AspectList.empty()) { std::string Str; llvm::raw_string_ostream OS(Str); - if (auto Iter = MapNames::CustomHelperFunctionMap.find( - HelperFuncCatalog::DefaultQueue); + if (auto Iter = MapNames::CustomHelperFunctionMap.find(getQueueKind()); Iter != MapNames::CustomHelperFunctionMap.end()) { OS << MapNames::getDpctNamespace() << "has_capability_or_fail("; OS << Iter->second << ".get_device(), "; diff --git a/clang/lib/DPCT/MapNames.h b/clang/lib/DPCT/MapNames.h index 83a57ac6930d..56c9418aa609 100644 --- a/clang/lib/DPCT/MapNames.h +++ b/clang/lib/DPCT/MapNames.h @@ -21,7 +21,9 @@ enum class KernelArgType; enum class HelperFileEnum : unsigned int; struct HelperFunc; enum class HelperFuncCatalog { - DefaultQueue + GetDefaultQueue, + GetOutOfOrderQueue, + GetInOrderQueue, }; } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index fad85fc6e961..7cba694e0d49 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -278,9 +278,18 @@ void registerPatternRewriterRule(MetaRuleObject &R) { } void registerHelperFunctionRule(MetaRuleObject &R) { - if (R.In == "DefaultQueue" && R.Priority == RulePriority::Takeover) { - MapNames::CustomHelperFunctionMap.insert( - {dpct::HelperFuncCatalog::DefaultQueue, R.Out}); + if ((R.In == "get_default_queue" || R.In == "get_in_order_queue" || + R.In == "get_out_of_order_queue") && + R.Priority == RulePriority::Takeover) { + if (R.In == "get_default_queue") + MapNames::CustomHelperFunctionMap.insert( + {dpct::HelperFuncCatalog::GetDefaultQueue, R.Out}); + else if (R.In == "get_in_order_queue") + MapNames::CustomHelperFunctionMap.insert( + {dpct::HelperFuncCatalog::GetInOrderQueue, R.Out}); + else + MapNames::CustomHelperFunctionMap.insert( + {dpct::HelperFuncCatalog::GetOutOfOrderQueue, R.Out}); dpct::DpctGlobalInfo::setUsingDRYPattern(false); dpct::DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes().insert( R.Includes.begin(), R.Includes.end()); diff --git a/clang/test/dpct/user_defined_rule2.cu b/clang/test/dpct/user_defined_rule_helper1.cu similarity index 65% rename from clang/test/dpct/user_defined_rule2.cu rename to clang/test/dpct/user_defined_rule_helper1.cu index fcb47b93e92d..d9c50f57780a 100644 --- a/clang/test/dpct/user_defined_rule2.cu +++ b/clang/test/dpct/user_defined_rule_helper1.cu @@ -1,17 +1,17 @@ -// RUN: dpct --out-root %T/user_defined_rule2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu.yaml --format-range=none -// RUN: FileCheck --input-file %T/user_defined_rule2/user_defined_rule2.dp.cpp --match-full-lines %s -// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule2/user_defined_rule2.dp.cpp -o %T/user_defined_rule2/user_defined_rule2.dp.o %} +// RUN: dpct --out-root %T/user_defined_rule_helper1 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_1.yaml --format-range=none +// RUN: FileCheck --input-file %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp -o %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.o %} #ifndef NO_BUILD_TEST // CHECK: #include // CHECK-NEXT: #include -// CHECK-NEXT: #include "xpu_helper.h" +// CHECK-NEXT: #include "xpu_helper1.h" #include __global__ void foo1_kernel() {} void foo1() { - // CHECK: static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK: static_cast(c10::xpu::getCurrentXPUStream1()).parallel_for( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: foo1_kernel(); @@ -23,17 +23,17 @@ __global__ void foo2_kernel(double *d) {} void foo2() { double *d; - // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream())); + // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream1())); // CHECK-NEXT: { - // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream()).get_device(), {sycl::aspect::fp64}); + // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream1()).get_device(), {sycl::aspect::fp64}); // CHECK-EMPTY: - // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream()).parallel_for( + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream1()).parallel_for( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: foo2_kernel(d); // CHECK-NEXT: }); // CHECK-NEXT: } - // CHECK-NEXT: dpct::dpct_free(d, static_cast(c10::xpu::getCurrentXPUStream())); + // CHECK-NEXT: dpct::dpct_free(d, static_cast(c10::xpu::getCurrentXPUStream1())); cudaMalloc(&d, sizeof(double)); foo2_kernel<<<1, 1>>>(d); cudaFree(d); diff --git a/clang/test/dpct/user_defined_rule_helper2.cu b/clang/test/dpct/user_defined_rule_helper2.cu new file mode 100644 index 000000000000..b6734650894a --- /dev/null +++ b/clang/test/dpct/user_defined_rule_helper2.cu @@ -0,0 +1,47 @@ +// RUN: dpct --out-root %T/user_defined_rule_helper2 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_2.yaml --format-range=none --usm-level=none +// RUN: FileCheck --input-file %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.cpp -o %T/user_defined_rule_helper2/user_defined_rule_helper2.dp.o %} + +#ifndef NO_BUILD_TEST + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper2.h" +#include + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: static_cast(c10::xpu::getCurrentXPUStream2()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = (double *)dpct::dpct_malloc(sizeof(double)); + // CHECK-NEXT: { + // CHECK-NEXT: dpct::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream2()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream2()).submit( + // CHECK-NEXT: [&](sycl::handler &cgh) { + // CHECK-NEXT: auto d_acc_ct0 = dpct::get_access(d, cgh); + // CHECK-EMPTY: + // CHECK-NEXT: cgh.parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(&d_acc_ct0[0]); + // CHECK-NEXT: }); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: dpct::dpct_free(d); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif diff --git a/clang/test/dpct/user_defined_rule_helper3.cu b/clang/test/dpct/user_defined_rule_helper3.cu new file mode 100644 index 000000000000..1c87ca0fdc13 --- /dev/null +++ b/clang/test/dpct/user_defined_rule_helper3.cu @@ -0,0 +1,42 @@ +// RUN: dpct --out-root %T/user_defined_rule_helper3 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_3.yaml --format-range=none --use-syclcompat +// RUN: FileCheck --input-file %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.cpp -o %T/user_defined_rule_helper3/user_defined_rule_helper3.dp.o %} + +#ifndef NO_BUILD_TEST + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include "xpu_helper3.h" +#include + +__global__ void foo1_kernel() {} +void foo1() { + // CHECK: static_cast(c10::xpu::getCurrentXPUStream3()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo1_kernel(); + // CHECK-NEXT: }); + foo1_kernel<<<1, 1>>>(); +} + +__global__ void foo2_kernel(double *d) {} + +void foo2() { + double *d; + // CHECK: d = sycl::malloc_device(1, static_cast(c10::xpu::getCurrentXPUStream3())); + // CHECK-NEXT: { + // CHECK-NEXT: syclcompat::has_capability_or_fail(static_cast(c10::xpu::getCurrentXPUStream3()).get_device(), {sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: static_cast(c10::xpu::getCurrentXPUStream3()).parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: foo2_kernel(d); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: syclcompat::wait_and_free(d, static_cast(c10::xpu::getCurrentXPUStream3())); + cudaMalloc(&d, sizeof(double)); + foo2_kernel<<<1, 1>>>(d); + cudaFree(d); +} + +#endif diff --git a/clang/test/dpct/xpu.yaml b/clang/test/dpct/xpu.yaml deleted file mode 100644 index ef994c9801ce..000000000000 --- a/clang/test/dpct/xpu.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -- Rule: rule1 - Kind: HelperFunction - Priority: Takeover - In: DefaultQueue - Out: static_cast(c10::xpu::getCurrentXPUStream()) - Includes: ["xpu_helper.h"] diff --git a/clang/test/dpct/xpu_1.yaml b/clang/test/dpct/xpu_1.yaml new file mode 100644 index 000000000000..9ad637f94958 --- /dev/null +++ b/clang/test/dpct/xpu_1.yaml @@ -0,0 +1,7 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_in_order_queue + Out: static_cast(c10::xpu::getCurrentXPUStream1()) + Includes: ["xpu_helper1.h"] diff --git a/clang/test/dpct/xpu_2.yaml b/clang/test/dpct/xpu_2.yaml new file mode 100644 index 000000000000..62a9b50ace6b --- /dev/null +++ b/clang/test/dpct/xpu_2.yaml @@ -0,0 +1,7 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_out_of_order_queue + Out: static_cast(c10::xpu::getCurrentXPUStream2()) + Includes: ["xpu_helper2.h"] diff --git a/clang/test/dpct/xpu_3.yaml b/clang/test/dpct/xpu_3.yaml new file mode 100644 index 000000000000..3d6d0c7a0df5 --- /dev/null +++ b/clang/test/dpct/xpu_3.yaml @@ -0,0 +1,7 @@ +--- +- Rule: rule1 + Kind: HelperFunction + Priority: Takeover + In: get_default_queue + Out: static_cast(c10::xpu::getCurrentXPUStream3()) + Includes: ["xpu_helper3.h"] From 59c75fc8d783c5c2e8886550b63efb2c787a2dfa Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Thu, 24 Oct 2024 10:49:36 +0800 Subject: [PATCH 11/11] Add comments and warning Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 1 + clang/lib/DPCT/Rules.cpp | 38 ++++++++++++-------- clang/test/dpct/user_defined_rule_helper1.cu | 7 +++- clang/test/dpct/xpu_1.yaml | 5 +++ 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 3873fb9c29df..85d5783ca551 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -2262,6 +2262,7 @@ void DpctGlobalInfo::resetInfo() { SpellingLocToDFIsMapForAssumeNDRange.clear(); DFIToSpellingLocsMapForAssumeNDRange.clear(); FreeQueriesInfo::reset(); + CustomHelperFunctionAddtionalIncludes.clear(); } void DpctGlobalInfo::updateSpellingLocDFIMaps( SourceLocation SL, std::shared_ptr DFI) { diff --git a/clang/lib/DPCT/Rules.cpp b/clang/lib/DPCT/Rules.cpp index 7cba694e0d49..1304f1e32acf 100644 --- a/clang/lib/DPCT/Rules.cpp +++ b/clang/lib/DPCT/Rules.cpp @@ -278,21 +278,28 @@ void registerPatternRewriterRule(MetaRuleObject &R) { } void registerHelperFunctionRule(MetaRuleObject &R) { - if ((R.In == "get_default_queue" || R.In == "get_in_order_queue" || - R.In == "get_out_of_order_queue") && - R.Priority == RulePriority::Takeover) { - if (R.In == "get_default_queue") - MapNames::CustomHelperFunctionMap.insert( - {dpct::HelperFuncCatalog::GetDefaultQueue, R.Out}); - else if (R.In == "get_in_order_queue") - MapNames::CustomHelperFunctionMap.insert( - {dpct::HelperFuncCatalog::GetInOrderQueue, R.Out}); - else - MapNames::CustomHelperFunctionMap.insert( - {dpct::HelperFuncCatalog::GetOutOfOrderQueue, R.Out}); - dpct::DpctGlobalInfo::setUsingDRYPattern(false); - dpct::DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes().insert( - R.Includes.begin(), R.Includes.end()); + static const std::unordered_map + String2HelperFuncCatalogMap{ + {"get_default_queue", dpct::HelperFuncCatalog::GetDefaultQueue}, + {"get_in_order_queue", dpct::HelperFuncCatalog::GetInOrderQueue}, + {"get_out_of_order_queue", + dpct::HelperFuncCatalog::GetOutOfOrderQueue}}; + if (R.Priority == RulePriority::Takeover) { + if (auto Iter = String2HelperFuncCatalogMap.find(R.In); + Iter != String2HelperFuncCatalogMap.end()) { + // This map is inited here. + // It saves the customized string which used for each kind of helper + // function call in the migrated code. + MapNames::CustomHelperFunctionMap.insert({Iter->second, R.Out}); + dpct::DpctGlobalInfo::setUsingDRYPattern(false); + dpct::DpctGlobalInfo::getCustomHelperFunctionAddtionalIncludes().insert( + R.Includes.begin(), R.Includes.end()); + } else { + llvm::outs() + << "Warning: The rule named " << R.RuleId + << " (Kind: HelperFunction) is ignored, as the API specified " + "in the \"In\" field is not supported for customization.\n"; + } } } @@ -387,6 +394,7 @@ void importRules(std::vector &RuleFiles) { break; case (RuleKind::HelperFunction): registerHelperFunctionRule(*r); + break; case (RuleKind::PythonRule): registerPythonMigrationRule(*r); break; diff --git a/clang/test/dpct/user_defined_rule_helper1.cu b/clang/test/dpct/user_defined_rule_helper1.cu index d9c50f57780a..2b281552e1c7 100644 --- a/clang/test/dpct/user_defined_rule_helper1.cu +++ b/clang/test/dpct/user_defined_rule_helper1.cu @@ -1,6 +1,9 @@ -// RUN: dpct --out-root %T/user_defined_rule_helper1 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_1.yaml --format-range=none +// RUN: mkdir %T/user_defined_rule_helper1 +// RUN: dpct --out-root %T/user_defined_rule_helper1 %s --cuda-include-path="%cuda-path/include" --rule-file %S/xpu_1.yaml --format-range=none > %T/user_defined_rule_helper1/warning.txt 2>&1 // RUN: FileCheck --input-file %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp --match-full-lines %s +// RUN: FileCheck --input-file %T/user_defined_rule_helper1/warning.txt --match-full-lines %s -check-prefix=WARNING // RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.cpp -o %T/user_defined_rule_helper1/user_defined_rule_helper1.dp.o %} +// RUN: rm -rf %T/user_defined_rule_helper1 #ifndef NO_BUILD_TEST @@ -40,3 +43,5 @@ void foo2() { } #endif + +// WARNING: Warning: The rule named rule2 (Kind: HelperFunction) is ignored, as the API specified in the "In" field is not supported for customization. diff --git a/clang/test/dpct/xpu_1.yaml b/clang/test/dpct/xpu_1.yaml index 9ad637f94958..0bbf932311a6 100644 --- a/clang/test/dpct/xpu_1.yaml +++ b/clang/test/dpct/xpu_1.yaml @@ -5,3 +5,8 @@ In: get_in_order_queue Out: static_cast(c10::xpu::getCurrentXPUStream1()) Includes: ["xpu_helper1.h"] +- Rule: rule2 + Kind: HelperFunction + Priority: Takeover + In: unknown_function + Out: unknown_function_2