-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[clang-sycl-linker] Add AOT compilation support for Intel GPUs/CPUs #133194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
8f22fbe
5925538
abf2b4b
ff997fa
e64a417
9002f9a
649db25
727744f
0c3882f
15fc4c2
4265c08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| //===--- SYCL.h -------------------------------------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_CLANG_BASIC_SYCL_H | ||
| #define LLVM_CLANG_BASIC_SYCL_H | ||
|
|
||
| #include "clang/Basic/Cuda.h" | ||
|
|
||
| namespace llvm { | ||
| class StringRef; | ||
| template <unsigned InternalLen> class SmallString; | ||
| } // namespace llvm | ||
|
|
||
| namespace clang { | ||
| // List of architectures (Intel CPUs and Intel GPUs) | ||
| // that support SYCL offloading. | ||
| enum class SYCLSupportedIntelArchs { | ||
| // Intel CPUs | ||
| UNKNOWN, | ||
| SKYLAKEAVX512, | ||
| COREAVX2, | ||
| COREI7AVX, | ||
| COREI7, | ||
| WESTMERE, | ||
| SANDYBRIDGE, | ||
| IVYBRIDGE, | ||
| BROADWELL, | ||
| COFFEELAKE, | ||
| ALDERLAKE, | ||
| SKYLAKE, | ||
| SKX, | ||
| CASCADELAKE, | ||
| ICELAKECLIENT, | ||
| ICELAKESERVER, | ||
| SAPPHIRERAPIDS, | ||
| GRANITERAPIDS, | ||
| // Intel GPUs | ||
| BDW, | ||
| SKL, | ||
| KBL, | ||
| CFL, | ||
| APL, | ||
| BXT, | ||
| GLK, | ||
| WHL, | ||
| AML, | ||
| CML, | ||
| ICLLP, | ||
| ICL, | ||
| EHL, | ||
| JSL, | ||
| TGLLP, | ||
| TGL, | ||
| RKL, | ||
| ADL_S, | ||
| RPL_S, | ||
| ADL_P, | ||
| ADL_N, | ||
| DG1, | ||
| ACM_G10, | ||
| DG2_G10, | ||
| ACM_G11, | ||
| DG2_G11, | ||
| ACM_G12, | ||
| DG2_G12, | ||
| PVC, | ||
| PVC_VG, | ||
| MTL_U, | ||
| MTL_S, | ||
| ARL_U, | ||
| ARL_S, | ||
| MTL_H, | ||
| ARL_H, | ||
| BMG_G21, | ||
| LNL_M, | ||
| }; | ||
|
|
||
| // Check if the given Arch value is a Generic AMD GPU. | ||
| // Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. | ||
| // This list is used to filter out GFX*_GENERIC AMD GPUs in | ||
| // `IsSYCLSupportedAMDGPUArch`. | ||
| static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { | ||
|
||
| return Arch == OffloadArch::GFX9_GENERIC || | ||
| Arch == OffloadArch::GFX10_1_GENERIC || | ||
| Arch == OffloadArch::GFX10_3_GENERIC || | ||
| Arch == OffloadArch::GFX11_GENERIC || | ||
| Arch == OffloadArch::GFX12_GENERIC; | ||
| } | ||
|
|
||
| // Check if the given Arch value is a valid SYCL supported AMD GPU. | ||
| static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && | ||
| !IsAMDGenericGPUArch(Arch); | ||
| } | ||
|
|
||
| // Check if the given Arch value is a valid SYCL supported NVidia GPU. | ||
| static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; | ||
| } | ||
|
|
||
| // Check if the given Arch value is a valid SYCL supported Intel CPU. | ||
| static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { | ||
| return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && | ||
| Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; | ||
| } | ||
|
|
||
| // Check if the given Arch value is a valid SYCL supported Intel GPU. | ||
| static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { | ||
| return Arch >= SYCLSupportedIntelArchs::BDW && | ||
| Arch <= SYCLSupportedIntelArchs::LNL_M; | ||
| } | ||
|
|
||
| // Check if the user provided value for --offload-arch is a valid | ||
| // SYCL supported Intel AOT target. | ||
| SYCLSupportedIntelArchs | ||
| StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); | ||
|
|
||
| // This is a mapping between the user provided --offload-arch value for Intel | ||
| // GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU | ||
| // AOT compiler). | ||
| llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName); | ||
| llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName); | ||
|
|
||
| } // namespace clang | ||
|
|
||
| #endif // LLVM_CLANG_BASIC_SYCL_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,226 @@ | ||
| #include "clang/Basic/SYCL.h" | ||
| #include "llvm/ADT/STLExtras.h" | ||
| #include "llvm/ADT/SmallString.h" | ||
| #include "llvm/ADT/StringRef.h" | ||
| #include "llvm/ADT/StringSwitch.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| namespace clang { | ||
|
|
||
| // Struct that relates an AOT target value with | ||
| // Intel CPUs and Intel GPUs. | ||
| struct StringToOffloadArchSYCLMap { | ||
| const char *ArchName; | ||
| SYCLSupportedIntelArchs IntelArch; | ||
| }; | ||
|
|
||
| // Mapping of supported SYCL offloading architectures. | ||
| static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { | ||
| // Intel CPU mapping. | ||
| {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, | ||
| {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, | ||
| {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, | ||
| {"corei7", SYCLSupportedIntelArchs::COREI7}, | ||
| {"westmere", SYCLSupportedIntelArchs::WESTMERE}, | ||
| {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, | ||
| {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, | ||
| {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, | ||
| {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, | ||
| {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, | ||
| {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, | ||
| {"skx", SYCLSupportedIntelArchs::SKX}, | ||
| {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, | ||
| {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, | ||
| {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, | ||
| {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, | ||
| {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, | ||
| // Intel GPU mapping. | ||
| {"bdw", SYCLSupportedIntelArchs::BDW}, | ||
| {"skl", SYCLSupportedIntelArchs::SKL}, | ||
| {"kbl", SYCLSupportedIntelArchs::KBL}, | ||
| {"cfl", SYCLSupportedIntelArchs::CFL}, | ||
| {"apl", SYCLSupportedIntelArchs::APL}, | ||
| {"bxt", SYCLSupportedIntelArchs::BXT}, | ||
| {"glk", SYCLSupportedIntelArchs::GLK}, | ||
| {"whl", SYCLSupportedIntelArchs::WHL}, | ||
| {"aml", SYCLSupportedIntelArchs::AML}, | ||
| {"cml", SYCLSupportedIntelArchs::CML}, | ||
| {"icllp", SYCLSupportedIntelArchs::ICLLP}, | ||
| {"icl", SYCLSupportedIntelArchs::ICL}, | ||
| {"ehl", SYCLSupportedIntelArchs::EHL}, | ||
| {"jsl", SYCLSupportedIntelArchs::JSL}, | ||
| {"tgllp", SYCLSupportedIntelArchs::TGLLP}, | ||
| {"tgl", SYCLSupportedIntelArchs::TGL}, | ||
| {"rkl", SYCLSupportedIntelArchs::RKL}, | ||
| {"adl_s", SYCLSupportedIntelArchs::ADL_S}, | ||
| {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, | ||
| {"adl_p", SYCLSupportedIntelArchs::ADL_P}, | ||
| {"adl_n", SYCLSupportedIntelArchs::ADL_N}, | ||
| {"dg1", SYCLSupportedIntelArchs::DG1}, | ||
| {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, | ||
| {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, | ||
| {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, | ||
| {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, | ||
| {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, | ||
| {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, | ||
| {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, | ||
| {"pvc", SYCLSupportedIntelArchs::PVC}, | ||
| {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, | ||
| {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, | ||
| {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, | ||
| {"arl_u", SYCLSupportedIntelArchs::ARL_U}, | ||
| {"arl_s", SYCLSupportedIntelArchs::ARL_S}, | ||
| {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, | ||
| {"arl_h", SYCLSupportedIntelArchs::ARL_H}, | ||
| {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, | ||
| {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; | ||
|
|
||
| // Check if the user provided value for --offload-arch is a valid | ||
| // SYCL supported Intel AOT target. | ||
| SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) { | ||
| auto result = | ||
| llvm::find_if(StringToArchNamesMap, | ||
| [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { | ||
| return ArchNameAsString == map.ArchName; | ||
| }); | ||
| if (result == std::end(StringToArchNamesMap)) | ||
| return SYCLSupportedIntelArchs::UNKNOWN; | ||
| return result->IntelArch; | ||
| } | ||
|
|
||
| // This is a mapping between the user provided --offload-arch value for Intel | ||
| // GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU | ||
| // AOT compiler). | ||
| StringRef mapIntelGPUArchName(StringRef ArchName) { | ||
| StringRef Arch; | ||
| Arch = llvm::StringSwitch<StringRef>(ArchName) | ||
| .Case("bdw", "bdw") | ||
| .Case("skl", "skl") | ||
| .Case("kbl", "kbl") | ||
| .Case("cfl", "cfl") | ||
| .Cases("apl", "bxt", "apl") | ||
| .Case("glk", "glk") | ||
| .Case("whl", "whl") | ||
| .Case("aml", "aml") | ||
| .Case("cml", "cml") | ||
| .Cases("icllp", "icl", "icllp") | ||
| .Cases("ehl", "jsl", "ehl") | ||
| .Cases("tgllp", "tgl", "tgllp") | ||
| .Case("rkl", "rkl") | ||
| .Cases("adl_s", "rpl_s", "adl_s") | ||
| .Case("adl_p", "adl_p") | ||
| .Case("adl_n", "adl_n") | ||
| .Case("dg1", "dg1") | ||
| .Cases("acm_g10", "dg2_g10", "acm_g10") | ||
| .Cases("acm_g11", "dg2_g11", "acm_g11") | ||
| .Cases("acm_g12", "dg2_g12", "acm_g12") | ||
| .Case("pvc", "pvc") | ||
| .Case("pvc_vg", "pvc_vg") | ||
| .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") | ||
| .Case("mtl_h", "mtl_h") | ||
| .Case("arl_h", "arl_h") | ||
| .Case("bmg_g21", "bmg_g21") | ||
| .Case("lnl_m", "lnl_m") | ||
| .Default(""); | ||
| return Arch; | ||
| } | ||
|
|
||
| SmallString<64> getGenDeviceMacro(StringRef DeviceName) { | ||
| SmallString<64> Macro; | ||
| StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName) | ||
| .Case("bdw", "INTEL_GPU_BDW") | ||
| .Case("skl", "INTEL_GPU_SKL") | ||
| .Case("kbl", "INTEL_GPU_KBL") | ||
| .Case("cfl", "INTEL_GPU_CFL") | ||
| .Case("apl", "INTEL_GPU_APL") | ||
| .Case("glk", "INTEL_GPU_GLK") | ||
| .Case("whl", "INTEL_GPU_WHL") | ||
| .Case("aml", "INTEL_GPU_AML") | ||
| .Case("cml", "INTEL_GPU_CML") | ||
| .Case("icllp", "INTEL_GPU_ICLLP") | ||
| .Case("ehl", "INTEL_GPU_EHL") | ||
| .Case("tgllp", "INTEL_GPU_TGLLP") | ||
| .Case("rkl", "INTEL_GPU_RKL") | ||
| .Case("adl_s", "INTEL_GPU_ADL_S") | ||
| .Case("adl_p", "INTEL_GPU_ADL_P") | ||
| .Case("adl_n", "INTEL_GPU_ADL_N") | ||
| .Case("dg1", "INTEL_GPU_DG1") | ||
| .Case("acm_g10", "INTEL_GPU_ACM_G10") | ||
| .Case("acm_g11", "INTEL_GPU_ACM_G11") | ||
| .Case("acm_g12", "INTEL_GPU_ACM_G12") | ||
| .Case("pvc", "INTEL_GPU_PVC") | ||
| .Case("pvc_vg", "INTEL_GPU_PVC_VG") | ||
| .Case("mtl_u", "INTEL_GPU_MTL_U") | ||
| .Case("mtl_h", "INTEL_GPU_MTL_H") | ||
| .Case("arl_h", "INTEL_GPU_ARL_H") | ||
| .Case("bmg_g21", "INTEL_GPU_BMG_G21") | ||
| .Case("lnl_m", "INTEL_GPU_LNL_M") | ||
| .Case("ptl_h", "INTEL_GPU_PTL_H") | ||
| .Case("ptl_u", "INTEL_GPU_PTL_U") | ||
| .Case("sm_50", "NVIDIA_GPU_SM_50") | ||
| .Case("sm_52", "NVIDIA_GPU_SM_52") | ||
| .Case("sm_53", "NVIDIA_GPU_SM_53") | ||
| .Case("sm_60", "NVIDIA_GPU_SM_60") | ||
| .Case("sm_61", "NVIDIA_GPU_SM_61") | ||
| .Case("sm_62", "NVIDIA_GPU_SM_62") | ||
| .Case("sm_70", "NVIDIA_GPU_SM_70") | ||
| .Case("sm_72", "NVIDIA_GPU_SM_72") | ||
| .Case("sm_75", "NVIDIA_GPU_SM_75") | ||
| .Case("sm_80", "NVIDIA_GPU_SM_80") | ||
| .Case("sm_86", "NVIDIA_GPU_SM_86") | ||
| .Case("sm_87", "NVIDIA_GPU_SM_87") | ||
| .Case("sm_89", "NVIDIA_GPU_SM_89") | ||
| .Case("sm_90", "NVIDIA_GPU_SM_90") | ||
| .Case("sm_90a", "NVIDIA_GPU_SM_90A") | ||
| .Case("gfx700", "AMD_GPU_GFX700") | ||
| .Case("gfx701", "AMD_GPU_GFX701") | ||
| .Case("gfx702", "AMD_GPU_GFX702") | ||
| .Case("gfx703", "AMD_GPU_GFX703") | ||
| .Case("gfx704", "AMD_GPU_GFX704") | ||
| .Case("gfx705", "AMD_GPU_GFX705") | ||
| .Case("gfx801", "AMD_GPU_GFX801") | ||
| .Case("gfx802", "AMD_GPU_GFX802") | ||
| .Case("gfx803", "AMD_GPU_GFX803") | ||
| .Case("gfx805", "AMD_GPU_GFX805") | ||
| .Case("gfx810", "AMD_GPU_GFX810") | ||
| .Case("gfx900", "AMD_GPU_GFX900") | ||
| .Case("gfx902", "AMD_GPU_GFX902") | ||
| .Case("gfx904", "AMD_GPU_GFX904") | ||
| .Case("gfx906", "AMD_GPU_GFX906") | ||
| .Case("gfx908", "AMD_GPU_GFX908") | ||
| .Case("gfx909", "AMD_GPU_GFX909") | ||
| .Case("gfx90a", "AMD_GPU_GFX90A") | ||
| .Case("gfx90c", "AMD_GPU_GFX90C") | ||
| .Case("gfx940", "AMD_GPU_GFX940") | ||
| .Case("gfx941", "AMD_GPU_GFX941") | ||
| .Case("gfx942", "AMD_GPU_GFX942") | ||
| .Case("gfx1010", "AMD_GPU_GFX1010") | ||
| .Case("gfx1011", "AMD_GPU_GFX1011") | ||
| .Case("gfx1012", "AMD_GPU_GFX1012") | ||
| .Case("gfx1013", "AMD_GPU_GFX1013") | ||
| .Case("gfx1030", "AMD_GPU_GFX1030") | ||
| .Case("gfx1031", "AMD_GPU_GFX1031") | ||
| .Case("gfx1032", "AMD_GPU_GFX1032") | ||
| .Case("gfx1033", "AMD_GPU_GFX1033") | ||
| .Case("gfx1034", "AMD_GPU_GFX1034") | ||
| .Case("gfx1035", "AMD_GPU_GFX1035") | ||
| .Case("gfx1036", "AMD_GPU_GFX1036") | ||
| .Case("gfx1100", "AMD_GPU_GFX1100") | ||
| .Case("gfx1101", "AMD_GPU_GFX1101") | ||
| .Case("gfx1102", "AMD_GPU_GFX1102") | ||
| .Case("gfx1103", "AMD_GPU_GFX1103") | ||
| .Case("gfx1150", "AMD_GPU_GFX1150") | ||
| .Case("gfx1151", "AMD_GPU_GFX1151") | ||
| .Case("gfx1200", "AMD_GPU_GFX1200") | ||
| .Case("gfx1201", "AMD_GPU_GFX1201") | ||
| .Default(""); | ||
| if (!Ext.empty()) { | ||
| Macro = "__SYCL_TARGET_"; | ||
| Macro += Ext; | ||
| Macro += "__"; | ||
| } | ||
| return Macro; | ||
| } | ||
|
|
||
| } // namespace clang |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any way we should unify this with the exiting OffloadArch support?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added the Intel arches to the OffloadArch enum. Although, perhaps the file it is defined in (Cuda.h) should be named to something more appropriate? If so, I can change that in a follow up PR to avoid unrelated changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, it would probably make sense to move the non cuda stuff in a
Offloading.hfile (there is SYCL, OpenMP, CUDA and HIP after all)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea, I moved the
OffloadArchenum into Offloading.cpp/h, and kept the CUDA specific stuff in Cuda.h, and included that in Cuda.h, so it looks like no follow up PR of renaming of Cuda.h will be necessary.Add Offloading.cpp/h