Skip to content

Commit 0493e45

Browse files
Use existing utilities for NVidia and AMD GPUs.
1 parent 1e7ca18 commit 0493e45

File tree

4 files changed

+100
-190
lines changed

4 files changed

+100
-190
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,11 +1338,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
13381338
Archs.insert(Arch);
13391339
}
13401340
for (StringRef Arch : Archs) {
1341-
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArchSYCL(
1341+
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
13421342
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
13431343
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
13441344
} else if (AMDTriple &&
1345-
IsSYCLSupportedAMDGPUArch(StringToOffloadArchSYCL(
1345+
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
13461346
getProcessorFromTargetID(*AMDTriple, Arch)))) {
13471347
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
13481348
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 61 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -28,139 +28,84 @@ using namespace clang;
2828
using namespace llvm::opt;
2929

3030
// Struct that relates an AOT target value with
31-
// Intel CPUs, Intel GPUs, AMD and NVidia GPUs.
31+
// Intel CPUs and Intel GPUs.
3232
struct StringToOffloadArchSYCLMap {
3333
const char *ArchName;
34-
SYCLSupportedOffloadArchs IntelArch;
34+
SYCLSupportedIntelArchs IntelArch;
3535
};
3636

3737
// Mapping of supported SYCL offloading architectures.
3838
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
3939
// Intel CPU mapping.
40-
{"skylake-avx512", SYCLSupportedOffloadArchs::SKYLAKEAVX512},
41-
{"core-avx2", SYCLSupportedOffloadArchs::COREAVX2},
42-
{"corei7-avx", SYCLSupportedOffloadArchs::COREI7AVX},
43-
{"corei7", SYCLSupportedOffloadArchs::COREI7},
44-
{"westmere", SYCLSupportedOffloadArchs::WESTMERE},
45-
{"sandybridge", SYCLSupportedOffloadArchs::SANDYBRIDGE},
46-
{"ivybridge", SYCLSupportedOffloadArchs::IVYBRIDGE},
47-
{"broadwell", SYCLSupportedOffloadArchs::BROADWELL},
48-
{"coffeelake", SYCLSupportedOffloadArchs::COFFEELAKE},
49-
{"alderlake", SYCLSupportedOffloadArchs::ALDERLAKE},
50-
{"skylake", SYCLSupportedOffloadArchs::SKYLAKE},
51-
{"skx", SYCLSupportedOffloadArchs::SKX},
52-
{"cascadelake", SYCLSupportedOffloadArchs::CASCADELAKE},
53-
{"icelake-client", SYCLSupportedOffloadArchs::ICELAKECLIENT},
54-
{"icelake-server", SYCLSupportedOffloadArchs::ICELAKESERVER},
55-
{"sapphirerapids", SYCLSupportedOffloadArchs::SAPPHIRERAPIDS},
56-
{"graniterapids", SYCLSupportedOffloadArchs::GRANITERAPIDS},
40+
{"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
41+
{"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
42+
{"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
43+
{"corei7", SYCLSupportedIntelArchs::COREI7},
44+
{"westmere", SYCLSupportedIntelArchs::WESTMERE},
45+
{"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
46+
{"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
47+
{"broadwell", SYCLSupportedIntelArchs::BROADWELL},
48+
{"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
49+
{"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
50+
{"skylake", SYCLSupportedIntelArchs::SKYLAKE},
51+
{"skx", SYCLSupportedIntelArchs::SKX},
52+
{"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
53+
{"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
54+
{"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
55+
{"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
56+
{"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
5757
// Intel GPU mapping.
58-
{"bdw", SYCLSupportedOffloadArchs::BDW},
59-
{"skl", SYCLSupportedOffloadArchs::SKL},
60-
{"kbl", SYCLSupportedOffloadArchs::KBL},
61-
{"cfl", SYCLSupportedOffloadArchs::CFL},
62-
{"apl", SYCLSupportedOffloadArchs::APL},
63-
{"bxt", SYCLSupportedOffloadArchs::BXT},
64-
{"glk", SYCLSupportedOffloadArchs::GLK},
65-
{"whl", SYCLSupportedOffloadArchs::WHL},
66-
{"aml", SYCLSupportedOffloadArchs::AML},
67-
{"cml", SYCLSupportedOffloadArchs::CML},
68-
{"icllp", SYCLSupportedOffloadArchs::ICLLP},
69-
{"icl", SYCLSupportedOffloadArchs::ICL},
70-
{"ehl", SYCLSupportedOffloadArchs::EHL},
71-
{"jsl", SYCLSupportedOffloadArchs::JSL},
72-
{"tgllp", SYCLSupportedOffloadArchs::TGLLP},
73-
{"tgl", SYCLSupportedOffloadArchs::TGL},
74-
{"rkl", SYCLSupportedOffloadArchs::RKL},
75-
{"adl_s", SYCLSupportedOffloadArchs::ADL_S},
76-
{"rpl_s", SYCLSupportedOffloadArchs::RPL_S},
77-
{"adl_p", SYCLSupportedOffloadArchs::ADL_P},
78-
{"adl_n", SYCLSupportedOffloadArchs::ADL_N},
79-
{"dg1", SYCLSupportedOffloadArchs::DG1},
80-
{"acm_g10", SYCLSupportedOffloadArchs::ACM_G10},
81-
{"dg2_g10", SYCLSupportedOffloadArchs::DG2_G10},
82-
{"acm_g11", SYCLSupportedOffloadArchs::ACM_G11},
83-
{"dg2_g10", SYCLSupportedOffloadArchs::DG2_G10},
84-
{"dg2_g11", SYCLSupportedOffloadArchs::DG2_G11},
85-
{"acm_g12", SYCLSupportedOffloadArchs::ACM_G12},
86-
{"dg2_g12", SYCLSupportedOffloadArchs::DG2_G12},
87-
{"pvc", SYCLSupportedOffloadArchs::PVC},
88-
{"pvc_vg", SYCLSupportedOffloadArchs::PVC_VG},
89-
{"mtl_u", SYCLSupportedOffloadArchs::MTL_U},
90-
{"mtl_s", SYCLSupportedOffloadArchs::MTL_S},
91-
{"arl_u", SYCLSupportedOffloadArchs::ARL_U},
92-
{"arl_s", SYCLSupportedOffloadArchs::ARL_S},
93-
{"mtl_h", SYCLSupportedOffloadArchs::MTL_H},
94-
{"arl_h", SYCLSupportedOffloadArchs::ARL_H},
95-
{"bmg_g21", SYCLSupportedOffloadArchs::BMG_G21},
96-
{"lnl_m", SYCLSupportedOffloadArchs::LNL_M},
97-
// AMD GPU Mapping
98-
{"gfx700", SYCLSupportedOffloadArchs::GFX700},
99-
{"gfx701", SYCLSupportedOffloadArchs::GFX701},
100-
{"gfx702", SYCLSupportedOffloadArchs::GFX702},
101-
{"gfx801", SYCLSupportedOffloadArchs::GFX801},
102-
{"gfx802", SYCLSupportedOffloadArchs::GFX802},
103-
{"gfx803", SYCLSupportedOffloadArchs::GFX803},
104-
{"gfx805", SYCLSupportedOffloadArchs::GFX805},
105-
{"gfx810", SYCLSupportedOffloadArchs::GFX810},
106-
{"gfx900", SYCLSupportedOffloadArchs::GFX900},
107-
{"gfx902", SYCLSupportedOffloadArchs::GFX902},
108-
{"gfx904", SYCLSupportedOffloadArchs::GFX904},
109-
{"gfx906", SYCLSupportedOffloadArchs::GFX906},
110-
{"gfx908", SYCLSupportedOffloadArchs::GFX908},
111-
{"gfx909", SYCLSupportedOffloadArchs::GFX909},
112-
{"gfx90a", SYCLSupportedOffloadArchs::GFX90A},
113-
{"gfx90c", SYCLSupportedOffloadArchs::GFX90C},
114-
{"gfx940", SYCLSupportedOffloadArchs::GFX940},
115-
{"gfx941", SYCLSupportedOffloadArchs::GFX941},
116-
{"gfx942", SYCLSupportedOffloadArchs::GFX942},
117-
{"gfx1010", SYCLSupportedOffloadArchs::GFX1010},
118-
{"gfx1011", SYCLSupportedOffloadArchs::GFX1011},
119-
{"gfx1012", SYCLSupportedOffloadArchs::GFX1012},
120-
{"gfx1013", SYCLSupportedOffloadArchs::GFX1013},
121-
{"gfx1030", SYCLSupportedOffloadArchs::GFX1030},
122-
{"gfx1031", SYCLSupportedOffloadArchs::GFX1031},
123-
{"gfx1032", SYCLSupportedOffloadArchs::GFX1032},
124-
{"gfx1033", SYCLSupportedOffloadArchs::GFX1033},
125-
{"gfx1034", SYCLSupportedOffloadArchs::GFX1034},
126-
{"gfx1035", SYCLSupportedOffloadArchs::GFX1035},
127-
{"gfx1036", SYCLSupportedOffloadArchs::GFX1036},
128-
{"gfx1100", SYCLSupportedOffloadArchs::GFX1100},
129-
{"gfx1101", SYCLSupportedOffloadArchs::GFX1101},
130-
{"gfx1102", SYCLSupportedOffloadArchs::GFX1102},
131-
{"gfx1103", SYCLSupportedOffloadArchs::GFX1103},
132-
{"gfx1150", SYCLSupportedOffloadArchs::GFX1150},
133-
{"gfx1151", SYCLSupportedOffloadArchs::GFX1151},
134-
{"gfx1200", SYCLSupportedOffloadArchs::GFX1200},
135-
{"gfx1201", SYCLSupportedOffloadArchs::GFX1201},
136-
// NVidia GPU Mapping.
137-
{"sm_50", SYCLSupportedOffloadArchs::SM_50},
138-
{"sm_52", SYCLSupportedOffloadArchs::SM_52},
139-
{"sm_53", SYCLSupportedOffloadArchs::SM_53},
140-
{"sm_60", SYCLSupportedOffloadArchs::SM_60},
141-
{"sm_61", SYCLSupportedOffloadArchs::SM_61},
142-
{"sm_62", SYCLSupportedOffloadArchs::SM_62},
143-
{"sm_70", SYCLSupportedOffloadArchs::SM_70},
144-
{"sm_72", SYCLSupportedOffloadArchs::SM_72},
145-
{"sm_75", SYCLSupportedOffloadArchs::SM_75},
146-
{"sm_80", SYCLSupportedOffloadArchs::SM_80},
147-
{"sm_86", SYCLSupportedOffloadArchs::SM_86},
148-
{"sm_87", SYCLSupportedOffloadArchs::SM_87},
149-
{"sm_89", SYCLSupportedOffloadArchs::SM_89},
150-
{"sm_90", SYCLSupportedOffloadArchs::SM_90},
151-
{"sm_90a", SYCLSupportedOffloadArchs::SM_90A}};
58+
{"bdw", SYCLSupportedIntelArchs::BDW},
59+
{"skl", SYCLSupportedIntelArchs::SKL},
60+
{"kbl", SYCLSupportedIntelArchs::KBL},
61+
{"cfl", SYCLSupportedIntelArchs::CFL},
62+
{"apl", SYCLSupportedIntelArchs::APL},
63+
{"bxt", SYCLSupportedIntelArchs::BXT},
64+
{"glk", SYCLSupportedIntelArchs::GLK},
65+
{"whl", SYCLSupportedIntelArchs::WHL},
66+
{"aml", SYCLSupportedIntelArchs::AML},
67+
{"cml", SYCLSupportedIntelArchs::CML},
68+
{"icllp", SYCLSupportedIntelArchs::ICLLP},
69+
{"icl", SYCLSupportedIntelArchs::ICL},
70+
{"ehl", SYCLSupportedIntelArchs::EHL},
71+
{"jsl", SYCLSupportedIntelArchs::JSL},
72+
{"tgllp", SYCLSupportedIntelArchs::TGLLP},
73+
{"tgl", SYCLSupportedIntelArchs::TGL},
74+
{"rkl", SYCLSupportedIntelArchs::RKL},
75+
{"adl_s", SYCLSupportedIntelArchs::ADL_S},
76+
{"rpl_s", SYCLSupportedIntelArchs::RPL_S},
77+
{"adl_p", SYCLSupportedIntelArchs::ADL_P},
78+
{"adl_n", SYCLSupportedIntelArchs::ADL_N},
79+
{"dg1", SYCLSupportedIntelArchs::DG1},
80+
{"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
81+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
82+
{"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
83+
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
84+
{"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
85+
{"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
86+
{"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
87+
{"pvc", SYCLSupportedIntelArchs::PVC},
88+
{"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
89+
{"mtl_u", SYCLSupportedIntelArchs::MTL_U},
90+
{"mtl_s", SYCLSupportedIntelArchs::MTL_S},
91+
{"arl_u", SYCLSupportedIntelArchs::ARL_U},
92+
{"arl_s", SYCLSupportedIntelArchs::ARL_S},
93+
{"mtl_h", SYCLSupportedIntelArchs::MTL_H},
94+
{"arl_h", SYCLSupportedIntelArchs::ARL_H},
95+
{"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
96+
{"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
15297

15398
// Check if the user provided value for --offload-arch is a valid
154-
// SYCL supported AOT target.
155-
SYCLSupportedOffloadArchs
99+
// SYCL supported Intel AOT target.
100+
SYCLSupportedIntelArchs
156101
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
157102
auto result = std::find_if(
158103
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
159104
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
160105
return ArchNameAsString == map.ArchName;
161106
});
162107
if (result == std::end(StringToArchNamesMap))
163-
return SYCLSupportedOffloadArchs::UNKNOWN;
108+
return SYCLSupportedIntelArchs::UNKNOWN;
164109
return result->IntelArch;
165110
}
166111

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 28 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,17 @@
99
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
1010
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
1111

12+
#include "clang/Basic/Cuda.h"
1213
#include "clang/Driver/Options.h"
1314
#include "clang/Driver/Tool.h"
1415
#include "clang/Driver/ToolChain.h"
1516

1617
namespace clang {
1718
namespace driver {
1819

19-
// List of architectures (Intel CPU, Intel GPU, AMD GPU, NVidia GPU)
20+
// List of architectures (Intel CPUs and Intel GPUs)
2021
// that support SYCL offloading.
21-
enum class SYCLSupportedOffloadArchs {
22+
enum class SYCLSupportedIntelArchs {
2223
// Intel CPUs
2324
UNKNOWN,
2425
SKYLAKEAVX512,
@@ -77,91 +78,46 @@ enum class SYCLSupportedOffloadArchs {
7778
ARL_H,
7879
BMG_G21,
7980
LNL_M,
80-
// AMD GPUs
81-
GFX700,
82-
GFX701,
83-
GFX702,
84-
GFX801,
85-
GFX802,
86-
GFX803,
87-
GFX805,
88-
GFX810,
89-
GFX900,
90-
GFX902,
91-
GFX904,
92-
GFX906,
93-
GFX908,
94-
GFX909,
95-
GFX90A,
96-
GFX90C,
97-
GFX940,
98-
GFX941,
99-
GFX942,
100-
GFX1010,
101-
GFX1011,
102-
GFX1012,
103-
GFX1013,
104-
GFX1030,
105-
GFX1031,
106-
GFX1032,
107-
GFX1033,
108-
GFX1034,
109-
GFX1035,
110-
GFX1036,
111-
GFX1100,
112-
GFX1101,
113-
GFX1102,
114-
GFX1103,
115-
GFX1150,
116-
GFX1151,
117-
GFX1200,
118-
GFX1201,
119-
// NVidia GPUs.
120-
SM_50,
121-
SM_52,
122-
SM_53,
123-
SM_60,
124-
SM_61,
125-
SM_62,
126-
SM_70,
127-
SM_72,
128-
SM_75,
129-
SM_80,
130-
SM_86,
131-
SM_87,
132-
SM_89,
133-
SM_90,
134-
SM_90A
13581
};
13682

83+
// Check if the given Arch value is a Generic AMD GPU.
84+
// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
85+
// This list is used to filter out GFX*_GENERIC AMD GPUs in
86+
// `IsSYCLSupportedAMDGPUArch`.
87+
static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
88+
return Arch == OffloadArch::GFX9_GENERIC ||
89+
Arch == OffloadArch::GFX10_1_GENERIC ||
90+
Arch == OffloadArch::GFX10_3_GENERIC ||
91+
Arch == OffloadArch::GFX11_GENERIC ||
92+
Arch == OffloadArch::GFX12_GENERIC;
93+
}
94+
13795
// Check if the given Arch value is a valid SYCL supported AMD GPU.
138-
static inline bool IsSYCLSupportedAMDGPUArch(SYCLSupportedOffloadArchs Arch) {
139-
return Arch >= SYCLSupportedOffloadArchs::GFX700 &&
140-
Arch <= SYCLSupportedOffloadArchs::GFX1201;
96+
static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
97+
return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
98+
!IsAMDGenericGPUArch(Arch);
14199
}
142100

143101
// Check if the given Arch value is a valid SYCL supported NVidia GPU.
144-
static inline bool
145-
IsSYCLSupportedNVidiaGPUArch(SYCLSupportedOffloadArchs Arch) {
146-
return Arch >= SYCLSupportedOffloadArchs::SM_50 &&
147-
Arch <= SYCLSupportedOffloadArchs::SM_90A;
102+
static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
103+
return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
148104
}
149105

150106
// Check if the given Arch value is a valid SYCL supported Intel CPU.
151-
static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedOffloadArchs Arch) {
152-
return Arch >= SYCLSupportedOffloadArchs::SKYLAKEAVX512 &&
153-
Arch <= SYCLSupportedOffloadArchs::GRANITERAPIDS;
107+
static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
108+
return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
109+
Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
154110
}
155111

156112
// Check if the given Arch value is a valid SYCL supported Intel GPU.
157-
static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedOffloadArchs Arch) {
158-
return Arch >= SYCLSupportedOffloadArchs::BDW &&
159-
Arch <= SYCLSupportedOffloadArchs::LNL_M;
113+
static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
114+
return Arch >= SYCLSupportedIntelArchs::BDW &&
115+
Arch <= SYCLSupportedIntelArchs::LNL_M;
160116
}
161117

162118
// Check if the user provided value for --offload-arch is a valid
163-
// SYCL supported AOT target.
164-
SYCLSupportedOffloadArchs
119+
// SYCL supported Intel AOT target.
120+
SYCLSupportedIntelArchs
165121
StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);
166122

167123
// This is a mapping between the user provided --offload-arch value for Intel

clang/test/Driver/sycl-offload-arch-amd-gpu.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,13 @@
120120
// TARGET-TRIPLE-AMD-GPU: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__"
121121
// CLANG-OFFLOAD-PACKAGER-AMD: clang-offload-packager{{.*}} "--image={{.*}}triple=amdgcn-amd-amdhsa,arch=[[DEV_STR]],kind=sycl"
122122

123+
// Tests for handling an invalid architecture.
124+
//
125+
// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch=gfx10_3_generic %s -### 2>&1 \
126+
// RUN: | FileCheck -check-prefix=ERROR %s
127+
// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch=gfx10_3_generic %s -### 2>&1 \
128+
// RUN: | FileCheck -check-prefix=ERROR %s
129+
130+
// ERROR: error: SYCL target is invalid: 'gfx10_3_generic'
131+
123132

0 commit comments

Comments
 (0)