Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -874,4 +874,12 @@ def warn_drv_openacc_without_cir
: Warning<"OpenACC directives will result in no runtime behavior; use "
"-fclangir to enable runtime effect">,
InGroup<SourceUsesOpenACC>;
def err_drv_sycl_offload_arch_missing_value :
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @srividya-sundaram

I will take a look in a bit.

Thanks

Copy link

@asudarsa asudarsa Jul 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these warnings SYCL specific?

Thanks

Error<"must pass in a valid cpu or gpu architecture string to '--offload-arch'">;

def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">;

def warn_drv_sycl_offload_target_duplicate : Warning<
"SYCL offloading target '%0' is similar to target '%1' already specified; "
"will be ignored">, InGroup<SyclTarget>;
}
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticGroups.td
Original file line number Diff line number Diff line change
Expand Up @@ -1751,3 +1751,8 @@ def ExplicitSpecializationStorageClass : DiagGroup<"explicit-specialization-stor

// A warning for options that enable a feature that is not yet complete
def ExperimentalOption : DiagGroup<"experimental-option">;

// SYCL Warnings
def SyclTarget : DiagGroup<"sycl-target">;


9 changes: 5 additions & 4 deletions clang/include/clang/Basic/OffloadArch.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,14 @@ enum class OffloadArch {
Generic, // A processor model named 'generic' if the target backend defines a
// public one.
// Intel CPUs
GRANITERAPIDS,
GRANITERAPIDS_CPU,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the format we will follow going forward? Processor name + "_" + CPU/GPU? I am ok with it.
Adding @bader for comment.

Thanks

// Intel GPUs
BMG_G21,
BMG_G21_GPU,
LAST,

CudaDefault = OffloadArch::SM_52,
HIPDefault = OffloadArch::GFX906,
SYCLDefault = OffloadArch::BMG_G21_GPU,
};

static inline bool IsNVIDIAOffloadArch(OffloadArch A) {
Expand All @@ -121,11 +122,11 @@ static inline bool IsAMDOffloadArch(OffloadArch A) {
}

static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) {
return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21;
return Arch >= OffloadArch::GRANITERAPIDS_CPU && Arch < OffloadArch::BMG_G21_GPU;
}

static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) {
return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST;
return Arch >= OffloadArch::BMG_G21_GPU && Arch < OffloadArch::LAST;
}

static inline bool IsIntelOffloadArch(OffloadArch Arch) {
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,22 @@ class Driver {
/// Compute the default -fmodule-cache-path.
/// \return True if the system provides a default cache directory.
static bool getDefaultModuleCachePath(SmallVectorImpl<char> &Result);
/// Vector of Macros that need to be added to the Host compilation in a
/// SYCL based offloading scenario. These macros are gathered during
/// construction of the device compilations.
mutable std::vector<std::string> SYCLTargetMacro;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mutable std::vector<std::string> SYCLTargetMacro;
mutable std::vector<std::string> SYCLTargetMacros;


/// addSYCLTargetMacro - Add the given macro to the vector of args to be
/// added to the host compilation step.
void addSYCLTargetMacro(const llvm::opt::ArgList &Args,
StringRef Macro) const {
SYCLTargetMacro.push_back(Args.MakeArgString(Macro));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize that this patch doesn't currently have the macro addition steps - but this may be a good opportunity to reduce macro duplication that is added to the host compilation by only adding unique macro values to the SYCLTargetMacro array.

}

/// getSYCLTargetMacro - return the previously gathered macro target args.
llvm::ArrayRef<std::string> getSYCLTargetMacro() const {
return SYCLTargetMacro;
}
};

/// \return True if the last defined optimization level is -Ofast.
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Basic/OffloadArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ static const OffloadArchToStringMap ArchNames[] = {
GFX(1201), // gfx1201
{OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
// Intel CPUs
{OffloadArch::GRANITERAPIDS, "graniterapids", ""},
{OffloadArch::GRANITERAPIDS_CPU, "graniterapids", ""},
// Intel GPUS
{OffloadArch::BMG_G21, "bmg_g21", ""},
{OffloadArch::BMG_G21_GPU, "bmg_g21", ""},
{OffloadArch::Generic, "generic", ""},
// clang-format on
};
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
case OffloadArch::GRANITERAPIDS:
case OffloadArch::BMG_G21:
case OffloadArch::GRANITERAPIDS_CPU:
case OffloadArch::BMG_G21_GPU:
case OffloadArch::LAST:
break;
case OffloadArch::UNKNOWN:
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,8 +2333,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
case OffloadArch::GRANITERAPIDS:
case OffloadArch::BMG_G21:
case OffloadArch::GRANITERAPIDS_CPU:
case OffloadArch::BMG_G21_GPU:
case OffloadArch::UNUSED:
case OffloadArch::UNKNOWN:
break;
Expand Down
102 changes: 92 additions & 10 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,17 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
return std::nullopt;
}


static std::optional<llvm::Triple>
getINTELOffloadTargetTriple(const Driver &D, const ArgList &Args,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name doesn't seem to fit the triple being returned. The value here is a spirv value, so is this more of a 'default SYCL JIT' triple?

const llvm::Triple &HostTriple) {
if (!Args.hasArg(options::OPT_offload_EQ)) {
return llvm::Triple(HostTriple.isArch64Bit() ? "spirv64"
: "spirv32");
}
return std::nullopt;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we emit something if user specifies -offload= for SYCL offloading? Or atleast add an assert?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently we emit a diagnostic for empty --offload-arch

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This question was about -offload. What will happen if user says '-fsycl -offload=abc'?

}

template <typename F> static bool usesInput(const ArgList &Args, F &&Fn) {
return llvm::any_of(Args, [&](Arg *A) {
return (A->getOption().matches(options::OPT_x) &&
Expand Down Expand Up @@ -918,8 +929,7 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
}

static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) {
SmallVector<StringRef, 5> SYCLAlias = {"spir", "spir64", "spirv", "spirv32",
"spirv64"};
SmallVector<StringRef, 5> SYCLAlias = {"spirv", "spirv32", "spirv64"};
if (llvm::is_contained(SYCLAlias, TargetArch)) {
llvm::Triple TargetTriple;
TargetTriple.setArchName(TargetArch);
Expand All @@ -932,16 +942,23 @@ static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) {

static bool addSYCLDefaultTriple(Compilation &C,
SmallVectorImpl<llvm::Triple> &SYCLTriples) {

llvm::Triple DefaultTriple = getSYCLDeviceTriple(
C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32"
: "spirv64");
for (const auto &SYCLTriple : SYCLTriples) {
if (SYCLTriple == DefaultTriple)
return false;
if(SYCLTriple.isSPIRV())
return false;
}
// Check current set of triples to see if the default has already been set.
for (const auto &SYCLTriple : SYCLTriples) {
if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch &&
SYCLTriple.isSPIROrSPIRV())
return false;
}
// Add the default triple as it was not found.
llvm::Triple DefaultTriple = getSYCLDeviceTriple(
C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32"
: "spirv64");

SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple);
return true;
}
Expand Down Expand Up @@ -1141,19 +1158,84 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
// -ffreestanding cannot be used with -fsycl
argSYCLIncompatible(options::OPT_ffreestanding);

llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
llvm::StringMap<StringRef> FoundNormalizedTriples;
std::multiset<StringRef> SYCLTriples;
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;

if (IsSYCL) {
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
!IsHIP && !IsCuda ) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto IntelTriple = getINTELOffloadTargetTriple(*this, C.getInputArgs(),
HostTC->getTriple());
// Attempt to deduce the offloading triple from the set of architectures.
// We need to temporarily create these toolchains so that we can access
// tools for inferring architectures.
llvm::DenseSet<StringRef> Archs;
for (const std::optional<llvm::Triple> &TT : {IntelTriple}) {
if (!TT)
continue;

auto &TC =
getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, *TT,
C.getDefaultToolChain().getTriple());
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, true))
Archs.insert(Arch);
}

for (StringRef Arch : Archs) {
if (IntelTriple && IsIntelOffloadArch(StringToOffloadArch(
getProcessorFromTargetID(*IntelTriple, Arch))) ) {
DerivedArchs[IntelTriple->getTriple()].insert(Arch);
}
else {
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
return;
}
}

// If the set is empty then we failed to find a native architecture.
if (Archs.empty()) {
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
return;
}

for (const auto &TripleAndArchs : DerivedArchs)
SYCLTriples.insert(TripleAndArchs.first());

for (StringRef Val : SYCLTriples) {
llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val));
std::string NormalizedName = SYCLTargetTriple.normalize();

// Make sure we don't have a duplicate triple.
auto [TripleIt, Inserted] =
FoundNormalizedTriples.try_emplace(NormalizedName, Val);

if (!Inserted) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val << TripleIt->second;
continue;
}
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
}

addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

} else
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

// We'll need to use the SYCL and host triples as the key into
// getOffloadingDeviceToolChain, because the device toolchains we're
// getOffloadToolChain, because the device toolchains we're
// going to create will depend on both.
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
for (const auto &TT : UniqueSYCLTriplesVec) {
auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
HostTC->getTriple());
C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL);
if (DerivedArchs.contains(TT.getTriple()))
KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple()];
}
}

Expand Down Expand Up @@ -4846,7 +4928,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
} else if (Kind == Action::OFK_HIP) {
Archs.insert(OffloadArchToString(OffloadArch::HIPDefault));
} else if (Kind == Action::OFK_SYCL) {
Archs.insert(StringRef());
Archs.insert(OffloadArchToString(OffloadArch::SYCLDefault));
} else if (Kind == Action::OFK_OpenMP) {
// Accept legacy `-march` device arguments for OpenMP.
if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)
Expand Down Expand Up @@ -6740,7 +6822,7 @@ const ToolChain &Driver::getOffloadToolChain(
if (Kind == Action::OFK_HIP)
TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target,
*HostTC, Args);
else if (Kind == Action::OFK_OpenMP)
else if ((Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL))
TC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(*this, Target,
*HostTC, Args);
break;
Expand Down
14 changes: 7 additions & 7 deletions clang/unittests/Basic/OffloadArchTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ TEST(OffloadArchTest, basic) {
EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::GFX1201));
EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::GFX12_GENERIC));
EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::AMDGCNSPIRV));
EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::GRANITERAPIDS));
EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::GRANITERAPIDS_CPU));

EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::GRANITERAPIDS));
EXPECT_TRUE(IsIntelCPUOffloadArch(OffloadArch::GRANITERAPIDS));
EXPECT_FALSE(IsIntelGPUOffloadArch(OffloadArch::GRANITERAPIDS));
EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::BMG_G21));
EXPECT_FALSE(IsIntelCPUOffloadArch(OffloadArch::BMG_G21));
EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::BMG_G21));
EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::GRANITERAPIDS_CPU));
EXPECT_TRUE(IsIntelCPUOffloadArch(OffloadArch::GRANITERAPIDS_CPU));
EXPECT_FALSE(IsIntelGPUOffloadArch(OffloadArch::GRANITERAPIDS_CPU));
EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::BMG_G21_GPU));
EXPECT_FALSE(IsIntelCPUOffloadArch(OffloadArch::BMG_G21_GPU));
EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::BMG_G21_GPU));

EXPECT_FALSE(IsNVIDIAOffloadArch(OffloadArch::Generic));
EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::Generic));
Expand Down