From 7b59746110456be554f2095119d059fa90f54bb6 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 22 Oct 2025 10:10:34 -0700 Subject: [PATCH] POC for intrinsic with underscore in name --- llvm/include/llvm/IR/Intrinsics.td | 3 +- .../TableGen/intrinsic-duplicate-enum-name.td | 9 ++ .../TableGen/Basic/CodeGenIntrinsics.cpp | 96 ++++++++++++++++--- llvm/utils/TableGen/Basic/CodeGenIntrinsics.h | 2 +- .../GlobalISel/GlobalISelMatchTable.cpp | 4 +- .../utils/TableGen/SearchableTableEmitter.cpp | 2 +- 6 files changed, 99 insertions(+), 17 deletions(-) create mode 100644 llvm/test/TableGen/intrinsic-duplicate-enum-name.td diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e6cce9a4eea1d..939e34b80bb8a 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1876,8 +1876,7 @@ def int_fptosi_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]> // Clear cache intrinsic, default to ignore (ie. emit nothing) // maps to void __clear_cache() on supporting platforms -def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], - [], "llvm.clear_cache">; +def int_clear__cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty]>; // Intrinsic to detect whether its argument is a constant. def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], diff --git a/llvm/test/TableGen/intrinsic-duplicate-enum-name.td b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td new file mode 100644 index 0000000000000..b091f9cd588dc --- /dev/null +++ b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td @@ -0,0 +1,9 @@ +// RUN: not llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS 2>&1 | FileCheck %s -DFILE=%s + +include "llvm/IR/Intrinsics.td" + +def int_x__y_z : Intrinsic<[llvm_anyint_ty], [], []>; + +// CHECK: [[FILE]]:[[@LINE+2]]:5: error: `Intrinsic::x_y_z` is already defined +// CHECK: [[FILE]]:[[@LINE-3]]:5: note: Previous definition here +def int_x_y_z : Intrinsic<[llvm_anyint_ty], [], []>; diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index be7537c83da3a..cc3b48649f020 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -90,15 +90,44 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const { [](const CodeGenIntrinsic &Int1, const CodeGenIntrinsic &Int2) { return Int1.Name == Int2.Name; }); - if (I == Intrinsics.end()) - return; + if (I != Intrinsics.end()) { + // Found 2 intrinsics with same name. + const CodeGenIntrinsic &First = *I; + const CodeGenIntrinsic &Second = *(I + 1); + PrintError(Second.TheDef, + Twine("Intrinsic `") + First.Name + "` is already defined"); + PrintFatalNote(First.TheDef, "Previous definition here"); + } - // Found a duplicate intrinsics. - const CodeGenIntrinsic &First = *I; - const CodeGenIntrinsic &Second = *(I + 1); - PrintError(Second.TheDef, - Twine("Intrinsic `") + First.Name + "` is already defined"); - PrintFatalNote(First.TheDef, "Previous definition here"); + // Now detect intrinsics that may have the same enum name. For that, we first + // sort the intrinsics by their enum name. + std::vector SortedByEnumName; + SortedByEnumName.reserve(size()); + for (const CodeGenIntrinsic &Int : Intrinsics) + SortedByEnumName.push_back(&Int); + + llvm::sort(SortedByEnumName, [](const CodeGenIntrinsic *LHS, + const CodeGenIntrinsic *RHS) { + // To ensure deterministic sorted order when duplicates are + // present, use record ID as a tie-breaker + unsigned LhsID = LHS->TheDef->getID(); + unsigned RhsID = RHS->TheDef->getID(); + return std::tie(LHS->EnumName, LhsID) < std::tie(RHS->EnumName, RhsID); + }); + auto J = std::adjacent_find( + SortedByEnumName.begin(), SortedByEnumName.end(), + [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) { + return Int1->EnumName == Int2->EnumName; + }); + + if (J != SortedByEnumName.end()) { + // Found 2 intrinsics with same enum name. + const CodeGenIntrinsic *First = *J; + const CodeGenIntrinsic *Second = *(J + 1); + PrintError(Second->TheDef, Twine("`Intrinsic::") + First->EnumName + + "` is already defined"); + PrintFatalNote(First->TheDef, "Previous definition here"); + } } // For target independent intrinsics, check that their second dotted component @@ -257,6 +286,24 @@ const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) { return *Iter->second; } +// Sanitize the intrinsic name by replacing each _ pair with a single _ and +// optionally each single _ (in the original input string) with . +static void sanitizeName(std::string &Name, bool ReplaceSingleUnderscore) { + size_t Next = 0; + for (size_t I = 0, E = Name.size(); I < E; ++I) { + if (Name[I] == '_' && I + 1 < E && Name[I + 1] == '_') { + Name[Next++] = '_'; + // Skip over both the _s. + ++I; + } else if (ReplaceSingleUnderscore && Name[I] == '_') { + Name[Next++] = '.'; + } else { + Name[Next++] = Name[I]; + } + } + Name = Name.substr(0, Next); +} + CodeGenIntrinsic::CodeGenIntrinsic(const Record *R, const CodeGenIntrinsicContext &Ctx) : TheDef(R) { @@ -267,7 +314,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R, PrintFatalError(DefLoc, "Intrinsic '" + DefName + "' does not start with 'int_'!"); - EnumName = DefName.substr(4); + EnumName = DefName.substr(4).str(); // Ignore a missing ClangBuiltinName field. ClangBuiltinName = @@ -278,16 +325,43 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R, TargetPrefix = R->getValueAsString("TargetPrefix"); Name = R->getValueAsString("LLVMName").str(); + // Note, we only sanitize __ in intrinsic names and not their C++ enum names. + // The rationale is that if we sanitize enum names as well (by just replacing + // _ pairs with _) we may get conflicting enum names for different record + // names which is not desirable. For example: + // + // Enum Name Enum Name Intrinsic Name + // (Sanitized) (Original) + // + // int_x__y_z x_y_z x__y_z llvm.x_y.z + // int_x_y_z x_y_z x_y_z llvm.x.y.z + // + // So with no enum name sanitization, two different record names will not + // conflicts in both enum names and intrinsic names. The side-effect is that + // intrinsics like int_clear_cache will need to be named int_clear__cache to + // have their default name be "llvm.clear_cache" but then their intrisnic name + // will change to "Intrinsic::clear__cache". + + // Alternatively, we do sanitize the enum name (which preserved a lot of + // existing names), but then detect the cases where 2 different records may + // end up generating the same enum name. This/ can be done by extending + // CheckDuplicateIntrinsics() to detect duplicated enum names as well and + // fail if that happens. + // Note: (Implementing this option). + if (Name == "") { // If an explicit name isn't specified, derive one from the DefName. - Name = "llvm." + EnumName.str(); - llvm::replace(Name, '_', '.'); + Name = "llvm." + EnumName; + sanitizeName(Name, /*ReplaceSingleUnderscore*/ true); } else { // Verify it starts with "llvm.". if (!StringRef(Name).starts_with("llvm.")) PrintFatalError(DefLoc, "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!"); } + // Sanitize the enum name by just replacing each pair of _ with a single _. + // That way, most existing intrinsic names stay the same. + sanitizeName(EnumName, /*ReplaceSingleUnderscore*/ false); // If TargetPrefix is specified, make sure that Name starts with // "llvm..". diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h index 2e86149514f46..d73aa89d076de 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h @@ -35,7 +35,7 @@ struct CodeGenIntrinsicContext { struct CodeGenIntrinsic { const Record *TheDef; // The actual record defining this intrinsic. std::string Name; // The name of the LLVM function "llvm.bswap.i32" - StringRef EnumName; // The name of the enum "bswap_i32" + std::string EnumName; // The name of the enum "bswap_i32" StringRef ClangBuiltinName; // Name of the corresponding GCC builtin, or "". StringRef MSBuiltinName; // Name of the corresponding MS builtin, or "". StringRef TargetPrefix; // Target prefix, e.g. "ppc" for t-s intrinsics. diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp index 5d49715879280..91a54aa933f6e 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp @@ -1364,7 +1364,7 @@ void IntrinsicIDOperandMatcher::emitPredicateOpcodes(MatchTable &Table, Table << MatchTable::Opcode("GIM_CheckIntrinsicID") << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID) << MatchTable::Comment("Op") << MatchTable::ULEB128Value(OpIdx) - << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str()) + << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName) << MatchTable::LineBreak; } @@ -2180,7 +2180,7 @@ void IntrinsicIDRenderer::emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const { Table << MatchTable::Opcode("GIR_AddIntrinsicID") << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnID) - << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str()) + << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName) << MatchTable::LineBreak; } diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index d17d90b452bd7..295be9d54aef6 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -137,7 +137,7 @@ class SearchableTableEmitter { if (const auto *BI = dyn_cast(I)) return BI->getValue() ? "true" : "false"; if (Field.IsIntrinsic) - return "Intrinsic::" + getIntrinsic(I).EnumName.str(); + return "Intrinsic::" + getIntrinsic(I).EnumName; if (Field.IsInstruction) return I->getAsString(); if (Field.Enum) {