- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
          [NFC][TableGen] Use Twine for Name argument in CodeGenHelpers
          #163581
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| 
          
 @llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-core Author: Rahul Joshi (jurahul) ChangesFull diff: https://github.com/llvm/llvm-project/pull/163581.diff 4 Files Affected: 
 diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h
index e22c6d4f6d390..d6fe36c8da4d3 100644
--- a/llvm/include/llvm/TableGen/CodeGenHelpers.h
+++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h
@@ -13,9 +13,8 @@
 #ifndef LLVM_TABLEGEN_CODEGENHELPERS_H
 #define LLVM_TABLEGEN_CODEGENHELPERS_H
 
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
@@ -23,7 +22,7 @@ namespace llvm {
 // Simple RAII helper for emitting ifdef-undef-endif scope.
 class IfDefEmitter {
 public:
-  IfDefEmitter(raw_ostream &OS, StringRef Name) : Name(Name.str()), OS(OS) {
+  IfDefEmitter(raw_ostream &OS, const Twine &Name) : Name(Name.str()), OS(OS) {
     OS << "#ifdef " << Name << "\n"
        << "#undef " << Name << "\n\n";
   }
@@ -37,7 +36,7 @@ class IfDefEmitter {
 // Simple RAII helper for emitting header include guard (ifndef-define-endif).
 class IncludeGuardEmitter {
 public:
-  IncludeGuardEmitter(raw_ostream &OS, StringRef Name)
+  IncludeGuardEmitter(raw_ostream &OS, const Twine &Name)
       : Name(Name.str()), OS(OS) {
     OS << "#ifndef " << Name << "\n"
        << "#define " << Name << "\n\n";
@@ -54,8 +53,8 @@ class IncludeGuardEmitter {
 // namespace scope.
 class NamespaceEmitter {
 public:
-  NamespaceEmitter(raw_ostream &OS, StringRef NameUntrimmed)
-      : Name(trim(NameUntrimmed).str()), OS(OS) {
+  NamespaceEmitter(raw_ostream &OS, const Twine &NameUntrimmed)
+      : Name(trim(NameUntrimmed)), OS(OS) {
     if (!Name.empty())
       OS << "namespace " << Name << " {\n";
   }
@@ -77,9 +76,11 @@ class NamespaceEmitter {
   // }
   //
   // and cannot use "namespace ::mlir::toy".
-  static StringRef trim(StringRef Name) {
-    Name.consume_front("::");
-    return Name;
+  static std::string trim(const Twine &NameUntrimmed) {
+    std::string Name = NameUntrimmed.str();
+    StringRef StrRef = Name;
+    StrRef.consume_front("::");
+    return StrRef.str();
   }
   std::string Name;
   raw_ostream &OS;
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index 3c6ff1132230b..bc9d281f8cbb9 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -266,7 +266,7 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     return;
 
   StringRef Lang = DirLang.getName();
-  IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str());
+  IncludeGuardEmitter IncGuard(OS, Twine("LLVM_") + Lang + "_INC");
 
   OS << "#include \"llvm/ADT/ArrayRef.h\"\n";
 
@@ -941,10 +941,8 @@ static void generateClauseSet(ArrayRef<const Record *> VerClauses,
 static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
                                         Frontend FE, raw_ostream &OS) {
 
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_SETS";
-  IfDefEmitter Scope(OS, IfDefName);
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_SETS");
 
   StringRef Namespace =
       getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE);
@@ -985,10 +983,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
 // allowances (allowed, allowed once, allowed exclusive and required).
 static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang,
                                        Frontend FE, raw_ostream &OS) {
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_MAP";
-  IfDefEmitter Scope(OS, IfDefName);
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_MAP");
 
   OS << "{\n";
 
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index daae3c79ffd43..ff610f46c5aa0 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -4938,10 +4938,7 @@ static void emitOpDefShard(const RecordKeeper &records,
                            ArrayRef<const Record *> defs,
                            const Dialect &dialect, unsigned shardIndex,
                            unsigned shardCount, raw_ostream &os) {
-  std::string shardGuard = "GET_OP_DEFS_";
-  std::string indexStr = std::to_string(shardIndex);
-  shardGuard += indexStr;
-  IfDefEmitter scope(os, shardGuard);
+  IfDefEmitter scope(os, Twine("GET_OP_DEFS_") + Twine(shardIndex));
 
   // Emit the op registration hook in the first shard.
   const char *const opRegistrationHook =
@@ -4968,7 +4965,7 @@ static void emitOpDefShard(const RecordKeeper &records,
   os << "}\n";
 
   // Generate the per-shard op definitions.
-  emitOpClassDefs(records, defs, os, indexStr);
+  emitOpClassDefs(records, defs, os, std::to_string(shardIndex));
 }
 
 /// Emit op definitions for all op records.
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index ab8d534a99f19..f74cc05790042 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -442,8 +442,8 @@ void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
 }
 
 void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
-  auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
-  llvm::NamespaceEmitter ns(os, cppNamespace);
+  llvm::NamespaceEmitter ns(os,
+                            Twine(interface.getCppNamespace()) + "::detail");
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
 | 
    
| 
          
 @llvm/pr-subscribers-tablegen Author: Rahul Joshi (jurahul) ChangesFull diff: https://github.com/llvm/llvm-project/pull/163581.diff 4 Files Affected: 
 diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h
index e22c6d4f6d390..d6fe36c8da4d3 100644
--- a/llvm/include/llvm/TableGen/CodeGenHelpers.h
+++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h
@@ -13,9 +13,8 @@
 #ifndef LLVM_TABLEGEN_CODEGENHELPERS_H
 #define LLVM_TABLEGEN_CODEGENHELPERS_H
 
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
@@ -23,7 +22,7 @@ namespace llvm {
 // Simple RAII helper for emitting ifdef-undef-endif scope.
 class IfDefEmitter {
 public:
-  IfDefEmitter(raw_ostream &OS, StringRef Name) : Name(Name.str()), OS(OS) {
+  IfDefEmitter(raw_ostream &OS, const Twine &Name) : Name(Name.str()), OS(OS) {
     OS << "#ifdef " << Name << "\n"
        << "#undef " << Name << "\n\n";
   }
@@ -37,7 +36,7 @@ class IfDefEmitter {
 // Simple RAII helper for emitting header include guard (ifndef-define-endif).
 class IncludeGuardEmitter {
 public:
-  IncludeGuardEmitter(raw_ostream &OS, StringRef Name)
+  IncludeGuardEmitter(raw_ostream &OS, const Twine &Name)
       : Name(Name.str()), OS(OS) {
     OS << "#ifndef " << Name << "\n"
        << "#define " << Name << "\n\n";
@@ -54,8 +53,8 @@ class IncludeGuardEmitter {
 // namespace scope.
 class NamespaceEmitter {
 public:
-  NamespaceEmitter(raw_ostream &OS, StringRef NameUntrimmed)
-      : Name(trim(NameUntrimmed).str()), OS(OS) {
+  NamespaceEmitter(raw_ostream &OS, const Twine &NameUntrimmed)
+      : Name(trim(NameUntrimmed)), OS(OS) {
     if (!Name.empty())
       OS << "namespace " << Name << " {\n";
   }
@@ -77,9 +76,11 @@ class NamespaceEmitter {
   // }
   //
   // and cannot use "namespace ::mlir::toy".
-  static StringRef trim(StringRef Name) {
-    Name.consume_front("::");
-    return Name;
+  static std::string trim(const Twine &NameUntrimmed) {
+    std::string Name = NameUntrimmed.str();
+    StringRef StrRef = Name;
+    StrRef.consume_front("::");
+    return StrRef.str();
   }
   std::string Name;
   raw_ostream &OS;
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index 3c6ff1132230b..bc9d281f8cbb9 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -266,7 +266,7 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     return;
 
   StringRef Lang = DirLang.getName();
-  IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str());
+  IncludeGuardEmitter IncGuard(OS, Twine("LLVM_") + Lang + "_INC");
 
   OS << "#include \"llvm/ADT/ArrayRef.h\"\n";
 
@@ -941,10 +941,8 @@ static void generateClauseSet(ArrayRef<const Record *> VerClauses,
 static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
                                         Frontend FE, raw_ostream &OS) {
 
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_SETS";
-  IfDefEmitter Scope(OS, IfDefName);
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_SETS");
 
   StringRef Namespace =
       getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE);
@@ -985,10 +983,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
 // allowances (allowed, allowed once, allowed exclusive and required).
 static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang,
                                        Frontend FE, raw_ostream &OS) {
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_MAP";
-  IfDefEmitter Scope(OS, IfDefName);
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_MAP");
 
   OS << "{\n";
 
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index daae3c79ffd43..ff610f46c5aa0 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -4938,10 +4938,7 @@ static void emitOpDefShard(const RecordKeeper &records,
                            ArrayRef<const Record *> defs,
                            const Dialect &dialect, unsigned shardIndex,
                            unsigned shardCount, raw_ostream &os) {
-  std::string shardGuard = "GET_OP_DEFS_";
-  std::string indexStr = std::to_string(shardIndex);
-  shardGuard += indexStr;
-  IfDefEmitter scope(os, shardGuard);
+  IfDefEmitter scope(os, Twine("GET_OP_DEFS_") + Twine(shardIndex));
 
   // Emit the op registration hook in the first shard.
   const char *const opRegistrationHook =
@@ -4968,7 +4965,7 @@ static void emitOpDefShard(const RecordKeeper &records,
   os << "}\n";
 
   // Generate the per-shard op definitions.
-  emitOpClassDefs(records, defs, os, indexStr);
+  emitOpClassDefs(records, defs, os, std::to_string(shardIndex));
 }
 
 /// Emit op definitions for all op records.
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index ab8d534a99f19..f74cc05790042 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -442,8 +442,8 @@ void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
 }
 
 void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
-  auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
-  llvm::NamespaceEmitter ns(os, cppNamespace);
+  llvm::NamespaceEmitter ns(os,
+                            Twine(interface.getCppNamespace()) + "::detail");
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
 | 
    
| 
           Would like to get this in first before #163820  | 
    
| class IfDefEmitter { | ||
| public: | ||
| IfDefEmitter(raw_ostream &OS, StringRef Name) : Name(Name.str()), OS(OS) { | ||
| IfDefEmitter(raw_ostream &OS, const Twine &Name) : Name(Name.str()), OS(OS) { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't follow why doing this?
In general I believe we use Twine when the string is only conditionally used, because we save the time to serialize the string unnecessarily on the caller side.
Here this is on the contrary a pessimization: it'll serialize the string twice below!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intent here (and I think @s-barannikov can confirm as he also suggested using Twine) is easy string concatenation. So instead of concatenating a series of strings (either as concatenated Twines or std::string concatenation) and calling .str() externally this makes it convienent to pass a concatenated string as a Twine directly and avoids the .str(). Existing code that passed in a StringRef should not be pessimized.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So for example,
auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
  llvm::NamespaceEmitter ns(os, cppNamespace);
is simplified to:
  llvm::NamespaceEmitter ns(os, interface.getCppNamespace() + "::detail");
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the twice-serializatioon, I am guessing we should use the std::string class member and not the input twine. Let me do that as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed the > 1 serialization issue you pointed out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I though most IRBuilder API take in a Twine and not a StringRef for this reason, so clients can easily pass concatenated strings as Twines, which do get serialized internally when that IR object is created. TableGen Error.h API also take in a Twine, but there you could argue that the Twine is just emitted to a stream and hence does not get serialized.
Just to be clear String -> Twine -> std::string (new code) has no pessimization compared to StringRef->std::string (old code).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new code for NamespaceEmiter does String -> Twine -> std::string -> StringRef -> std::string because of the trim function. The old code trimmed a StringRef pointing to memory owned by the caller and only converted to std::string after trimming the StringRef. So there is an extra copy now in the case that the caller provided a complete string.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, for the trim() case (for the namespace emitter). I think we can optimize it if it's a concern.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like to fix this extra copy, we can use Twine::toStringRef API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
19fef5b    to
    01477a7      
    Compare
  
    | 
           Maybe I should go through https://discourse.llvm.org/t/correct-use-of-stringref-and-twine/20142  | 
    
          
  | 
    
          
 I think this deserves a Discourse post: it seems that 14y old discussion didn't lead to definitive conclusion, and the codebase does not seem to consistently prefer Twine to StringRef in general as far as I can tell (?). At least I probably used Twine only a handful of time in APIs for the last 6 years, I assumed StringRef to be the default and the pattern of   | 
    
          
 I was thinking the same. Let me post something to start a discussion.  | 
    
d0deaa8    to
    c520e59      
    Compare
  
    c520e59    to
    592e875      
    Compare
  
    | 
           @joker-eph does the discourse thread lend credence to my approach here (to use Twine)? I have addressed other issues related to double serialization and allocation. @topperc as well for another review.  | 
    
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Use
Twineinstead ofStringReffor various CodeGenHelper constructor arguments so that it's easy to pass in temporary concatenations.