Skip to content

Conversation

Michael137
Copy link
Member

@Michael137 Michael137 force-pushed the llvm/dwarf-language-attrs-2-part-1 branch from d434950 to 7affded Compare October 8, 2025 14:44
@Michael137 Michael137 force-pushed the llvm/dwarf-language-attrs-2-part-1 branch from 7affded to 84cf912 Compare October 8, 2025 17:33
@Michael137 Michael137 marked this pull request as ready for review October 8, 2025 17:37
@llvmbot llvmbot added the llvm:ir label Oct 8, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 8, 2025

@llvm/pr-subscribers-llvm-ir

Author: Michael Buch (Michael137)

Changes

Depends on:

Part of a patch series to support the DWARFv6 DW_AT_language_name/DW_AT_language_version attributes.


Full diff: https://github.com/llvm/llvm-project/pull/162445.diff

10 Files Affected:

  • (modified) llvm/include/llvm/AsmParser/LLToken.h (+22-21)
  • (modified) llvm/lib/AsmParser/LLLexer.cpp (+1)
  • (modified) llvm/lib/AsmParser/LLParser.cpp (+43-6)
  • (modified) llvm/lib/Bitcode/Reader/MetadataLoader.cpp (+11-5)
  • (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+7-1)
  • (modified) llvm/lib/IR/AsmWriter.cpp (+10-4)
  • (added) llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll (+4)
  • (added) llvm/test/Assembler/dicompileunit-invalid-language.ll (+22)
  • (modified) llvm/test/Assembler/invalid-dicompileunit-missing-language.ll (+1-1)
  • (added) llvm/test/Bitcode/dwarf-source-language-name.ll (+15)
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index d976d40e5e956..6de99fe182ad9 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -488,27 +488,28 @@ enum Kind {
   SummaryID,  // ^42
 
   // String valued tokens (StrVal).
-  LabelStr,         // foo:
-  GlobalVar,        // @foo @"foo"
-  ComdatVar,        // $foo
-  LocalVar,         // %foo %"foo"
-  MetadataVar,      // !foo
-  StringConstant,   // "foo"
-  DwarfTag,         // DW_TAG_foo
-  DwarfAttEncoding, // DW_ATE_foo
-  DwarfVirtuality,  // DW_VIRTUALITY_foo
-  DwarfLang,        // DW_LANG_foo
-  DwarfCC,          // DW_CC_foo
-  EmissionKind,     // lineTablesOnly
-  NameTableKind,    // GNU
-  FixedPointKind,   // Fixed point
-  DwarfOp,          // DW_OP_foo
-  DIFlag,           // DIFlagFoo
-  DISPFlag,         // DISPFlagFoo
-  DwarfMacinfo,     // DW_MACINFO_foo
-  ChecksumKind,     // CSK_foo
-  DbgRecordType,    // dbg_foo
-  DwarfEnumKind,    // DW_APPLE_ENUM_KIND_foo
+  LabelStr,            // foo:
+  GlobalVar,           // @foo @"foo"
+  ComdatVar,           // $foo
+  LocalVar,            // %foo %"foo"
+  MetadataVar,         // !foo
+  StringConstant,      // "foo"
+  DwarfTag,            // DW_TAG_foo
+  DwarfAttEncoding,    // DW_ATE_foo
+  DwarfVirtuality,     // DW_VIRTUALITY_foo
+  DwarfLang,           // DW_LANG_foo
+  DwarfSourceLangName, // DW_LNAME_foo
+  DwarfCC,             // DW_CC_foo
+  EmissionKind,        // lineTablesOnly
+  NameTableKind,       // GNU
+  FixedPointKind,      // Fixed point
+  DwarfOp,             // DW_OP_foo
+  DIFlag,              // DIFlagFoo
+  DISPFlag,            // DISPFlagFoo
+  DwarfMacinfo,        // DW_MACINFO_foo
+  ChecksumKind,        // CSK_foo
+  DbgRecordType,       // dbg_foo
+  DwarfEnumKind,       // DW_APPLE_ENUM_KIND_foo
 
   // Type valued tokens (TyVal).
   Type,
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f6937d38eb38c..50d1d4730007a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   DWKEYWORD(ATE, DwarfAttEncoding);
   DWKEYWORD(VIRTUALITY, DwarfVirtuality);
   DWKEYWORD(LANG, DwarfLang);
+  DWKEYWORD(LNAME, DwarfSourceLangName);
   DWKEYWORD(CC, DwarfCC);
   DWKEYWORD(OP, DwarfOp);
   DWKEYWORD(MACINFO, DwarfMacinfo);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 55899660fa84a..380b19296a3c4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField {
   DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
 };
 
+struct DwarfSourceLangNameField : public MDUnsignedField {
+  DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
 struct DwarfCCField : public MDUnsignedField {
   DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
 };
@@ -4997,6 +5001,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
   return false;
 }
 
+template <>
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+                            DwarfSourceLangNameField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfSourceLangName)
+    return tokError("expected DWARF source language name");
+
+  unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal());
+  if (!Lang)
+    return tokError("invalid DWARF source language name" + Twine(" '") +
+                    Lex.getStrVal() + "'");
+  assert(Lang <= Result.Max && "Expected valid DWARF source language name");
+  Result.assign(Lang);
+  Lex.Lex();
+  return false;
+}
+
 template <>
 bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
   if (Lex.getKind() == lltok::APSInt)
@@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   if (!IsDistinct)
     return tokError("missing 'distinct', required for !DICompileUnit");
 
+  LocTy Loc = Lex.getLoc();
+
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(language, DwarfLangField, );                                        \
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
+  OPTIONAL(language, DwarfLangField, );                                        \
+  OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, );                    \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
+  if (!language.Seen && !sourceLanguageName.Seen)
+    return error(Loc, "missing one of 'language' or 'sourceLanguageName', "
+                      "required for !DICompileUnit");
+
+  if (language.Seen && sourceLanguageName.Seen)
+    return error(Loc, "can only specify one of 'language' and "
+                      "'sourceLanguageName' on !DICompileUnit");
+
   Result = DICompileUnit::getDistinct(
-      Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
-      isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
-      emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
-      macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
-      nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
+      Context,
+      language.Seen ? DISourceLanguageName(language.Val)
+                    : DISourceLanguageName(sourceLanguageName.Val, 0),
+      file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
+      splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
+      globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
+      debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val,
+      sysroot.Val, sdk.Val);
   return false;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a4d1b8372dfac..cdcf7a80ffac7 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // distinct.  It's always distinct.
     IsDistinct = true;
 
+    const auto LangVersionMask = (uint64_t(1) << 63);
+    const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+
     auto *CU = DICompileUnit::getDistinct(
-        Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
-        getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
-        getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
-        getMDOrNull(Record[10]), getMDOrNull(Record[12]),
-        getMDOrNull(Record[13]),
+        Context,
+        HasVersionedLanguage
+            ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+            : DISourceLanguageName(Record[1]),
+        getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
+        getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
+        getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+        getMDOrNull(Record[12]), getMDOrNull(Record[13]),
         Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
         Record.size() <= 14 ? 0 : Record[14],
         Record.size() <= 16 ? true : Record[16],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d392fca..0ca55a26f8013 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   assert(N->isDistinct() && "Expected distinct compile units");
   Record.push_back(/* IsDistinct */ true);
 
-  Record.push_back(N->getSourceLanguage().getUnversionedName());
+  auto Lang = N->getSourceLanguage();
+  Record.push_back(Lang.getName());
+  // Set bit so the MetadataLoader can distniguish between versioned and
+  // unversioned names.
+  if (Lang.hasVersionedName())
+    Record.back() ^= (uint64_t(1) << 63);
+
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
   Record.push_back(N->isOptimized());
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..0bc877d1c4471 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Out << "!DICompileUnit(";
   MDFieldPrinter Printer(Out, WriterCtx);
 
-  Printer.printDwarfEnum("language",
-                         N->getSourceLanguage().getUnversionedName(),
-                         dwarf::LanguageString,
-                         /* ShouldSkipZero */ false);
+  auto Lang = N->getSourceLanguage();
+  if (Lang.hasVersionedName())
+    Printer.printDwarfEnum(
+        "sourceLanguageName",
+        static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+        dwarf::SourceLanguageNameString,
+        /* ShouldSkipZero */ false);
+  else
+    Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
+                           /* ShouldSkipZero */ false);
 
   Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
diff --git a/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
new file mode 100644
index 0000000000000..3aad27ba8a87a
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:15: error: can only specify one of 'language' and 'sourceLanguageName' on !DICompileUnit
+!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageName: DW_LNAME_C, file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Assembler/dicompileunit-invalid-language.ll b/llvm/test/Assembler/dicompileunit-invalid-language.ll
new file mode 100644
index 0000000000000..da93c4f20e333
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-invalid-language.ll
@@ -0,0 +1,22 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/invalid_dw_lang.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG
+; RUN: not llvm-as < %t/invalid_dw_lang_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG_2
+; RUN: not llvm-as < %t/invalid_dw_lname.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME
+; RUN: not llvm-as < %t/invalid_dw_lname_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME_2
+
+; INVALID_DW_LANG:    invalid DWARF language 'DW_LANG_blah'
+; INVALID_DW_LANG_2:  expected DWARF language
+; INVALID_DW_LNAME:   invalid DWARF source language name 'DW_LNAME_blah'
+; INVALID_DW_LNAME_2: expected DWARF source language name
+
+;--- invalid_dw_lang.ll
+!0 = distinct !DICompileUnit(language: DW_LANG_blah)
+
+;--- invalid_dw_lang_2.ll
+!0 = distinct !DICompileUnit(language: DW_LNAME_C)
+
+;--- invalid_dw_lname.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_blah)
+
+;--- invalid_dw_lname_2.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LANG_C)
diff --git a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
index 8e4cb0261dbbf..ebc86e3410d66 100644
--- a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+; CHECK: <stdin>:[[@LINE+1]]:15: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit
 !0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/llvm/test/Bitcode/dwarf-source-language-name.ll b/llvm/test/Bitcode/dwarf-source-language-name.ll
new file mode 100644
index 0000000000000..e9893638f61b5
--- /dev/null
+++ b/llvm/test/Bitcode/dwarf-source-language-name.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: sourceLanguageName: DW_LNAME_ObjC_plus_plus
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}

Copy link
Collaborator

@adrian-prantl adrian-prantl left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a suggestion for an improved auto-upgrade test.

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}

!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to use DW_LANG_C++17 to test the upgrade to name+version?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants