Skip to content

Commit 6cba572

Browse files
authored
[llvm][DebugInfo][NFC] Abstract DICompileUnit::SourceLanguage to allow alternate DWARF SourceLanguage encoding (#162255)
This patch sets up `DICompileUnit` to support the DWARFv6 `DW_AT_language_name` and `DW_AT_language_version` attributes (which are set to replace `DW_AT_language`). This patch changes the `DICompileUnit::SourceLanguage` field type to a `DISourceLanguageName` that encapsulates the notion of "versioned vs. unversioned name". A "versioned" name is one that has an associated version stored separately in `DISourceLanguageName::Version`. This patch just changes all the clients of the `getSourceLanguage` API to the expect a `DISourceLanguageName`. Currently they all just `assert` (via `DISourceLanguageName::getUnversionedName`) that we're dealing with "unversioned names" (i.e., the pre-DWARFv6 language codes). In follow-up patches (e.g., draft is at #162261), when we start emitting versioned language codes, the `getUnversionedName` calls can then be adjusted to `getName`. **Implementation considerations** * We could have added a new member to `DICompileUnit` alongside the existing `SourceLanguage` field. I don't think this would have made the transition any simpler (clients would still need to be aware of "versioned" vs. "unversioned" language names). I felt that encapsulating this inside a `DISourceLanguageName` was easier to reason about for maintainers. * Currently DISourceLanguageName is a `12` byte structure. We could probably pack all the info inside a `uint64_t` (16-bits for the name, 32-bits for the version, 1-bit for answering the `hasVersionedName`). Just to keep the prototype simple I used a `std::optional`. But since the guts of the structure are hidden, we can always change the layout to a more compact representation instead. **How to review** * The new `DISourceLanguageName` structure is defined in `DebugInfoMetadata.h`. All the other changes fall out from changing the `DICompileUnit::SourceLanguage` from `unsigned` to `DISourceLanguageName`.
1 parent 0c087bd commit 6cba572

29 files changed

+211
-125
lines changed

clang/lib/CodeGen/CGDebugInfo.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,8 @@ void CGDebugInfo::CreateCompileUnit() {
787787

788788
// Create new compile unit.
789789
TheCU = DBuilder.createCompileUnit(
790-
LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
790+
llvm::DISourceLanguageName(LangTag), CUFile,
791+
CGOpts.EmitVersionIdentMetadata ? Producer : "",
791792
CGOpts.OptimizationLevel != 0 || CGOpts.PrepareForLTO ||
792793
CGOpts.PrepareForThinLTO,
793794
CGOpts.DwarfDebugFlags, RuntimeVers, CGOpts.SplitDwarfFile, EmissionKind,
@@ -1232,7 +1233,7 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
12321233

12331234
/// \return whether a C++ mangling exists for the type defined by TD.
12341235
static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
1235-
switch (TheCU->getSourceLanguage()) {
1236+
switch (TheCU->getSourceLanguage().getUnversionedName()) {
12361237
case llvm::dwarf::DW_LANG_C_plus_plus:
12371238
case llvm::dwarf::DW_LANG_C_plus_plus_11:
12381239
case llvm::dwarf::DW_LANG_C_plus_plus_14:
@@ -3211,8 +3212,8 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
32113212
if (!ID)
32123213
return nullptr;
32133214

3214-
auto RuntimeLang =
3215-
static_cast<llvm::dwarf::SourceLanguage>(TheCU->getSourceLanguage());
3215+
auto RuntimeLang = static_cast<llvm::dwarf::SourceLanguage>(
3216+
TheCU->getSourceLanguage().getUnversionedName());
32163217

32173218
// Return a forward declaration if this type was imported from a clang module,
32183219
// and this is not the compile unit with the implementation of the type (which
@@ -3348,7 +3349,8 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
33483349
ObjCInterfaceDecl *ID = Ty->getDecl();
33493350
llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
33503351
unsigned Line = getLineNumber(ID->getLocation());
3351-
unsigned RuntimeLang = TheCU->getSourceLanguage();
3352+
3353+
unsigned RuntimeLang = TheCU->getSourceLanguage().getUnversionedName();
33523354

33533355
// Bit size, align and offset of the type.
33543356
uint64_t Size = CGM.getContext().getTypeSize(Ty);

llvm/include/llvm/IR/DIBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ namespace llvm {
146146
/// \param SDK The SDK name. On Darwin, this is the last component
147147
/// of the sysroot.
148148
LLVM_ABI DICompileUnit *
149-
createCompileUnit(unsigned Lang, DIFile *File, StringRef Producer,
150-
bool isOptimized, StringRef Flags, unsigned RV,
151-
StringRef SplitName = StringRef(),
149+
createCompileUnit(DISourceLanguageName Lang, DIFile *File,
150+
StringRef Producer, bool isOptimized, StringRef Flags,
151+
unsigned RV, StringRef SplitName = StringRef(),
152152
DICompileUnit::DebugEmissionKind Kind =
153153
DICompileUnit::DebugEmissionKind::FullDebug,
154154
uint64_t DWOId = 0, bool SplitDebugInlining = true,

llvm/include/llvm/IR/DebugInfoMetadata.h

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,55 @@ namespace dwarf {
6666
enum Tag : uint16_t;
6767
}
6868

69+
/// Wrapper structure that holds a language name and its version.
70+
///
71+
/// Some debug-info formats, particularly DWARF, distniguish between
72+
/// language codes that include the version name and codes that don't.
73+
/// DISourceLanguageName may hold either of these.
74+
///
75+
class DISourceLanguageName {
76+
/// Language version. The version scheme is language
77+
/// dependent.
78+
uint32_t Version = 0;
79+
80+
/// Language name.
81+
/// If \ref HasVersion is \c true, then this name
82+
/// is version independent (i.e., doesn't include the language
83+
/// version in its name).
84+
uint16_t Name;
85+
86+
/// If \c true, then \ref Version is interpretable and \ref Name
87+
/// is a version independent name.
88+
bool HasVersion;
89+
90+
public:
91+
bool hasVersionedName() const { return HasVersion; }
92+
93+
/// Returns a versioned or unversioned language name.
94+
uint16_t getName() const { return Name; }
95+
96+
/// Transitional API for cases where we do not yet support
97+
/// versioned source language names. Use \ref getName instead.
98+
///
99+
/// FIXME: remove once all callers of this API account for versioned
100+
/// names.
101+
uint16_t getUnversionedName() const {
102+
assert(!hasVersionedName());
103+
return Name;
104+
}
105+
106+
/// Returns language version. Only valid for versioned language names.
107+
uint32_t getVersion() const {
108+
assert(hasVersionedName());
109+
return Version;
110+
}
111+
112+
DISourceLanguageName(uint16_t Lang, uint32_t Version)
113+
: Version(Version), Name(Lang), HasVersion(true) {};
114+
DISourceLanguageName(uint16_t Lang)
115+
: Version(0), Name(Lang), HasVersion(false) {};
116+
};
117+
69118
class DbgVariableRecord;
70119

71120
LLVM_ABI extern cl::opt<bool> EnableFSDiscriminator;
@@ -2003,7 +2052,7 @@ class DICompileUnit : public DIScope {
20032052
LLVM_ABI static const char *nameTableKindString(DebugNameTableKind PK);
20042053

20052054
private:
2006-
unsigned SourceLanguage;
2055+
DISourceLanguageName SourceLanguage;
20072056
unsigned RuntimeVersion;
20082057
uint64_t DWOId;
20092058
unsigned EmissionKind;
@@ -2013,16 +2062,17 @@ class DICompileUnit : public DIScope {
20132062
bool DebugInfoForProfiling;
20142063
bool RangesBaseAddress;
20152064

2016-
DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
2017-
bool IsOptimized, unsigned RuntimeVersion,
2018-
unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining,
2019-
bool DebugInfoForProfiling, unsigned NameTableKind,
2020-
bool RangesBaseAddress, ArrayRef<Metadata *> Ops);
2065+
DICompileUnit(LLVMContext &C, StorageType Storage,
2066+
DISourceLanguageName SourceLanguage, bool IsOptimized,
2067+
unsigned RuntimeVersion, unsigned EmissionKind, uint64_t DWOId,
2068+
bool SplitDebugInlining, bool DebugInfoForProfiling,
2069+
unsigned NameTableKind, bool RangesBaseAddress,
2070+
ArrayRef<Metadata *> Ops);
20212071
~DICompileUnit() = default;
20222072

20232073
static DICompileUnit *
2024-
getImpl(LLVMContext &Context, unsigned SourceLanguage, DIFile *File,
2025-
StringRef Producer, bool IsOptimized, StringRef Flags,
2074+
getImpl(LLVMContext &Context, DISourceLanguageName SourceLanguage,
2075+
DIFile *File, StringRef Producer, bool IsOptimized, StringRef Flags,
20262076
unsigned RuntimeVersion, StringRef SplitDebugFilename,
20272077
unsigned EmissionKind, DICompositeTypeArray EnumTypes,
20282078
DIScopeArray RetainedTypes,
@@ -2042,8 +2092,8 @@ class DICompileUnit : public DIScope {
20422092
getCanonicalMDString(Context, SDK), Storage, ShouldCreate);
20432093
}
20442094
LLVM_ABI static DICompileUnit *
2045-
getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
2046-
MDString *Producer, bool IsOptimized, MDString *Flags,
2095+
getImpl(LLVMContext &Context, DISourceLanguageName SourceLanguage,
2096+
Metadata *File, MDString *Producer, bool IsOptimized, MDString *Flags,
20472097
unsigned RuntimeVersion, MDString *SplitDebugFilename,
20482098
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
20492099
Metadata *GlobalVariables, Metadata *ImportedEntities,
@@ -2068,7 +2118,7 @@ class DICompileUnit : public DIScope {
20682118

20692119
DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
20702120
DICompileUnit,
2071-
(unsigned SourceLanguage, DIFile *File, StringRef Producer,
2121+
(DISourceLanguageName SourceLanguage, DIFile *File, StringRef Producer,
20722122
bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
20732123
StringRef SplitDebugFilename, DebugEmissionKind EmissionKind,
20742124
DICompositeTypeArray EnumTypes, DIScopeArray RetainedTypes,
@@ -2084,7 +2134,7 @@ class DICompileUnit : public DIScope {
20842134
SysRoot, SDK))
20852135
DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
20862136
DICompileUnit,
2087-
(unsigned SourceLanguage, Metadata *File, MDString *Producer,
2137+
(DISourceLanguageName SourceLanguage, Metadata *File, MDString *Producer,
20882138
bool IsOptimized, MDString *Flags, unsigned RuntimeVersion,
20892139
MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
20902140
Metadata *RetainedTypes, Metadata *GlobalVariables,
@@ -2099,7 +2149,7 @@ class DICompileUnit : public DIScope {
20992149

21002150
TempDICompileUnit clone() const { return cloneImpl(); }
21012151

2102-
unsigned getSourceLanguage() const { return SourceLanguage; }
2152+
DISourceLanguageName getSourceLanguage() const { return SourceLanguage; }
21032153
bool isOptimized() const { return IsOptimized; }
21042154
unsigned getRuntimeVersion() const { return RuntimeVersion; }
21052155
DebugEmissionKind getEmissionKind() const {

llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M,
4343
// filenames), so just print a few useful things.
4444
for (DICompileUnit *CU : Finder.compile_units()) {
4545
O << "Compile unit: ";
46-
auto Lang = dwarf::LanguageString(CU->getSourceLanguage());
46+
auto Lang =
47+
dwarf::LanguageString(CU->getSourceLanguage().getUnversionedName());
4748
if (!Lang.empty())
4849
O << Lang;
4950
else
50-
O << "unknown-language(" << CU->getSourceLanguage() << ")";
51+
O << "unknown-language(" << CU->getSourceLanguage().getUnversionedName()
52+
<< ")";
5153
printFile(O, CU->getFilename(), CU->getDirectory());
5254
O << '\n';
5355
}

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5861,11 +5861,11 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
58615861
#undef VISIT_MD_FIELDS
58625862

58635863
Result = DICompileUnit::getDistinct(
5864-
Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
5865-
runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
5866-
retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val,
5867-
splitDebugInlining.Val, debugInfoForProfiling.Val, nameTableKind.Val,
5868-
rangesBaseAddress.Val, sysroot.Val, sdk.Val);
5864+
Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
5865+
isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
5866+
emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
5867+
macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
5868+
nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
58695869
return false;
58705870
}
58715871

llvm/lib/Bitcode/Reader/MetadataLoader.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,11 +1866,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
18661866
// Ignore Record[0], which indicates whether this compile unit is
18671867
// distinct. It's always distinct.
18681868
IsDistinct = true;
1869+
18691870
auto *CU = DICompileUnit::getDistinct(
1870-
Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]),
1871-
Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]),
1872-
Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]),
1873-
getMDOrNull(Record[12]), getMDOrNull(Record[13]),
1871+
Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
1872+
getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
1873+
getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
1874+
getMDOrNull(Record[10]), getMDOrNull(Record[12]),
1875+
getMDOrNull(Record[13]),
18741876
Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
18751877
Record.size() <= 14 ? 0 : Record[14],
18761878
Record.size() <= 16 ? true : Record[16],

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2107,7 +2107,8 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
21072107
unsigned Abbrev) {
21082108
assert(N->isDistinct() && "Expected distinct compile units");
21092109
Record.push_back(/* IsDistinct */ true);
2110-
Record.push_back(N->getSourceLanguage());
2110+
2111+
Record.push_back(N->getSourceLanguage().getUnversionedName());
21112112
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
21122113
Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
21132114
Record.push_back(N->isOptimized());

llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,8 +633,8 @@ void CodeViewDebug::beginModule(Module *M) {
633633
Node = *CUs->operands().begin();
634634
}
635635
const auto *CU = cast<DICompileUnit>(Node);
636-
637-
CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
636+
CurrentSourceLanguage =
637+
MapDWLangToCVLang(CU->getSourceLanguage().getUnversionedName());
638638
if (!M->getCodeViewFlag() ||
639639
CU->getEmissionKind() == DICompileUnit::NoDebug) {
640640
Asm = nullptr;

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,7 +1040,8 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
10401040
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
10411041

10421042
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
1043-
DIUnit->getSourceLanguage());
1043+
DIUnit->getSourceLanguage().getUnversionedName());
1044+
10441045
NewCU.addString(Die, dwarf::DW_AT_name, FN);
10451046
StringRef SysRoot = DIUnit->getSysRoot();
10461047
if (!SysRoot.empty())
@@ -2930,10 +2931,9 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
29302931
case dwarf::DW_TAG_union_type:
29312932
case dwarf::DW_TAG_enumeration_type:
29322933
return dwarf::PubIndexEntryDescriptor(
2933-
dwarf::GIEK_TYPE,
2934-
dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
2935-
? dwarf::GIEL_EXTERNAL
2936-
: dwarf::GIEL_STATIC);
2934+
dwarf::GIEK_TYPE, dwarf::isCPlusPlus(CU->getSourceLanguage())
2935+
? dwarf::GIEL_EXTERNAL
2936+
: dwarf::GIEL_STATIC);
29372937
case dwarf::DW_TAG_typedef:
29382938
case dwarf::DW_TAG_base_type:
29392939
case dwarf::DW_TAG_subrange_type:
@@ -3926,7 +3926,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
39263926
TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
39273927

39283928
NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
3929-
CU.getLanguage());
3929+
CU.getSourceLanguage());
39303930

39313931
uint64_t Signature = makeTypeSignature(Identifier);
39323932
NewTU.setTypeSignature(Signature);

llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ DwarfUnit::~DwarfUnit() {
100100
}
101101

102102
int64_t DwarfUnit::getDefaultLowerBound() const {
103-
switch (getLanguage()) {
103+
switch (getSourceLanguage()) {
104104
default:
105105
break;
106106

@@ -704,12 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
704704
addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
705705
}
706706

707+
llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const {
708+
return static_cast<llvm::dwarf::SourceLanguage>(
709+
getLanguage().getUnversionedName());
710+
}
711+
707712
std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
708713
if (!Context)
709714
return "";
710715

711716
// FIXME: Decide whether to implement this for non-C++ languages.
712-
if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
717+
if (!dwarf::isCPlusPlus(getSourceLanguage()))
713718
return "";
714719

715720
std::string CS;
@@ -940,7 +945,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
940945

941946
// Add prototype flag if we're dealing with a C language and the function has
942947
// been prototyped.
943-
if (isPrototyped && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
948+
if (isPrototyped && dwarf::isC(getSourceLanguage()))
944949
addFlag(Buffer, dwarf::DW_AT_prototyped);
945950

946951
// Add a DW_AT_calling_convention if this has an explicit convention.
@@ -1448,7 +1453,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
14481453

14491454
// Add the prototype if we have a prototype and we have a C like
14501455
// language.
1451-
if (SP->isPrototyped() && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
1456+
if (SP->isPrototyped() && dwarf::isC(getSourceLanguage()))
14521457
addFlag(SPDie, dwarf::DW_AT_prototyped);
14531458

14541459
if (SP->isObjCDirect())
@@ -1700,8 +1705,7 @@ DIE *DwarfUnit::getIndexTyDie() {
17001705
addString(*IndexTyDie, dwarf::DW_AT_name, Name);
17011706
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t));
17021707
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
1703-
dwarf::getArrayIndexTypeEncoding(
1704-
(dwarf::SourceLanguage)getLanguage()));
1708+
dwarf::getArrayIndexTypeEncoding(getSourceLanguage()));
17051709
DD->addAccelType(*this, CUNode->getNameTableKind(), Name, *IndexTyDie,
17061710
/*Flags*/ 0);
17071711
return IndexTyDie;

0 commit comments

Comments
 (0)