Skip to content

llvm-pretty-bc-parser inlines metadata nodes unnecessarily #260

@RyanGlScott

Description

@RyanGlScott

If you compile this program:

// test.c
int main(void) {
  int x = 0;
  return x;
}

Like so:

$ clang-14 test.c -emit-llvm -g -c && clang-14 test.c -emit-llvm -g -S

Then this is what the resulting test.ll file will look like:

Details
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 !dbg !10 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  call void @llvm.dbg.declare(metadata i32* %2, metadata !15, metadata !DIExpression()), !dbg !16
  store i32 0, i32* %2, align 4, !dbg !16
  %3 = load i32, i32* %2, align 4, !dbg !17
  ret i32 %3, !dbg !18
}

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
!llvm.ident = !{!9}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Ubuntu clang version 14.0.0-1ubuntu1.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "/home/ryanscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser", checksumkind: CSK_MD5, checksum: "374aba39e8173c9c7f53dd059756dcd6")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 1}
!8 = !{i32 7, !"frame-pointer", i32 2}
!9 = !{!"Ubuntu clang version 14.0.0-1ubuntu1.1"}
!10 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14)
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!14 = !{}
!15 = !DILocalVariable(name: "x", scope: !10, file: !1, line: 3, type: !13)
!16 = !DILocation(line: 3, column: 7, scope: !10)
!17 = !DILocation(line: 4, column: 10, scope: !10)
!18 = !DILocation(line: 4, column: 3, scope: !10)

So far, so good. Now let's see what happens when we round-trip this through llvm-pretty-bc-parser:

Details
λ> parseBitCodeFromFile "test.bc" >>= print . fmap (ppLLVM llvmVlatest ppModule)
Right source_filename = "test.c"
target triple = "x86_64-pc-linux-gnu-"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
declare default void @llvm.dbg.declare(metadata, metadata,
                                       metadata)
define default i32 @main() !dbg !10 {
; <label>: 0
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  call void @llvm.dbg.declare(metadata i32* %2,
                              metadata !DILocalVariable(scope: !10, name: "x",
                                                        file: !DIFile(filename: "test.c",
                                                                      directory: "/home/ryanscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser"),
                                                        line: 3, type: !11, arg: 0, flags: 0,
                                                        align: 0),
                              metadata !DIExpression()), !dbg !DILocation(line: 3, column: 7,
                                                                          scope: !10)
  store i32 0, i32* %2, align 4, !dbg !DILocation(line: 3, column: 7,
                                                  scope: !10)
  %3 = load i32, i32* %2, align 4, !dbg !DILocation(line: 4,
                                                    column: 10, scope: !10)
  ret i32 %3, !dbg !DILocation(line: 4, column: 3, scope: !10)
}
!llvm.dbg.cu = !{!0}
!llvm.ident = !{!9}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
!0 =
distinct !DICompileUnit(language: 12, file: !1,
                        producer: "Ubuntu clang version 14.0.0-1ubuntu1.1",
                        isOptimized: false, flags: "", runtimeVersion: 0, emissionKind: 1,
                        dwoId: 0, splitDebugInlining: false, debugInfoForProfiling: false,
                        nameTableKind: 2, rangesBaseAddress: false)
!1 =
!DIFile(filename: "test.c",
        directory: "/home/ryanscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 1}
!8 = !{i32 7, !"frame-pointer", i32 2}
!9 = !{!"Ubuntu clang version 14.0.0-1ubuntu1.1"}
!10 =
distinct !DISubprogram(scope: !1, name: "main", file: !1, line: 2,
                       type: !13, isLocal: false, isDefinition: true, scopeLine: 2,
                       virtuality: 0, virtualIndex: 0, flags: 256, isOptimized: false,
                       unit: !0, retainedNodes: !14)
!11 =
!DIBasicType(tag: 36, name: "int", size: 32, align: 0,
             encoding: 5, flags: 0)
!12 = !{!11}
!13 = distinct !DISubroutineType(flags: 0, types: !12)
!14 = !{}
!15 =
!DILocalVariable(scope: !10, name: "x", file: !1, line: 3,
                 type: !11, arg: 0, flags: 0, align: 0)

The part that I want to draw attention to is the call void @llvm.dbg.declare(...) statement. In the original test.ll file, we have this:

  call void @llvm.dbg.declare(metadata i32* %2, metadata !15, metadata !DIExpression()), !dbg !16

But in the roundtripped code, we instead have this:

  call void @llvm.dbg.declare(metadata i32* %2,
                              metadata !DILocalVariable(scope: !10, name: "x",
                                                        file: !DIFile(filename: "test.c",
                                                                      directory: "/home/ryanscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser"),
                                                        line: 3, type: !11, arg: 0, flags: 0,
                                                        align: 0),
                              metadata !DIExpression()), !dbg !DILocation(line: 3, column: 7,
                                                                          scope: !10)

Note how instead of printing references to !15 and !16, the latter inlines the definitions of !15 and !16 entirely, resulting in much more verbose code.

Although strange, this is not wrong in this example, since both versions of the program are equivalent. This is not always the case, however. If you repeat this experiment with the test case in #258 (using Clang 17), however, you will see this in the original .ll file:

  call void @llvm.dbg.assign(metadata i1 undef, metadata !23, metadata !DIExpression(), metadata !24, metadata ptr undef, metadata !DIExpression()), !dbg !25

...

!24 = distinct !DIAssignID()

And this in the roundtripped version:

  call void @llvm.dbg.assign(metadata i1 undef,
                             metadata !DILocalVariable(scope: !17, name: "x",
                                                       file: !DIFile(filename: "test.c",
                                                                     directory: "/home/ryanscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser"),
                                                       line: 4,
                                                       type: !DIBasicType(tag: 36, name: "int",
                                                                          size: 32, align: 0,
                                                                          encoding: 5, flags: 0),
                                                       arg: 0, flags: 0, align: 0),
                             metadata !DIExpression(), metadata !DIAssignID(),
                             metadata ptr undef,
                             metadata !DIExpression()), !dbg !DILocation(line: 0, column: 0,
                                                                         scope: !17)

Note that we are now inlining the expression metadata !DIAssignID. This is invalid, as LLVM requires that all DIAssignID nodes be distinct. This is the case in the original .ll file, but the roundtripped version drops the distinct keyword after inlining. By that point, it is too late, as it is not possible to attach the distinct to inline metadata nodes—the only way to do so is by putting the node in the top-level list of metadata nodes (e.g., !24 in the original .ll file).

In order to make the test case from #258 work, we will need to prevent llvm-pretty-bc-parser from performing this gratuitous inlining. This issue tracks that task.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions