@@ -127,7 +127,7 @@ var (
127
127
// readProgramSizeFromDWARF reads the source location for each line of code and
128
128
// each variable in the program, as far as this is stored in the DWARF debug
129
129
// information.
130
- func readProgramSizeFromDWARF (data * dwarf.Data , codeOffset uint64 ) ([]addressLine , error ) {
130
+ func readProgramSizeFromDWARF (data * dwarf.Data , codeOffset uint64 , skipTombstone bool ) ([]addressLine , error ) {
131
131
r := data .Reader ()
132
132
var lines []* dwarf.LineFile
133
133
var addresses []addressLine
@@ -169,7 +169,7 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
169
169
return nil , err
170
170
}
171
171
172
- if prevLineEntry .EndSequence && lineEntry .Address == 0 {
172
+ if prevLineEntry .EndSequence && lineEntry .Address == 0 && skipTombstone {
173
173
// Tombstone value. This symbol has been removed, for
174
174
// example by the --gc-sections linker flag. It is still
175
175
// here in the debug information because the linker can't
@@ -178,6 +178,10 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
178
178
// skipped.
179
179
// For more details, see (among others):
180
180
// https://reviews.llvm.org/D84825
181
+ // The value 0 can however really occur in object files,
182
+ // that typically start at address 0. So don't skip
183
+ // tombstone values in object files (like when parsing MachO
184
+ // files).
181
185
for {
182
186
err := lr .Next (& lineEntry )
183
187
if err != nil {
@@ -256,6 +260,65 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
256
260
return addresses , nil
257
261
}
258
262
263
+ // Read a MachO object file and return a line table.
264
+ // Also return an index from symbol name to start address in the line table.
265
+ func readMachOSymbolAddresses (path string ) (map [string ]int , []addressLine , error ) {
266
+ // Some constants from mach-o/nlist.h
267
+ // See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
268
+ const (
269
+ N_STAB = 0xe0
270
+ N_TYPE = 0x0e // bitmask for N_TYPE field
271
+ N_SECT = 0xe // one of the possible type in the N_TYPE field
272
+ )
273
+
274
+ // Read DWARF from the given object file.
275
+ file , err := macho .Open (path )
276
+ if err != nil {
277
+ return nil , nil , err
278
+ }
279
+ defer file .Close ()
280
+ dwarf , err := file .DWARF ()
281
+ if err != nil {
282
+ return nil , nil , err
283
+ }
284
+ lines , err := readProgramSizeFromDWARF (dwarf , 0 , false )
285
+ if err != nil {
286
+ return nil , nil , err
287
+ }
288
+
289
+ // Make a map from start addresses to indices in the line table (because the
290
+ // line table is a slice, not a map).
291
+ addressToLine := make (map [uint64 ]int , len (lines ))
292
+ for i , line := range lines {
293
+ if _ , ok := addressToLine [line .Address ]; ok {
294
+ addressToLine [line .Address ] = - 1
295
+ continue
296
+ }
297
+ addressToLine [line .Address ] = i
298
+ }
299
+
300
+ // Make a map that for each symbol gives the start index in the line table.
301
+ addresses := make (map [string ]int , len (addressToLine ))
302
+ for _ , symbol := range file .Symtab .Syms {
303
+ if symbol .Type & N_STAB != 0 {
304
+ continue // STABS entry, ignore
305
+ }
306
+ if symbol .Type & 0x0e != N_SECT {
307
+ continue // undefined symbol
308
+ }
309
+ if index , ok := addressToLine [symbol .Value ]; ok && index >= 0 {
310
+ if _ , ok := addresses [symbol .Name ]; ok {
311
+ // There is a duplicate. Mark it as unavailable.
312
+ addresses [symbol .Name ] = - 1
313
+ continue
314
+ }
315
+ addresses [symbol .Name ] = index
316
+ }
317
+ }
318
+
319
+ return addresses , lines , nil
320
+ }
321
+
259
322
// loadProgramSize calculate a program/data size breakdown of each package for a
260
323
// given ELF file.
261
324
// If the file doesn't contain DWARF debug information, the returned program
@@ -278,7 +341,7 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
278
341
// Read DWARF information. The error is intentionally ignored.
279
342
data , _ := file .DWARF ()
280
343
if data != nil {
281
- addresses , err = readProgramSizeFromDWARF (data , 0 )
344
+ addresses , err = readProgramSizeFromDWARF (data , 0 , true )
282
345
if err != nil {
283
346
// However, _do_ report an error here. Something must have gone
284
347
// wrong while trying to parse DWARF data.
@@ -370,11 +433,6 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
370
433
}
371
434
}
372
435
} else if file , err := macho .NewFile (f ); err == nil {
373
- // TODO: read DWARF information. On MacOS, DWARF debug information isn't
374
- // stored in the executable but stays in the object files. The
375
- // executable does however contain the object file paths that contain
376
- // debug information.
377
-
378
436
// Read segments, for use while reading through sections.
379
437
segments := map [string ]* macho.Segment {}
380
438
for _ , load := range file .Loads {
@@ -421,11 +479,86 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
421
479
})
422
480
}
423
481
}
482
+
483
+ // Read DWARF information.
484
+ // The data isn't stored directly in the binary as in most executable
485
+ // formats. Instead, it is left in the object files that were used as a
486
+ // basis for linking. The executable does however contain STABS debug
487
+ // information that points to the source object file and is used by
488
+ // debuggers.
489
+ // For more information:
490
+ // http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
491
+ var objSymbolNames map [string ]int
492
+ var objAddresses []addressLine
493
+ var previousSymbol macho.Symbol
494
+ for _ , symbol := range file .Symtab .Syms {
495
+ // STABS constants, from mach-o/stab.h:
496
+ // https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
497
+ const (
498
+ N_GSYM = 0x20
499
+ N_FUN = 0x24
500
+ N_STSYM = 0x26
501
+ N_SO = 0x64
502
+ N_OSO = 0x66
503
+ )
504
+ if symbol .Type == N_OSO {
505
+ // Found an object file. Now try to parse it.
506
+ objSymbolNames , objAddresses , err = readMachOSymbolAddresses (symbol .Name )
507
+ if err != nil && sizesDebug {
508
+ // Errors are normally ignored. If there is an error, it's
509
+ // simply treated as that the DWARF is not available.
510
+ fmt .Fprintf (os .Stderr , "could not read DWARF from file %s: %s\n " , symbol .Name , err )
511
+ }
512
+ } else if symbol .Type == N_FUN {
513
+ // Found a function.
514
+ // The way this is encoded is a bit weird. MachO symbols don't
515
+ // have a length. What I've found is that the length is encoded
516
+ // by first having a N_FUN symbol as usual, and then having a
517
+ // symbol with a zero-length name that has the value not set to
518
+ // the address of the symbol but to the length. So in order to
519
+ // get both the address and the length, we look for a symbol
520
+ // with a name followed by a symbol without a name.
521
+ if symbol .Name == "" && previousSymbol .Type == N_FUN && previousSymbol .Name != "" {
522
+ // Functions are encoded as many small chunks in the line
523
+ // table (one or a few instructions per source line). But
524
+ // the symbol length covers the whole symbols, over many
525
+ // lines and possibly including inlined functions. So we
526
+ // continue to iterate through the objAddresses slice until
527
+ // we've found all the source lines that are part of this
528
+ // symbol.
529
+ address := previousSymbol .Value
530
+ length := symbol .Value
531
+ if index , ok := objSymbolNames [previousSymbol .Name ]; ok && index >= 0 {
532
+ for length > 0 {
533
+ line := objAddresses [index ]
534
+ line .Address = address
535
+ if line .Length > length {
536
+ // Line extends beyond the end of te symbol?
537
+ // Weird, shouldn't happen.
538
+ break
539
+ }
540
+ addresses = append (addresses , line )
541
+ index ++
542
+ length -= line .Length
543
+ address += line .Length
544
+ }
545
+ }
546
+ }
547
+ } else if symbol .Type == N_GSYM || symbol .Type == N_STSYM {
548
+ // Global variables.
549
+ if index , ok := objSymbolNames [symbol .Name ]; ok {
550
+ address := objAddresses [index ]
551
+ address .Address = symbol .Value
552
+ addresses = append (addresses , address )
553
+ }
554
+ }
555
+ previousSymbol = symbol
556
+ }
424
557
} else if file , err := pe .NewFile (f ); err == nil {
425
558
// Read DWARF information. The error is intentionally ignored.
426
559
data , _ := file .DWARF ()
427
560
if data != nil {
428
- addresses , err = readProgramSizeFromDWARF (data , 0 )
561
+ addresses , err = readProgramSizeFromDWARF (data , 0 , true )
429
562
if err != nil {
430
563
// However, _do_ report an error here. Something must have gone
431
564
// wrong while trying to parse DWARF data.
@@ -495,9 +628,9 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
495
628
const codeOffset = 0x8000_0000_0000_0000
496
629
497
630
// Read DWARF information. The error is intentionally ignored.
498
- data , err := file .DWARF ()
631
+ data , _ := file .DWARF ()
499
632
if data != nil {
500
- addresses , err = readProgramSizeFromDWARF (data , codeOffset )
633
+ addresses , err = readProgramSizeFromDWARF (data , codeOffset , true )
501
634
if err != nil {
502
635
// However, _do_ report an error here. Something must have gone
503
636
// wrong while trying to parse DWARF data.
0 commit comments