Skip to content

Commit 77cf60e

Browse files
aykevldeadprogram
authored andcommitted
darwin: print full size information for -size=full
The MachO file format is a bit weird and doesn't store the DWARF debug information directly in the file. Instead, it has to be looked up in the original object file. This makes reading the DWARF debug information for code size usage a bit more difficult. However, it works with this change.
1 parent a4e2e09 commit 77cf60e

File tree

1 file changed

+144
-11
lines changed

1 file changed

+144
-11
lines changed

builder/sizes.go

Lines changed: 144 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ var (
127127
// readProgramSizeFromDWARF reads the source location for each line of code and
128128
// each variable in the program, as far as this is stored in the DWARF debug
129129
// information.
130-
func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLine, error) {
130+
func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64, skipTombstone bool) ([]addressLine, error) {
131131
r := data.Reader()
132132
var lines []*dwarf.LineFile
133133
var addresses []addressLine
@@ -169,7 +169,7 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
169169
return nil, err
170170
}
171171

172-
if prevLineEntry.EndSequence && lineEntry.Address == 0 {
172+
if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone {
173173
// Tombstone value. This symbol has been removed, for
174174
// example by the --gc-sections linker flag. It is still
175175
// here in the debug information because the linker can't
@@ -178,6 +178,10 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
178178
// skipped.
179179
// For more details, see (among others):
180180
// https://reviews.llvm.org/D84825
181+
// The value 0 can however really occur in object files,
182+
// that typically start at address 0. So don't skip
183+
// tombstone values in object files (like when parsing MachO
184+
// files).
181185
for {
182186
err := lr.Next(&lineEntry)
183187
if err != nil {
@@ -256,6 +260,65 @@ func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLin
256260
return addresses, nil
257261
}
258262

263+
// Read a MachO object file and return a line table.
264+
// Also return an index from symbol name to start address in the line table.
265+
func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) {
266+
// Some constants from mach-o/nlist.h
267+
// See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
268+
const (
269+
N_STAB = 0xe0
270+
N_TYPE = 0x0e // bitmask for N_TYPE field
271+
N_SECT = 0xe // one of the possible type in the N_TYPE field
272+
)
273+
274+
// Read DWARF from the given object file.
275+
file, err := macho.Open(path)
276+
if err != nil {
277+
return nil, nil, err
278+
}
279+
defer file.Close()
280+
dwarf, err := file.DWARF()
281+
if err != nil {
282+
return nil, nil, err
283+
}
284+
lines, err := readProgramSizeFromDWARF(dwarf, 0, false)
285+
if err != nil {
286+
return nil, nil, err
287+
}
288+
289+
// Make a map from start addresses to indices in the line table (because the
290+
// line table is a slice, not a map).
291+
addressToLine := make(map[uint64]int, len(lines))
292+
for i, line := range lines {
293+
if _, ok := addressToLine[line.Address]; ok {
294+
addressToLine[line.Address] = -1
295+
continue
296+
}
297+
addressToLine[line.Address] = i
298+
}
299+
300+
// Make a map that for each symbol gives the start index in the line table.
301+
addresses := make(map[string]int, len(addressToLine))
302+
for _, symbol := range file.Symtab.Syms {
303+
if symbol.Type&N_STAB != 0 {
304+
continue // STABS entry, ignore
305+
}
306+
if symbol.Type&0x0e != N_SECT {
307+
continue // undefined symbol
308+
}
309+
if index, ok := addressToLine[symbol.Value]; ok && index >= 0 {
310+
if _, ok := addresses[symbol.Name]; ok {
311+
// There is a duplicate. Mark it as unavailable.
312+
addresses[symbol.Name] = -1
313+
continue
314+
}
315+
addresses[symbol.Name] = index
316+
}
317+
}
318+
319+
return addresses, lines, nil
320+
}
321+
259322
// loadProgramSize calculate a program/data size breakdown of each package for a
260323
// given ELF file.
261324
// If the file doesn't contain DWARF debug information, the returned program
@@ -278,7 +341,7 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
278341
// Read DWARF information. The error is intentionally ignored.
279342
data, _ := file.DWARF()
280343
if data != nil {
281-
addresses, err = readProgramSizeFromDWARF(data, 0)
344+
addresses, err = readProgramSizeFromDWARF(data, 0, true)
282345
if err != nil {
283346
// However, _do_ report an error here. Something must have gone
284347
// wrong while trying to parse DWARF data.
@@ -370,11 +433,6 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
370433
}
371434
}
372435
} else if file, err := macho.NewFile(f); err == nil {
373-
// TODO: read DWARF information. On MacOS, DWARF debug information isn't
374-
// stored in the executable but stays in the object files. The
375-
// executable does however contain the object file paths that contain
376-
// debug information.
377-
378436
// Read segments, for use while reading through sections.
379437
segments := map[string]*macho.Segment{}
380438
for _, load := range file.Loads {
@@ -421,11 +479,86 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
421479
})
422480
}
423481
}
482+
483+
// Read DWARF information.
484+
// The data isn't stored directly in the binary as in most executable
485+
// formats. Instead, it is left in the object files that were used as a
486+
// basis for linking. The executable does however contain STABS debug
487+
// information that points to the source object file and is used by
488+
// debuggers.
489+
// For more information:
490+
// http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
491+
var objSymbolNames map[string]int
492+
var objAddresses []addressLine
493+
var previousSymbol macho.Symbol
494+
for _, symbol := range file.Symtab.Syms {
495+
// STABS constants, from mach-o/stab.h:
496+
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
497+
const (
498+
N_GSYM = 0x20
499+
N_FUN = 0x24
500+
N_STSYM = 0x26
501+
N_SO = 0x64
502+
N_OSO = 0x66
503+
)
504+
if symbol.Type == N_OSO {
505+
// Found an object file. Now try to parse it.
506+
objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name)
507+
if err != nil && sizesDebug {
508+
// Errors are normally ignored. If there is an error, it's
509+
// simply treated as that the DWARF is not available.
510+
fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err)
511+
}
512+
} else if symbol.Type == N_FUN {
513+
// Found a function.
514+
// The way this is encoded is a bit weird. MachO symbols don't
515+
// have a length. What I've found is that the length is encoded
516+
// by first having a N_FUN symbol as usual, and then having a
517+
// symbol with a zero-length name that has the value not set to
518+
// the address of the symbol but to the length. So in order to
519+
// get both the address and the length, we look for a symbol
520+
// with a name followed by a symbol without a name.
521+
if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" {
522+
// Functions are encoded as many small chunks in the line
523+
// table (one or a few instructions per source line). But
524+
// the symbol length covers the whole symbols, over many
525+
// lines and possibly including inlined functions. So we
526+
// continue to iterate through the objAddresses slice until
527+
// we've found all the source lines that are part of this
528+
// symbol.
529+
address := previousSymbol.Value
530+
length := symbol.Value
531+
if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 {
532+
for length > 0 {
533+
line := objAddresses[index]
534+
line.Address = address
535+
if line.Length > length {
536+
// Line extends beyond the end of te symbol?
537+
// Weird, shouldn't happen.
538+
break
539+
}
540+
addresses = append(addresses, line)
541+
index++
542+
length -= line.Length
543+
address += line.Length
544+
}
545+
}
546+
}
547+
} else if symbol.Type == N_GSYM || symbol.Type == N_STSYM {
548+
// Global variables.
549+
if index, ok := objSymbolNames[symbol.Name]; ok {
550+
address := objAddresses[index]
551+
address.Address = symbol.Value
552+
addresses = append(addresses, address)
553+
}
554+
}
555+
previousSymbol = symbol
556+
}
424557
} else if file, err := pe.NewFile(f); err == nil {
425558
// Read DWARF information. The error is intentionally ignored.
426559
data, _ := file.DWARF()
427560
if data != nil {
428-
addresses, err = readProgramSizeFromDWARF(data, 0)
561+
addresses, err = readProgramSizeFromDWARF(data, 0, true)
429562
if err != nil {
430563
// However, _do_ report an error here. Something must have gone
431564
// wrong while trying to parse DWARF data.
@@ -495,9 +628,9 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz
495628
const codeOffset = 0x8000_0000_0000_0000
496629

497630
// Read DWARF information. The error is intentionally ignored.
498-
data, err := file.DWARF()
631+
data, _ := file.DWARF()
499632
if data != nil {
500-
addresses, err = readProgramSizeFromDWARF(data, codeOffset)
633+
addresses, err = readProgramSizeFromDWARF(data, codeOffset, true)
501634
if err != nil {
502635
// However, _do_ report an error here. Something must have gone
503636
// wrong while trying to parse DWARF data.

0 commit comments

Comments
 (0)