Skip to content

Commit 603fff7

Browse files
aykevldeadprogram
authored andcommitted
all: add support for ThinLTO
ThinLTO optimizes across LLVM modules at link time. This means that optimizations (such as inlining and const-propagation) are possible between C and Go. This makes this change especially useful for CGo, but not just for CGo. By doing some optimizations at link time, the linker can discard some unused functions and this leads to a size reduction on average. It does increase code size in some cases, but that's true for most optimizations. I've excluded a number of targets for now (wasm, avr, xtensa, windows, macos). They can probably be supported with some more work, but that should be done in separate PRs. Overall, this change results in an average 3.24% size reduction over all the tinygo.org/x/drivers smoke tests. TODO: this commit runs part of the pass pipeline twice. We should set the PrepareForThinLTO flag in the PassManagerBuilder for even further reduced code size (0.7%) and improved compilation speed.
1 parent d4b1467 commit 603fff7

File tree

8 files changed

+121
-25
lines changed

8 files changed

+121
-25
lines changed

builder/build.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
105105
defer os.RemoveAll(dir)
106106
}
107107

108+
// Look up the build cache directory, which is used to speed up incremental
109+
// builds.
110+
cacheDir := goenv.Get("GOCACHE")
111+
if cacheDir == "off" {
112+
// Use temporary build directory instead, effectively disabling the
113+
// build cache.
114+
cacheDir = dir
115+
}
116+
108117
// Check for a libc dependency.
109118
// As a side effect, this also creates the headers for the given libc, if
110119
// the libc needs them.
@@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
238247

239248
// Determine the path of the bitcode file (which is a serialized version
240249
// of a LLVM module).
241-
cacheDir := goenv.Get("GOCACHE")
242-
if cacheDir == "off" {
243-
// Use temporary build directory instead, effectively disabling the
244-
// build cache.
245-
cacheDir = dir
246-
}
247250
bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc")
248251
packageBitcodePaths[pkg.ImportPath] = bitcodePath
249252

@@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
416419
// Load and link all the bitcode files. This does not yet optimize
417420
// anything, it only links the bitcode files together.
418421
ctx := llvm.NewContext()
419-
mod = ctx.NewModule("")
422+
mod = ctx.NewModule("main")
420423
for _, pkg := range lprogram.Sorted() {
421424
pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath])
422425
if err != nil {
@@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
512515
}
513516
return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666)
514517
case ".bc":
515-
data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes()
516-
return ioutil.WriteFile(outpath, data, 0666)
518+
var buf llvm.MemoryBuffer
519+
if config.UseThinLTO() {
520+
buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
521+
} else {
522+
buf = llvm.WriteBitcodeToMemoryBuffer(mod)
523+
}
524+
defer buf.Dispose()
525+
return ioutil.WriteFile(outpath, buf.Bytes(), 0666)
517526
case ".ll":
518527
data := []byte(mod.String())
519528
return ioutil.WriteFile(outpath, data, 0666)
@@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
533542
dependencies: []*compileJob{programJob},
534543
result: objfile,
535544
run: func(*compileJob) error {
536-
llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
537-
if err != nil {
538-
return err
545+
var llvmBuf llvm.MemoryBuffer
546+
if config.UseThinLTO() {
547+
llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
548+
} else {
549+
var err error
550+
llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
551+
if err != nil {
552+
return err
553+
}
539554
}
555+
defer llvmBuf.Dispose()
540556
return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666)
541557
},
542558
}
@@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
569585
job := &compileJob{
570586
description: "compile extra file " + path,
571587
run: func(job *compileJob) error {
572-
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands)
588+
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands)
573589
job.result = result
574590
return err
575591
},
@@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
587603
job := &compileJob{
588604
description: "compile CGo file " + abspath,
589605
run: func(job *compileJob) error {
590-
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands)
606+
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands)
591607
job.result = result
592608
return err
593609
},
@@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
656672
if config.Options.PrintCommands != nil {
657673
config.Options.PrintCommands(config.Target.Linker, ldflags...)
658674
}
675+
if config.UseThinLTO() {
676+
ldflags = append(ldflags,
677+
"--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"),
678+
"-plugin-opt=mcpu="+config.CPU(),
679+
"-plugin-opt=O"+strconv.Itoa(optLevel),
680+
"-plugin-opt=thinlto")
681+
if config.CodeModel() != "default" {
682+
ldflags = append(ldflags,
683+
"-mllvm", "-code-model="+config.CodeModel())
684+
}
685+
if sizeLevel >= 2 {
686+
// Workaround with roughly the same effect as
687+
// https://reviews.llvm.org/D119342.
688+
// Can hopefully be removed in LLVM 15.
689+
ldflags = append(ldflags,
690+
"-mllvm", "--rotation-max-header-size=0")
691+
}
692+
}
659693
err = link(config.Target.Linker, ldflags...)
660694
if err != nil {
661695
return &commandError{"failed to link", executable, err}
@@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error {
846880
}
847881
}
848882

849-
if config.GOOS() != "darwin" {
883+
if config.GOOS() != "darwin" && !config.UseThinLTO() {
850884
transform.ApplyFunctionSections(mod) // -ffunction-sections
851885
}
852886

builder/cc.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ import (
5656
// depfile but without invalidating its name. For this reason, the depfile is
5757
// written on each new compilation (even when it seems unnecessary). However, it
5858
// could in rare cases lead to a stale file fetched from the cache.
59-
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) {
59+
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) {
6060
// Hash input file.
6161
fileHash, err := hashFile(abspath)
6262
if err != nil {
@@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
6767
unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock"))
6868
defer unlock()
6969

70+
ext := ".o"
71+
if thinlto {
72+
ext = ".bc"
73+
}
74+
7075
// Create cache key for the dependencies file.
7176
buf, err := json.Marshal(struct {
7277
Path string
@@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
99104
}
100105

101106
// Obtain hashes of all the files listed as a dependency.
102-
outpath, err := makeCFileCachePath(dependencies, depfileNameHash)
107+
outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext)
103108
if err == nil {
104109
if _, err := os.Stat(outpath); err == nil {
105110
return outpath, nil
@@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
112117
return "", err
113118
}
114119

115-
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o")
120+
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext)
116121
if err != nil {
117122
return "", err
118123
}
@@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
124129
depTmpFile.Close()
125130
flags := append([]string{}, cflags...) // copy cflags
126131
flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies
132+
if thinlto {
133+
flags = append(flags, "-flto=thin")
134+
}
127135
flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath)
128136
if strings.ToLower(filepath.Ext(abspath)) == ".s" {
129137
// If this is an assembly file (.s or .S, lowercase or uppercase), then
@@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
181189
}
182190

183191
// Move temporary object file to final location.
184-
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash)
192+
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext)
185193
if err != nil {
186194
return "", err
187195
}
@@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
196204
// Create a cache path (a path in GOCACHE) to store the output of a compiler
197205
// job. This path is based on the dep file name (which is a hash of metadata
198206
// including compiler flags) and the hash of all input files in the paths slice.
199-
func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) {
207+
func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) {
200208
// Hash all input files.
201209
fileHashes := make(map[string]string, len(paths))
202210
for _, path := range paths {
@@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error)
221229
outFileNameBuf := sha512.Sum512_224(buf)
222230
cacheKey := hex.EncodeToString(outFileNameBuf[:])
223231

224-
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o")
232+
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext)
225233
return outpath, nil
226234
}
227235

compileopts/config.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool {
176176
return false
177177
}
178178

179+
// UseThinLTO returns whether ThinLTO should be used for the given target. Some
180+
// targets (such as wasm) are not yet supported.
181+
// We should try and remove as many exceptions as possible in the future, so
182+
// that this optimization can be applied in more places.
183+
func (c *Config) UseThinLTO() bool {
184+
parts := strings.Split(c.Triple(), "-")
185+
if parts[0] == "wasm32" {
186+
// wasm-ld doesn't seem to support ThinLTO yet.
187+
return false
188+
}
189+
if parts[0] == "avr" || parts[0] == "xtensa" {
190+
// These use external (GNU) linkers which might perhaps support ThinLTO
191+
// through a plugin, but it's too much hassle to set up.
192+
return false
193+
}
194+
if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") {
195+
// We use an external linker here at the moment.
196+
return false
197+
}
198+
if len(parts) >= 2 && parts[2] == "windows" {
199+
// Linker error (undefined runtime.trackedGlobalsBitmap) when linking
200+
// for Windows. Disable it for now until that's figured out and fixed.
201+
return false
202+
}
203+
// Other architectures support ThinLTO.
204+
return true
205+
}
206+
179207
// RP2040BootPatch returns whether the RP2040 boot patch should be applied that
180208
// calculates and patches in the checksum for the 2nd stage bootloader.
181209
func (c *Config) RP2040BootPatch() bool {

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ require (
1515
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
1616
golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9
1717
gopkg.in/yaml.v2 v2.4.0
18-
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3
18+
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1
1919
)

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
8080
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
8181
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
8282
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
83-
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448=
84-
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=
83+
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98=
84+
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=

transform/optimizer.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i
151151
funcPasses.FinalizeFunc()
152152

153153
// Run module passes.
154+
// TODO: somehow set the PrepareForThinLTO flag in the pass manager builder.
154155
modPasses := llvm.NewPassManager()
155156
defer modPasses.Dispose()
156157
builder.Populate(modPasses)

transform/stacksize.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ import (
1111
// modified after linking.
1212
func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string {
1313
functionMap := map[llvm.Value][]llvm.Value{}
14-
var functions []llvm.Value
14+
var functions []llvm.Value // ptrtoint values of functions
1515
var functionNames []string
16+
var functionValues []llvm.Value // direct references to functions
1617
for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) {
1718
if use.FirstUse().IsNil() {
1819
// Apparently this stack size isn't used.
@@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
2324
if _, ok := functionMap[ptrtoint]; !ok {
2425
functions = append(functions, ptrtoint)
2526
functionNames = append(functionNames, ptrtoint.Operand(0).Name())
27+
functionValues = append(functionValues, ptrtoint.Operand(0))
2628
}
2729
functionMap[ptrtoint] = append(functionMap[ptrtoint], use)
2830
}
@@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
4446
}
4547
stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes))
4648

49+
// Add all relevant values to llvm.used (for LTO).
50+
appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...)
51+
4752
// Replace the calls with loads from the new global with stack sizes.
4853
irbuilder := mod.Context().NewBuilder()
4954
defer irbuilder.Dispose()
@@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
6267

6368
return functionNames
6469
}
70+
71+
// Append the given values to the llvm.used array. The values can be any pointer
72+
// type, they will be bitcast to i8*.
73+
func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) {
74+
if !mod.NamedGlobal("llvm.used").IsNil() {
75+
// Sanity check. TODO: we don't emit such a global at the moment, but
76+
// when we do we should append to it instead.
77+
panic("todo: append to existing llvm.used")
78+
}
79+
i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0)
80+
var castValues []llvm.Value
81+
for _, value := range values {
82+
castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType))
83+
}
84+
usedInitializer := llvm.ConstArray(i8ptrType, castValues)
85+
used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used")
86+
used.SetInitializer(usedInitializer)
87+
used.SetLinkage(llvm.AppendingLinkage)
88+
}

transform/testdata/stacksize.out.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
22
target triple = "armv7m-none-eabi"
33

44
@"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes"
5+
@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)]
56

67
declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)
78

0 commit comments

Comments
 (0)