|
| 1 | +.PHONY: top clean clean-profiles |
| 2 | + |
| 3 | +STAGE0_BUILD:=$(CURDIR)/stage0.build |
| 4 | +STAGE1_BUILD:=$(CURDIR)/stage1.build |
| 5 | +STAGE2_BUILD:=$(CURDIR)/stage2.build |
| 6 | + |
| 7 | +STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/ |
| 8 | + |
| 9 | +PROFILE_DIR:=$(CURDIR)/profiles |
| 10 | +PROFILE_FILE:=$(PROFILE_DIR)/merged.prof |
| 11 | +PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw) |
| 12 | +JULIA_ROOT:=$(CURDIR)/../.. |
| 13 | + |
| 14 | +LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt |
| 15 | +LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata |
| 16 | +LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy |
| 17 | + |
| 18 | +# When building a single libLLVM.so we need to increase -vp-counters-per-site |
| 19 | +# significantly |
| 20 | +COUNTERS_PER_SITE:=6 |
| 21 | + |
| 22 | +AFTER_STAGE1_MESSAGE:='Run `make clean-profiles` to start with a clean slate. $\ |
| 23 | + Then run Julia to collect realistic profile data, for example: `$(STAGE1_BUILD)/julia -O3 -e $\ |
| 24 | + '\''using Pkg; Pkg.add("LoopVectorization"); Pkg.test("LoopVectorization")'\''`. This $\ |
| 25 | + should produce about 15MB of data in $(PROFILE_DIR). Note that running extensive $\ |
| 26 | + scripts may result in counter overflows, which can be detected by running $\ |
| 27 | + `make top`. Afterwards run `make stage2`.' |
| 28 | +
|
| 29 | +TOOLCHAIN_FLAGS = $\ |
| 30 | + "CC=$(STAGE0_TOOLS)clang" $\ |
| 31 | + "CXX=$(STAGE0_TOOLS)clang++" $\ |
| 32 | + "LD=$(STAGE0_TOOLS)ld.lld" $\ |
| 33 | + "AR=$(STAGE0_TOOLS)llvm-ar" $\ |
| 34 | + "RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\ |
| 35 | + "CFLAGS+=$(PGO_CFLAGS)" $\ |
| 36 | + "CXXFLAGS+=$(PGO_CXXFLAGS)" $\ |
| 37 | + "LDFLAGS+=$(PGO_LDFLAGS)" |
| 38 | +
|
| 39 | +$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD): |
| 40 | + $(MAKE) -C $(JULIA_ROOT) O=$@ configure |
| 41 | +
|
| 42 | +stage0: export USE_BINARYBUILDER_LLVM=1 |
| 43 | +stage0: | $(STAGE0_BUILD) |
| 44 | + # Turn [cd]tors into init/fini_array sections in libclang_rt, since lld |
| 45 | + # doesn't do that, and otherwise the profile constructor is not executed |
| 46 | + $(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \ |
| 47 | + find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \ |
| 48 | + touch $@ |
| 49 | +
|
| 50 | +$(STAGE1_BUILD): stage0 |
| 51 | +stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) |
| 52 | +stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) |
| 53 | +stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR) |
| 54 | +stage1: export USE_BINARYBUILDER_LLVM=0 |
| 55 | +stage1: | $(STAGE1_BUILD) |
| 56 | + $(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ |
| 57 | + @echo $(AFTER_STAGE1_MESSAGE) |
| 58 | +
|
| 59 | +stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE) |
| 60 | +stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE) |
| 61 | +stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe |
| 62 | +stage2: export USE_BINARYBUILDER_LLVM=0 |
| 63 | +stage2: $(PROFILE_FILE) | $(STAGE2_BUILD) |
| 64 | + $(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ |
| 65 | +
|
| 66 | +install: stage2 |
| 67 | + $(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install |
| 68 | +
|
| 69 | +$(PROFILE_FILE): stage1 $(PROFRAW_FILES) |
| 70 | + $(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES) |
| 71 | +
|
| 72 | +# show top 50 functions |
| 73 | +top: $(PROFILE_FILE) |
| 74 | + $(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT) |
| 75 | +
|
| 76 | +clean-profiles: |
| 77 | + rm -rf $(PROFILE_DIR) |
| 78 | +
|
| 79 | +clean: |
| 80 | + rm -f stage0 stage1 stage2 $(PROFILE_FILE) |
0 commit comments