Skip to content

Commit 3a8e337

Browse files
committed
The embedded bitcode should always be prepared for LTO/ThinLTO
1 parent 9c87288 commit 3a8e337

File tree

14 files changed

+259
-70
lines changed

14 files changed

+259
-70
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ fn produce_final_output_artifacts(
210210
// to get rid of it.
211211
for output_type in crate_output.outputs.keys() {
212212
match *output_type {
213-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
213+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
214214
// Cranelift doesn't have bitcode
215215
// user_wants_bitcode = true;
216216
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::mem::ManuallyDrop;
55
use std::path::Path;
6+
use std::ptr::NonNull;
67
use std::sync::Arc;
78
use std::{io, iter, slice};
89

@@ -610,7 +611,7 @@ pub(crate) fn run_pass_manager(
610611
let first_run = true;
611612
debug!("running llvm pm opt pipeline");
612613
unsafe {
613-
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
614+
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, first_run)?;
614615
}
615616
debug!("lto done");
616617
Ok(())
@@ -670,6 +671,11 @@ impl ThinBuffer {
670671
ThinBuffer(buffer)
671672
}
672673
}
674+
675+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
676+
let mut ptr = NonNull::new(ptr).unwrap();
677+
ThinBuffer(unsafe { ptr.as_mut() })
678+
}
673679
}
674680

675681
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 83 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -534,6 +535,7 @@ pub(crate) unsafe fn llvm_optimize(
534535
cgcx: &CodegenContext<LlvmCodegenBackend>,
535536
dcx: DiagCtxtHandle<'_>,
536537
module: &ModuleCodegen<ModuleLlvm>,
538+
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
537539
config: &ModuleConfig,
538540
opt_level: config::OptLevel,
539541
opt_stage: llvm::OptStage,
@@ -566,7 +568,17 @@ pub(crate) unsafe fn llvm_optimize(
566568
vectorize_loop = config.vectorize_loop;
567569
}
568570
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
569-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
571+
if thin_lto_buffer.is_some() {
572+
assert!(
573+
matches!(
574+
opt_stage,
575+
llvm::OptStage::PreLinkNoLTO
576+
| llvm::OptStage::PreLinkFatLTO
577+
| llvm::OptStage::PreLinkThinLTO
578+
),
579+
"the bitcode for LTO can only be obtained at the pre-link stage"
580+
);
581+
}
570582
let pgo_gen_path = get_pgo_gen_path(config);
571583
let pgo_use_path = get_pgo_use_path(config);
572584
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -626,7 +638,9 @@ pub(crate) unsafe fn llvm_optimize(
626638
config.no_prepopulate_passes,
627639
config.verify_llvm_ir,
628640
config.lint_llvm_ir,
629-
using_thin_buffers,
641+
thin_lto_buffer,
642+
config.emit_thin_lto,
643+
config.emit_thin_lto_summary,
630644
config.merge_functions,
631645
unroll_loops,
632646
vectorize_slp,
@@ -686,17 +700,56 @@ pub(crate) unsafe fn optimize(
686700

687701
// If we know that we will later run AD, then we disable vectorization and loop unrolling
688702
let skip_size_increasing_opts = cfg!(llvm_enzyme);
689-
return unsafe {
703+
// The embedded bitcode is used to run LTO/ThinLTO.
704+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
705+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
706+
// this point.
707+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
708+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
709+
|| config.emit_thin_lto_summary
710+
{
711+
Some(null_mut())
712+
} else {
713+
None
714+
};
715+
unsafe {
690716
llvm_optimize(
691717
cgcx,
692718
dcx,
693719
module,
720+
thin_lto_buffer.as_mut(),
694721
config,
695722
opt_level,
696723
opt_stage,
697724
skip_size_increasing_opts,
698725
)
699-
};
726+
}?;
727+
if let Some(thin_lto_buffer) = thin_lto_buffer {
728+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
729+
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
730+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
731+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
732+
}
733+
let bc_summary_out =
734+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
735+
if config.emit_thin_lto_summary
736+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
737+
{
738+
let summary_data = thin_lto_buffer.thin_link_data();
739+
cgcx.prof.artifact_size(
740+
"llvm_bitcode_summary",
741+
thin_link_bitcode_filename.to_string_lossy(),
742+
summary_data.len() as u64,
743+
);
744+
let _timer = cgcx.prof.generic_activity_with_arg(
745+
"LLVM_module_codegen_emit_bitcode_summary",
746+
&*module.name,
747+
);
748+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
749+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
750+
}
751+
}
752+
}
700753
}
701754
Ok(())
702755
}
@@ -777,59 +830,47 @@ pub(crate) unsafe fn codegen(
777830
// asm from LLVM and use `gcc` to create the object file.
778831

779832
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
780-
let bc_summary_out =
781-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
782833
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
783834

784835
if config.bitcode_needed() {
785-
let _timer = cgcx
786-
.prof
787-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
788-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
789-
let data = thin.data();
790-
791-
if let Some(bitcode_filename) = bc_out.file_name() {
792-
cgcx.prof.artifact_size(
793-
"llvm_bitcode",
794-
bitcode_filename.to_string_lossy(),
795-
data.len() as u64,
796-
);
797-
}
798-
799-
if config.emit_thin_lto_summary
800-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
801-
{
802-
let summary_data = thin.thin_link_data();
803-
cgcx.prof.artifact_size(
804-
"llvm_bitcode_summary",
805-
thin_link_bitcode_filename.to_string_lossy(),
806-
summary_data.len() as u64,
807-
);
808-
809-
let _timer = cgcx.prof.generic_activity_with_arg(
810-
"LLVM_module_codegen_emit_bitcode_summary",
811-
&*module.name,
812-
);
813-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
814-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
815-
}
816-
}
817-
818836
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
837+
let thin = {
838+
let _timer = cgcx.prof.generic_activity_with_arg(
839+
"LLVM_module_codegen_make_bitcode",
840+
&*module.name,
841+
);
842+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
843+
};
844+
let data = thin.data();
819845
let _timer = cgcx
820846
.prof
821847
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
848+
if let Some(bitcode_filename) = bc_out.file_name() {
849+
cgcx.prof.artifact_size(
850+
"llvm_bitcode",
851+
bitcode_filename.to_string_lossy(),
852+
data.len() as u64,
853+
);
854+
}
822855
if let Err(err) = fs::write(&bc_out, data) {
823856
dcx.emit_err(WriteBytecode { path: &bc_out, err });
824857
}
825858
}
826859

827-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
860+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
861+
&& module.kind == ModuleKind::Regular
862+
{
828863
let _timer = cgcx
829864
.prof
830865
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
866+
let thin_bc_out =
867+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
868+
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
869+
let data = fs::read(&thin_bc_out).unwrap();
870+
debug!("removing embed bitcode file {:?}", thin_bc_out);
871+
ensure_removed(dcx, &thin_bc_out);
831872
unsafe {
832-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
873+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
833874
}
834875
}
835876
}

compiler/rustc_codegen_llvm/src/builder/autodiff.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ pub(crate) fn differentiate<'ll, 'tcx>(
331331
cgcx,
332332
diag_handler.handle(),
333333
module,
334+
None,
334335
config,
335336
opt_level,
336337
opt_stage,

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2246,7 +2246,9 @@ unsafe extern "C" {
22462246
NoPrepopulatePasses: bool,
22472247
VerifyIR: bool,
22482248
LintIR: bool,
2249-
UseThinLTOBuffers: bool,
2249+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2250+
EmitThinLTO: bool,
2251+
EmitThinLTOSummary: bool,
22502252
MergeFunctions: bool,
22512253
UnrollLoops: bool,
22522254
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,9 @@ fn produce_final_output_artifacts(
613613
// them for making an rlib.
614614
copy_if_one_unit(OutputType::Bitcode, true);
615615
}
616+
OutputType::ThinBitcode => {
617+
copy_if_one_unit(OutputType::ThinBitcode, true);
618+
}
616619
OutputType::ThinLinkBitcode => {
617620
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
618621
}

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "llvm/Analysis/Lint.h"
88
#include "llvm/Analysis/TargetLibraryInfo.h"
99
#include "llvm/Bitcode/BitcodeWriter.h"
10+
#include "llvm/Bitcode/BitcodeWriterPass.h"
1011
#include "llvm/CodeGen/CommandFlags.h"
1112
#include "llvm/IR/AssemblyAnnotationWriter.h"
1213
#include "llvm/IR/AutoUpgrade.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
3738
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
3839
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
40+
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
3941
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
4042
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4143
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -194,6 +196,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
194196
GEN_SUBTARGETS
195197
#undef SUBTARGET
196198

199+
// This struct and various functions are sort of a hack right now, but the
200+
// problem is that we've got in-memory LLVM modules after we generate and
201+
// optimize all codegen-units for one compilation in rustc. To be compatible
202+
// with the LTO support above we need to serialize the modules plus their
203+
// ThinLTO summary into memory.
204+
//
205+
// This structure is basically an owned version of a serialize module, with
206+
// a ThinLTO summary attached.
207+
struct LLVMRustThinLTOBuffer {
208+
std::string data;
209+
std::string thin_link_data;
210+
};
211+
197212
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
198213
const char *Feature) {
199214
TargetMachine *Target = unwrap(TM);
@@ -692,7 +707,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
692707
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
693708
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
694709
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
695-
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
710+
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
711+
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
696712
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
697713
bool EmitLifetimeMarkers, LLVMRustSanitizerOptions *SanitizerOptions,
698714
const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage,
@@ -939,7 +955,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
939955
}
940956

941957
ModulePassManager MPM;
942-
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
958+
bool NeedThinLTOBufferPasses = EmitThinLTO;
959+
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
960+
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
961+
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
943962
if (!NoPrepopulatePasses) {
944963
// The pre-link pipelines don't support O0 and require using
945964
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -963,7 +982,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
963982

964983
switch (OptStage) {
965984
case LLVMRustOptStage::PreLinkNoLTO:
966-
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
985+
if (ThinLTOBufferRef) {
986+
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
987+
// bitcode for embedding is obtained after performing
988+
// `ThinLTOPreLinkDefaultPipeline`.
989+
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
990+
if (EmitThinLTO) {
991+
MPM.addPass(ThinLTOBitcodeWriterPass(
992+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
993+
} else {
994+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
995+
}
996+
*ThinLTOBufferRef = ThinLTOBuffer.release();
997+
MPM.addPass(PB.buildModuleOptimizationPipeline(
998+
OptLevel, ThinOrFullLTOPhase::None));
999+
MPM.addPass(
1000+
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
1001+
} else {
1002+
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
1003+
}
9671004
break;
9681005
case LLVMRustOptStage::PreLinkThinLTO:
9691006
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -1009,6 +1046,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
10091046
MPM.addPass(CanonicalizeAliasesPass());
10101047
MPM.addPass(NameAnonGlobalPass());
10111048
}
1049+
// For `-Copt-level=0`, ThinLTO, or LTO.
1050+
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
1051+
if (EmitThinLTO) {
1052+
MPM.addPass(ThinLTOBitcodeWriterPass(
1053+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1054+
} else {
1055+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1056+
}
1057+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1058+
}
10121059

10131060
// Upgrade all calls to old intrinsics first.
10141061
for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;)
@@ -1475,19 +1522,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
14751522
return true;
14761523
}
14771524

1478-
// This struct and various functions are sort of a hack right now, but the
1479-
// problem is that we've got in-memory LLVM modules after we generate and
1480-
// optimize all codegen-units for one compilation in rustc. To be compatible
1481-
// with the LTO support above we need to serialize the modules plus their
1482-
// ThinLTO summary into memory.
1483-
//
1484-
// This structure is basically an owned version of a serialize module, with
1485-
// a ThinLTO summary attached.
1486-
struct LLVMRustThinLTOBuffer {
1487-
std::string data;
1488-
std::string thin_link_data;
1489-
};
1490-
14911525
extern "C" LLVMRustThinLTOBuffer *
14921526
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
14931527
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

0 commit comments

Comments
 (0)