Skip to content

Commit b31f4ed

Browse files
committed
The embedded bitcode should always be prepared for LTO/ThinLTO
1 parent d53f0b1 commit b31f4ed

File tree

11 files changed

+249
-63
lines changed

11 files changed

+249
-63
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ fn produce_final_output_artifacts(
204204
// to get rid of it.
205205
for output_type in crate_output.outputs.keys() {
206206
match *output_type {
207-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
207+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
208208
// Cranelift doesn't have bitcode
209209
// user_wants_bitcode = true;
210210
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::mem::ManuallyDrop;
55
use std::path::Path;
6+
use std::ptr::NonNull;
67
use std::sync::Arc;
78
use std::{io, iter, slice};
89

@@ -604,7 +605,7 @@ pub(crate) fn run_pass_manager(
604605
debug!("running the pass manager");
605606
let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
606607
let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
607-
unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
608+
unsafe { write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage) }?;
608609
debug!("lto done");
609610
Ok(())
610611
}
@@ -663,6 +664,11 @@ impl ThinBuffer {
663664
ThinBuffer(buffer)
664665
}
665666
}
667+
668+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
669+
let mut ptr = NonNull::new(ptr).unwrap();
670+
ThinBuffer(unsafe { ptr.as_mut() })
671+
}
666672
}
667673

668674
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 82 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -41,7 +42,7 @@ use crate::errors::{
4142
WithLlvmError, WriteBytecode,
4243
};
4344
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
44-
use crate::llvm::{self, DiagnosticInfo, PassManager};
45+
use crate::llvm::{self, DiagnosticInfo, PassManager, ThinLTOBuffer};
4546
use crate::type_::Type;
4647
use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
4748

@@ -514,13 +515,21 @@ pub(crate) unsafe fn llvm_optimize(
514515
cgcx: &CodegenContext<LlvmCodegenBackend>,
515516
dcx: DiagCtxtHandle<'_>,
516517
module: &ModuleCodegen<ModuleLlvm>,
518+
thin_lto_buffer: Option<&mut *mut ThinLTOBuffer>,
517519
config: &ModuleConfig,
518520
opt_level: config::OptLevel,
519521
opt_stage: llvm::OptStage,
520522
) -> Result<(), FatalError> {
523+
if thin_lto_buffer.is_some() {
524+
assert!(matches!(
525+
opt_stage,
526+
llvm::OptStage::PreLinkNoLTO
527+
| llvm::OptStage::PreLinkFatLTO
528+
| llvm::OptStage::PreLinkThinLTO
529+
));
530+
}
521531
let unroll_loops =
522532
opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
523-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
524533
let pgo_gen_path = get_pgo_gen_path(config);
525534
let pgo_use_path = get_pgo_use_path(config);
526535
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -580,7 +589,9 @@ pub(crate) unsafe fn llvm_optimize(
580589
config.no_prepopulate_passes,
581590
config.verify_llvm_ir,
582591
config.lint_llvm_ir,
583-
using_thin_buffers,
592+
thin_lto_buffer,
593+
config.emit_thin_lto,
594+
config.emit_thin_lto_summary,
584595
config.merge_functions,
585596
unroll_loops,
586597
config.vectorize_slp,
@@ -635,7 +646,47 @@ pub(crate) unsafe fn optimize(
635646
_ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
636647
_ => llvm::OptStage::PreLinkNoLTO,
637648
};
638-
return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
649+
// The embedded bitcode is used to run LTO/ThinLTO.
650+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
651+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
652+
// this point.
653+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
654+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
655+
|| config.emit_thin_lto_summary
656+
{
657+
Some(null_mut())
658+
} else {
659+
None
660+
};
661+
unsafe {
662+
llvm_optimize(cgcx, dcx, module, thin_lto_buffer.as_mut(), config, opt_level, opt_stage)
663+
}?;
664+
if let Some(thin_lto_buffer) = thin_lto_buffer {
665+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
666+
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
667+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
668+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
669+
}
670+
let bc_summary_out =
671+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
672+
if config.emit_thin_lto_summary
673+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
674+
{
675+
let summary_data = thin_lto_buffer.thin_link_data();
676+
cgcx.prof.artifact_size(
677+
"llvm_bitcode_summary",
678+
thin_link_bitcode_filename.to_string_lossy(),
679+
summary_data.len() as u64,
680+
);
681+
let _timer = cgcx.prof.generic_activity_with_arg(
682+
"LLVM_module_codegen_emit_bitcode_summary",
683+
&*module.name,
684+
);
685+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
686+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
687+
}
688+
}
689+
}
639690
}
640691
Ok(())
641692
}
@@ -714,61 +765,49 @@ pub(crate) unsafe fn codegen(
714765
// requested.
715766
// - If we don't have the integrated assembler then we need to emit
716767
// asm from LLVM and use `gcc` to create the object file.
717-
718768
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
719-
let bc_summary_out =
720-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
721769
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
722770

723771
if config.bitcode_needed() {
724-
let _timer = cgcx
725-
.prof
726-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
727-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
728-
let data = thin.data();
729-
730-
if let Some(bitcode_filename) = bc_out.file_name() {
731-
cgcx.prof.artifact_size(
732-
"llvm_bitcode",
733-
bitcode_filename.to_string_lossy(),
734-
data.len() as u64,
735-
);
736-
}
737-
738-
if config.emit_thin_lto_summary
739-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
740-
{
741-
let summary_data = thin.thin_link_data();
742-
cgcx.prof.artifact_size(
743-
"llvm_bitcode_summary",
744-
thin_link_bitcode_filename.to_string_lossy(),
745-
summary_data.len() as u64,
746-
);
747-
748-
let _timer = cgcx.prof.generic_activity_with_arg(
749-
"LLVM_module_codegen_emit_bitcode_summary",
750-
&*module.name,
751-
);
752-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
753-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
754-
}
755-
}
756-
772+
// If the object file of the target spec is bitcode, what happens when performing LTO in Rust?
757773
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
774+
let thin = {
775+
let _timer = cgcx.prof.generic_activity_with_arg(
776+
"LLVM_module_codegen_make_bitcode",
777+
&*module.name,
778+
);
779+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
780+
};
781+
let data = thin.data();
758782
let _timer = cgcx
759783
.prof
760784
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
785+
if let Some(bitcode_filename) = bc_out.file_name() {
786+
cgcx.prof.artifact_size(
787+
"llvm_bitcode",
788+
bitcode_filename.to_string_lossy(),
789+
data.len() as u64,
790+
);
791+
}
761792
if let Err(err) = fs::write(&bc_out, data) {
762793
dcx.emit_err(WriteBytecode { path: &bc_out, err });
763794
}
764795
}
765796

766-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
797+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
798+
&& module.kind == ModuleKind::Regular
799+
{
767800
let _timer = cgcx
768801
.prof
769802
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
803+
let thin_bc_out =
804+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
805+
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
806+
let data = fs::read(&thin_bc_out).unwrap();
807+
debug!("removing embed bitcode file {:?}", thin_bc_out);
808+
ensure_removed(dcx, &thin_bc_out);
770809
unsafe {
771-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
810+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
772811
}
773812
}
774813
}

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,9 @@ unsafe extern "C" {
22502250
NoPrepopulatePasses: bool,
22512251
VerifyIR: bool,
22522252
LintIR: bool,
2253-
UseThinLTOBuffers: bool,
2253+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2254+
EmitThinLTO: bool,
2255+
EmitThinLTOSummary: bool,
22542256
MergeFunctions: bool,
22552257
UnrollLoops: bool,
22562258
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,9 @@ fn produce_final_output_artifacts(
616616
// them for making an rlib.
617617
copy_if_one_unit(OutputType::Bitcode, true);
618618
}
619+
OutputType::ThinBitcode => {
620+
copy_if_one_unit(OutputType::ThinBitcode, true);
621+
}
619622
OutputType::ThinLinkBitcode => {
620623
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
621624
}

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "llvm/Analysis/Lint.h"
88
#include "llvm/Analysis/TargetLibraryInfo.h"
99
#include "llvm/Bitcode/BitcodeWriter.h"
10+
#include "llvm/Bitcode/BitcodeWriterPass.h"
1011
#include "llvm/CodeGen/CommandFlags.h"
1112
#include "llvm/IR/AssemblyAnnotationWriter.h"
1213
#include "llvm/IR/AutoUpgrade.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
3738
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
3839
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
40+
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
3941
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
4042
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4143
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -194,6 +196,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
194196
GEN_SUBTARGETS
195197
#undef SUBTARGET
196198

199+
// This struct and various functions are sort of a hack right now, but the
200+
// problem is that we've got in-memory LLVM modules after we generate and
201+
// optimize all codegen-units for one compilation in rustc. To be compatible
202+
// with the LTO support above we need to serialize the modules plus their
203+
// ThinLTO summary into memory.
204+
//
205+
// This structure is basically an owned version of a serialize module, with
206+
// a ThinLTO summary attached.
207+
struct LLVMRustThinLTOBuffer {
208+
std::string data;
209+
std::string thin_link_data;
210+
};
211+
197212
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
198213
const char *Feature) {
199214
TargetMachine *Target = unwrap(TM);
@@ -676,7 +691,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
676691
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
677692
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
678693
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
679-
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
694+
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
695+
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
680696
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
681697
bool EmitLifetimeMarkers, LLVMRustSanitizerOptions *SanitizerOptions,
682698
const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage,
@@ -923,7 +939,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
923939
}
924940

925941
ModulePassManager MPM;
926-
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
942+
bool NeedThinLTOBufferPasses = EmitThinLTO;
943+
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
944+
auto ThinLTODataOS = raw_string_ostream(ThinLTOBuffer->data);
945+
auto ThinLinkDataOS = raw_string_ostream(ThinLTOBuffer->thin_link_data);
927946
if (!NoPrepopulatePasses) {
928947
// The pre-link pipelines don't support O0 and require using
929948
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -947,7 +966,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
947966

948967
switch (OptStage) {
949968
case LLVMRustOptStage::PreLinkNoLTO:
950-
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
969+
if (ThinLTOBufferRef) {
970+
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
971+
// bitcode for embedding is obtained after performing
972+
// `ThinLTOPreLinkDefaultPipeline`.
973+
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
974+
if (EmitThinLTO) {
975+
MPM.addPass(ThinLTOBitcodeWriterPass(
976+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
977+
} else {
978+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
979+
}
980+
*ThinLTOBufferRef = ThinLTOBuffer.release();
981+
MPM.addPass(PB.buildModuleOptimizationPipeline(
982+
OptLevel, ThinOrFullLTOPhase::None));
983+
MPM.addPass(
984+
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
985+
} else {
986+
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
987+
}
951988
break;
952989
case LLVMRustOptStage::PreLinkThinLTO:
953990
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -993,6 +1030,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
9931030
MPM.addPass(CanonicalizeAliasesPass());
9941031
MPM.addPass(NameAnonGlobalPass());
9951032
}
1033+
// For -Copt-level=0
1034+
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
1035+
if (EmitThinLTO) {
1036+
MPM.addPass(ThinLTOBitcodeWriterPass(
1037+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1038+
} else {
1039+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1040+
}
1041+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1042+
}
9961043

9971044
// Upgrade all calls to old intrinsics first.
9981045
for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;)
@@ -1465,19 +1512,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
14651512
return true;
14661513
}
14671514

1468-
// This struct and various functions are sort of a hack right now, but the
1469-
// problem is that we've got in-memory LLVM modules after we generate and
1470-
// optimize all codegen-units for one compilation in rustc. To be compatible
1471-
// with the LTO support above we need to serialize the modules plus their
1472-
// ThinLTO summary into memory.
1473-
//
1474-
// This structure is basically an owned version of a serialize module, with
1475-
// a ThinLTO summary attached.
1476-
struct LLVMRustThinLTOBuffer {
1477-
std::string data;
1478-
std::string thin_link_data;
1479-
};
1480-
14811515
extern "C" LLVMRustThinLTOBuffer *
14821516
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
14831517
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

0 commit comments

Comments
 (0)