diff --git a/README.md b/README.md index 8a98972a..0c08ab69 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ files with embedded bitcode (.o), optionally stored inside ar archives (.a). The linker requires LLVM 21. It can use the same LLVM used by the rust compiler, or it can use an external LLVM installation. -If your target is `aarch64-unknown-linux-gnu` (i.e. Linux on Apple Silicon) you +If your target is `aarch64-unknown-linux-gnu` (i.e. Linux on Apple Silicon) you will have to use the *external LLVM* method. ### Using LLVM provided by rustc diff --git a/src/bin/bpf-linker.rs b/src/bin/bpf-linker.rs index 635f8a3a..b598814a 100644 --- a/src/bin/bpf-linker.rs +++ b/src/bin/bpf-linker.rs @@ -14,7 +14,7 @@ use std::{ feature = "rust-llvm-21" ))] use aya_rustc_llvm_proxy as _; -use bpf_linker::{Cpu, Linker, LinkerOptions, OptLevel, OutputType}; +use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; use clap::{ builder::{PathBufValueParser, TypedValueParser as _}, error::ErrorKind, @@ -212,7 +212,6 @@ fn main() -> anyhow::Result<()> { emit, btf, allow_bpf_trap, - libs, optimize, export_symbols, log_file, @@ -227,6 +226,7 @@ fn main() -> anyhow::Result<()> { export, fatal_errors, _debug, + libs: _libs, // NOTE: not used right now } = match Parser::try_parse_from(args) { Ok(command_line) => command_line, Err(err) => match err.kind() { @@ -294,23 +294,25 @@ fn main() -> anyhow::Result<()> { target, cpu, cpu_features, - inputs, - output, - output_type, - libs, optimize, - export_symbols, unroll_loops, ignore_inline_never, - dump_module, llvm_args, disable_expand_memcpy_in_order, disable_memory_builtins, btf, allow_bpf_trap, - }); + })?; + + if let Some(path) = dump_module { + linker.set_dump_module_path(path); + } + + let inputs = inputs + .iter() + .map(|p| LinkerInput::new_from_file(p.as_path())); - linker.link()?; + linker.link_to_file(inputs, &output, output_type, &export_symbols)?; if fatal_errors && linker.has_errors() { return Err(anyhow::anyhow!( diff --git a/src/linker.rs b/src/linker.rs index 9962dd57..0ed9e804 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -3,29 +3,22 @@ use std::{ collections::HashSet, ffi::{CStr, CString, OsStr}, fs::File, - io::{self, Read, Seek as _}, + io::{self, Read, Seek}, + ops::Deref, os::unix::ffi::OsStrExt as _, path::{Path, PathBuf}, - pin::Pin, - ptr, str::{self, FromStr}, }; use ar::Archive; use llvm_sys::{ - bit_writer::LLVMWriteBitcodeToFile, - core::{ - LLVMContextCreate, LLVMContextDispose, LLVMContextSetDiagnosticHandler, LLVMDisposeModule, - LLVMGetTarget, - }, error_handling::{LLVMEnablePrettyStackTrace, LLVMInstallFatalErrorHandler}, - prelude::{LLVMContextRef, LLVMModuleRef}, - target_machine::{LLVMCodeGenFileType, LLVMDisposeTargetMachine, LLVMTargetMachineRef}, + target_machine::LLVMCodeGenFileType, }; use thiserror::Error; use tracing::{debug, error, info, warn}; -use crate::llvm; +use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer}; /// Linker error #[derive(Debug, Error)] @@ -77,6 +70,10 @@ pub enum LinkerError { /// The input object file does not have embedded bitcode. #[error("no bitcode section found in {0}")] MissingBitcodeSection(PathBuf), + + /// LLVM cannot create a module for linking. + #[error("failed to create module")] + CreateModuleError, } /// BPF Cpu type @@ -145,6 +142,80 @@ pub enum OptLevel { SizeMin, } +pub struct FileInput<'a> { + path: &'a Path, +} + +pub struct BufferInput<'a> { + name: &'a str, + bytes: &'a [u8], +} + +pub enum LinkerInput<'a> { + File(FileInput<'a>), + Buffer(BufferInput<'a>), +} + +impl<'a> LinkerInput<'a> { + pub fn new_from_file(path: &'a Path) -> Self { + LinkerInput::File(FileInput { path }) + } + + pub fn new_from_buffer(name: &'a str, bytes: &'a [u8]) -> Self { + LinkerInput::Buffer(BufferInput { name, bytes }) + } +} + +enum InputReader<'a> { + File { + path: &'a Path, + file: File, + }, + Buffer { + name: &'a str, + cursor: io::Cursor<&'a [u8]>, + }, +} + +impl<'a> TryFrom> for InputReader<'a> { + type Error = LinkerError; + + fn try_from(value: LinkerInput<'a>) -> Result { + match value { + LinkerInput::File(file_input) => { + let file = File::open(file_input.path) + .map_err(|err| LinkerError::IoError(file_input.path.to_owned(), err))?; + Ok(InputReader::File { + path: file_input.path, + file, + }) + } + LinkerInput::Buffer(buffer_input) => Ok(InputReader::Buffer { + name: buffer_input.name, + cursor: io::Cursor::new(buffer_input.bytes), + }), + } + } +} + +impl Seek for InputReader<'_> { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match self { + InputReader::File { file, .. } => file.seek(pos), + InputReader::Buffer { cursor, .. } => cursor.seek(pos), + } + } +} + +impl Read for InputReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + InputReader::File { file, .. } => file.read(buf), + InputReader::Buffer { cursor, .. } => cursor.read(buf), + } + } +} + /// Linker input type #[derive(Clone, Copy, Debug, PartialEq)] enum InputType { @@ -196,24 +267,13 @@ pub struct LinkerOptions { pub cpu: Cpu, /// Cpu features. pub cpu_features: CString, - /// Input files. Can be bitcode, object files with embedded bitcode or archive files. - pub inputs: Vec, - /// Where to save the output. - pub output: PathBuf, - /// The format to output. - pub output_type: OutputType, - pub libs: Vec, /// Optimization level. pub optimize: OptLevel, - /// Set of symbol names to export. - pub export_symbols: HashSet>, /// Whether to aggressively unroll loops. Useful for older kernels that don't support loops. pub unroll_loops: bool, /// Remove `noinline` attributes from functions. Useful for kernels before 5.8 that don't /// support function calls. pub ignore_inline_never: bool, - /// Write the linked module IR before and after optimization. - pub dump_module: Option, /// Extra command line args to pass to LLVM. pub llvm_args: Vec, /// Disable passing --bpf-expand-memcpy-in-order to LLVM. @@ -232,366 +292,205 @@ pub struct LinkerOptions { /// BPF Linker pub struct Linker { options: LinkerOptions, - context: LLVMContextRef, - module: LLVMModuleRef, - target_machine: LLVMTargetMachineRef, - diagnostic_handler: Pin>, + context: LLVMContext, + diagnostic_handler: llvm::InstalledDiagnosticHandler, + dump_module: Option, } impl Linker { /// Create a new linker instance with the given options. - pub fn new(options: LinkerOptions) -> Self { - Self { - options, - context: ptr::null_mut(), - module: ptr::null_mut(), - target_machine: ptr::null_mut(), - diagnostic_handler: Box::pin(DiagnosticHandler::default()), - } - } - - /// Link and generate the output code. - pub fn link(&mut self) -> Result<(), LinkerError> { - self.llvm_init(); - self.link_modules()?; - self.create_target_machine()?; - if let Some(path) = &self.options.dump_module { - std::fs::create_dir_all(path).map_err(|err| LinkerError::IoError(path.clone(), err))?; - } - if let Some(path) = &self.options.dump_module { - // dump IR before optimization - let path = path.join("pre-opt.ll"); - let path = CString::new(path.as_os_str().as_bytes()).unwrap(); - self.write_ir(&path)?; - }; - self.optimize()?; - if let Some(path) = &self.options.dump_module { - // dump IR before optimization - let path = path.join("post-opt.ll"); - let path = CString::new(path.as_os_str().as_bytes()).unwrap(); - self.write_ir(&path)?; - }; - self.codegen()?; - Ok(()) - } + pub fn new(options: LinkerOptions) -> Result { + let (context, diagnostic_handler) = llvm_init(&options); - pub fn has_errors(&self) -> bool { - self.diagnostic_handler.has_errors + Ok(Self { + options, + context, + diagnostic_handler, + dump_module: None, + }) } - fn link_modules(&mut self) -> Result<(), LinkerError> { - // buffer used to perform file type detection - let mut buf = [0u8; 8]; - for path in self.options.inputs.clone() { - let mut file = File::open(&path).map_err(|e| LinkerError::IoError(path.clone(), e))?; - - // determine whether the input is bitcode, ELF with embedded bitcode, an archive file - // or an invalid file - file.read_exact(&mut buf) - .map_err(|e| LinkerError::IoError(path.clone(), e))?; - file.rewind() - .map_err(|e| LinkerError::IoError(path.clone(), e))?; - let in_type = detect_input_type(&buf) - .ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?; - - match in_type { - InputType::Archive => { - info!("linking archive {:?}", path); - - // Extract the archive and call link_reader() for each item. - let mut archive = Archive::new(file); - while let Some(Ok(item)) = archive.next_entry() { - let name = PathBuf::from(OsStr::from_bytes(item.header().identifier())); - info!("linking archive item {:?}", name); - - match self.link_reader(&name, item, None) { - Ok(_) => continue, - Err(LinkerError::InvalidInputType(_)) => { - info!("ignoring archive item {:?}: invalid type", name); - continue; - } - Err(LinkerError::MissingBitcodeSection(_)) => { - warn!("ignoring archive item {:?}: no embedded bitcode", name); - continue; - } - Err(_) => return Err(LinkerError::LinkArchiveModuleError(path, name)), - }; - } - } - ty => { - info!("linking file {:?} type {}", path, ty); - match self.link_reader(&path, file, Some(ty)) { - Ok(_) => {} - Err(LinkerError::InvalidInputType(_)) => { - info!("ignoring file {:?}: invalid type", path); - continue; - } - Err(LinkerError::MissingBitcodeSection(_)) => { - warn!("ignoring file {:?}: no embedded bitcode", path); - } - err => return err, - } - } - } - } - - Ok(()) + /// Set the directory where the linker will dump the linked LLVM IR before and after + /// optimization, for debugging and inspection purposes. + /// + /// When set: + /// - The directory is created if it does not already exist. + /// - A "pre-opt.ll" file is written with the IR before optimization. + /// - A "post-opt.ll" file is written with the IR after optimization. + pub fn set_dump_module_path(&mut self, path: impl AsRef) { + self.dump_module = Some(path.as_ref().to_path_buf()) } - // link in a `Read`-er, which can be a file or an archive item - fn link_reader( - &mut self, - path: &Path, - mut reader: impl Read, - in_type: Option, + /// Link and generate the output code to file. + /// + /// # Example + /// + /// ```rust,no_run + /// # use std::{collections::HashSet, path::Path, borrow::Cow}; + /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; + /// # fn main() -> Result<(), Box> { + /// let path = Path::new("/path/to/object-or-bitcode"); + /// let bytes: &[u8] = &[]; // An in memory object/bitcode + /// # let options = LinkerOptions { + /// # target: None, + /// # cpu: Cpu::Generic, + /// # cpu_features: String::new(), + /// # optimize: OptLevel::Default, + /// # unroll_loops: false, + /// # ignore_inline_never: false, + /// # llvm_args: vec![], + /// # disable_expand_memcpy_in_order: false, + /// # disable_memory_builtins: false, + /// # allow_bpf_trap: false, + /// # btf: false, + /// # }; + /// # let linker = Linker::new(options)?; + /// + /// let export_symbols = ["my_sym_1", "my_sym_2"] + /// .into_iter() + /// .map(Cow::Borrowed) + /// .collect(); + /// + /// // Link to a file + /// linker.link_to_file( + /// [ + /// LinkerInput::new_from_file(path), + /// LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name + /// ], + /// Path::new("/path/to/output"), + /// OutputType::Object, + /// &export_symbols, + /// None, + /// )?; + /// # Ok(()) + /// # } + /// ``` + #[allow(single_use_lifetimes)] + pub fn link_to_file<'i>( + &self, + inputs: impl IntoIterator>, + output: &Path, + output_type: OutputType, + export_symbols: &HashSet>, ) -> Result<(), LinkerError> { - let mut data = Vec::new(); - let _: usize = reader - .read_to_end(&mut data) - .map_err(|e| LinkerError::IoError(path.to_owned(), e))?; - // in_type is unknown when we're linking an item from an archive file - let in_type = in_type - .or_else(|| detect_input_type(&data)) - .ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?; - - let bitcode = match in_type { - InputType::Bitcode => data, - InputType::Elf => match llvm::find_embedded_bitcode(self.context, &data) { - Ok(Some(bitcode)) => bitcode, - Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())), - Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)), - }, - // we need to handle this here since archive files could contain - // mach-o files, eg somecrate.rlib containing lib.rmeta which is - // mach-o on macos - InputType::MachO => return Err(LinkerError::InvalidInputType(path.to_owned())), - // this can't really happen - InputType::Archive => panic!("nested archives not supported duh"), - }; - - if !llvm::link_bitcode_buffer(self.context, self.module, &bitcode) { - return Err(LinkerError::LinkModuleError(path.to_owned())); - } - - Ok(()) - } - - fn create_target_machine(&mut self) -> Result<(), LinkerError> { - let Self { - options: - LinkerOptions { - target, - cpu, - cpu_features, - .. - }, - module, - target_machine, - .. - } = self; - // Here's how the output target is selected: - // - // 1) rustc with builtin BPF support: cargo build --target=bpf[el|eb]-unknown-none - // the input modules are already configured for the correct output target - // - // 2) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker -C link-arg=--target=bpf[el|eb] - // the input modules are configured for the *host* target, and the output target - // is configured with the `--target` linker argument - // - // 3) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker - // the input modules are configured for the *host* target, the output target isn't - // set via `--target`, so default to `bpf` (bpfel or bpfeb depending on the host - // endianness) - let (triple, target) = match target { - // case 1 - Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)), - None => { - let c_triple = unsafe { LLVMGetTarget(*module) }; - let c_triple = unsafe { CStr::from_ptr(c_triple) }; - if c_triple.to_bytes().starts_with(b"bpf") { - // case 2 - (c_triple, llvm::target_from_module(*module)) - } else { - // case 3. - info!("detected non-bpf input target {:?} and no explicit output --target specified, selecting `bpf'", c_triple); - let c_triple = c"bpf"; - (c_triple, llvm::target_from_triple(c_triple)) - } - } - }; - let target = target - .map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - - debug!( - "creating target machine: triple: {} cpu: {} features: {}", - triple.to_string_lossy(), - cpu, - cpu_features.to_string_lossy(), - ); - - *target_machine = llvm::create_target_machine(target, triple, cpu.as_c_str(), cpu_features) - .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - - Ok(()) - } - - fn optimize(&mut self) -> Result<(), LinkerError> { - if !self.options.disable_memory_builtins { - self.options.export_symbols.extend( - ["memcpy", "memmove", "memset", "memcmp", "bcmp"] - .into_iter() - .map(Into::into), - ); - }; - debug!( - "linking exporting symbols {:?}, opt level {:?}", - self.options.export_symbols, self.options.optimize - ); - // run optimizations. Will optionally remove noinline attributes, intern all non exported - // programs and maps and remove dead code. - - let export_symbols = self - .options - .export_symbols - .iter() - .map(|s| s.as_bytes().into()) - .collect(); - - if self.options.btf { - // if we want to emit BTF, we need to sanitize the debug information - llvm::DISanitizer::new(self.context, self.module).run(&export_symbols); - } else { - // if we don't need BTF emission, we can strip DI - let ok = llvm::strip_debug_info(self.module); - debug!("Stripping DI, changed={}", ok); - } - - llvm::optimize( - self.target_machine, - self.module, - self.options.optimize, - self.options.ignore_inline_never, - &export_symbols, - ) - .map_err(LinkerError::OptimizeError)?; - + // Catch non existing files + let inputs = inputs + .into_iter() + .map(InputReader::try_from) + .collect::, _>>()?; + + let (linked_module, target_machine) = self.link(inputs, export_symbols)?; + codegen_to_file(&linked_module, &target_machine, output, output_type)?; Ok(()) } - fn codegen(&mut self) -> Result<(), LinkerError> { - let output = CString::new(self.options.output.as_os_str().as_bytes()).unwrap(); - match self.options.output_type { - OutputType::Bitcode => self.write_bitcode(&output), - OutputType::LlvmAssembly => self.write_ir(&output), - OutputType::Assembly => self.emit(&output, LLVMCodeGenFileType::LLVMAssemblyFile), - OutputType::Object => self.emit(&output, LLVMCodeGenFileType::LLVMObjectFile), - } + /// Link and generate the output code to an in-memory buffer. + /// + /// # Example + /// + /// ```rust,no_run + /// # use std::{collections::HashSet, path::Path, borrow::Cow}; + /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; + /// # fn main() -> Result<(), Box> { + /// let path = Path::new("/path/to/object-or-bitcode"); + /// let bytes: &[u8] = &[]; // An in memory object/bitcode + /// # let options = LinkerOptions { + /// # target: None, + /// # cpu: Cpu::Generic, + /// # cpu_features: String::new(), + /// # optimize: OptLevel::Default, + /// # unroll_loops: false, + /// # ignore_inline_never: false, + /// # llvm_args: vec![], + /// # disable_expand_memcpy_in_order: false, + /// # disable_memory_builtins: false, + /// # allow_bpf_trap: false, + /// # btf: false, + /// # }; + /// # let linker = Linker::new(options)?; + /// + /// let export_symbols = ["my_sym_1", "my_sym_2"] + /// .into_iter() + /// .map(Cow::Borrowed) + /// .collect(); + /// + /// // Link into an in-memory buffer. + /// let out_buf = linker.link_to_buffer( + /// [ + /// LinkerInput::new_from_file(path), + /// LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name + /// ], + /// OutputType::Bitcode, + /// &export_symbols, + /// None, + /// )?; + /// + /// // Use the buffer as slice of u8 + /// let bytes = out_buf.as_slice(); + /// println!("Linked {} bytes into memory)", bytes.len()); + /// + /// # Ok(()) + /// # } + /// ``` + #[allow(single_use_lifetimes)] + pub fn link_to_buffer<'i>( + &self, + inputs: impl IntoIterator>, + output_type: OutputType, + export_symbols: &HashSet>, + ) -> Result { + // Catch non existing files + let inputs = inputs + .into_iter() + .map(InputReader::try_from) + .collect::, _>>()?; + + let (linked_module, target_machine) = self.link(inputs, export_symbols)?; + codegen_to_buffer(&linked_module, &target_machine, output_type) } - fn write_bitcode(&mut self, output: &CStr) -> Result<(), LinkerError> { - info!("writing bitcode to {:?}", output); - - if unsafe { LLVMWriteBitcodeToFile(self.module, output.as_ptr()) } == 1 { - return Err(LinkerError::WriteBitcodeError); + /// Link and generate the output code. + #[allow(single_use_lifetimes)] + fn link<'ctx, 'i>( + &'ctx self, + inputs: impl IntoIterator>, + export_symbols: &HashSet>, + ) -> Result<(LLVMModule<'ctx>, LLVMTargetMachine), LinkerError> { + let mut module = link_modules(&self.context, inputs)?; + + let target_machine = create_target_machine(&self.options, &module)?; + + if let Some(path) = &self.dump_module { + std::fs::create_dir_all(path) + .map_err(|err| LinkerError::IoError(path.to_owned(), err))?; } + if let Some(path) = &self.dump_module { + // dump IR before optimization + let path = path.join("pre-opt.ll"); + module + .write_ir_to_path(path) + .map_err(LinkerError::WriteIRError)?; + }; + optimize( + &self.options, + &self.context, + &target_machine, + &mut module, + export_symbols, + )?; + if let Some(path) = &self.dump_module { + // dump IR before optimization + let path = path.join("post-opt.ll"); + module + .write_ir_to_path(&path) + .map_err(LinkerError::WriteIRError)?; + }; - Ok(()) - } - - fn write_ir(&mut self, output: &CStr) -> Result<(), LinkerError> { - info!("writing IR to {:?}", output); - - llvm::write_ir(self.module, output).map_err(LinkerError::WriteIRError) - } - - fn emit(&mut self, output: &CStr, output_type: LLVMCodeGenFileType) -> Result<(), LinkerError> { - info!("emitting {:?} to {:?}", output_type, output); - - llvm::codegen(self.target_machine, self.module, output, output_type) - .map_err(LinkerError::EmitCodeError) + Ok((module, target_machine)) } - fn llvm_init(&mut self) { - let mut args = Vec::>::new(); - args.push(c"bpf-linker".into()); - // Disable cold call site detection. Many accessors in aya-ebpf return Result - // where the layout is larger than 64 bits, but the LLVM BPF target only supports - // up to 64 bits return values. Since the accessors are tiny in terms of code, we - // avoid the issue by annotating them with #[inline(always)]. If they are classified - // as cold though - and they often are starting from LLVM17 - #[inline(always)] - // is ignored and the BPF target fails codegen. - args.push(c"--cold-callsite-rel-freq=0".into()); - if self.options.unroll_loops { - // setting cmdline arguments is the only way to customize the unroll pass with the - // C API. - args.extend([ - c"--unroll-runtime".into(), - c"--unroll-runtime-multi-exit".into(), - CString::new(format!("--unroll-max-upperbound={}", u32::MAX)) - .unwrap() - .into(), - CString::new(format!("--unroll-threshold={}", u32::MAX)) - .unwrap() - .into(), - ]); - } - if !self.options.disable_expand_memcpy_in_order { - args.push(c"--bpf-expand-memcpy-in-order".into()); - } - if !self.options.allow_bpf_trap { - // TODO: Remove this once ksyms support is guaranteed. - // LLVM introduces __bpf_trap calls at points where __builtin_trap would normally be - // emitted. This is currently not supported by aya because __bpf_trap requires a .ksyms - // section, but this is not trivial to support. In the meantime, using this flag - // returns LLVM to the old behaviour, which did not introduce these calls and therefore - // does not require the .ksyms section. - args.push(c"--bpf-disable-trap-unreachable".into()); - } - args.extend(self.options.llvm_args.iter().map(Into::into)); - info!("LLVM command line: {:?}", args); - llvm::init(args.as_slice(), c"BPF linker"); - - let context = unsafe { LLVMContextCreate() }; - self.context = context; - - unsafe { - let handler_ptr = { - // SAFETY: `diagnostic_handler` is pinned for the lifetime of `Linker`, and we use - // the mutable reference only to obtain a stable raw pointer for LLVM’s callback. - let handler = self.diagnostic_handler.as_mut().get_unchecked_mut(); - ptr::from_mut(handler).cast() - }; - LLVMContextSetDiagnosticHandler( - context, - Some(llvm::diagnostic_handler::), - handler_ptr, - ); - LLVMInstallFatalErrorHandler(Some(llvm::fatal_error)); - LLVMEnablePrettyStackTrace(); - } - self.module = llvm::create_module( - CString::new(self.options.output.file_stem().unwrap().as_bytes()) - .unwrap() - .as_c_str(), - context, - ) - .unwrap(); - } -} - -impl Drop for Linker { - fn drop(&mut self) { - unsafe { - if !self.target_machine.is_null() { - LLVMDisposeTargetMachine(self.target_machine); - } - if !self.module.is_null() { - LLVMDisposeModule(self.module); - } - if !self.context.is_null() { - LLVMContextDispose(self.context); - } - } + pub fn has_errors(&self) -> bool { + self.diagnostic_handler.with_view(|h| h.has_errors) } } @@ -655,3 +554,357 @@ fn detect_input_type(data: &[u8]) -> Option { } } } + +fn llvm_init( + options: &LinkerOptions, +) -> ( + LLVMContext, + llvm::InstalledDiagnosticHandler, +) { + let mut args = Vec::>::new(); + args.push(c"bpf-linker".into()); + // Disable cold call site detection. Many accessors in aya-ebpf return Result + // where the layout is larger than 64 bits, but the LLVM BPF target only supports + // up to 64 bits return values. Since the accessors are tiny in terms of code, we + // avoid the issue by annotating them with #[inline(always)]. If they are classified + // as cold though - and they often are starting from LLVM17 - #[inline(always)] + // is ignored and the BPF target fails codegen. + args.push(c"--cold-callsite-rel-freq=0".into()); + if options.unroll_loops { + // setting cmdline arguments is the only way to customize the unroll pass with the + // C API. + args.extend([ + c"--unroll-runtime".into(), + c"--unroll-runtime-multi-exit".into(), + CString::new(format!("--unroll-max-upperbound={}", u32::MAX)) + .unwrap() + .into(), + CString::new(format!("--unroll-threshold={}", u32::MAX)) + .unwrap() + .into(), + ]); + } + if !options.disable_expand_memcpy_in_order { + args.push(c"--bpf-expand-memcpy-in-order".into()); + } + if !options.allow_bpf_trap { + // TODO: Remove this once ksyms support is guaranteed. + // LLVM introduces __bpf_trap calls at points where __builtin_trap would normally be + // emitted. This is currently not supported by aya because __bpf_trap requires a .ksyms + // section, but this is not trivial to support. In the meantime, using this flag + // returns LLVM to the old behaviour, which did not introduce these calls and therefore + // does not require the .ksyms section. + args.push(c"--bpf-disable-trap-unreachable".into()); + } + args.extend(options.llvm_args.iter().map(Into::into)); + info!("LLVM command line: {:?}", args); + + llvm::init(&args, c"BPF linker"); + + let mut context = LLVMContext::new(); + + let diagnostic_handler = context.set_diagnostic_handler(DiagnosticHandler::default()); + + unsafe { + LLVMInstallFatalErrorHandler(Some(llvm::fatal_error)); + LLVMEnablePrettyStackTrace(); + } + + (context, diagnostic_handler) +} + +fn create_target_machine( + options: &LinkerOptions, + module: &LLVMModule<'_>, +) -> Result { + let LinkerOptions { + target, + cpu, + cpu_features, + .. + } = options; + // Here's how the output target is selected: + // + // 1) rustc with builtin BPF support: cargo build --target=bpf[el|eb]-unknown-none + // the input modules are already configured for the correct output target + // + // 2) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker -C link-arg=--target=bpf[el|eb] + // the input modules are configured for the *host* target, and the output target + // is configured with the `--target` linker argument + // + // 3) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker + // the input modules are configured for the *host* target, the output target isn't + // set via `--target`, so default to `bpf` (bpfel or bpfeb depending on the host + // endianness) + let (triple, target) = match target { + // case 1 + Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)), + None => { + let c_triple = module.get_target(); + let c_triple = unsafe { CStr::from_ptr(c_triple) }; + if c_triple.to_bytes().starts_with(b"bpf") { + // case 2 + (c_triple, llvm::target_from_module(module)) + } else { + // case 3. + info!("detected non-bpf input target {:?} and no explicit output --target specified, selecting `bpf'", c_triple); + let c_triple = c"bpf"; + (c_triple, llvm::target_from_triple(c_triple)) + } + } + }; + let target = + target.map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; + + debug!( + "creating target machine: triple: {} cpu: {} features: {}", + triple.to_string_lossy(), + cpu, + cpu_features.to_string_lossy(), + ); + + let target_machine = LLVMTargetMachine::new(target, triple, cpu.as_c_str(), cpu_features) + .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; + + Ok(target_machine) +} + +fn codegen_to_file( + module: &LLVMModule<'_>, + target_machine: &LLVMTargetMachine, + output: &Path, + output_type: OutputType, +) -> Result<(), LinkerError> { + match output_type { + OutputType::Bitcode => { + info!("writing bitcode to {:?}", output); + module + .write_bitcode_to_path(output) + .map_err(|_| LinkerError::WriteBitcodeError) + } + OutputType::LlvmAssembly => { + info!("writing IR to {:?}", output); + module + .write_ir_to_path(output) + .map_err(LinkerError::WriteIRError) + } + OutputType::Assembly => { + info!("emitting {:?} to {:?}", output_type, output); + + target_machine + .emit_to_file(module, output, LLVMCodeGenFileType::LLVMAssemblyFile) + .map_err(LinkerError::EmitCodeError) + } + OutputType::Object => { + info!("emitting {:?} to {:?}", output_type, output); + + target_machine + .emit_to_file(module, output, LLVMCodeGenFileType::LLVMObjectFile) + .map_err(LinkerError::EmitCodeError) + } + } +} + +#[allow(single_use_lifetimes)] +fn link_modules<'ctx, 'i>( + context: &'ctx LLVMContext, + inputs: impl IntoIterator>, +) -> Result, LinkerError> { + let mut module = context + .create_module(c"linked_module") + .ok_or(LinkerError::CreateModuleError)?; + + // buffer used to perform file type detection + let mut buf = [0u8; 8]; + for mut input in inputs { + let path = match input { + InputReader::File { path, .. } => path.into(), + InputReader::Buffer { name, .. } => PathBuf::from(format!("in_memory::{}", name)), + }; + + // determine whether the input is bitcode, ELF with embedded bitcode, an archive file + // or an invalid file + input + .read_exact(&mut buf) + .map_err(|e| LinkerError::IoError(path.clone(), e))?; + input + .rewind() + .map_err(|e| LinkerError::IoError(path.clone(), e))?; + let in_type = + detect_input_type(&buf).ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?; + + match in_type { + InputType::Archive => { + info!("linking archive {:?}", path); + + // Extract the archive and call link_reader() for each item. + let mut archive = Archive::new(input); + while let Some(Ok(item)) = archive.next_entry() { + let name = PathBuf::from(OsStr::from_bytes(item.header().identifier())); + info!("linking archive item {:?}", name); + + match link_reader(context, &mut module, &name, item, None) { + Ok(_) => continue, + Err(LinkerError::InvalidInputType(_)) => { + info!("ignoring archive item {:?}: invalid type", name); + continue; + } + Err(LinkerError::MissingBitcodeSection(_)) => { + warn!("ignoring archive item {:?}: no embedded bitcode", name); + continue; + } + Err(_) => { + return Err(LinkerError::LinkArchiveModuleError(path.clone(), name)) + } + }; + } + } + ty => { + info!("linking file {:?} type {}", path, ty); + match link_reader(context, &mut module, &path, input, Some(ty)) { + Ok(_) => {} + Err(LinkerError::InvalidInputType(_)) => { + info!("ignoring file {:?}: invalid type", path); + continue; + } + Err(LinkerError::MissingBitcodeSection(_)) => { + warn!("ignoring file {:?}: no embedded bitcode", path); + } + Err(err) => return Err(err), + } + } + } + } + + Ok(module) +} + +// link in a `Read`-er, which can be a file or an archive item +fn link_reader<'ctx>( + context: &'ctx LLVMContext, + module: &mut LLVMModule<'ctx>, + path: &Path, + mut reader: impl Read, + in_type: Option, +) -> Result<(), LinkerError> { + let mut data = Vec::new(); + let _: usize = reader + .read_to_end(&mut data) + .map_err(|e| LinkerError::IoError(path.to_owned(), e))?; + // in_type is unknown when we're linking an item from an archive file + let in_type = in_type + .or_else(|| detect_input_type(&data)) + .ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?; + + let bitcode = match in_type { + InputType::Bitcode => data, + InputType::Elf => match llvm::find_embedded_bitcode(context, &data) { + Ok(Some(bitcode)) => bitcode, + Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())), + Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)), + }, + // we need to handle this here since archive files could contain + // mach-o files, eg somecrate.rlib containing lib.rmeta which is + // mach-o on macos + InputType::MachO => return Err(LinkerError::InvalidInputType(path.to_owned())), + // this can't really happen + InputType::Archive => panic!("nested archives not supported duh"), + }; + + if !llvm::link_bitcode_buffer(context, module, &bitcode) { + return Err(LinkerError::LinkModuleError(path.to_owned())); + } + + Ok(()) +} + +fn optimize<'ctx>( + options: &LinkerOptions, + context: &'ctx LLVMContext, + target_machine: &LLVMTargetMachine, + module: &mut LLVMModule<'ctx>, + export_symbols: &HashSet>, +) -> Result<(), LinkerError> { + let mut export_symbols = export_symbols.clone(); + + if !options.disable_memory_builtins { + export_symbols.extend( + ["memcpy", "memmove", "memset", "memcmp", "bcmp"] + .into_iter() + .map(Into::into), + ); + }; + debug!( + "linking exporting symbols {:?}, opt level {:?}", + export_symbols, options.optimize + ); + // run optimizations. Will optionally remove noinline attributes, intern all non exported + // programs and maps and remove dead code. + + let export_symbols = export_symbols.iter().map(|s| s.as_bytes().into()).collect(); + + if options.btf { + // if we want to emit BTF, we need to sanitize the debug information + llvm::DISanitizer::new(context, module).run(&export_symbols); + } else { + // if we don't need BTF emission, we can strip DI + let ok = module.strip_debug_info(); + debug!("Stripping DI, changed={}", ok); + } + + llvm::optimize( + target_machine, + module, + options.optimize, + options.ignore_inline_never, + &export_symbols, + ) + .map_err(LinkerError::OptimizeError)?; + + Ok(()) +} + +fn codegen_to_buffer( + module: &LLVMModule<'_>, + target_machine: &LLVMTargetMachine, + output_type: OutputType, +) -> Result { + let memory_buffer = match output_type { + OutputType::Bitcode => module.write_bitcode_to_memory(), + OutputType::LlvmAssembly => module.write_ir_to_memory(), + OutputType::Assembly => target_machine + .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMAssemblyFile) + .map_err(LinkerError::EmitCodeError)?, + OutputType::Object => target_machine + .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMObjectFile) + .map_err(LinkerError::EmitCodeError)?, + }; + + Ok(LinkerOutput { + inner: memory_buffer, + }) +} + +pub struct LinkerOutput { + inner: MemoryBuffer, +} + +impl LinkerOutput { + pub fn as_slice(&self) -> &[u8] { + self.inner.as_slice() + } +} + +impl AsRef<[u8]> for LinkerOutput { + fn as_ref(&self) -> &[u8] { + self.as_slice() + } +} + +impl Deref for LinkerOutput { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.as_slice() + } +} diff --git a/src/llvm/di.rs b/src/llvm/di.rs index 21505a3c..90c05945 100644 --- a/src/llvm/di.rs +++ b/src/llvm/di.rs @@ -3,6 +3,7 @@ use std::{ collections::{hash_map::DefaultHasher, HashMap, HashSet}, hash::Hasher as _, io::Write as _, + marker::PhantomData, ptr, }; @@ -14,20 +15,21 @@ use super::types::{ di::DIType, ir::{Function, MDNode, Metadata, Value}, }; -use crate::llvm::{iter::*, types::di::DISubprogram}; +use crate::llvm::{iter::*, types::di::DISubprogram, LLVMContext, LLVMModule}; // KSYM_NAME_LEN from linux kernel intentionally set // to lower value found across kernel versions to ensure // backward compatibility const MAX_KSYM_NAME_LEN: usize = 128; -pub(crate) struct DISanitizer { +pub(crate) struct DISanitizer<'ctx> { context: LLVMContextRef, module: LLVMModuleRef, builder: LLVMDIBuilderRef, visited_nodes: HashSet, replace_operands: HashMap, skipped_types_lossy: Vec, + _marker: PhantomData>, } // Sanitize Rust type names to be valid C type names. @@ -55,15 +57,16 @@ fn sanitize_type_name(name: &[u8]) -> Vec { sanitized } -impl DISanitizer { - pub(crate) fn new(context: LLVMContextRef, module: LLVMModuleRef) -> Self { - Self { - context, - module, - builder: unsafe { LLVMCreateDIBuilder(module) }, +impl<'ctx> DISanitizer<'ctx> { + pub(crate) fn new(context: &'ctx LLVMContext, module: &mut LLVMModule<'ctx>) -> Self { + DISanitizer { + context: context.as_mut_ptr(), + module: module.as_mut_ptr(), + builder: unsafe { LLVMCreateDIBuilder(module.as_mut_ptr()) }, visited_nodes: HashSet::new(), replace_operands: HashMap::new(), skipped_types_lossy: Vec::new(), + _marker: PhantomData, } } diff --git a/src/llvm/mod.rs b/src/llvm/mod.rs index 84436853..e304502d 100644 --- a/src/llvm/mod.rs +++ b/src/llvm/mod.rs @@ -1,11 +1,12 @@ mod di; mod iter; + mod types; use std::{ borrow::Cow, collections::HashSet, - ffi::{c_void, CStr, CString}, + ffi::{CStr, CString}, os::raw::c_char, ptr, slice, str, }; @@ -16,12 +17,10 @@ use llvm_sys::{ bit_reader::LLVMParseBitcodeInContext2, core::{ LLVMCreateMemoryBufferWithMemoryRange, LLVMDisposeMemoryBuffer, LLVMDisposeMessage, - LLVMGetDiagInfoDescription, LLVMGetDiagInfoSeverity, LLVMGetEnumAttributeKindForName, - LLVMGetMDString, LLVMGetModuleInlineAsm, LLVMGetTarget, LLVMGetValueName2, - LLVMModuleCreateWithNameInContext, LLVMPrintModuleToFile, LLVMRemoveEnumAttributeAtIndex, - LLVMSetLinkage, LLVMSetModuleInlineAsm2, LLVMSetVisibility, + LLVMGetEnumAttributeKindForName, LLVMGetMDString, LLVMGetModuleInlineAsm, LLVMGetTarget, + LLVMGetValueName2, LLVMRemoveEnumAttributeAtIndex, LLVMSetLinkage, LLVMSetModuleInlineAsm2, + LLVMSetVisibility, }, - debuginfo::LLVMStripModuleDebugInfo, error::{ LLVMDisposeErrorMessage, LLVMGetErrorMessage, LLVMGetErrorTypeId, LLVMGetStringErrorTypeId, }, @@ -31,23 +30,25 @@ use llvm_sys::{ LLVMGetSectionName, LLVMGetSectionSize, LLVMMoveToNextSection, LLVMObjectFileCopySectionIterator, LLVMObjectFileIsSectionIteratorAtEnd, }, - prelude::{LLVMContextRef, LLVMDiagnosticInfoRef, LLVMModuleRef, LLVMValueRef}, + prelude::{LLVMModuleRef, LLVMValueRef}, support::LLVMParseCommandLineOptions, target::{ LLVMInitializeBPFAsmParser, LLVMInitializeBPFAsmPrinter, LLVMInitializeBPFDisassembler, LLVMInitializeBPFTarget, LLVMInitializeBPFTargetInfo, LLVMInitializeBPFTargetMC, }, - target_machine::{ - LLVMCodeGenFileType, LLVMCodeGenOptLevel, LLVMCodeModel, LLVMCreateTargetMachine, - LLVMGetTargetFromTriple, LLVMRelocMode, LLVMTargetMachineEmitToFile, LLVMTargetMachineRef, - LLVMTargetRef, - }, + target_machine::{LLVMGetTargetFromTriple, LLVMTargetRef}, transforms::pass_builder::{ LLVMCreatePassBuilderOptions, LLVMDisposePassBuilderOptions, LLVMRunPasses, }, LLVMAttributeFunctionIndex, LLVMLinkage, LLVMVisibility, }; use tracing::{debug, error}; +pub(crate) use types::{ + context::{InstalledDiagnosticHandler, LLVMContext}, + memory_buffer::MemoryBuffer, + module::LLVMModule, + target_machine::LLVMTargetMachine, +}; use crate::OptLevel; @@ -65,18 +66,8 @@ pub(crate) fn init(args: &[Cow<'_, CStr>], overview: &CStr) { unsafe { LLVMParseCommandLineOptions(c_ptrs.len() as i32, c_ptrs.as_ptr(), overview.as_ptr()) }; } -pub(crate) fn create_module(name: &CStr, context: LLVMContextRef) -> Option { - let module = unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), context) }; - - if module.is_null() { - return None; - } - - Some(module) -} - pub(crate) fn find_embedded_bitcode( - context: LLVMContextRef, + context: &LLVMContext, data: &[u8], ) -> Result>, String> { let buffer_name = c"mem_buffer"; @@ -90,7 +81,7 @@ pub(crate) fn find_embedded_bitcode( }; let (bin, message) = - Message::with(|message| unsafe { LLVMCreateBinary(buffer, context, message) }); + Message::with(|message| unsafe { LLVMCreateBinary(buffer, context.as_mut_ptr(), message) }); if bin.is_null() { return Err(message.as_string_lossy().to_string()); } @@ -118,9 +109,9 @@ pub(crate) fn find_embedded_bitcode( } #[must_use] -pub(crate) fn link_bitcode_buffer( - context: LLVMContextRef, - module: LLVMModuleRef, +pub(crate) fn link_bitcode_buffer<'ctx>( + context: &'ctx LLVMContext, + module: &mut LLVMModule<'ctx>, buffer: &[u8], ) -> bool { let mut linked = false; @@ -136,8 +127,8 @@ pub(crate) fn link_bitcode_buffer( let mut temp_module = ptr::null_mut(); - if unsafe { LLVMParseBitcodeInContext2(context, buffer, &mut temp_module) } == 0 { - linked = unsafe { LLVMLinkModules2(module, temp_module) } == 0; + if unsafe { LLVMParseBitcodeInContext2(context.as_mut_ptr(), buffer, &mut temp_module) } == 0 { + linked = unsafe { LLVMLinkModules2(module.as_mut_ptr(), temp_module) } == 0; } unsafe { LLVMDisposeMemoryBuffer(buffer) }; @@ -157,54 +148,30 @@ pub(crate) fn target_from_triple(triple: &CStr) -> Result } } -pub(crate) fn target_from_module(module: LLVMModuleRef) -> Result { - let triple = unsafe { LLVMGetTarget(module) }; +pub(crate) fn target_from_module(module: &LLVMModule<'_>) -> Result { + let triple = unsafe { LLVMGetTarget(module.as_mut_ptr()) }; unsafe { target_from_triple(CStr::from_ptr(triple)) } } -pub(crate) fn create_target_machine( - target: LLVMTargetRef, - triple: &CStr, - cpu: &CStr, - features: &CStr, -) -> Option { - let tm = unsafe { - LLVMCreateTargetMachine( - target, - triple.as_ptr(), - cpu.as_ptr(), - features.as_ptr(), - LLVMCodeGenOptLevel::LLVMCodeGenLevelAggressive, - LLVMRelocMode::LLVMRelocDefault, - LLVMCodeModel::LLVMCodeModelDefault, - ) - }; - if tm.is_null() { - None - } else { - Some(tm) - } -} - pub(crate) fn optimize( - tm: LLVMTargetMachineRef, - module: LLVMModuleRef, + tm: &LLVMTargetMachine, + module: &mut LLVMModule<'_>, opt_level: OptLevel, ignore_inline_never: bool, export_symbols: &HashSet>, ) -> Result<(), String> { - if module_asm_is_probestack(module) { - unsafe { LLVMSetModuleInlineAsm2(module, ptr::null_mut(), 0) }; + if module_asm_is_probestack(module.as_mut_ptr()) { + unsafe { LLVMSetModuleInlineAsm2(module.as_mut_ptr(), ptr::null_mut(), 0) }; } - for sym in module.globals_iter() { + for sym in module.as_mut_ptr().globals_iter() { internalize(sym, symbol_name(sym), export_symbols); } - for sym in module.global_aliases_iter() { + for sym in module.as_mut_ptr().global_aliases_iter() { internalize(sym, symbol_name(sym), export_symbols); } - for function in module.functions_iter() { + for function in module.as_mut_ptr().functions_iter() { let name = symbol_name(function); if !name.starts_with(b"llvm.") { if ignore_inline_never { @@ -235,7 +202,14 @@ pub(crate) fn optimize( debug!("running passes: {passes}"); let passes = CString::new(passes).unwrap(); let options = unsafe { LLVMCreatePassBuilderOptions() }; - let error = unsafe { LLVMRunPasses(module, passes.as_ptr(), tm, options) }; + let error = unsafe { + LLVMRunPasses( + module.as_mut_ptr(), + passes.as_ptr(), + tm.as_mut_ptr(), + options, + ) + }; unsafe { LLVMDisposePassBuilderOptions(options) }; // Handle the error and print it to stderr. if !error.is_null() { @@ -253,11 +227,6 @@ pub(crate) fn optimize( Ok(()) } -/// strips debug information, returns true if DI got stripped -pub(crate) fn strip_debug_info(module: LLVMModuleRef) -> bool { - unsafe { LLVMStripModuleDebugInfo(module) != 0 } -} - pub(crate) fn module_asm_is_probestack(module: LLVMModuleRef) -> bool { let mut len = 0; let ptr = unsafe { LLVMGetModuleInlineAsm(module, &mut len) }; @@ -281,32 +250,6 @@ pub(crate) fn remove_attribute(function: *mut llvm_sys::LLVMValue, name: &str) { unsafe { LLVMRemoveEnumAttributeAtIndex(function, LLVMAttributeFunctionIndex, attr_kind) }; } -pub(crate) fn write_ir(module: LLVMModuleRef, output: &CStr) -> Result<(), String> { - let (ret, message) = - Message::with(|message| unsafe { LLVMPrintModuleToFile(module, output.as_ptr(), message) }); - if ret == 0 { - Ok(()) - } else { - Err(message.as_string_lossy().to_string()) - } -} - -pub(crate) fn codegen( - tm: LLVMTargetMachineRef, - module: LLVMModuleRef, - output: &CStr, - output_type: LLVMCodeGenFileType, -) -> Result<(), String> { - let (ret, message) = Message::with(|message| unsafe { - LLVMTargetMachineEmitToFile(tm, module, output.as_ptr().cast_mut(), output_type, message) - }); - if ret == 0 { - Ok(()) - } else { - Err(message.as_string_lossy().to_string()) - } -} - pub(crate) fn internalize( value: LLVMValueRef, name: &[u8], @@ -326,18 +269,6 @@ pub(crate) trait LLVMDiagnosticHandler { ); } -pub(crate) extern "C" fn diagnostic_handler( - info: LLVMDiagnosticInfoRef, - handler: *mut c_void, -) { - let severity = unsafe { LLVMGetDiagInfoSeverity(info) }; - let message = Message { - ptr: unsafe { LLVMGetDiagInfoDescription(info) }, - }; - let handler = handler.cast::(); - unsafe { &mut *handler }.handle_diagnostic(severity, message.as_string_lossy()); -} - pub(crate) extern "C" fn fatal_error(reason: *const c_char) { error!("fatal error: {:?}", unsafe { CStr::from_ptr(reason) }) } diff --git a/src/llvm/types/context.rs b/src/llvm/types/context.rs new file mode 100644 index 00000000..e3ed6bbb --- /dev/null +++ b/src/llvm/types/context.rs @@ -0,0 +1,111 @@ +use std::{ + any::Any, + ffi::{c_void, CStr}, + marker::PhantomData, + pin::Pin, + ptr, + rc::Rc, +}; + +use llvm_sys::{ + core::{ + LLVMContextCreate, LLVMContextDispose, LLVMContextSetDiagnosticHandler, + LLVMGetDiagInfoDescription, LLVMGetDiagInfoSeverity, LLVMModuleCreateWithNameInContext, + }, + prelude::{LLVMContextRef, LLVMDiagnosticInfoRef}, +}; + +use crate::llvm::{types::module::LLVMModule, LLVMDiagnosticHandler, Message}; + +pub(crate) struct LLVMContext { + pub(super) context: LLVMContextRef, + diagnostic_handler: Option, +} + +impl LLVMContext { + pub(crate) fn new() -> Self { + let context = unsafe { LLVMContextCreate() }; + Self { + context, + diagnostic_handler: None, + } + } + + /// Returns an unsafe mutable pointer to the LLVM context. + /// + /// The caller must ensure that the [LLVMContext] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMContextRef { + self.context + } + + pub(crate) fn create_module<'ctx>(&'ctx self, name: &CStr) -> Option> { + let module = unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), self.context) }; + + if module.is_null() { + return None; + } + + Some(LLVMModule { + module, + _marker: PhantomData, + }) + } + + pub(crate) fn set_diagnostic_handler(&mut self, handler: T) -> InstalledDiagnosticHandler + where + T: LLVMDiagnosticHandler + 'static, + { + let pinrc = Rc::pin(handler); + self.diagnostic_handler = Some(StoredHandler { + _handler: pinrc.clone(), + }); + + let handler_ptr = ptr::from_ref(Pin::as_ref(&pinrc).get_ref()) as *mut c_void; + + unsafe { + LLVMContextSetDiagnosticHandler( + self.context, + Some(diagnostic_handler::), + handler_ptr, + ) + }; + + InstalledDiagnosticHandler { inner: pinrc } + } +} + +impl Drop for LLVMContext { + fn drop(&mut self) { + unsafe { + LLVMContextDispose(self.context); + } + } +} + +struct StoredHandler { + _handler: Pin>, +} + +extern "C" fn diagnostic_handler( + info: LLVMDiagnosticInfoRef, + handler: *mut c_void, +) { + let severity = unsafe { LLVMGetDiagInfoSeverity(info) }; + let message = Message { + ptr: unsafe { LLVMGetDiagInfoDescription(info) }, + }; + let handler = handler.cast::(); + unsafe { &mut *handler }.handle_diagnostic(severity, message.as_string_lossy()); +} + +#[derive(Clone)] +pub(crate) struct InstalledDiagnosticHandler { + inner: Pin>, +} + +impl InstalledDiagnosticHandler { + pub(crate) fn with_view R>(&self, f: F) -> R { + f(Pin::as_ref(&self.inner).get_ref()) + } +} diff --git a/src/llvm/types/memory_buffer.rs b/src/llvm/types/memory_buffer.rs new file mode 100644 index 00000000..a9b150e4 --- /dev/null +++ b/src/llvm/types/memory_buffer.rs @@ -0,0 +1,34 @@ +use core::slice; + +use llvm_sys::{ + core::{LLVMDisposeMemoryBuffer, LLVMGetBufferSize, LLVMGetBufferStart}, + prelude::LLVMMemoryBufferRef, +}; + +pub(crate) struct MemoryBuffer { + pub(super) memory_buffer: LLVMMemoryBufferRef, +} + +impl MemoryBuffer { + /// Gets a byte slice of this `MemoryBuffer`. + pub(crate) fn as_slice(&self) -> &[u8] { + unsafe { + let start = LLVMGetBufferStart(self.memory_buffer); + + slice::from_raw_parts(start.cast(), self.get_size()) + } + } + + /// Gets the byte size of this `MemoryBuffer`. + pub(crate) fn get_size(&self) -> usize { + unsafe { LLVMGetBufferSize(self.memory_buffer) } + } +} + +impl Drop for MemoryBuffer { + fn drop(&mut self) { + unsafe { + LLVMDisposeMemoryBuffer(self.memory_buffer); + } + } +} diff --git a/src/llvm/types/mod.rs b/src/llvm/types/mod.rs index 783868ba..f585bb04 100644 --- a/src/llvm/types/mod.rs +++ b/src/llvm/types/mod.rs @@ -1,2 +1,6 @@ +pub(super) mod context; pub(super) mod di; pub(super) mod ir; +pub(super) mod memory_buffer; +pub(super) mod module; +pub(super) mod target_machine; diff --git a/src/llvm/types/module.rs b/src/llvm/types/module.rs new file mode 100644 index 00000000..c3e1e6c4 --- /dev/null +++ b/src/llvm/types/module.rs @@ -0,0 +1,100 @@ +use std::{ + ffi::{CStr, CString}, + marker::PhantomData, + path::Path, +}; + +use libc::c_char; +use llvm_sys::{ + bit_writer::LLVMWriteBitcodeToFile, + core::{ + LLVMCreateMemoryBufferWithMemoryRangeCopy, LLVMDisposeMessage, LLVMDisposeModule, + LLVMGetTarget, LLVMPrintModuleToFile, LLVMPrintModuleToString, + }, + debuginfo::LLVMStripModuleDebugInfo, + prelude::LLVMModuleRef, +}; + +use crate::llvm::{types::context::LLVMContext, MemoryBuffer, Message}; + +pub(crate) struct LLVMModule<'ctx> { + pub(super) module: LLVMModuleRef, + pub(super) _marker: PhantomData<&'ctx LLVMContext>, +} + +impl LLVMModule<'_> { + /// Returns an unsafe mutable pointer to the LLVM module. + /// + /// The caller must ensure that the [LLVMModule] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMModuleRef { + self.module + } + + pub(crate) fn get_target(&self) -> *const c_char { + unsafe { LLVMGetTarget(self.module) } + } + + pub(crate) fn write_bitcode_to_path(&self, path: impl AsRef) -> Result<(), String> { + let path = CString::new(path.as_ref().as_os_str().as_encoded_bytes()).unwrap(); + + if unsafe { LLVMWriteBitcodeToFile(self.module, path.as_ptr()) } == 1 { + return Err("failed to write bitcode".to_string()); + } + + Ok(()) + } + + pub(crate) fn write_bitcode_to_memory(&self) -> MemoryBuffer { + let buf = unsafe { llvm_sys::bit_writer::LLVMWriteBitcodeToMemoryBuffer(self.module) }; + + MemoryBuffer { memory_buffer: buf } + } + + pub(crate) fn write_ir_to_path(&self, path: impl AsRef) -> Result<(), String> { + let path = CString::new(path.as_ref().as_os_str().as_encoded_bytes()).unwrap(); + + let (ret, message) = unsafe { + Message::with(|message| LLVMPrintModuleToFile(self.module, path.as_ptr(), message)) + }; + + if ret == 0 { + Ok(()) + } else { + Err(message.as_string_lossy().to_string()) + } + } + + pub(crate) fn write_ir_to_memory(&self) -> MemoryBuffer { + // Format the module to a string, then copy into a MemoryBuffer. We do the extra copy to keep the + // internal API simpler, as all the other codegen methods output a MemoryBuffer. + unsafe { + let ptr = LLVMPrintModuleToString(self.module); + let cstr = CStr::from_ptr(ptr); + let bytes = cstr.to_bytes(); + + let buffer_name = c"mem_buffer"; + + // Copy bytes into a new LLVMMemoryBuffer so we can safely dispose the message. + let memory_buffer = LLVMCreateMemoryBufferWithMemoryRangeCopy( + bytes.as_ptr().cast(), + bytes.len(), + buffer_name.as_ptr(), + ); + LLVMDisposeMessage(ptr); + + MemoryBuffer { memory_buffer } + } + } + + /// strips debug information, returns true if DI got stripped + pub(crate) fn strip_debug_info(&mut self) -> bool { + unsafe { LLVMStripModuleDebugInfo(self.module) != 0 } + } +} + +impl Drop for LLVMModule<'_> { + fn drop(&mut self) { + unsafe { LLVMDisposeModule(self.module) }; + } +} diff --git a/src/llvm/types/target_machine.rs b/src/llvm/types/target_machine.rs new file mode 100644 index 00000000..0c22db12 --- /dev/null +++ b/src/llvm/types/target_machine.rs @@ -0,0 +1,108 @@ +use std::{ + ffi::{CStr, CString}, + path::Path, +}; + +use llvm_sys::target_machine::{ + LLVMCodeGenFileType, LLVMCodeGenOptLevel, LLVMCodeModel, LLVMCreateTargetMachine, + LLVMDisposeTargetMachine, LLVMRelocMode, LLVMTargetMachineEmitToFile, + LLVMTargetMachineEmitToMemoryBuffer, LLVMTargetMachineRef, LLVMTargetRef, +}; + +use crate::llvm::{types::module::LLVMModule, MemoryBuffer, Message}; + +pub(crate) struct LLVMTargetMachine { + pub(super) target_machine: LLVMTargetMachineRef, +} + +impl LLVMTargetMachine { + pub(crate) fn new( + target: LLVMTargetRef, + triple: &CStr, + cpu: &CStr, + features: &CStr, + ) -> Option { + let tm = unsafe { + LLVMCreateTargetMachine( + target, + triple.as_ptr(), + cpu.as_ptr(), + features.as_ptr(), + LLVMCodeGenOptLevel::LLVMCodeGenLevelAggressive, + LLVMRelocMode::LLVMRelocDefault, + LLVMCodeModel::LLVMCodeModelDefault, + ) + }; + if tm.is_null() { + None + } else { + Some(Self { target_machine: tm }) + } + } + + /// Returns an unsafe mutable pointer to the LLVM target machine. + /// + /// The caller must ensure that the [LLVMTargetMachine] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMTargetMachineRef { + self.target_machine + } + + pub(crate) fn emit_to_file( + &self, + module: &LLVMModule<'_>, + path: impl AsRef, + output_type: LLVMCodeGenFileType, + ) -> Result<(), String> { + let path = CString::new(path.as_ref().as_os_str().as_encoded_bytes()).unwrap(); + + let (ret, message) = unsafe { + Message::with(|message| { + LLVMTargetMachineEmitToFile( + self.target_machine, + module.module, + path.as_ptr(), + output_type, + message, + ) + }) + }; + if ret == 0 { + Ok(()) + } else { + Err(message.as_string_lossy().to_string()) + } + } + + pub(crate) fn emit_to_memory_buffer( + &self, + module: &LLVMModule<'_>, + output_type: LLVMCodeGenFileType, + ) -> Result { + let mut out_buf = std::ptr::null_mut(); + let (ret, message) = Message::with(|message| unsafe { + LLVMTargetMachineEmitToMemoryBuffer( + self.target_machine, + module.module, + output_type, + message, + &mut out_buf, + ) + }); + if ret != 0 { + return Err(message.as_string_lossy().to_string()); + } + + Ok(MemoryBuffer { + memory_buffer: out_buf, + }) + } +} + +impl Drop for LLVMTargetMachine { + fn drop(&mut self) { + unsafe { + LLVMDisposeTargetMachine(self.target_machine); + } + } +}