Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ tracing = "0.1"
compiletest_rs = { version = "0.11.0" }
regex = { version = "1.11.1", default-features = false }
rustc-build-sysroot = { workspace = true }
tempfile = { version = "3.13" }
which = { version = "8.0.0", default-features = false, features = ["real-sys", "regex"] }

[lints]
Expand Down
79 changes: 58 additions & 21 deletions src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{
collections::HashSet,
ffi::{CStr, CString, OsStr},
fs::File,
io::{self, Read, Seek},
io::{self, BufRead, BufReader, Read, Seek},
ops::Deref,
os::unix::ffi::OsStrExt as _,
path::{Path, PathBuf},
Expand Down Expand Up @@ -206,6 +206,8 @@ enum InputType {
MachO,
/// Archive file. (.a)
Archive,
/// IR file (.ll)
Ir,
}

impl std::fmt::Display for InputType {
Expand All @@ -218,6 +220,7 @@ impl std::fmt::Display for InputType {
Self::Elf => "elf",
Self::MachO => "Mach-O",
Self::Archive => "archive",
Self::Ir => "ir",
}
)
}
Expand Down Expand Up @@ -507,24 +510,27 @@ where
.create_module(c"linked_module")
.ok_or(LinkerError::CreateModuleError)?;

// buffer used to perform file type detection
let mut buf = [0u8; 8];
for mut input in inputs {
for input in inputs {
let path = match input {
InputReader::File { path, .. } => path.into(),
InputReader::Buffer { name, .. } => PathBuf::from(format!("in_memory::{}", name)),
};

// determine whether the input is bitcode, ELF with embedded bitcode, an archive file
// or an invalid file
input
.read_exact(&mut buf)
let mut buf = BufReader::new(input);

// Peek at the buffer to determine file type
let preview = buf
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is still an arbitrary size buffer (the size of the internal BufReader
buffer which is 4096 by default)

You don't need BufReader here. What you want to do is pass input to
detect_input_type instead of passing &[u8]

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, just saw on the implementation

"BufReader can improve the speed of programs that make small and repeated read calls to the same file or network socket. It does not help when reading very large amounts at once, or reading just one or a few times. It also provides no advantage when reading from a source that is already in memory, like a Vec"

.fill_buf()
.map_err(|e| LinkerError::IoError(path.clone(), e))?;

let in_type = detect_input_type(preview)
.ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;

// Get back the inner reader to rewind it
let mut input = buf.into_inner();
input
.rewind()
.map_err(|e| LinkerError::IoError(path.clone(), e))?;
let in_type =
detect_input_type(&buf).ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;

match in_type {
InputType::Archive => {
Expand Down Expand Up @@ -587,13 +593,30 @@ fn link_reader<'ctx>(
.or_else(|| detect_input_type(&data))
.ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?;

let bitcode = match in_type {
InputType::Bitcode => data,
InputType::Elf => match llvm::find_embedded_bitcode(context, &data) {
Ok(Some(bitcode)) => bitcode,
Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())),
Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)),
},
match in_type {
InputType::Bitcode => {
if !llvm::link_bitcode_buffer(context, module, &data) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
InputType::Ir => {
let data = CString::new(data).unwrap();
if !llvm::link_ir_buffer(context, module, &data)
.map_err(|_| LinkerError::LinkModuleError(path.to_owned()))?
{
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
InputType::Elf => {
let bitcode = match llvm::find_embedded_bitcode(context, &data) {
Ok(Some(bitcode)) => bitcode,
Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())),
Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)),
};
if !llvm::link_bitcode_buffer(context, module, &bitcode) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
// we need to handle this here since archive files could contain
// mach-o files, eg somecrate.rlib containing lib.rmeta which is
// mach-o on macos
Expand All @@ -602,10 +625,6 @@ fn link_reader<'ctx>(
InputType::Archive => panic!("nested archives not supported duh"),
};

if !llvm::link_bitcode_buffer(context, module, &bitcode) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}

Ok(())
}

Expand Down Expand Up @@ -882,13 +901,31 @@ fn detect_input_type(data: &[u8]) -> Option<InputType> {
_ => {
if &data[..8] == b"!<arch>\x0A" {
Some(InputType::Archive)
} else if is_llvm_ir(data) {
Some(InputType::Ir)
} else {
None
}
}
}
}

fn is_llvm_ir(data: &[u8]) -> bool {
let trimmed = data.trim_ascii_start();

let prefixes: &[&[u8]] = &[
b"; ModuleID",
b"target triple",
b"target datalayout",
b"source_filename",
b"target ",
b"define",
b"!llvm",
];

prefixes.iter().any(|prefix| trimmed.starts_with(prefix))
}

pub struct LinkerOutput {
inner: MemoryBuffer,
}
Expand Down
35 changes: 35 additions & 0 deletions src/llvm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use llvm_sys::{
error::{
LLVMDisposeErrorMessage, LLVMGetErrorMessage, LLVMGetErrorTypeId, LLVMGetStringErrorTypeId,
},
ir_reader::LLVMParseIRInContext,
linker::LLVMLinkModules2,
object::{
LLVMCreateBinary, LLVMDisposeBinary, LLVMDisposeSectionIterator, LLVMGetSectionContents,
Expand Down Expand Up @@ -141,6 +142,40 @@ pub(crate) fn link_bitcode_buffer<'ctx>(
linked
}

pub(crate) fn link_ir_buffer<'ctx>(
context: &'ctx LLVMContext,
module: &mut LLVMModule<'ctx>,
buffer: &CStr,
) -> Result<bool, String> {
let buffer_name = c"ir_buffer";
let buffer = buffer.to_bytes();
let mem_buffer = unsafe {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're leaking this you need to call LLVMDisposeMemoryBuffer before returning

LLVMCreateMemoryBufferWithMemoryRange(
buffer.as_ptr().cast(),
buffer.len(),
buffer_name.as_ptr(),
1, // LLVM internally sets RequiresTerminator=true
)
};

let mut temp_module = ptr::null_mut();
let (ret, message) = Message::with(|error_msg| unsafe {
LLVMParseIRInContext(
context.as_mut_ptr(),
mem_buffer,
&mut temp_module,
error_msg,
)
});

if ret == 0 {
let linked = unsafe { LLVMLinkModules2(module.as_mut_ptr(), temp_module) } == 0;
Ok(linked)
} else {
Err(message.as_string_lossy().to_string())
}
}

pub(crate) fn target_from_triple(triple: &CStr) -> Result<LLVMTargetRef, String> {
let mut target = ptr::null_mut();
let (ret, message) = Message::with(|message| unsafe {
Expand Down
102 changes: 102 additions & 0 deletions tests/ir_file_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#![expect(unused_crate_dependencies, reason = "used in lib/bin")]

use std::{
env, fs,
path::{Path, PathBuf},
process::Command,
};

fn linker_path() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_bpf-linker"))
}

fn create_test_ir_file(dir: &Path, name: &str) -> PathBuf {
let ir_path = dir.join(format!("{}.ll", name));
let ir_content = format!(
r#"; ModuleID = '{name}'
source_filename = "{name}"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "bpf"
define i32 @test_{name}(i32 %x) #0 {{
entry:
%result = add i32 %x, 1
ret i32 %result
}}
attributes #0 = {{ noinline nounwind optnone }}
!llvm.module.flags = !{{!0}}
!0 = !{{i32 1, !"wchar_size", i32 4}}
"#
);
fs::write(&ir_path, ir_content).expect("Failed to write test IR file");
ir_path
}

#[test]
fn test_link_ir_file() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
let ir_file = create_test_ir_file(temp_dir.path(), "alessandro");
let output_file = temp_dir.path().join("output.o");

let output = Command::new(linker_path())
.arg("--export")
.arg(format!("test_{}", "alessandro"))
.arg(&ir_file)
.arg("-o")
.arg(&output_file)
.output()
.expect("Failed to execute bpf-linker");

if !output.status.success() {
eprintln!("stdout: {}", String::from_utf8_lossy(&output.stdout));
eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
panic!("bpf-linker failed with status: {}", output.status);
}

assert!(
output_file.exists(),
"Output file should exist: {:?}",
output_file
);
assert!(
output_file.metadata().unwrap().len() > 0,
"Output file should not be empty"
);
}

#[test]
fn test_invalid_ir_file() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");

let valid_ir_file = create_test_ir_file(temp_dir.path(), "alessandro");

let valid_content = fs::read_to_string(valid_ir_file).expect("Failed to read valid IR file");

// Corrupting IR content
let invalid_content = valid_content
.replace("define", "defXne")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to corrupt 3 things, pick one :D

.replace("add i32", "adX i32")
.replace("; ModuleID = 'alessandro'", ": ModuleXX = 'corrupted'");

let invalid_ir_file = temp_dir.path().join("corrupted.ll");

fs::write(&invalid_ir_file, invalid_content).expect("Failed to write invalid IR file");

let output_file = temp_dir.path().join("output.o");

let output = Command::new(linker_path())
.arg(&invalid_ir_file)
.arg("-o")
.arg(&output_file)
.output()
.expect("Failed to execute bpf-linker");

// Should fail with corrupted IR
assert!(
!output.status.success(),
"bpf-linker should fail with corrupted IR. stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
}
Loading