Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ compiletest_rs = { version = "0.11.0" }
regex = { version = "1.11.1", default-features = false }
rustc-build-sysroot = { workspace = true }
which = { version = "8.0.0", default-features = false, features = ["real-sys", "regex"] }
tempfile = "3.13"

[lints]
workspace = true
Expand Down
66 changes: 50 additions & 16 deletions src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ enum InputType {
MachO,
/// Archive file. (.a)
Archive,
/// IR file (.ll)
Ir,
}

impl std::fmt::Display for InputType {
Expand All @@ -218,6 +220,7 @@ impl std::fmt::Display for InputType {
Self::Elf => "elf",
Self::MachO => "Mach-O",
Self::Archive => "archive",
Self::Ir => "ir",
}
)
}
Expand Down Expand Up @@ -508,7 +511,7 @@ where
.ok_or(LinkerError::CreateModuleError)?;

// buffer used to perform file type detection
let mut buf = [0u8; 8];
let mut buf = [0u8; 1024];
for mut input in inputs {
let path = match input {
InputReader::File { path, .. } => path.into(),
Expand All @@ -517,14 +520,14 @@ where

// determine whether the input is bitcode, ELF with embedded bitcode, an archive file
// or an invalid file
input
.read_exact(&mut buf)
let bytes_read = input
.read(&mut buf)
.map_err(|e| LinkerError::IoError(path.clone(), e))?;
input
.rewind()
.map_err(|e| LinkerError::IoError(path.clone(), e))?;
let in_type =
detect_input_type(&buf).ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;
let in_type = detect_input_type(&buf[..bytes_read])
.ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;

match in_type {
InputType::Archive => {
Expand Down Expand Up @@ -587,13 +590,29 @@ fn link_reader<'ctx>(
.or_else(|| detect_input_type(&data))
.ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?;

let bitcode = match in_type {
InputType::Bitcode => data,
InputType::Elf => match llvm::find_embedded_bitcode(context, &data) {
Ok(Some(bitcode)) => bitcode,
Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())),
Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)),
},
match in_type {
InputType::Bitcode => {
if !llvm::link_bitcode_buffer(context, module, &data) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
InputType::Ir => {
data.push(0); // force push null terminator
let data = CStr::from_bytes_with_nul(&data).unwrap();
if !llvm::link_ir_buffer(context, module, data) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
InputType::Elf => {
let bitcode = match llvm::find_embedded_bitcode(context, &data) {
Ok(Some(bitcode)) => bitcode,
Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())),
Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)),
};
if !llvm::link_bitcode_buffer(context, module, &bitcode) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}
}
// we need to handle this here since archive files could contain
// mach-o files, eg somecrate.rlib containing lib.rmeta which is
// mach-o on macos
Expand All @@ -602,10 +621,6 @@ fn link_reader<'ctx>(
InputType::Archive => panic!("nested archives not supported duh"),
};

if !llvm::link_bitcode_buffer(context, module, &bitcode) {
return Err(LinkerError::LinkModuleError(path.to_owned()));
}

Ok(())
}

Expand Down Expand Up @@ -882,13 +897,32 @@ fn detect_input_type(data: &[u8]) -> Option<InputType> {
_ => {
if &data[..8] == b"!<arch>\x0A" {
Some(InputType::Archive)
} else if is_llvm_ir(data) {
Some(InputType::Ir)
} else {
None
}
}
}
}

fn is_llvm_ir(data: &[u8]) -> bool {
// Trim whitespace from the start of the data
let trimmed = match data.iter().position(|b| !b.is_ascii_whitespace()) {
Some(position) => &data[position..],
None => return false,
};

// Checking for the presence of key keywords in the header
trimmed.starts_with(b"; ModuleID")
|| trimmed.starts_with(b"target triple")
|| trimmed.starts_with(b"target datalayout")
|| trimmed.starts_with(b"source_filename")
|| trimmed.starts_with(b"target ")
|| trimmed.starts_with(b"define")
|| trimmed.starts_with(b"!llvm")
}

pub struct LinkerOutput {
inner: MemoryBuffer,
}
Expand Down
49 changes: 46 additions & 3 deletions src/llvm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ use llvm_sys::{
bit_reader::LLVMParseBitcodeInContext2,
core::{
LLVMCreateMemoryBufferWithMemoryRange, LLVMDisposeMemoryBuffer, LLVMDisposeMessage,
LLVMGetEnumAttributeKindForName, LLVMGetMDString, LLVMGetModuleInlineAsm, LLVMGetTarget,
LLVMGetValueName2, LLVMRemoveEnumAttributeAtIndex, LLVMSetLinkage, LLVMSetModuleInlineAsm2,
LLVMSetVisibility,
LLVMDisposeModule, LLVMGetEnumAttributeKindForName, LLVMGetMDString,
LLVMGetModuleInlineAsm, LLVMGetTarget, LLVMGetValueName2, LLVMRemoveEnumAttributeAtIndex,
LLVMSetLinkage, LLVMSetModuleInlineAsm2, LLVMSetVisibility,
},
error::{
LLVMDisposeErrorMessage, LLVMGetErrorMessage, LLVMGetErrorTypeId, LLVMGetStringErrorTypeId,
},
ir_reader::LLVMParseIRInContext,
linker::LLVMLinkModules2,
object::{
LLVMCreateBinary, LLVMDisposeBinary, LLVMDisposeSectionIterator, LLVMGetSectionContents,
Expand Down Expand Up @@ -140,6 +141,48 @@ pub(crate) fn link_bitcode_buffer<'ctx>(

linked
}
#[must_use]
pub(crate) fn link_ir_buffer<'ctx>(
context: &'ctx LLVMContext,
module: &mut LLVMModule<'ctx>,
buffer: &CStr,
) -> bool {
let mut linked = false;
let buffer_name = c"ir_buffer";
let buffer = buffer.to_bytes();
let mem_buffer = unsafe {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're leaking this you need to call LLVMDisposeMemoryBuffer before returning

LLVMCreateMemoryBufferWithMemoryRange(
buffer.as_ptr().cast(),
buffer.len(),
buffer_name.as_ptr(),
1,
)
};

let mut temp_module = ptr::null_mut();
let mut error_msg = ptr::null_mut();

if unsafe {
LLVMParseIRInContext(
context.as_mut_ptr(),
mem_buffer,
&mut temp_module,
&mut error_msg,
)
} == 0
{
linked = unsafe { LLVMLinkModules2(module.as_mut_ptr(), temp_module) } == 0;
} else {
if !error_msg.is_null() {
unsafe { LLVMDisposeMessage(error_msg) };
}
if !temp_module.is_null() {
unsafe { LLVMDisposeModule(temp_module) };
}
}

linked
}

pub(crate) fn target_from_triple(triple: &CStr) -> Result<LLVMTargetRef, String> {
let mut target = ptr::null_mut();
Expand Down
103 changes: 103 additions & 0 deletions tests/ir_file_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#![expect(unused_crate_dependencies, reason = "used in lib/bin")]

use std::{
env, fs,
path::{Path, PathBuf},
process::Command,
};

fn linker_path() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_bpf-linker"))
}

fn create_test_ir_file(dir: &Path, name: &str) -> PathBuf {
let ir_path = dir.join(format!("{}.ll", name));
let ir_content = format!(
r#"; ModuleID = '{name}'
source_filename = "{name}"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "bpf"

define i32 @test_{name}(i32 %x) #0 {{
entry:
%result = add i32 %x, 1
ret i32 %result
}}

attributes #0 = {{ noinline nounwind optnone }}

!llvm.module.flags = !{{!0}}
!0 = !{{i32 1, !"wchar_size", i32 4}}
"#
);
fs::write(&ir_path, ir_content).expect("Failed to write test IR file");
ir_path
}

#[test]
fn test_link_ir_file() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
let ir_file = create_test_ir_file(temp_dir.path(), "alessandro");
let output_file = temp_dir.path().join("output.o");

let output = Command::new(linker_path())
.arg("--export")
.arg(format!("test_{}", "alessandro"))
.arg(&ir_file)
.arg("-o")
.arg(&output_file)
.output()
.expect("Failed to execute bpf-linker");

if !output.status.success() {
eprintln!("stdout: {}", String::from_utf8_lossy(&output.stdout));
eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr));
panic!("bpf-linker failed with status: {}", output.status);
}

assert!(
output_file.exists(),
"Output file should exist: {:?}",
output_file
);
assert!(
output_file.metadata().unwrap().len() > 0,
"Output file should not be empty"
);
}

#[test]
fn test_invalid_ir_file() {
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");

let valid_ir_file = create_test_ir_file(temp_dir.path(), "alessandro");

let valid_content = fs::read_to_string(valid_ir_file).expect("Failed to read valid IR file");

// Corrupting IR content
let invalid_content = valid_content
.replace("define", "defXne")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to corrupt 3 things, pick one :D

.replace("add i32", "adX i32")
.replace("; ModuleID = 'alessandro'", ": ModuleXX = 'corrupted'");

let invalid_ir_file = temp_dir.path().join("corrupted.ll");

fs::write(&invalid_ir_file, invalid_content).expect("Failed to write invalid IR file");

let output_file = temp_dir.path().join("output.o");

let output = Command::new(linker_path())
.arg(&invalid_ir_file)
.arg("-o")
.arg(&output_file)
.output()
.expect("Failed to execute bpf-linker");

// Should fail with corrupted IR
assert!(
!output.status.success(),
"bpf-linker should fail with corrupted IR. stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
}

Loading