From b046f1b8d69fd480be7354a98fc0ff7049dee21d Mon Sep 17 00:00:00 2001 From: Michal R Date: Wed, 12 Nov 2025 22:15:08 +0100 Subject: [PATCH 1/3] ci: Drop Rust 1.86.0, add 1.91.0 Rust 1.91.0 is the newest stable version that uses LLVM 21. Rust 1.86.0 makes it impossible to honor the recent clippy lints. --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e668e96..2d18632e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,14 +50,14 @@ jobs: fail-fast: false matrix: include: - - rust: 1.86.0 - llvm-version: 19 - llvm-from: apt - exclude-features: default,llvm-20,llvm-21,rust-llvm-20,rust-llvm-21 - - rust: 1.89.0 + - rust: 1.90.0 llvm-version: 20 llvm-from: apt exclude-features: default,llvm-19,llvm-21,rust-llvm-19,rust-llvm-21 + - rust: 1.91.0 + llvm-version: 21 + llvm-from: apt + exclude-features: default,llvm-19,llvm-20,rust-llvm-19,rust-llvm-20 - rust: beta llvm-version: 21 llvm-from: apt From 7f7b2b62056d6d338c1a3ee7c2bc1b0e00bf1296 Mon Sep 17 00:00:00 2001 From: Michal R Date: Thu, 13 Nov 2025 06:09:13 +0100 Subject: [PATCH 2/3] ci: Install clang in version that matches LLVM version LLVM bitcode is not backwards compatible and linking bitode objects produced by different LLVM versions, even when it does not always trigger any error in the backend, can lead to miscompilation (e.g. https://github.com/aya-rs/bpf-linker/issues/318). Using clang 15 for a compile test was "working" out of luck, because the program was simple enough to not cause a miscompilation. But given that it's wrong, ensure that clang version matches LLVM version --- .github/workflows/ci.yml | 42 +++++++++++++++++++++++++--------------- tests/tests.rs | 8 ++++++++ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d18632e..59531deb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,8 @@ jobs: env: RUST_BACKTRACE: full - LLVM_FEATURES: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-dynamic + LLVM_FEATURES_DYNAMIC: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-dynamic + LLVM_FEATURES_STATIC: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-static steps: - uses: actions/checkout@v5 @@ -96,28 +97,28 @@ jobs: run: cargo install btfdump - name: Install prerequisites - # ubuntu-22.04 comes with clang 13-15[0]; support for signed and 64bit - # enum values was added in clang 15[1] which isn't in `$PATH`. - # # gcc-multilib provides at least which is referenced by libbpf. - # - # [0] https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2204-Readme.md - # - # [1] https://github.com/llvm/llvm-project/commit/dc1c43d run: | set -euxo pipefail sudo apt update sudo apt -y install gcc-multilib - echo /usr/lib/llvm-15/bin >> $GITHUB_PATH - - name: Install LLVM - if: matrix.llvm-from == 'apt' + - name: Install clang + # We use clang in compiletests to test linking of bitcode produced by + # both C and Rust. The major version of clang must match the version of + # LLVM that bpf-linker is using. run: | set -euxo pipefail wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc echo -e deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm-version }} main | sudo tee /etc/apt/sources.list.d/llvm.list sudo apt update + sudo apt -y install clang-${{ matrix.llvm-version }} + + - name: Install LLVM libraries and headers + if: matrix.llvm-from == 'apt' + run: | + set -euxo pipefail # TODO(vadorovsky): Remove the requirement of libpolly. # # Packages from apt.llvm.org are being built all at once, with one @@ -165,13 +166,13 @@ jobs: run: | cargo hack check --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} - name: Build run: | cargo hack build --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} # Toolchains provided by rustup include standard library artifacts # only for Tier 1 targets, which do not include BPF targets. @@ -185,7 +186,7 @@ jobs: run: | RUSTC_BOOTSTRAP=1 cargo hack test --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} # To make things easier for package maintainers, the step of building a # custom sysroot can be skipped by setting the `BPFEL_SYSROOT_DIR` @@ -209,7 +210,7 @@ jobs: BPFEL_SYSROOT_DIR="$BPFEL_SYSROOT_DIR" cargo hack test --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} - uses: actions/checkout@v5 if: matrix.rust == 'nightly' @@ -220,7 +221,16 @@ jobs: - name: Install if: matrix.rust == 'nightly' - run: cargo install --path . --no-default-features --features ${{ env.LLVM_FEATURES }} + # `LD_LIBRARY_PATH` set to a custom LLVM build messes up with clang, + # that is linked to system-wide LLVM with larger configuration than our + # custom build, causing errors like: + # /usr/bin/clang-21: symbol lookup error: /usr/lib/llvm-21/bin/../lib/libclang-cpp.so.21.1: + # undefined symbol: _ZTIN4llvm5MachO13RecordVisitorE, version + # Unset the variable and link the custom LLVM statically. + run: | + set -euxo pipefail + echo "LD_LIBRARY_PATH=" >> $GITHUB_ENV + cargo install --path . --no-default-features --features ${{ env.LLVM_FEATURES_STATIC }} - name: Run aya integration tests if: matrix.rust == 'nightly' diff --git a/tests/tests.rs b/tests/tests.rs index 45fa4e08..f739ac8f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -84,6 +84,14 @@ where .arg("-o") .arg(dst.as_ref()) .arg(src.as_ref()) + // `LD_LIBRARY_PATH` set to a custom LLVM build might mess up with + // clang, if it's dynamically linked to libLLVM that was build with + // larger configuration than the custom one, causing errors like: + // ``` + // /usr/bin/clang-21: symbol lookup error: /usr/lib/llvm-21/bin/../lib/libclang-cpp.so.21.1: + // undefined symbol: _ZTIN4llvm5MachO13RecordVisitorE, version + // ``` + .env_remove("LD_LIBRARY_PATH") .output() .expect("failed to execute clang"); From ad3cbcf5468c945721b4a86b275f79e72565d422 Mon Sep 17 00:00:00 2001 From: Michal R Date: Thu, 30 Oct 2025 15:15:14 +0100 Subject: [PATCH 3/3] Ensure that the bitcode was compiled with the supported LLVM version Linking bitcode produced by different LLVM versions can lead either to: - The backend failing to lower the IR to bytecode (an easier-to-debug scenario that throws a descriptive error). - "Successful" compilation but with broken BTF (DI sanitization modifies DI operands, which have incompatibilities across versions). To prevent both cases, always throw an error when the bitcode is incompatible. Fixes: #319 --- Cargo.lock | 7 + Cargo.toml | 1 + src/linker.rs | 63 +++++- src/llvm/bitcode.rs | 517 ++++++++++++++++++++++++++++++++++++++++++++ src/llvm/mod.rs | 108 ++++++++- 5 files changed, 687 insertions(+), 9 deletions(-) create mode 100644 src/llvm/bitcode.rs diff --git a/Cargo.lock b/Cargo.lock index 61de611a..c2bc8c5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d67af77d68a931ecd5cbd8a3b5987d63a1d1d1278f7f6a60ae33db485cdebb69" +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "autocfg" version = "1.5.0" @@ -108,6 +114,7 @@ version = "0.9.15" dependencies = [ "anyhow", "ar", + "assert_matches", "aya-rustc-llvm-proxy", "clap", "compiletest_rs", diff --git a/Cargo.toml b/Cargo.toml index 1a506ce9..2f3df2d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ thiserror = { version = "2.0.12" } tracing = "0.1" [dev-dependencies] +assert_matches = "1.5.0" compiletest_rs = { version = "0.11.0" } regex = { version = "1.11.1", default-features = false } rustc-build-sysroot = { workspace = true } diff --git a/src/linker.rs b/src/linker.rs index faea046b..7beb1c44 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -18,7 +18,29 @@ use llvm_sys::{ use thiserror::Error; use tracing::{debug, error, info, warn}; -use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer}; +use crate::llvm::{ + self, LLVMContext, LLVMModule, LLVMTargetMachine, LlvmVersionDetectionError, MemoryBuffer, +}; + +#[cfg(feature = "llvm-19")] +const SUPPORTED_LLVM_MAJOR: u32 = 19; +#[cfg(feature = "llvm-20")] +const SUPPORTED_LLVM_MAJOR: u32 = 20; +#[cfg(feature = "llvm-21")] +const SUPPORTED_LLVM_MAJOR: u32 = 21; + +#[cfg(any( + all(feature = "llvm-19", not(feature = "rust-llvm-19")), + all(feature = "llvm-20", not(feature = "rust-llvm-20")), + all(feature = "llvm-21", not(feature = "rust-llvm-21")), +))] +const SUGGESTED_FEATURE_PREFIX: &str = "llvm-"; +#[cfg(any( + feature = "rust-llvm-19", + feature = "rust-llvm-20", + feature = "rust-llvm-21", +))] +const SUGGESTED_FEATURE_PREFIX: &str = "rust-llvm-"; /// Linker error #[derive(Debug, Error)] @@ -74,6 +96,27 @@ pub enum LinkerError { /// LLVM cannot create a module for linking. #[error("failed to create module")] CreateModuleError, + + /// The LLVM version embedded in the input bitcode is not supported. + #[error( + "bitcode {path} was built with LLVM {bitcode_version}, but this bpf-linker +supports LLVM {linker_version}; please re-install bpf-linker with +`cargo install --force bpf-linker --no-default-features --features +{SUGGESTED_FEATURE_PREFIX}{bitcode_version}`" + )] + LlvmVersionMismatch { + path: PathBuf, + bitcode_version: String, + linker_version: u32, + }, + + /// Failed to determine the LLVM version for a bitcode input. + #[error("failed to determine LLVM version for `{path}`: {kind}")] + LlvmVersionDetectionError { + path: PathBuf, + #[source] + kind: LlvmVersionDetectionError, + }, } /// BPF Cpu type @@ -602,11 +645,21 @@ fn link_reader<'ctx>( InputType::Archive => panic!("nested archives not supported duh"), }; - if !llvm::link_bitcode_buffer(context, module, &bitcode) { - return Err(LinkerError::LinkModuleError(path.to_owned())); + match llvm::link_bitcode_buffer(context, module, &bitcode, Some(SUPPORTED_LLVM_MAJOR)) { + Ok(true) => Ok(()), + Ok(false) => Err(LinkerError::LinkModuleError(path.to_owned())), + Err(LlvmVersionDetectionError::VersionMismatch { + bitcode_version, .. + }) => Err(LinkerError::LlvmVersionMismatch { + path: path.to_owned(), + bitcode_version, + linker_version: SUPPORTED_LLVM_MAJOR, + }), + Err(kind) => Err(LinkerError::LlvmVersionDetectionError { + path: path.to_owned(), + kind, + }), } - - Ok(()) } fn create_target_machine( diff --git a/src/llvm/bitcode.rs b/src/llvm/bitcode.rs new file mode 100644 index 00000000..b79e6496 --- /dev/null +++ b/src/llvm/bitcode.rs @@ -0,0 +1,517 @@ +use std::collections::HashMap; + +#[expect(missing_copy_implementations, reason = "not needed")] +#[derive(Debug, thiserror::Error)] +pub enum BitcodeError { + #[error("bitcode has invalid size, expected at least 8 bytes, got {0}")] + InvalidSize(usize), + #[error("bitcode is not 32-bit aligned")] + Misaligned, + #[error("missing bitcode magic header")] + MissingMagicHeader, + #[error("bitcode cursor seek out of bounds")] + CursorOutOfBounds, + #[error("unexpected end of bitcode")] + UnexpectedEnd, + #[error("unsupported abbreviation encoding: {0}")] + UnsupportedAbbreviationEncoding(usize), + #[error("unsupported abbreviated record ID: {0}")] + UnsupportedAbbreviatedRecordID(usize), + #[error("abbreviation {0} referenced before definition")] + UnknownAbbreviation(usize), + #[error("array abbreviation missing element encoding")] + MissingArrayElementEncoding, + #[error("array element encoding must be non-literal")] + InvalidArrayElementEncoding, + #[error("abbreviated record missing leading code")] + MissingRecordCode, + #[error("invalid CHAR6 value {0}")] + InvalidChar6(u64), + #[error("BLOCKINFO block missing target id before defining abbreviation")] + BlockInfoMissingTarget, + #[error("BLOCKINFO SETBID record missing operand")] + BlockInfoMissingSetBidOperand, + #[error("BLOCKINFO SETBID value {0} does not fit in u32")] + BlockInfoInvalidSetBid(u64), + #[error("mising identification string")] + MissingIdentificationString, + #[error("value {0} exceeds supported range for u32")] + ValueOutOfRangeU32(u64), + #[error("value {0} exceeds supported range for usize")] + ValueOutOfRangeUsize(u64), + #[error("identification string contains invalid byte value {0}")] + InvalidIdentificationByte(u64), +} + +pub(crate) fn identification_string(buffer: &[u8]) -> Result { + if buffer.len() < 8 { + return Err(BitcodeError::InvalidSize(buffer.len())); + } + if !buffer.len().is_multiple_of(4) { + return Err(BitcodeError::Misaligned); + } + + let mut words = Vec::with_capacity(buffer.len() / 4); + for chunk in buffer.chunks_exact(4) { + words.push(u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); + } + + const BITCODE_MAGIC: u32 = 0xdec0_4342; + if words.first().copied() != Some(BITCODE_MAGIC) { + return Err(BitcodeError::MissingMagicHeader); + } + + let mut cursor = BitCursor::new(&words); + cursor.seek_to_bit(32)?; + + let mut block_infos: HashMap> = HashMap::new(); + let mut current_blockinfo_target = None; + let mut blocks = vec![BlockState::root()]; + + while blocks.last().is_some() { + if cursor.is_eof() { + break; + } + + let (code_size, block_id) = { + let state = blocks.last().expect("block stack not empty"); + (state.code_size, state.block_id) + }; + let abbrev_id = cursor.read_bits(code_size)?; + match abbrev_id { + ABBREV_ID_END_BLOCK => { + cursor.align32()?; + if let Some(popped) = blocks.pop() + && popped.block_id == Some(BLOCKINFO_BLOCK_ID) + { + current_blockinfo_target = None; + } + if blocks.is_empty() { + break; + } + } + ABBREV_ID_ENTER_SUBBLOCK => { + let block_id = cursor.read_vbr_u32(SUBBLOCK_ID_VBR_WIDTH)?; + let new_code_size = cursor.read_vbr_usize(SUBBLOCK_CODE_SIZE_VBR_WIDTH)?; + cursor.align32()?; + let _len_in_words = cursor.read_bits(32)?; + let inherited = block_infos.get(&block_id).cloned().unwrap_or_default(); + blocks.push(BlockState::new(block_id, new_code_size, inherited)); + if block_id == BLOCKINFO_BLOCK_ID { + current_blockinfo_target = None; + } + } + ABBREV_ID_DEFINE_ABBREV => { + let abbrev = read_abbrev(&mut cursor)?; + if block_id == Some(BLOCKINFO_BLOCK_ID) { + let target = + current_blockinfo_target.ok_or(BitcodeError::BlockInfoMissingTarget)?; + block_infos.entry(target).or_default().push(abbrev); + } else { + let state = blocks.last_mut().expect("block stack not empty"); + state.abbrevs.push(abbrev); + } + } + ABBREV_ID_UNABBREV_RECORD => { + let record = read_unabbrev_record(&mut cursor)?; + if let Some(string) = + process_record(record, block_id, &mut current_blockinfo_target)? + { + return Ok(string); + } + } + other => { + if other < ABBREV_ID_UNABBREV_RECORD + 1 { + return Err(BitcodeError::UnsupportedAbbreviatedRecordID(other)); + } + let abbrev_index = other - (ABBREV_ID_UNABBREV_RECORD + 1); + let state = blocks.last_mut().expect("block stack not empty"); + let abbrev = state + .abbrevs + .get(abbrev_index) + .ok_or(BitcodeError::UnknownAbbreviation(other))?; + let record = read_abbrev_record(&mut cursor, abbrev)?; + if let Some(string) = + process_record(record, block_id, &mut current_blockinfo_target)? + { + return Ok(string); + } + } + } + } + + Err(BitcodeError::MissingIdentificationString) +} + +const ABBREV_ID_END_BLOCK: usize = 0; +const ABBREV_ID_ENTER_SUBBLOCK: usize = 1; +const ABBREV_ID_DEFINE_ABBREV: usize = 2; +const ABBREV_ID_UNABBREV_RECORD: usize = 3; + +const BLOCKINFO_BLOCK_ID: u32 = 0; +const BLOCKINFO_CODE_SETBID: u32 = 1; +const IDENTIFICATION_BLOCK_ID: u32 = 13; +const IDENTIFICATION_CODE_STRING: u32 = 1; + +/// VBR width used when decoding block IDs inside `ENTER_SUBBLOCK` records. +const SUBBLOCK_ID_VBR_WIDTH: usize = 8; +/// VBR width that encodes a subblock's local abbreviation bit width. +const SUBBLOCK_CODE_SIZE_VBR_WIDTH: usize = 4; +/// VBR width for unabbreviated record codes. +const RECORD_CODE_VBR_WIDTH: usize = 6; +/// VBR width for the number of operands in unabbreviated records. +const RECORD_NUM_OPERANDS_VBR_WIDTH: usize = 6; +/// VBR width for each operand within an unabbreviated record. +const RECORD_OPERAND_VBR_WIDTH: usize = 6; +/// VBR width that encodes how many ops a `DEFINE_ABBREV` entry has. +const ABBREV_NUM_OPERANDS_VBR_WIDTH: usize = 5; +/// VBR width for literal values inside `DEFINE_ABBREV`. +const LITERAL_VBR_WIDTH: usize = 8; +/// VBR width for data attached to certain abbrev encodings (`Array`/`Char6`). +const ABBREV_ENCODING_DATA_VBR_WIDTH: usize = 5; +/// VBR width used for array/blob lengths in abbreviated records. +const LENGTH_VBR_WIDTH: usize = 6; + +struct BlockState { + block_id: Option, + code_size: usize, + abbrevs: Vec, +} + +impl BlockState { + fn root() -> Self { + Self { + block_id: None, + code_size: 2, + abbrevs: Vec::new(), + } + } + + fn new(block_id: u32, code_size: usize, abbrevs: Vec) -> Self { + Self { + block_id: Some(block_id), + code_size, + abbrevs, + } + } +} + +#[derive(Clone)] +struct Abbrev { + ops: Vec, +} + +#[derive(Clone)] +enum AbbrevOp { + Literal(u64), + Encoding(AbbrevEncoding), +} + +#[derive(Clone)] +enum AbbrevEncoding { + Fixed(usize), + Vbr(usize), + Char6, + Array(Box), + Blob, +} + +/// Bit-level reader over 32-bit word slices. +/// Tracks the current bit offset and supports arbitrary-width bitcode fields. +struct BitCursor<'a> { + words: &'a [u32], + bit_len: usize, + bit_pos: usize, +} + +impl<'a> BitCursor<'a> { + fn new(words: &'a [u32]) -> Self { + Self { + words, + bit_len: words.len() * 32, + bit_pos: 0, + } + } + + fn seek_to_bit(&mut self, bit: usize) -> Result<(), BitcodeError> { + if bit > self.bit_len { + return Err(BitcodeError::CursorOutOfBounds); + } + self.bit_pos = bit; + Ok(()) + } + + fn is_eof(&self) -> bool { + self.bit_pos >= self.bit_len + } + + /// Reads `n` bits from the current position, stitching across word + /// boundaries when needed, and advances the cursor by that many bits. + fn read_bits(&mut self, n: usize) -> Result { + if n == 0 { + return Ok(0); + } + if self.bit_pos + n > self.bit_len { + return Err(BitcodeError::UnexpectedEnd); + } + + let mut result = 0usize; + let mut read = 0; + + while read < n { + let word_index = self.bit_pos >> 5; + let bit_index = self.bit_pos & 31; + let bits_available = 32 - bit_index; + let take = std::cmp::min(bits_available, n - read); + let mask = if take == 32 { + usize::MAX + } else { + (1usize << take) - 1 + }; + let chunk = ((self.words[word_index] as usize) >> bit_index) & mask; + result |= chunk << read; + self.bit_pos += take; + read += take; + } + + Ok(result) + } + + /// Reads an LLVM variable-bit-rate (VBR) integer. + /// Each `width`-bit chunk uses the MSB as a continuation flag, with the + /// remaining bits appended LSB-first until a chunk clears the flag. + fn read_vbr(&mut self, width: usize) -> Result { + let mut result = 0u64; + let mut shift = 0; + loop { + let piece = self.read_bits(width)? as u64; + let continue_bit = 1u64 << (width - 1); + let value = piece & (continue_bit - 1); + result |= value << shift; + if piece & continue_bit == 0 { + break; + } + shift += width - 1; + } + Ok(result) + } + + fn read_vbr_u32(&mut self, width: usize) -> Result { + let value = self.read_vbr(width)?; + value + .try_into() + .map_err(|_| BitcodeError::ValueOutOfRangeU32(value)) + } + + fn read_vbr_usize(&mut self, width: usize) -> Result { + let value = self.read_vbr(width)?; + value + .try_into() + .map_err(|_| BitcodeError::ValueOutOfRangeUsize(value)) + } + + /// Skips padding so the cursor advances to the next 32-bit boundary. + /// LLVM blocks require subsequent contents to start on word-aligned offsets. + fn align32(&mut self) -> Result<(), BitcodeError> { + let remainder = self.bit_pos & 31; + if remainder != 0 { + let to_skip = 32 - remainder; + let _ = self.read_bits(to_skip)?; + } + Ok(()) + } +} + +/// Unabbreviated LLVM.ident record containing the opcode and raw operand payload. +struct Record { + code: u32, + operands: Vec, +} + +fn read_unabbrev_record(cursor: &mut BitCursor<'_>) -> Result { + let code = cursor.read_vbr_u32(RECORD_CODE_VBR_WIDTH)?; + let num_ops = cursor.read_vbr_usize(RECORD_NUM_OPERANDS_VBR_WIDTH)?; + let mut operands = Vec::with_capacity(num_ops); + for _ in 0..num_ops { + operands.push(cursor.read_vbr(RECORD_OPERAND_VBR_WIDTH)?); + } + Ok(Record { code, operands }) +} + +fn process_record( + record: Record, + block_id: Option, + current_blockinfo_target: &mut Option, +) -> Result, BitcodeError> { + if block_id == Some(BLOCKINFO_BLOCK_ID) { + handle_blockinfo_record(&record, current_blockinfo_target)?; + return Ok(None); + } + if block_id == Some(IDENTIFICATION_BLOCK_ID) && record.code == IDENTIFICATION_CODE_STRING { + let string = record_to_ident_string(record)?; + return Ok(Some(string)); + } + Ok(None) +} + +fn record_to_ident_string(record: Record) -> Result { + let bytes = record + .operands + .into_iter() + .map(|op| u8::try_from(op).map_err(|_| BitcodeError::InvalidIdentificationByte(op))) + .collect::, _>>()?; + Ok(String::from_utf8_lossy(&bytes).into_owned()) +} + +fn read_abbrev(cursor: &mut BitCursor<'_>) -> Result { + let mut remaining = cursor.read_vbr_usize(ABBREV_NUM_OPERANDS_VBR_WIDTH)?; + let mut ops = Vec::with_capacity(remaining); + while remaining > 0 { + ops.push(read_abbrev_op(cursor, &mut remaining)?); + } + Ok(Abbrev { ops }) +} + +fn read_abbrev_op( + cursor: &mut BitCursor<'_>, + remaining: &mut usize, +) -> Result { + *remaining -= 1; + let is_literal = cursor.read_bits(1)? != 0; + if is_literal { + let literal = cursor.read_vbr(LITERAL_VBR_WIDTH)?; + Ok(AbbrevOp::Literal(literal)) + } else { + let encoding = read_abbrev_encoding(cursor, remaining)?; + Ok(AbbrevOp::Encoding(encoding)) + } +} + +fn read_abbrev_encoding( + cursor: &mut BitCursor<'_>, + remaining: &mut usize, +) -> Result { + let encoding_kind = cursor.read_bits(3)?; + match encoding_kind { + 1 => { + let width = cursor.read_vbr_usize(ABBREV_ENCODING_DATA_VBR_WIDTH)?; + Ok(AbbrevEncoding::Fixed(width)) + } + 2 => { + let width = cursor.read_vbr_usize(ABBREV_ENCODING_DATA_VBR_WIDTH)?; + Ok(AbbrevEncoding::Vbr(width)) + } + 3 => { + if *remaining == 0 { + return Err(BitcodeError::MissingArrayElementEncoding); + } + let element = read_abbrev_op(cursor, remaining)?; + match element { + AbbrevOp::Literal(_) => Err(BitcodeError::InvalidArrayElementEncoding), + AbbrevOp::Encoding(enc) => Ok(AbbrevEncoding::Array(Box::new(enc))), + } + } + 4 => Ok(AbbrevEncoding::Char6), + 5 => Ok(AbbrevEncoding::Blob), + other => Err(BitcodeError::UnsupportedAbbreviationEncoding(other)), + } +} + +fn read_abbrev_record(cursor: &mut BitCursor<'_>, abbrev: &Abbrev) -> Result { + let mut code = None; + let mut operands = Vec::new(); + for op in &abbrev.ops { + match op { + AbbrevOp::Literal(value) => push_value(*value, &mut code, &mut operands)?, + AbbrevOp::Encoding(encoding) => { + read_encoded_values(cursor, encoding, &mut code, &mut operands)? + } + } + } + let code = code.ok_or(BitcodeError::MissingRecordCode)?; + Ok(Record { code, operands }) +} + +fn read_encoded_values( + cursor: &mut BitCursor<'_>, + encoding: &AbbrevEncoding, + code: &mut Option, + operands: &mut Vec, +) -> Result<(), BitcodeError> { + match encoding { + AbbrevEncoding::Fixed(width) => { + let value = cursor.read_bits(*width)? as u64; + push_value(value, code, operands)?; + } + AbbrevEncoding::Vbr(width) => { + let value = cursor.read_vbr(*width)?; + push_value(value, code, operands)?; + } + AbbrevEncoding::Char6 => { + let raw = cursor.read_bits(6)? as u64; + let ch = decode_char6(raw)?; + push_value(u64::from(ch), code, operands)?; + } + AbbrevEncoding::Array(element) => { + let len = cursor.read_vbr_usize(LENGTH_VBR_WIDTH)?; + for _ in 0..len { + read_encoded_values(cursor, element, code, operands)?; + } + } + AbbrevEncoding::Blob => { + let len = cursor.read_vbr_usize(LENGTH_VBR_WIDTH)?; + cursor.align32()?; + for _ in 0..len { + let byte = cursor.read_bits(8)? as u64; + push_value(byte, code, operands)?; + } + cursor.align32()?; + } + } + Ok(()) +} + +fn push_value( + value: u64, + code: &mut Option, + operands: &mut Vec, +) -> Result<(), BitcodeError> { + if code.is_some() { + operands.push(value); + } else { + let record_code = + u32::try_from(value).map_err(|_| BitcodeError::ValueOutOfRangeU32(value))?; + *code = Some(record_code); + } + Ok(()) +} + +fn decode_char6(value: u64) -> Result { + #[expect( + clippy::cast_possible_truncation, + reason = "values are checked before casting" + )] + let ch = match value { + 0..=25 => b'a' + value as u8, + 26..=51 => b'A' + (value as u8 - 26), + 52..=61 => b'0' + (value as u8 - 52), + 62 => b'.', + 63 => b'_', + _ => return Err(BitcodeError::InvalidChar6(value)), + }; + Ok(ch) +} + +fn handle_blockinfo_record(record: &Record, current: &mut Option) -> Result<(), BitcodeError> { + if record.code == BLOCKINFO_CODE_SETBID { + let value = record + .operands + .first() + .copied() + .ok_or(BitcodeError::BlockInfoMissingSetBidOperand)?; + let block_id = + u32::try_from(value).map_err(|_| BitcodeError::BlockInfoInvalidSetBid(value))?; + *current = Some(block_id); + } + Ok(()) +} diff --git a/src/llvm/mod.rs b/src/llvm/mod.rs index 5e644084..3dfcd837 100644 --- a/src/llvm/mod.rs +++ b/src/llvm/mod.rs @@ -1,3 +1,4 @@ +mod bitcode; mod di; mod iter; mod types; @@ -49,7 +50,71 @@ pub(crate) use types::{ target_machine::LLVMTargetMachine, }; -use crate::OptLevel; +use crate::{OptLevel, llvm::bitcode::BitcodeError}; + +#[derive(Debug, thiserror::Error)] +pub enum LlvmVersionDetectionError { + #[error("failed to retrieve LLVM version from bitcode: {0}")] + Bitcode(#[from] BitcodeError), + #[error("unexpected bitcode producer string `{producer}`")] + UnexpectedProducerString { producer: String }, + #[error("invalid major version `{major}`")] + InvalidMajor { + major: String, + #[source] + source: std::num::ParseIntError, + }, + #[error( + "bitcode built with LLVM {bitcode_major} ({bitcode_version}), expected LLVM {expected_major}" + )] + VersionMismatch { + bitcode_major: u32, + bitcode_version: String, + expected_major: u32, + }, +} + +/// Parses the `llvm.ident` record to return its `(major, full_version)` tuple. +pub(crate) fn bitcode_llvm_version( + buffer: &[u8], +) -> Result<(u32, String), LlvmVersionDetectionError> { + let producer = bitcode::identification_string(buffer)?; + parse_llvm_version_from_producer(&producer) +} + +fn parse_llvm_version_from_producer( + producer: &str, +) -> Result<(u32, String), LlvmVersionDetectionError> { + let remainder = producer.strip_prefix("LLVM").ok_or_else(|| { + LlvmVersionDetectionError::UnexpectedProducerString { + producer: producer.to_owned(), + } + })?; + + let remainder = remainder.trim_start_matches(' '); + let version: String = remainder + .chars() + .take_while(|c| c.is_ascii_digit() || *c == '.') + .collect(); + + if version.is_empty() { + return Err(LlvmVersionDetectionError::UnexpectedProducerString { + producer: producer.to_owned(), + }); + } + + let major_part = version.split('.').next().unwrap_or(&version); + + let major = + major_part + .parse::() + .map_err(|source| LlvmVersionDetectionError::InvalidMajor { + major: major_part.to_owned(), + source, + })?; + + Ok((major, version)) +} pub(crate) fn init(args: &[Cow<'_, CStr>], overview: &CStr) { unsafe { @@ -113,12 +178,23 @@ pub(crate) fn find_embedded_bitcode( Ok(ret) } -#[must_use] pub(crate) fn link_bitcode_buffer<'ctx>( context: &'ctx LLVMContext, module: &mut LLVMModule<'ctx>, buffer: &[u8], -) -> bool { + expected_major: Option, +) -> Result { + if let Some(expected_major) = expected_major { + let (major, version) = bitcode_llvm_version(buffer)?; + if major != expected_major { + return Err(LlvmVersionDetectionError::VersionMismatch { + bitcode_major: major, + bitcode_version: version, + expected_major, + }); + } + } + let mut linked = false; let buffer_name = c"mem_buffer"; let buffer = unsafe { @@ -138,7 +214,7 @@ pub(crate) fn link_bitcode_buffer<'ctx>( unsafe { LLVMDisposeMemoryBuffer(buffer) }; - linked + Ok(linked) } pub(crate) fn target_from_triple(triple: &CStr) -> Result { @@ -314,3 +390,27 @@ impl Drop for Message { } } } + +#[cfg(test)] +mod tests { + use assert_matches::assert_matches; + + use super::{LlvmVersionDetectionError, parse_llvm_version_from_producer}; + + #[test] + fn parses_major_minor_patch_version() { + let parsed = parse_llvm_version_from_producer("LLVM21.1.4+libcxx").unwrap(); + assert_eq!(parsed, (21, String::from("21.1.4"))); + } + + #[test] + fn errors_when_prefix_missing() { + let err = parse_llvm_version_from_producer("rustc version 1.90") + .expect_err("expected producer parsing error"); + assert_matches!( + err, + LlvmVersionDetectionError::UnexpectedProducerString { producer } + if producer == "rustc version 1.90" + ); + } +}