diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e668e96..59531deb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,14 +50,14 @@ jobs: fail-fast: false matrix: include: - - rust: 1.86.0 - llvm-version: 19 - llvm-from: apt - exclude-features: default,llvm-20,llvm-21,rust-llvm-20,rust-llvm-21 - - rust: 1.89.0 + - rust: 1.90.0 llvm-version: 20 llvm-from: apt exclude-features: default,llvm-19,llvm-21,rust-llvm-19,rust-llvm-21 + - rust: 1.91.0 + llvm-version: 21 + llvm-from: apt + exclude-features: default,llvm-19,llvm-20,rust-llvm-19,rust-llvm-20 - rust: beta llvm-version: 21 llvm-from: apt @@ -75,7 +75,8 @@ jobs: env: RUST_BACKTRACE: full - LLVM_FEATURES: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-dynamic + LLVM_FEATURES_DYNAMIC: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-dynamic + LLVM_FEATURES_STATIC: llvm-${{ matrix.llvm-version }},llvm-sys-${{ matrix.llvm-version }}/force-static steps: - uses: actions/checkout@v5 @@ -96,28 +97,28 @@ jobs: run: cargo install btfdump - name: Install prerequisites - # ubuntu-22.04 comes with clang 13-15[0]; support for signed and 64bit - # enum values was added in clang 15[1] which isn't in `$PATH`. - # # gcc-multilib provides at least which is referenced by libbpf. - # - # [0] https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2204-Readme.md - # - # [1] https://github.com/llvm/llvm-project/commit/dc1c43d run: | set -euxo pipefail sudo apt update sudo apt -y install gcc-multilib - echo /usr/lib/llvm-15/bin >> $GITHUB_PATH - - name: Install LLVM - if: matrix.llvm-from == 'apt' + - name: Install clang + # We use clang in compiletests to test linking of bitcode produced by + # both C and Rust. The major version of clang must match the version of + # LLVM that bpf-linker is using. run: | set -euxo pipefail wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc echo -e deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm-version }} main | sudo tee /etc/apt/sources.list.d/llvm.list sudo apt update + sudo apt -y install clang-${{ matrix.llvm-version }} + + - name: Install LLVM libraries and headers + if: matrix.llvm-from == 'apt' + run: | + set -euxo pipefail # TODO(vadorovsky): Remove the requirement of libpolly. # # Packages from apt.llvm.org are being built all at once, with one @@ -165,13 +166,13 @@ jobs: run: | cargo hack check --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} - name: Build run: | cargo hack build --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} # Toolchains provided by rustup include standard library artifacts # only for Tier 1 targets, which do not include BPF targets. @@ -185,7 +186,7 @@ jobs: run: | RUSTC_BOOTSTRAP=1 cargo hack test --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} # To make things easier for package maintainers, the step of building a # custom sysroot can be skipped by setting the `BPFEL_SYSROOT_DIR` @@ -209,7 +210,7 @@ jobs: BPFEL_SYSROOT_DIR="$BPFEL_SYSROOT_DIR" cargo hack test --feature-powerset \ --exclude-features ${{ matrix.exclude-features }} \ - --features ${{ env.LLVM_FEATURES }} + --features ${{ env.LLVM_FEATURES_DYNAMIC }} - uses: actions/checkout@v5 if: matrix.rust == 'nightly' @@ -220,7 +221,16 @@ jobs: - name: Install if: matrix.rust == 'nightly' - run: cargo install --path . --no-default-features --features ${{ env.LLVM_FEATURES }} + # `LD_LIBRARY_PATH` set to a custom LLVM build messes up with clang, + # that is linked to system-wide LLVM with larger configuration than our + # custom build, causing errors like: + # /usr/bin/clang-21: symbol lookup error: /usr/lib/llvm-21/bin/../lib/libclang-cpp.so.21.1: + # undefined symbol: _ZTIN4llvm5MachO13RecordVisitorE, version + # Unset the variable and link the custom LLVM statically. + run: | + set -euxo pipefail + echo "LD_LIBRARY_PATH=" >> $GITHUB_ENV + cargo install --path . --no-default-features --features ${{ env.LLVM_FEATURES_STATIC }} - name: Run aya integration tests if: matrix.rust == 'nightly' diff --git a/Cargo.lock b/Cargo.lock index 61de611a..c2bc8c5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d67af77d68a931ecd5cbd8a3b5987d63a1d1d1278f7f6a60ae33db485cdebb69" +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "autocfg" version = "1.5.0" @@ -108,6 +114,7 @@ version = "0.9.15" dependencies = [ "anyhow", "ar", + "assert_matches", "aya-rustc-llvm-proxy", "clap", "compiletest_rs", diff --git a/Cargo.toml b/Cargo.toml index 1a506ce9..2f3df2d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ thiserror = { version = "2.0.12" } tracing = "0.1" [dev-dependencies] +assert_matches = "1.5.0" compiletest_rs = { version = "0.11.0" } regex = { version = "1.11.1", default-features = false } rustc-build-sysroot = { workspace = true } diff --git a/src/linker.rs b/src/linker.rs index faea046b..7beb1c44 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -18,7 +18,29 @@ use llvm_sys::{ use thiserror::Error; use tracing::{debug, error, info, warn}; -use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer}; +use crate::llvm::{ + self, LLVMContext, LLVMModule, LLVMTargetMachine, LlvmVersionDetectionError, MemoryBuffer, +}; + +#[cfg(feature = "llvm-19")] +const SUPPORTED_LLVM_MAJOR: u32 = 19; +#[cfg(feature = "llvm-20")] +const SUPPORTED_LLVM_MAJOR: u32 = 20; +#[cfg(feature = "llvm-21")] +const SUPPORTED_LLVM_MAJOR: u32 = 21; + +#[cfg(any( + all(feature = "llvm-19", not(feature = "rust-llvm-19")), + all(feature = "llvm-20", not(feature = "rust-llvm-20")), + all(feature = "llvm-21", not(feature = "rust-llvm-21")), +))] +const SUGGESTED_FEATURE_PREFIX: &str = "llvm-"; +#[cfg(any( + feature = "rust-llvm-19", + feature = "rust-llvm-20", + feature = "rust-llvm-21", +))] +const SUGGESTED_FEATURE_PREFIX: &str = "rust-llvm-"; /// Linker error #[derive(Debug, Error)] @@ -74,6 +96,27 @@ pub enum LinkerError { /// LLVM cannot create a module for linking. #[error("failed to create module")] CreateModuleError, + + /// The LLVM version embedded in the input bitcode is not supported. + #[error( + "bitcode {path} was built with LLVM {bitcode_version}, but this bpf-linker +supports LLVM {linker_version}; please re-install bpf-linker with +`cargo install --force bpf-linker --no-default-features --features +{SUGGESTED_FEATURE_PREFIX}{bitcode_version}`" + )] + LlvmVersionMismatch { + path: PathBuf, + bitcode_version: String, + linker_version: u32, + }, + + /// Failed to determine the LLVM version for a bitcode input. + #[error("failed to determine LLVM version for `{path}`: {kind}")] + LlvmVersionDetectionError { + path: PathBuf, + #[source] + kind: LlvmVersionDetectionError, + }, } /// BPF Cpu type @@ -602,11 +645,21 @@ fn link_reader<'ctx>( InputType::Archive => panic!("nested archives not supported duh"), }; - if !llvm::link_bitcode_buffer(context, module, &bitcode) { - return Err(LinkerError::LinkModuleError(path.to_owned())); + match llvm::link_bitcode_buffer(context, module, &bitcode, Some(SUPPORTED_LLVM_MAJOR)) { + Ok(true) => Ok(()), + Ok(false) => Err(LinkerError::LinkModuleError(path.to_owned())), + Err(LlvmVersionDetectionError::VersionMismatch { + bitcode_version, .. + }) => Err(LinkerError::LlvmVersionMismatch { + path: path.to_owned(), + bitcode_version, + linker_version: SUPPORTED_LLVM_MAJOR, + }), + Err(kind) => Err(LinkerError::LlvmVersionDetectionError { + path: path.to_owned(), + kind, + }), } - - Ok(()) } fn create_target_machine( diff --git a/src/llvm/bitcode.rs b/src/llvm/bitcode.rs new file mode 100644 index 00000000..b79e6496 --- /dev/null +++ b/src/llvm/bitcode.rs @@ -0,0 +1,517 @@ +use std::collections::HashMap; + +#[expect(missing_copy_implementations, reason = "not needed")] +#[derive(Debug, thiserror::Error)] +pub enum BitcodeError { + #[error("bitcode has invalid size, expected at least 8 bytes, got {0}")] + InvalidSize(usize), + #[error("bitcode is not 32-bit aligned")] + Misaligned, + #[error("missing bitcode magic header")] + MissingMagicHeader, + #[error("bitcode cursor seek out of bounds")] + CursorOutOfBounds, + #[error("unexpected end of bitcode")] + UnexpectedEnd, + #[error("unsupported abbreviation encoding: {0}")] + UnsupportedAbbreviationEncoding(usize), + #[error("unsupported abbreviated record ID: {0}")] + UnsupportedAbbreviatedRecordID(usize), + #[error("abbreviation {0} referenced before definition")] + UnknownAbbreviation(usize), + #[error("array abbreviation missing element encoding")] + MissingArrayElementEncoding, + #[error("array element encoding must be non-literal")] + InvalidArrayElementEncoding, + #[error("abbreviated record missing leading code")] + MissingRecordCode, + #[error("invalid CHAR6 value {0}")] + InvalidChar6(u64), + #[error("BLOCKINFO block missing target id before defining abbreviation")] + BlockInfoMissingTarget, + #[error("BLOCKINFO SETBID record missing operand")] + BlockInfoMissingSetBidOperand, + #[error("BLOCKINFO SETBID value {0} does not fit in u32")] + BlockInfoInvalidSetBid(u64), + #[error("mising identification string")] + MissingIdentificationString, + #[error("value {0} exceeds supported range for u32")] + ValueOutOfRangeU32(u64), + #[error("value {0} exceeds supported range for usize")] + ValueOutOfRangeUsize(u64), + #[error("identification string contains invalid byte value {0}")] + InvalidIdentificationByte(u64), +} + +pub(crate) fn identification_string(buffer: &[u8]) -> Result { + if buffer.len() < 8 { + return Err(BitcodeError::InvalidSize(buffer.len())); + } + if !buffer.len().is_multiple_of(4) { + return Err(BitcodeError::Misaligned); + } + + let mut words = Vec::with_capacity(buffer.len() / 4); + for chunk in buffer.chunks_exact(4) { + words.push(u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); + } + + const BITCODE_MAGIC: u32 = 0xdec0_4342; + if words.first().copied() != Some(BITCODE_MAGIC) { + return Err(BitcodeError::MissingMagicHeader); + } + + let mut cursor = BitCursor::new(&words); + cursor.seek_to_bit(32)?; + + let mut block_infos: HashMap> = HashMap::new(); + let mut current_blockinfo_target = None; + let mut blocks = vec![BlockState::root()]; + + while blocks.last().is_some() { + if cursor.is_eof() { + break; + } + + let (code_size, block_id) = { + let state = blocks.last().expect("block stack not empty"); + (state.code_size, state.block_id) + }; + let abbrev_id = cursor.read_bits(code_size)?; + match abbrev_id { + ABBREV_ID_END_BLOCK => { + cursor.align32()?; + if let Some(popped) = blocks.pop() + && popped.block_id == Some(BLOCKINFO_BLOCK_ID) + { + current_blockinfo_target = None; + } + if blocks.is_empty() { + break; + } + } + ABBREV_ID_ENTER_SUBBLOCK => { + let block_id = cursor.read_vbr_u32(SUBBLOCK_ID_VBR_WIDTH)?; + let new_code_size = cursor.read_vbr_usize(SUBBLOCK_CODE_SIZE_VBR_WIDTH)?; + cursor.align32()?; + let _len_in_words = cursor.read_bits(32)?; + let inherited = block_infos.get(&block_id).cloned().unwrap_or_default(); + blocks.push(BlockState::new(block_id, new_code_size, inherited)); + if block_id == BLOCKINFO_BLOCK_ID { + current_blockinfo_target = None; + } + } + ABBREV_ID_DEFINE_ABBREV => { + let abbrev = read_abbrev(&mut cursor)?; + if block_id == Some(BLOCKINFO_BLOCK_ID) { + let target = + current_blockinfo_target.ok_or(BitcodeError::BlockInfoMissingTarget)?; + block_infos.entry(target).or_default().push(abbrev); + } else { + let state = blocks.last_mut().expect("block stack not empty"); + state.abbrevs.push(abbrev); + } + } + ABBREV_ID_UNABBREV_RECORD => { + let record = read_unabbrev_record(&mut cursor)?; + if let Some(string) = + process_record(record, block_id, &mut current_blockinfo_target)? + { + return Ok(string); + } + } + other => { + if other < ABBREV_ID_UNABBREV_RECORD + 1 { + return Err(BitcodeError::UnsupportedAbbreviatedRecordID(other)); + } + let abbrev_index = other - (ABBREV_ID_UNABBREV_RECORD + 1); + let state = blocks.last_mut().expect("block stack not empty"); + let abbrev = state + .abbrevs + .get(abbrev_index) + .ok_or(BitcodeError::UnknownAbbreviation(other))?; + let record = read_abbrev_record(&mut cursor, abbrev)?; + if let Some(string) = + process_record(record, block_id, &mut current_blockinfo_target)? + { + return Ok(string); + } + } + } + } + + Err(BitcodeError::MissingIdentificationString) +} + +const ABBREV_ID_END_BLOCK: usize = 0; +const ABBREV_ID_ENTER_SUBBLOCK: usize = 1; +const ABBREV_ID_DEFINE_ABBREV: usize = 2; +const ABBREV_ID_UNABBREV_RECORD: usize = 3; + +const BLOCKINFO_BLOCK_ID: u32 = 0; +const BLOCKINFO_CODE_SETBID: u32 = 1; +const IDENTIFICATION_BLOCK_ID: u32 = 13; +const IDENTIFICATION_CODE_STRING: u32 = 1; + +/// VBR width used when decoding block IDs inside `ENTER_SUBBLOCK` records. +const SUBBLOCK_ID_VBR_WIDTH: usize = 8; +/// VBR width that encodes a subblock's local abbreviation bit width. +const SUBBLOCK_CODE_SIZE_VBR_WIDTH: usize = 4; +/// VBR width for unabbreviated record codes. +const RECORD_CODE_VBR_WIDTH: usize = 6; +/// VBR width for the number of operands in unabbreviated records. +const RECORD_NUM_OPERANDS_VBR_WIDTH: usize = 6; +/// VBR width for each operand within an unabbreviated record. +const RECORD_OPERAND_VBR_WIDTH: usize = 6; +/// VBR width that encodes how many ops a `DEFINE_ABBREV` entry has. +const ABBREV_NUM_OPERANDS_VBR_WIDTH: usize = 5; +/// VBR width for literal values inside `DEFINE_ABBREV`. +const LITERAL_VBR_WIDTH: usize = 8; +/// VBR width for data attached to certain abbrev encodings (`Array`/`Char6`). +const ABBREV_ENCODING_DATA_VBR_WIDTH: usize = 5; +/// VBR width used for array/blob lengths in abbreviated records. +const LENGTH_VBR_WIDTH: usize = 6; + +struct BlockState { + block_id: Option, + code_size: usize, + abbrevs: Vec, +} + +impl BlockState { + fn root() -> Self { + Self { + block_id: None, + code_size: 2, + abbrevs: Vec::new(), + } + } + + fn new(block_id: u32, code_size: usize, abbrevs: Vec) -> Self { + Self { + block_id: Some(block_id), + code_size, + abbrevs, + } + } +} + +#[derive(Clone)] +struct Abbrev { + ops: Vec, +} + +#[derive(Clone)] +enum AbbrevOp { + Literal(u64), + Encoding(AbbrevEncoding), +} + +#[derive(Clone)] +enum AbbrevEncoding { + Fixed(usize), + Vbr(usize), + Char6, + Array(Box), + Blob, +} + +/// Bit-level reader over 32-bit word slices. +/// Tracks the current bit offset and supports arbitrary-width bitcode fields. +struct BitCursor<'a> { + words: &'a [u32], + bit_len: usize, + bit_pos: usize, +} + +impl<'a> BitCursor<'a> { + fn new(words: &'a [u32]) -> Self { + Self { + words, + bit_len: words.len() * 32, + bit_pos: 0, + } + } + + fn seek_to_bit(&mut self, bit: usize) -> Result<(), BitcodeError> { + if bit > self.bit_len { + return Err(BitcodeError::CursorOutOfBounds); + } + self.bit_pos = bit; + Ok(()) + } + + fn is_eof(&self) -> bool { + self.bit_pos >= self.bit_len + } + + /// Reads `n` bits from the current position, stitching across word + /// boundaries when needed, and advances the cursor by that many bits. + fn read_bits(&mut self, n: usize) -> Result { + if n == 0 { + return Ok(0); + } + if self.bit_pos + n > self.bit_len { + return Err(BitcodeError::UnexpectedEnd); + } + + let mut result = 0usize; + let mut read = 0; + + while read < n { + let word_index = self.bit_pos >> 5; + let bit_index = self.bit_pos & 31; + let bits_available = 32 - bit_index; + let take = std::cmp::min(bits_available, n - read); + let mask = if take == 32 { + usize::MAX + } else { + (1usize << take) - 1 + }; + let chunk = ((self.words[word_index] as usize) >> bit_index) & mask; + result |= chunk << read; + self.bit_pos += take; + read += take; + } + + Ok(result) + } + + /// Reads an LLVM variable-bit-rate (VBR) integer. + /// Each `width`-bit chunk uses the MSB as a continuation flag, with the + /// remaining bits appended LSB-first until a chunk clears the flag. + fn read_vbr(&mut self, width: usize) -> Result { + let mut result = 0u64; + let mut shift = 0; + loop { + let piece = self.read_bits(width)? as u64; + let continue_bit = 1u64 << (width - 1); + let value = piece & (continue_bit - 1); + result |= value << shift; + if piece & continue_bit == 0 { + break; + } + shift += width - 1; + } + Ok(result) + } + + fn read_vbr_u32(&mut self, width: usize) -> Result { + let value = self.read_vbr(width)?; + value + .try_into() + .map_err(|_| BitcodeError::ValueOutOfRangeU32(value)) + } + + fn read_vbr_usize(&mut self, width: usize) -> Result { + let value = self.read_vbr(width)?; + value + .try_into() + .map_err(|_| BitcodeError::ValueOutOfRangeUsize(value)) + } + + /// Skips padding so the cursor advances to the next 32-bit boundary. + /// LLVM blocks require subsequent contents to start on word-aligned offsets. + fn align32(&mut self) -> Result<(), BitcodeError> { + let remainder = self.bit_pos & 31; + if remainder != 0 { + let to_skip = 32 - remainder; + let _ = self.read_bits(to_skip)?; + } + Ok(()) + } +} + +/// Unabbreviated LLVM.ident record containing the opcode and raw operand payload. +struct Record { + code: u32, + operands: Vec, +} + +fn read_unabbrev_record(cursor: &mut BitCursor<'_>) -> Result { + let code = cursor.read_vbr_u32(RECORD_CODE_VBR_WIDTH)?; + let num_ops = cursor.read_vbr_usize(RECORD_NUM_OPERANDS_VBR_WIDTH)?; + let mut operands = Vec::with_capacity(num_ops); + for _ in 0..num_ops { + operands.push(cursor.read_vbr(RECORD_OPERAND_VBR_WIDTH)?); + } + Ok(Record { code, operands }) +} + +fn process_record( + record: Record, + block_id: Option, + current_blockinfo_target: &mut Option, +) -> Result, BitcodeError> { + if block_id == Some(BLOCKINFO_BLOCK_ID) { + handle_blockinfo_record(&record, current_blockinfo_target)?; + return Ok(None); + } + if block_id == Some(IDENTIFICATION_BLOCK_ID) && record.code == IDENTIFICATION_CODE_STRING { + let string = record_to_ident_string(record)?; + return Ok(Some(string)); + } + Ok(None) +} + +fn record_to_ident_string(record: Record) -> Result { + let bytes = record + .operands + .into_iter() + .map(|op| u8::try_from(op).map_err(|_| BitcodeError::InvalidIdentificationByte(op))) + .collect::, _>>()?; + Ok(String::from_utf8_lossy(&bytes).into_owned()) +} + +fn read_abbrev(cursor: &mut BitCursor<'_>) -> Result { + let mut remaining = cursor.read_vbr_usize(ABBREV_NUM_OPERANDS_VBR_WIDTH)?; + let mut ops = Vec::with_capacity(remaining); + while remaining > 0 { + ops.push(read_abbrev_op(cursor, &mut remaining)?); + } + Ok(Abbrev { ops }) +} + +fn read_abbrev_op( + cursor: &mut BitCursor<'_>, + remaining: &mut usize, +) -> Result { + *remaining -= 1; + let is_literal = cursor.read_bits(1)? != 0; + if is_literal { + let literal = cursor.read_vbr(LITERAL_VBR_WIDTH)?; + Ok(AbbrevOp::Literal(literal)) + } else { + let encoding = read_abbrev_encoding(cursor, remaining)?; + Ok(AbbrevOp::Encoding(encoding)) + } +} + +fn read_abbrev_encoding( + cursor: &mut BitCursor<'_>, + remaining: &mut usize, +) -> Result { + let encoding_kind = cursor.read_bits(3)?; + match encoding_kind { + 1 => { + let width = cursor.read_vbr_usize(ABBREV_ENCODING_DATA_VBR_WIDTH)?; + Ok(AbbrevEncoding::Fixed(width)) + } + 2 => { + let width = cursor.read_vbr_usize(ABBREV_ENCODING_DATA_VBR_WIDTH)?; + Ok(AbbrevEncoding::Vbr(width)) + } + 3 => { + if *remaining == 0 { + return Err(BitcodeError::MissingArrayElementEncoding); + } + let element = read_abbrev_op(cursor, remaining)?; + match element { + AbbrevOp::Literal(_) => Err(BitcodeError::InvalidArrayElementEncoding), + AbbrevOp::Encoding(enc) => Ok(AbbrevEncoding::Array(Box::new(enc))), + } + } + 4 => Ok(AbbrevEncoding::Char6), + 5 => Ok(AbbrevEncoding::Blob), + other => Err(BitcodeError::UnsupportedAbbreviationEncoding(other)), + } +} + +fn read_abbrev_record(cursor: &mut BitCursor<'_>, abbrev: &Abbrev) -> Result { + let mut code = None; + let mut operands = Vec::new(); + for op in &abbrev.ops { + match op { + AbbrevOp::Literal(value) => push_value(*value, &mut code, &mut operands)?, + AbbrevOp::Encoding(encoding) => { + read_encoded_values(cursor, encoding, &mut code, &mut operands)? + } + } + } + let code = code.ok_or(BitcodeError::MissingRecordCode)?; + Ok(Record { code, operands }) +} + +fn read_encoded_values( + cursor: &mut BitCursor<'_>, + encoding: &AbbrevEncoding, + code: &mut Option, + operands: &mut Vec, +) -> Result<(), BitcodeError> { + match encoding { + AbbrevEncoding::Fixed(width) => { + let value = cursor.read_bits(*width)? as u64; + push_value(value, code, operands)?; + } + AbbrevEncoding::Vbr(width) => { + let value = cursor.read_vbr(*width)?; + push_value(value, code, operands)?; + } + AbbrevEncoding::Char6 => { + let raw = cursor.read_bits(6)? as u64; + let ch = decode_char6(raw)?; + push_value(u64::from(ch), code, operands)?; + } + AbbrevEncoding::Array(element) => { + let len = cursor.read_vbr_usize(LENGTH_VBR_WIDTH)?; + for _ in 0..len { + read_encoded_values(cursor, element, code, operands)?; + } + } + AbbrevEncoding::Blob => { + let len = cursor.read_vbr_usize(LENGTH_VBR_WIDTH)?; + cursor.align32()?; + for _ in 0..len { + let byte = cursor.read_bits(8)? as u64; + push_value(byte, code, operands)?; + } + cursor.align32()?; + } + } + Ok(()) +} + +fn push_value( + value: u64, + code: &mut Option, + operands: &mut Vec, +) -> Result<(), BitcodeError> { + if code.is_some() { + operands.push(value); + } else { + let record_code = + u32::try_from(value).map_err(|_| BitcodeError::ValueOutOfRangeU32(value))?; + *code = Some(record_code); + } + Ok(()) +} + +fn decode_char6(value: u64) -> Result { + #[expect( + clippy::cast_possible_truncation, + reason = "values are checked before casting" + )] + let ch = match value { + 0..=25 => b'a' + value as u8, + 26..=51 => b'A' + (value as u8 - 26), + 52..=61 => b'0' + (value as u8 - 52), + 62 => b'.', + 63 => b'_', + _ => return Err(BitcodeError::InvalidChar6(value)), + }; + Ok(ch) +} + +fn handle_blockinfo_record(record: &Record, current: &mut Option) -> Result<(), BitcodeError> { + if record.code == BLOCKINFO_CODE_SETBID { + let value = record + .operands + .first() + .copied() + .ok_or(BitcodeError::BlockInfoMissingSetBidOperand)?; + let block_id = + u32::try_from(value).map_err(|_| BitcodeError::BlockInfoInvalidSetBid(value))?; + *current = Some(block_id); + } + Ok(()) +} diff --git a/src/llvm/mod.rs b/src/llvm/mod.rs index 5e644084..3dfcd837 100644 --- a/src/llvm/mod.rs +++ b/src/llvm/mod.rs @@ -1,3 +1,4 @@ +mod bitcode; mod di; mod iter; mod types; @@ -49,7 +50,71 @@ pub(crate) use types::{ target_machine::LLVMTargetMachine, }; -use crate::OptLevel; +use crate::{OptLevel, llvm::bitcode::BitcodeError}; + +#[derive(Debug, thiserror::Error)] +pub enum LlvmVersionDetectionError { + #[error("failed to retrieve LLVM version from bitcode: {0}")] + Bitcode(#[from] BitcodeError), + #[error("unexpected bitcode producer string `{producer}`")] + UnexpectedProducerString { producer: String }, + #[error("invalid major version `{major}`")] + InvalidMajor { + major: String, + #[source] + source: std::num::ParseIntError, + }, + #[error( + "bitcode built with LLVM {bitcode_major} ({bitcode_version}), expected LLVM {expected_major}" + )] + VersionMismatch { + bitcode_major: u32, + bitcode_version: String, + expected_major: u32, + }, +} + +/// Parses the `llvm.ident` record to return its `(major, full_version)` tuple. +pub(crate) fn bitcode_llvm_version( + buffer: &[u8], +) -> Result<(u32, String), LlvmVersionDetectionError> { + let producer = bitcode::identification_string(buffer)?; + parse_llvm_version_from_producer(&producer) +} + +fn parse_llvm_version_from_producer( + producer: &str, +) -> Result<(u32, String), LlvmVersionDetectionError> { + let remainder = producer.strip_prefix("LLVM").ok_or_else(|| { + LlvmVersionDetectionError::UnexpectedProducerString { + producer: producer.to_owned(), + } + })?; + + let remainder = remainder.trim_start_matches(' '); + let version: String = remainder + .chars() + .take_while(|c| c.is_ascii_digit() || *c == '.') + .collect(); + + if version.is_empty() { + return Err(LlvmVersionDetectionError::UnexpectedProducerString { + producer: producer.to_owned(), + }); + } + + let major_part = version.split('.').next().unwrap_or(&version); + + let major = + major_part + .parse::() + .map_err(|source| LlvmVersionDetectionError::InvalidMajor { + major: major_part.to_owned(), + source, + })?; + + Ok((major, version)) +} pub(crate) fn init(args: &[Cow<'_, CStr>], overview: &CStr) { unsafe { @@ -113,12 +178,23 @@ pub(crate) fn find_embedded_bitcode( Ok(ret) } -#[must_use] pub(crate) fn link_bitcode_buffer<'ctx>( context: &'ctx LLVMContext, module: &mut LLVMModule<'ctx>, buffer: &[u8], -) -> bool { + expected_major: Option, +) -> Result { + if let Some(expected_major) = expected_major { + let (major, version) = bitcode_llvm_version(buffer)?; + if major != expected_major { + return Err(LlvmVersionDetectionError::VersionMismatch { + bitcode_major: major, + bitcode_version: version, + expected_major, + }); + } + } + let mut linked = false; let buffer_name = c"mem_buffer"; let buffer = unsafe { @@ -138,7 +214,7 @@ pub(crate) fn link_bitcode_buffer<'ctx>( unsafe { LLVMDisposeMemoryBuffer(buffer) }; - linked + Ok(linked) } pub(crate) fn target_from_triple(triple: &CStr) -> Result { @@ -314,3 +390,27 @@ impl Drop for Message { } } } + +#[cfg(test)] +mod tests { + use assert_matches::assert_matches; + + use super::{LlvmVersionDetectionError, parse_llvm_version_from_producer}; + + #[test] + fn parses_major_minor_patch_version() { + let parsed = parse_llvm_version_from_producer("LLVM21.1.4+libcxx").unwrap(); + assert_eq!(parsed, (21, String::from("21.1.4"))); + } + + #[test] + fn errors_when_prefix_missing() { + let err = parse_llvm_version_from_producer("rustc version 1.90") + .expect_err("expected producer parsing error"); + assert_matches!( + err, + LlvmVersionDetectionError::UnexpectedProducerString { producer } + if producer == "rustc version 1.90" + ); + } +} diff --git a/tests/tests.rs b/tests/tests.rs index 45fa4e08..f739ac8f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -84,6 +84,14 @@ where .arg("-o") .arg(dst.as_ref()) .arg(src.as_ref()) + // `LD_LIBRARY_PATH` set to a custom LLVM build might mess up with + // clang, if it's dynamically linked to libLLVM that was build with + // larger configuration than the custom one, causing errors like: + // ``` + // /usr/bin/clang-21: symbol lookup error: /usr/lib/llvm-21/bin/../lib/libclang-cpp.so.21.1: + // undefined symbol: _ZTIN4llvm5MachO13RecordVisitorE, version + // ``` + .env_remove("LD_LIBRARY_PATH") .output() .expect("failed to execute clang");