From 8f7fc3c8626ee81c7546b77d28b12ffd48ccc965 Mon Sep 17 00:00:00 2001 From: qjerome Date: Wed, 26 Feb 2025 19:04:00 +0100 Subject: [PATCH 1/2] add(test)!: c_void enum test WARNING: this commit only is supposed to fail the tests --- tests/btf/assembly/c_void_enum.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/btf/assembly/c_void_enum.rs diff --git a/tests/btf/assembly/c_void_enum.rs b/tests/btf/assembly/c_void_enum.rs new file mode 100644 index 00000000..fb84c830 --- /dev/null +++ b/tests/btf/assembly/c_void_enum.rs @@ -0,0 +1,19 @@ +// assembly-output: bpf-linker +// no-prefer-dynamic +// compile-flags: --crate-type bin -C link-arg=--emit=obj -C link-arg=--btf -C debuginfo=2 + +#![no_std] +#![no_main] + +use core::ffi::c_void; + +#[no_mangle] +static mut FOO: *mut c_void = core::ptr::null_mut(); + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} + +// We check the BTF dump out of btfdump +// CHECK-NOT: 'c_void' From f81942306fa294aa962fb004d9cba09060295d11 Mon Sep 17 00:00:00 2001 From: qjerome Date: Wed, 26 Feb 2025 19:29:00 +0100 Subject: [PATCH 2/2] fix: c_void enum issue --- src/llvm/di.rs | 115 ++++++++++++++++++++++++++++--- src/llvm/types/di.rs | 157 +++++++++++++++++++++++++++++++++++++++++-- src/llvm/types/ir.rs | 8 ++- 3 files changed, 263 insertions(+), 17 deletions(-) diff --git a/src/llvm/di.rs b/src/llvm/di.rs index c4a939d4..770dc6b1 100644 --- a/src/llvm/di.rs +++ b/src/llvm/di.rs @@ -6,12 +6,14 @@ use std::{ ptr, }; -use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part}; +use gimli::{ + DW_TAG_enumeration_type, DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part, +}; use llvm_sys::{core::*, debuginfo::*, prelude::*}; use tracing::{span, trace, warn, Level}; use super::types::{ - di::DIType, + di::{DIBasicType, DIBasicTypeKind, DICompileUnit, DIType}, ir::{Function, MDNode, Metadata, Value}, }; use crate::llvm::{iter::*, types::di::DISubprogram}; @@ -21,13 +23,14 @@ use crate::llvm::{iter::*, types::di::DISubprogram}; // backward compatibility const MAX_KSYM_NAME_LEN: usize = 128; -pub struct DISanitizer { +pub struct DISanitizer<'ctx> { context: LLVMContextRef, module: LLVMModuleRef, builder: LLVMDIBuilderRef, visited_nodes: HashSet, replace_operands: HashMap, skipped_types: Vec, + basic_types: HashMap>, } // Sanitize Rust type names to be valid C type names. @@ -58,8 +61,13 @@ fn sanitize_type_name>(name: T) -> String { n } -impl DISanitizer { - pub fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer { +/// [`LLVMGetMetadataKind`] wrapper +fn kind(m: LLVMMetadataRef) -> LLVMMetadataKind { + unsafe { LLVMGetMetadataKind(m) } +} + +impl<'ctx> DISanitizer<'_> { + pub fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer<'ctx> { DISanitizer { context, module, @@ -67,15 +75,42 @@ impl DISanitizer { visited_nodes: HashSet::new(), replace_operands: HashMap::new(), skipped_types: Vec::new(), + basic_types: HashMap::new(), } } - fn visit_mdnode(&mut self, mdnode: MDNode) { + /// Returns a [`DIBasicType`] given a [`DIBasicTypeKind`]. + fn di_basic_type(&'ctx mut self, di_bt: DIBasicTypeKind) -> &'ctx DIBasicType<'ctx> { + self.basic_types + .entry(di_bt) + .or_insert_with(|| DIBasicType::llvm_create(self.context, self.builder, di_bt)) + } + + fn visit_mdnode_item(&mut self, item: &mut Item) { + let Some(mdnode) = item.as_mdnode() else { + return; + }; + match mdnode.try_into().expect("MDNode is not Metadata") { Metadata::DICompositeType(mut di_composite_type) => { #[allow(clippy::single_match)] #[allow(non_upper_case_globals)] match di_composite_type.tag() { + DW_TAG_enumeration_type => { + if let Some(name) = di_composite_type.name() { + // we found the c_void enum + if name == c"c_void" && di_composite_type.size_in_bits() == 8 { + if let Item::Operand(ref mut op) = item { + // get i8 DIBasicType + let i8_bt = self.di_basic_type(DIBasicTypeKind::I8); + op.replace(i8_bt.value_ref); + } else { + // c_void enum is not an Item::Operand so we cannot replace it + warn!("failed at replacing c_void enum, it might result in BTF parsing errors in kernels < 5.4") + } + } + } + } DW_TAG_structure_type => { let names = match di_composite_type.name() { Some(name) => { @@ -247,9 +282,7 @@ impl DISanitizer { return; } - if let Value::MDNode(mdnode) = value.clone() { - self.visit_mdnode(mdnode) - } + self.visit_mdnode_item(&mut item); if let Some(operands) = value.operands() { for (index, operand) in operands.enumerate() { @@ -306,6 +339,8 @@ impl DISanitizer { ); } + self.fix_di_compile_units(); + unsafe { LLVMDisposeDIBuilder(self.builder) }; } @@ -404,6 +439,58 @@ impl DISanitizer { replace } + + fn di_compile_units(&self) -> Vec { + let compile_unit_name = c"llvm.dbg.cu"; + + // Get the number of DICompileUnit. + let num_di_cu = + unsafe { LLVMGetNamedMetadataNumOperands(self.module, compile_unit_name.as_ptr()) }; + + // Create a vector to hold them all. + let mut di_cus: Vec = vec![core::ptr::null_mut(); num_di_cu as usize]; + + unsafe { + LLVMGetNamedMetadataOperands( + self.module, + compile_unit_name.as_ptr(), + di_cus.as_mut_ptr(), + ) + }; + + di_cus + .into_iter() + .map(|v| unsafe { DICompileUnit::from_value_ref(v) }) + .collect() + } + + /// Removes replaced `c_void` Rust enum from all [`DICompileUnit`]. After + /// replacing `c_void` enum with a [`DIBasicType`] the [`DICompileUnit`] still + /// hold a reference to the enum and still believes it is a [`DICompositeType`] + /// which triggers a casting assertion in LLVM. + fn fix_di_compile_units(&mut self) { + for mut di_cu in self.di_compile_units() { + let tmp_cu = di_cu.clone(); + let need_replace = tmp_cu.enum_types().any(|ct| { + matches!( + kind(ct.metadata_ref), + LLVMMetadataKind::LLVMDIBasicTypeMetadataKind + ) + }); + + if need_replace { + di_cu.replace_enum_types( + self.builder, + tmp_cu.enum_types().filter(|ct| { + !matches!( + kind(ct.metadata_ref), + LLVMMetadataKind::LLVMDIBasicTypeMetadataKind + ) + }), + ); + } + } + } } #[derive(Clone, Debug, Eq, PartialEq)] @@ -438,6 +525,16 @@ impl Operand { } impl Item { + /// Returns the [`Item`] as [`MDNode`] only if [`Item::is_mdnode`] is `true` else `None`. + fn as_mdnode(&self) -> Option> { + let is_mdnode = unsafe { !LLVMIsAMDNode(self.value_ref()).is_null() }; + if is_mdnode { + Some(unsafe { MDNode::from_value_ref(self.value_ref()) }) + } else { + None + } + } + fn value_ref(&self) -> LLVMValueRef { match self { Item::GlobalVariable(value) diff --git a/src/llvm/types/di.rs b/src/llvm/types/di.rs index 0edc4c61..5063697b 100644 --- a/src/llvm/types/di.rs +++ b/src/llvm/types/di.rs @@ -7,13 +7,17 @@ use std::{ use gimli::DwTag; use llvm_sys::{ - core::{LLVMGetNumOperands, LLVMGetOperand, LLVMReplaceMDNodeOperandWith, LLVMValueAsMetadata}, + core::{ + LLVMGetNumOperands, LLVMGetOperand, LLVMMetadataAsValue, LLVMReplaceMDNodeOperandWith, + LLVMValueAsMetadata, + }, debuginfo::{ - LLVMDIFileGetFilename, LLVMDIFlags, LLVMDIScopeGetFile, LLVMDISubprogramGetLine, - LLVMDITypeGetFlags, LLVMDITypeGetLine, LLVMDITypeGetName, LLVMDITypeGetOffsetInBits, - LLVMGetDINodeTag, + LLVMDIBuilderCreateBasicType, LLVMDIBuilderGetOrCreateTypeArray, LLVMDIFileGetFilename, + LLVMDIFlags, LLVMDIScopeGetFile, LLVMDISubprogramGetLine, LLVMDITypeGetFlags, + LLVMDITypeGetLine, LLVMDITypeGetName, LLVMDITypeGetOffsetInBits, LLVMDITypeGetSizeInBits, + LLVMDWARFTypeEncoding, LLVMGetDINodeTag, }, - prelude::{LLVMContextRef, LLVMMetadataRef, LLVMValueRef}, + prelude::{LLVMContextRef, LLVMDIBuilderRef, LLVMMetadataRef, LLVMValueRef}, }; use crate::llvm::{ @@ -221,7 +225,7 @@ enum DICompositeTypeOperand { /// Composite type is a kind of type that can include other types, such as /// structures, enums, unions, etc. pub struct DICompositeType<'ctx> { - metadata_ref: LLVMMetadataRef, + pub(crate) metadata_ref: LLVMMetadataRef, value_ref: LLVMValueRef, _marker: PhantomData<&'ctx ()>, } @@ -308,6 +312,11 @@ impl DICompositeType<'_> { pub fn tag(&self) -> DwTag { unsafe { di_node_tag(self.metadata_ref) } } + + /// Returns the size in bits of the composite type. + pub fn size_in_bits(&self) -> u64 { + unsafe { LLVMDITypeGetSizeInBits(LLVMValueAsMetadata(self.value_ref)) } + } } /// Represents the operands for a [`DISubprogram`]. The enum values correspond @@ -430,3 +439,139 @@ impl DISubprogram<'_> { }; } } + +/// Represents the operands for a [`DICompileUnit`]. The enum values correspond +/// to the operand indices within metadata nodes. +#[repr(u32)] +enum DICompileUnitOperand { + EnumTypes = 4, +} + +/// Represents the debug information for a compile unit in LLVM IR. +#[derive(Clone)] +pub struct DICompileUnit<'ctx> { + value_ref: LLVMValueRef, + _marker: PhantomData<&'ctx ()>, +} + +impl<'ctx> DICompileUnit<'ctx> { + /// Constructs a new [`DICompileUnit`] from the given `value_ref`. + /// + /// # Safety + /// + /// This method assumes that the provided `value_ref` corresponds to a valid + /// instance of [LLVM `DICompileUnit`](https://llvm.org/doxygen/classllvm_1_1DICompileUnit.html). + /// It's the caller's responsibility to ensure this invariant, as this + /// method doesn't perform any valiation checks. + pub(crate) unsafe fn from_value_ref(value_ref: LLVMValueRef) -> Self { + Self { + value_ref, + _marker: PhantomData, + } + } + + pub fn enum_types(&self) -> impl Iterator { + let llvm_enum_types = + unsafe { LLVMGetOperand(self.value_ref, DICompileUnitOperand::EnumTypes as u32) }; + + let llvm_enum_types_len = if llvm_enum_types.is_null() { + 0 + } else { + unsafe { LLVMGetNumOperands(llvm_enum_types) } + }; + + (0..llvm_enum_types_len).map(move |i| unsafe { + let enum_type = LLVMGetOperand(llvm_enum_types, i as u32); + DICompositeType::from_value_ref(enum_type) + }) + } + + pub fn replace_enum_types(&mut self, builder: LLVMDIBuilderRef, rep: I) + where + I: IntoIterator>, + { + let mut rep: Vec<_> = rep.into_iter().map(|dct| dct.metadata_ref).collect(); + + unsafe { + let enum_array = + LLVMDIBuilderGetOrCreateTypeArray(builder, rep.as_mut_ptr(), rep.len()); + LLVMReplaceMDNodeOperandWith( + self.value_ref, + DICompileUnitOperand::EnumTypes as u32, + enum_array, + ); + } + } +} + +/// Represents [`DIBasicType`] kinds. +#[derive(Hash, PartialEq, Eq, Clone, Copy)] +pub enum DIBasicTypeKind { + I8, +} + +impl DIBasicTypeKind { + fn name(&self) -> &'static str { + match self { + Self::I8 => "i8", + } + } + + fn size_in_bits(&self) -> u64 { + match self { + Self::I8 => 8, + } + } + + // DWARF encoding https://llvm.org/docs/LangRef.html#dibasictype + fn dwarf_type_encoding(&self) -> LLVMDWARFTypeEncoding { + match self { + // DW_ATE_signed + Self::I8 => gimli::DW_ATE_signed.0.into(), + } + } +} + +/// Represents the debug information for a basic type in LLVM IR. +pub struct DIBasicType<'ctx> { + pub(crate) value_ref: LLVMValueRef, + _marker: PhantomData<&'ctx ()>, +} + +impl DIBasicType<'_> { + /// Constructs a new [`DIBasicType`] from the given `value_ref`. + /// + /// # Safety + /// + /// This method assumes that the provided `value_ref` corresponds to a valid + /// instance of [LLVM `DIBasicType`](https://llvm.org/doxygen/classllvm_1_1DIBasicType.html). + /// It's the caller's responsibility to ensure this invariant, as this + /// method doesn't perform any valiation checks. + pub(crate) unsafe fn from_value_ref(value_ref: LLVMValueRef) -> Self { + Self { + value_ref, + _marker: PhantomData, + } + } + + /// Creates a new [`DIBasicType`] of `kind` given a `context` and a `builder`. + pub fn llvm_create( + ctx: LLVMContextRef, + builder: LLVMDIBuilderRef, + kind: DIBasicTypeKind, + ) -> Self { + let name = kind.name(); + let metadata_ref = unsafe { + LLVMDIBuilderCreateBasicType( + builder, + name.as_ptr() as *const _, + name.len(), + kind.size_in_bits(), + kind.dwarf_type_encoding(), + 0, + ) + }; + + unsafe { Self::from_value_ref(LLVMMetadataAsValue(ctx, metadata_ref)) } + } +} diff --git a/src/llvm/types/ir.rs b/src/llvm/types/ir.rs index e68cd43a..32c14864 100644 --- a/src/llvm/types/ir.rs +++ b/src/llvm/types/ir.rs @@ -22,7 +22,7 @@ use llvm_sys::{ use crate::llvm::{ iter::IterBasicBlocks as _, symbol_name, - types::di::{DICompositeType, DIDerivedType, DISubprogram, DIType}, + types::di::{DICompileUnit, DICompositeType, DIDerivedType, DISubprogram, DIType}, Message, }; @@ -112,6 +112,7 @@ pub enum Metadata<'ctx> { DICompositeType(DICompositeType<'ctx>), DIDerivedType(DIDerivedType<'ctx>), DISubprogram(DISubprogram<'ctx>), + DICompileUnit(#[allow(dead_code)] DICompileUnit<'ctx>), Other(#[allow(dead_code)] LLVMValueRef), } @@ -140,6 +141,10 @@ impl Metadata<'_> { let di_subprogram = unsafe { DISubprogram::from_value_ref(value) }; Metadata::DISubprogram(di_subprogram) } + LLVMMetadataKind::LLVMDICompileUnitMetadataKind => { + let di_compile_unit = unsafe { DICompileUnit::from_value_ref(value) }; + Metadata::DICompileUnit(di_compile_unit) + } LLVMMetadataKind::LLVMDIGlobalVariableMetadataKind | LLVMMetadataKind::LLVMDICommonBlockMetadataKind | LLVMMetadataKind::LLVMMDStringMetadataKind @@ -156,7 +161,6 @@ impl Metadata<'_> { | LLVMMetadataKind::LLVMDIBasicTypeMetadataKind | LLVMMetadataKind::LLVMDISubroutineTypeMetadataKind | LLVMMetadataKind::LLVMDIFileMetadataKind - | LLVMMetadataKind::LLVMDICompileUnitMetadataKind | LLVMMetadataKind::LLVMDILexicalBlockMetadataKind | LLVMMetadataKind::LLVMDILexicalBlockFileMetadataKind | LLVMMetadataKind::LLVMDINamespaceMetadataKind