|
| 1 | +use core::slice; |
1 | 2 | use std::{ |
2 | 3 | borrow::Cow, |
3 | 4 | collections::{HashMap, HashSet, hash_map::DefaultHasher}, |
| 5 | + ffi::CStr, |
4 | 6 | hash::Hasher as _, |
5 | 7 | io::Write as _, |
6 | 8 | marker::PhantomData, |
7 | 9 | ptr, |
8 | 10 | }; |
9 | 11 |
|
10 | 12 | use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part}; |
11 | | -use llvm_sys::{core::*, debuginfo::*, prelude::*}; |
| 13 | +use llvm_sys::{LLVMTypeKind, core::*, debuginfo::*, prelude::*}; |
| 14 | +use log::debug; |
12 | 15 | use tracing::{Level, span, trace, warn}; |
13 | 16 |
|
14 | 17 | use super::types::{ |
15 | 18 | di::DIType, |
16 | 19 | ir::{Function, MDNode, Metadata, Value}, |
17 | 20 | }; |
18 | | -use crate::llvm::{LLVMContext, LLVMModule, iter::*, types::di::DISubprogram}; |
| 21 | +use crate::llvm::{LLVMContext, LLVMModule, iter::*, symbol_name, types::{di::DISubprogram, ir::MetadataEntries}}; |
19 | 22 |
|
20 | 23 | // KSYM_NAME_LEN from linux kernel intentionally set |
21 | 24 | // to lower value found across kernel versions to ensure |
@@ -268,6 +271,9 @@ impl<'ctx> DISanitizer<'ctx> { |
268 | 271 | pub(crate) fn run(mut self, exported_symbols: &HashSet<Cow<'_, [u8]>>) { |
269 | 272 | let module = self.module; |
270 | 273 |
|
| 274 | + // Create debug info for extern functions first |
| 275 | + self.create_extern_debug_info(); |
| 276 | + |
271 | 277 | self.replace_operands = self.fix_subprogram_linkage(exported_symbols); |
272 | 278 |
|
273 | 279 | for value in module.globals_iter() { |
@@ -319,6 +325,11 @@ impl<'ctx> DISanitizer<'ctx> { |
319 | 325 | continue; |
320 | 326 | } |
321 | 327 |
|
| 328 | + let num_blocks = unsafe { LLVMCountBasicBlocks(function.value_ref) }; |
| 329 | + if num_blocks == 0 { |
| 330 | + continue; |
| 331 | + } |
| 332 | + |
322 | 333 | // Skip functions that don't have subprograms. |
323 | 334 | let Some(mut subprogram) = function.subprogram(self.context) else { |
324 | 335 | continue; |
@@ -389,6 +400,243 @@ impl<'ctx> DISanitizer<'ctx> { |
389 | 400 |
|
390 | 401 | replace |
391 | 402 | } |
| 403 | + |
| 404 | + fn create_extern_debug_info(&mut self) { |
| 405 | + let Some((_, di_file)) = self.get_compile_unit_and_file() else { |
| 406 | + warn!("No compile unit found, skipping extern debug info creation"); |
| 407 | + return; |
| 408 | + }; |
| 409 | + |
| 410 | + let functions: Vec<LLVMValueRef> = self.module.functions_iter().collect(); |
| 411 | + |
| 412 | + for function in functions { |
| 413 | + let mut func = unsafe { Function::from_value_ref(function) }; |
| 414 | + |
| 415 | + if func.subprogram(self.context).is_some() { |
| 416 | + continue; |
| 417 | + } |
| 418 | + |
| 419 | + // Check if it's an extern (0 basic blocks) |
| 420 | + let num_blocks = unsafe { LLVMCountBasicBlocks(function) }; |
| 421 | + if num_blocks > 0 { |
| 422 | + continue; |
| 423 | + } |
| 424 | + |
| 425 | + let name = func.name(); |
| 426 | + |
| 427 | + // Get function type |
| 428 | + let func_type = unsafe { LLVMGlobalGetValueType(function) }; |
| 429 | + let return_type = unsafe { LLVMGetReturnType(func_type) }; |
| 430 | + let return_type_kind = unsafe { LLVMGetTypeKind(return_type) }; |
| 431 | + let param_count = unsafe { LLVMCountParamTypes(func_type) }; |
| 432 | + |
| 433 | + if !return_type.is_null() { |
| 434 | + warn!("Return type kind: {:?}", return_type_kind); |
| 435 | + } |
| 436 | + // Create DITypes for return and params |
| 437 | + let mut di_types = Vec::new(); |
| 438 | + |
| 439 | + // Add return type as first element |
| 440 | + let di_return = self.create_di_type_from_llvm_type(return_type, di_file); |
| 441 | + di_types.push(di_return); |
| 442 | + |
| 443 | + // Add parameter types and collect them for later |
| 444 | + let mut param_di_types = Vec::new(); |
| 445 | + if param_count > 0 { |
| 446 | + let mut param_types = vec![ptr::null_mut(); param_count as usize]; |
| 447 | + unsafe { LLVMGetParamTypes(func_type, param_types.as_mut_ptr()) }; |
| 448 | + |
| 449 | + for param_type in param_types { |
| 450 | + let di_param = self.create_di_type_from_llvm_type(param_type, di_file); |
| 451 | + di_types.push(di_param); |
| 452 | + param_di_types.push(di_param); |
| 453 | + } |
| 454 | + } |
| 455 | + |
| 456 | + // Create DISubroutineType |
| 457 | + let di_subroutine_type = unsafe { |
| 458 | + LLVMDIBuilderCreateSubroutineType( |
| 459 | + self.builder, |
| 460 | + di_file, |
| 461 | + di_types.as_mut_ptr(), |
| 462 | + di_types.len() as u32, |
| 463 | + 0, |
| 464 | + ) |
| 465 | + }; |
| 466 | + |
| 467 | + // Create DISubprogram for extern (declaration, not definition) |
| 468 | + let subprogram = unsafe { |
| 469 | + LLVMDIBuilderCreateFunction( |
| 470 | + self.builder, |
| 471 | + di_file, |
| 472 | + name.as_ptr().cast(), |
| 473 | + name.len(), |
| 474 | + name.as_ptr().cast(), |
| 475 | + name.len(), |
| 476 | + di_file, |
| 477 | + 0, |
| 478 | + di_subroutine_type, |
| 479 | + 0, |
| 480 | + 0, |
| 481 | + 0, |
| 482 | + LLVMDIFlagPrototyped, |
| 483 | + 1, |
| 484 | + ) |
| 485 | + }; |
| 486 | + |
| 487 | + let mut di_subprogram = unsafe { |
| 488 | + DISubprogram::from_value_ref(LLVMMetadataAsValue(self.context, subprogram)) |
| 489 | + }; |
| 490 | + |
| 491 | + // Create parameter debug info for retained nodes |
| 492 | + if !param_di_types.is_empty() { |
| 493 | + let mut param_vars = Vec::new(); |
| 494 | + |
| 495 | + for (idx, di_param_type) in param_di_types.iter().enumerate() { |
| 496 | + let arg_idx = (idx + 1) as u32; |
| 497 | + let param_name = format!("arg{}", idx); |
| 498 | + |
| 499 | + let di_param_var = unsafe { |
| 500 | + LLVMDIBuilderCreateParameterVariable( |
| 501 | + self.builder, |
| 502 | + subprogram, // scope |
| 503 | + param_name.as_ptr().cast(), |
| 504 | + param_name.len(), |
| 505 | + arg_idx, |
| 506 | + di_file, |
| 507 | + 0, // line |
| 508 | + *di_param_type, |
| 509 | + 1, // always preserve |
| 510 | + 0, // flags |
| 511 | + ) |
| 512 | + }; |
| 513 | + param_vars.push(di_param_var); |
| 514 | + } |
| 515 | + |
| 516 | + // Create retained nodes metadata |
| 517 | + let retained_nodes = unsafe { |
| 518 | + LLVMMDNodeInContext2(self.context, param_vars.as_mut_ptr(), param_vars.len()) |
| 519 | + }; |
| 520 | + di_subprogram.set_retained_nodes(retained_nodes); |
| 521 | + } |
| 522 | + |
| 523 | + unsafe { LLVMDIBuilderFinalizeSubprogram(self.builder, subprogram) }; |
| 524 | + |
| 525 | + func.set_subprogram(&di_subprogram); |
| 526 | + } |
| 527 | + } |
| 528 | + fn create_di_type_from_llvm_type( |
| 529 | + &mut self, |
| 530 | + llvm_type: LLVMTypeRef, |
| 531 | + di_file: LLVMMetadataRef, |
| 532 | + ) -> LLVMMetadataRef { |
| 533 | + unsafe { |
| 534 | + let type_kind = LLVMGetTypeKind(llvm_type); |
| 535 | + |
| 536 | + match type_kind { |
| 537 | + LLVMTypeKind::LLVMVoidTypeKind => { |
| 538 | + LLVMDIBuilderCreateBasicType(self.builder, c"void".as_ptr(), 4, 0, 0, 0) |
| 539 | + } |
| 540 | + LLVMTypeKind::LLVMIntegerTypeKind => { |
| 541 | + self.create_di_basic_int(llvm_type, di_file) |
| 542 | + } |
| 543 | + LLVMTypeKind::LLVMPointerTypeKind => { |
| 544 | + // Create void* for simplicity |
| 545 | + let pointee = |
| 546 | + LLVMDIBuilderCreateBasicType(self.builder, c"void".as_ptr(), 4, 0, 0, 0); |
| 547 | + LLVMDIBuilderCreatePointerType( |
| 548 | + self.builder, |
| 549 | + pointee, |
| 550 | + 64, // BPF is 64-bit |
| 551 | + 0, // align |
| 552 | + 0, // address space |
| 553 | + c"".as_ptr(), |
| 554 | + 0, |
| 555 | + ) |
| 556 | + } |
| 557 | + LLVMTypeKind::LLVMStructTypeKind => { |
| 558 | + // Create opaque struct type for extern function parameters |
| 559 | + // We don't need full layout for externs |
| 560 | + let struct_name = { |
| 561 | + let name_ptr = LLVMGetStructName(llvm_type); |
| 562 | + if name_ptr.is_null() { |
| 563 | + c"struct" |
| 564 | + } else { |
| 565 | + CStr::from_ptr(name_ptr) |
| 566 | + } |
| 567 | + }; |
| 568 | + |
| 569 | + LLVMDIBuilderCreateStructType( |
| 570 | + self.builder, |
| 571 | + ptr::null_mut(), // scope |
| 572 | + struct_name.as_ptr(), |
| 573 | + struct_name.to_bytes().len(), |
| 574 | + di_file, |
| 575 | + 0, // line |
| 576 | + 0, // size (opaque) |
| 577 | + 0, // align |
| 578 | + LLVMDIFlagFwdDecl, // forward decl |
| 579 | + ptr::null_mut(), // derived from |
| 580 | + ptr::null_mut(), // elements |
| 581 | + 0, // element count |
| 582 | + 0, // runtime lang |
| 583 | + ptr::null_mut(), // vtable |
| 584 | + c"".as_ptr(), |
| 585 | + 0, |
| 586 | + ) |
| 587 | + } |
| 588 | + // For any other type, default to void |
| 589 | + _ => LLVMDIBuilderCreateBasicType(self.builder, c"void".as_ptr(), 4, 0, 0, 0), |
| 590 | + } |
| 591 | + } |
| 592 | + } |
| 593 | + |
| 594 | + fn create_di_basic_int( |
| 595 | + &mut self, |
| 596 | + llvm_type: LLVMTypeRef, |
| 597 | + _di_file: LLVMMetadataRef, |
| 598 | + ) -> LLVMMetadataRef { |
| 599 | + unsafe { |
| 600 | + let width = LLVMGetIntTypeWidth(llvm_type); |
| 601 | + |
| 602 | + // DWARF encoding values |
| 603 | + const DW_ATE_BOOLEAN: u32 = 0x02; |
| 604 | + const DW_ATE_SIGNED: u32 = 0x05; |
| 605 | + const DW_ATE_UNSIGNED: u32 = 0x07; |
| 606 | + |
| 607 | + let (name, encoding) = match width { |
| 608 | + 1 => (c"bool", DW_ATE_BOOLEAN), |
| 609 | + 8 => (c"u8", DW_ATE_UNSIGNED), |
| 610 | + 16 => (c"u16", DW_ATE_UNSIGNED), |
| 611 | + 32 => (c"i32", DW_ATE_SIGNED), |
| 612 | + 64 => (c"u64", DW_ATE_UNSIGNED), |
| 613 | + _ => (c"int", DW_ATE_SIGNED), |
| 614 | + }; |
| 615 | + |
| 616 | + LLVMDIBuilderCreateBasicType( |
| 617 | + self.builder, |
| 618 | + name.as_ptr(), |
| 619 | + name.to_bytes().len(), |
| 620 | + width as u64, |
| 621 | + encoding, |
| 622 | + 0, // flags |
| 623 | + ) |
| 624 | + } |
| 625 | + } |
| 626 | + |
| 627 | + fn get_compile_unit_and_file(&self) -> Option<(LLVMMetadataRef, LLVMMetadataRef)> { |
| 628 | + for function in self.module.functions_iter() { |
| 629 | + let func = unsafe { Function::from_value_ref(function) }; |
| 630 | + |
| 631 | + if let Some(subprogram) = func.subprogram(self.context) { |
| 632 | + if let Some(unit) = subprogram.unit() { |
| 633 | + let file = subprogram.file(); |
| 634 | + return Some((unit, file)); |
| 635 | + } |
| 636 | + } |
| 637 | + } |
| 638 | + None |
| 639 | + } |
392 | 640 | } |
393 | 641 |
|
394 | 642 | #[derive(Clone, Debug, Eq, PartialEq)] |
|
0 commit comments