|
| 1 | +use crate::custom_insts::{self, CustomOp}; |
1 | 2 | use rspirv::binary::Assemble;
|
2 | 3 | use rspirv::dr::{Instruction, Module, Operand};
|
3 | 4 | use rspirv::spirv::{Op, Word};
|
4 | 5 | use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
5 | 6 | use rustc_middle::bug;
|
| 7 | +use smallvec::SmallVec; |
6 | 8 | use std::collections::hash_map;
|
| 9 | +use std::mem; |
7 | 10 |
|
8 | 11 | // FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for
|
9 | 12 | // file-level debuginfo (but using SPIR-T for linking might be better?).
|
@@ -269,19 +272,272 @@ pub fn remove_duplicate_types(module: &mut Module) {
|
269 | 272 | });
|
270 | 273 | }
|
271 | 274 |
|
272 |
| -pub fn remove_duplicate_lines(module: &mut Module) { |
| 275 | +pub fn remove_duplicate_debuginfo(module: &mut Module) { |
| 276 | + // FIXME(eddyb) avoid repeating this across different passes/helpers. |
| 277 | + let custom_ext_inst_set_import = module |
| 278 | + .ext_inst_imports |
| 279 | + .iter() |
| 280 | + .find(|inst| { |
| 281 | + assert_eq!(inst.class.opcode, Op::ExtInstImport); |
| 282 | + inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..] |
| 283 | + }) |
| 284 | + .map(|inst| inst.result_id.unwrap()); |
| 285 | + |
273 | 286 | for func in &mut module.functions {
|
274 | 287 | for block in &mut func.blocks {
|
275 |
| - block.instructions.dedup_by(|a, b| { |
276 |
| - if a.class.opcode == Op::Line && b.class.opcode == Op::Line { |
277 |
| - // dedup_by removes the *second* element in a pair of duplicates. We want to |
278 |
| - // remove the *first* (so the last OpLine is kept). So, swap them! :D |
279 |
| - std::mem::swap(a, b); |
280 |
| - true |
281 |
| - } else { |
282 |
| - false |
| 288 | + // Ignore the terminator, it's effectively "outside" debuginfo. |
| 289 | + let (_, insts) = block.instructions.split_last_mut().unwrap(); |
| 290 | + |
| 291 | + // HACK(eddyb) to make random access easier, we first replace unused |
| 292 | + // instructions with `OpNop`, and then remove all the `OpNop`s. |
| 293 | + |
| 294 | + #[derive(Clone)] |
| 295 | + struct DbgLocInst { |
| 296 | + inst_idx: usize, |
| 297 | + used: bool, |
| 298 | + } |
| 299 | + |
| 300 | + fn nop() -> Instruction { |
| 301 | + Instruction::new(Op::Nop, None, None, vec![]) |
| 302 | + } |
| 303 | + impl DbgLocInst { |
| 304 | + fn nop_if_unused(&self, insts: &mut [Instruction]) { |
| 305 | + if !self.used { |
| 306 | + insts[self.inst_idx] = nop(); |
| 307 | + } |
| 308 | + } |
| 309 | + } |
| 310 | + |
| 311 | + #[derive(Clone, Default)] |
| 312 | + struct DbgState { |
| 313 | + loc: Option<DbgLocInst>, |
| 314 | + has_semantic_insts: bool, |
| 315 | + } |
| 316 | + let mut dbg = DbgState::default(); |
| 317 | + |
| 318 | + struct Frame { |
| 319 | + call_dbg: DbgState, |
| 320 | + push_inst_idx: usize, |
| 321 | + } |
| 322 | + let mut inlined_frames = SmallVec::<[Frame; 8]>::new(); |
| 323 | + |
| 324 | + // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()` |
| 325 | + // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of |
| 326 | + // N pops will reverse the N last entries of `inlined_frames` into |
| 327 | + // this vector (and go from outside-in, to inside-out), which allows |
| 328 | + // *fusing* a pop with a push (of an identical inlined frame), when |
| 329 | + // no interverning instructions prevent it (such instructions will |
| 330 | + // clear this vector to indicate the pops are "fully committed"). |
| 331 | + struct PoppedFrame { |
| 332 | + frame: Frame, |
| 333 | + callee_has_semantic_insts: bool, |
| 334 | + pop_inst_idx: usize, |
| 335 | + } |
| 336 | + let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new(); |
| 337 | + |
| 338 | + for inst_idx in 0..insts.len() { |
| 339 | + let inst = &insts[inst_idx]; |
| 340 | + let custom_op = match inst.class.opcode { |
| 341 | + Op::ExtInst |
| 342 | + if Some(inst.operands[0].unwrap_id_ref()) == custom_ext_inst_set_import => |
| 343 | + { |
| 344 | + Some(CustomOp::decode_from_ext_inst(inst)) |
| 345 | + } |
| 346 | + _ => None, |
| 347 | + }; |
| 348 | + |
| 349 | + fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ { |
| 350 | + (inst.class.opcode, &inst.operands) |
283 | 351 | }
|
284 |
| - }); |
| 352 | + |
| 353 | + // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving |
| 354 | + // cases must all use `if can_continue { continue; }` to skip the |
| 355 | + // draining logic (`can_continue` is only `false` at the very end). |
| 356 | + let can_continue = inst_idx < insts.len() - 1; |
| 357 | + let prev_dbg_loc_snapshot = dbg.loc.clone(); |
| 358 | + match (inst.class.opcode, custom_op) { |
| 359 | + (Op::Line | Op::NoLine, _) |
| 360 | + | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => { |
| 361 | + // HACK(eddyb) prefer keeping older active `DbgLocInst`s, |
| 362 | + // if all the details are the same (it helps with fusion). |
| 363 | + if dbg.loc.as_ref().is_some_and(|old_dbg_loc| { |
| 364 | + inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx]) |
| 365 | + }) { |
| 366 | + insts[inst_idx] = nop(); |
| 367 | + if can_continue { |
| 368 | + continue; |
| 369 | + } |
| 370 | + } else { |
| 371 | + dbg.loc = Some(DbgLocInst { |
| 372 | + inst_idx, |
| 373 | + used: false, |
| 374 | + }); |
| 375 | + } |
| 376 | + } |
| 377 | + (_, Some(CustomOp::PushInlinedCallFrame)) => { |
| 378 | + // HACK(eddyb) attempt fusing this push with the last pop. |
| 379 | + let fuse_with_last_pop = fusable_freshly_popped_inlined_frames |
| 380 | + .last() |
| 381 | + .is_some_and(|last_popped| { |
| 382 | + // HACK(eddyb) updating `dbg.loc` deduplicates eagerly, |
| 383 | + // so here it suffices to check the (deduped) indices. |
| 384 | + let dbg_loc_inst_idx = |
| 385 | + |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx); |
| 386 | + dbg_loc_inst_idx(&last_popped.frame.call_dbg) |
| 387 | + == dbg_loc_inst_idx(&dbg) |
| 388 | + && inst_eq_key(inst) |
| 389 | + == inst_eq_key(&insts[last_popped.frame.push_inst_idx]) |
| 390 | + }); |
| 391 | + if fuse_with_last_pop { |
| 392 | + let PoppedFrame { |
| 393 | + frame, |
| 394 | + callee_has_semantic_insts, |
| 395 | + pop_inst_idx, |
| 396 | + } = fusable_freshly_popped_inlined_frames.pop().unwrap(); |
| 397 | + |
| 398 | + insts[pop_inst_idx] = nop(); |
| 399 | + |
| 400 | + // Can't make entering an inlined function a nop, |
| 401 | + // as it needs to reset callee-side `DbgLocInst`, |
| 402 | + // but we can replace it in-place and hope later |
| 403 | + // it get nop'd out by some real `DbgLocInst`. |
| 404 | + insts[inst_idx].operands.splice( |
| 405 | + 1.., |
| 406 | + [Operand::LiteralExtInstInteger( |
| 407 | + CustomOp::ClearDebugSrcLoc as u32, |
| 408 | + )], |
| 409 | + ); |
| 410 | + dbg = DbgState { |
| 411 | + loc: Some(DbgLocInst { |
| 412 | + inst_idx, |
| 413 | + used: false, |
| 414 | + }), |
| 415 | + has_semantic_insts: callee_has_semantic_insts, |
| 416 | + }; |
| 417 | + |
| 418 | + inlined_frames.push(frame); |
| 419 | + |
| 420 | + // Allow further fusing to occur. |
| 421 | + if can_continue { |
| 422 | + continue; |
| 423 | + } |
| 424 | + } else { |
| 425 | + // HACK(eddyb) the actual push to `inlined_frames` is |
| 426 | + // done at the very end of the loop body, to be able |
| 427 | + // to process any pending updates on the previous state. |
| 428 | + } |
| 429 | + } |
| 430 | + (_, Some(CustomOp::PopInlinedCallFrame)) => { |
| 431 | + // Leaving an inlined function doesn't use `DbgLocInst`. |
| 432 | + if let Some(dbg_loc) = dbg.loc.take() { |
| 433 | + // HACK(eddyb) only treat as "definitely unused" |
| 434 | + // instructions that are too "recent" to have been |
| 435 | + // used by a `PushInlinedCallFrame` with a still |
| 436 | + // uncommitted `PopInlinedCallFrame`. |
| 437 | + let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames |
| 438 | + .last() |
| 439 | + .map_or(0, |last_popped| last_popped.pop_inst_idx); |
| 440 | + if dbg_loc.inst_idx > min_safe_inst_idx_to_nop { |
| 441 | + dbg_loc.nop_if_unused(insts); |
| 442 | + } |
| 443 | + } |
| 444 | + if let Some(frame) = inlined_frames.pop() { |
| 445 | + let callee_has_semantic_insts = dbg.has_semantic_insts; |
| 446 | + dbg = frame.call_dbg.clone(); |
| 447 | + dbg.has_semantic_insts |= callee_has_semantic_insts; |
| 448 | + |
| 449 | + // HACK(eddyb) inform future `PushInlinedCallFrame`s |
| 450 | + // of potential fusion, by saving a copy of the frame. |
| 451 | + fusable_freshly_popped_inlined_frames.push(PoppedFrame { |
| 452 | + frame, |
| 453 | + callee_has_semantic_insts, |
| 454 | + pop_inst_idx: inst_idx, |
| 455 | + }); |
| 456 | + } else { |
| 457 | + // FIXME(eddyb) this may indicate a bug elsewhere. |
| 458 | + insts[inst_idx] = nop(); |
| 459 | + } |
| 460 | + if can_continue { |
| 461 | + continue; |
| 462 | + } |
| 463 | + } |
| 464 | + _ => { |
| 465 | + if let Some(dbg_loc) = &mut dbg.loc { |
| 466 | + dbg_loc.used = true; |
| 467 | + } |
| 468 | + dbg.has_semantic_insts = true; |
| 469 | + } |
| 470 | + } |
| 471 | + |
| 472 | + // NOTE(eddyb) mutable so that it may be marked as used below. |
| 473 | + let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| { |
| 474 | + dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx) |
| 475 | + }); |
| 476 | + |
| 477 | + // NOTE(eddyb) the iteration order doesn't matter, as this is |
| 478 | + // effectively a set of `PopInlinedCallFrame`s which have had |
| 479 | + // all their other side-effects processed, and didn't get a |
| 480 | + // chance to be fused away, so they're getting committed. |
| 481 | + for popped in fusable_freshly_popped_inlined_frames.drain(..) { |
| 482 | + let PoppedFrame { |
| 483 | + mut frame, |
| 484 | + callee_has_semantic_insts, |
| 485 | + pop_inst_idx, |
| 486 | + } = popped; |
| 487 | + |
| 488 | + // HACK(eddyb) this popped frame's `call_dbg.loc` may still |
| 489 | + // be used elsewhere, in which case that use takes precedence, |
| 490 | + // and is effectively the new "owner" of the `DbgLocInst`. |
| 491 | + let call_dbg_loc_used_elsewhere = |
| 492 | + frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| { |
| 493 | + [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()] |
| 494 | + .into_iter() |
| 495 | + .flatten() |
| 496 | + .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx) |
| 497 | + }); |
| 498 | + if call_dbg_loc_used_elsewhere.is_some() { |
| 499 | + frame.call_dbg.loc = None; |
| 500 | + } |
| 501 | + |
| 502 | + if callee_has_semantic_insts { |
| 503 | + // The `PushInlinedCallFrame` being kept requires its |
| 504 | + // original `DbgLocInst` to also be kept around. |
| 505 | + if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere { |
| 506 | + call_dbg_loc.used = true; |
| 507 | + } |
| 508 | + } else { |
| 509 | + // If the entire inlined call is all `OpNop`s now, |
| 510 | + // entering/leaving it can also become `OpNop`s. |
| 511 | + if let Some(call_dbg_loc) = &mut frame.call_dbg.loc { |
| 512 | + call_dbg_loc.nop_if_unused(insts); |
| 513 | + } |
| 514 | + insts[frame.push_inst_idx] = nop(); |
| 515 | + insts[pop_inst_idx] = nop(); |
| 516 | + } |
| 517 | + } |
| 518 | + |
| 519 | + // Only remove a replaced `DbgLocInst` after it had a chance to |
| 520 | + // be marked as used above (for e.g. a `PushInlinedCallFrame`). |
| 521 | + if let Some(old_dbg_loc) = freshly_replaced_dbg_loc { |
| 522 | + old_dbg_loc.nop_if_unused(insts); |
| 523 | + } |
| 524 | + |
| 525 | + // HACK(eddyb) the actual push to `inlined_frames` is |
| 526 | + // done at the very end of the loop body, to be able |
| 527 | + // to process any pending updates on the previous state. |
| 528 | + if custom_op == Some(CustomOp::PushInlinedCallFrame) { |
| 529 | + inlined_frames.push(Frame { |
| 530 | + call_dbg: mem::take(&mut dbg), |
| 531 | + push_inst_idx: inst_idx, |
| 532 | + }); |
| 533 | + } |
| 534 | + } |
| 535 | + |
| 536 | + assert!(fusable_freshly_popped_inlined_frames.is_empty()); |
| 537 | + |
| 538 | + block |
| 539 | + .instructions |
| 540 | + .retain(|inst| inst.class.opcode != Op::Nop); |
285 | 541 | }
|
286 | 542 | }
|
287 | 543 | }
|
0 commit comments