Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libs/@local/hashql/mir/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"fix:clippy": "just clippy --fix",
"lint:clippy": "just clippy",
"test:codspeed": "cargo codspeed run -p hashql-mir",
"test:miri": "cargo miri nextest run -- changed_bitor",
"test:unit": "mise run test:unit @rust/hashql-mir"
},
"dependencies": {
Expand Down
40 changes: 37 additions & 3 deletions libs/@local/hashql/mir/src/pass/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,21 @@ impl Changed {
}
}

const fn from_u8(value: u8) -> Self {
/// Convert from a `u8` value.
///
/// # Safety
///
/// The caller must ensure that the value is either `0`, `1`, or `3`.
#[expect(unsafe_code)]
const unsafe fn from_u8_unchecked(value: u8) -> Self {
debug_assert!(value == 0 || value == 1 || value == 3);

match value {
0 => Self::No,
1 => Self::Unknown,
3 => Self::Yes,
_ => unreachable!(),
// SAFETY: caller guarantees that the value is valid.
_ => unsafe { core::hint::unreachable_unchecked() },
}
}

Expand All @@ -128,12 +137,21 @@ impl Changed {
impl BitOr for Changed {
type Output = Self;

#[inline]
#[expect(unsafe_code)]
fn bitor(self, rhs: Self) -> Self::Output {
Self::from_u8(self.into_u8() | rhs.into_u8())
let result = self.into_u8() | rhs.into_u8();

// We use `from_u8_unchecked` here because the safe version prevents LLVM from vectorizing
// loops that use `|=` on slices of `Changed` values.
// SAFETY: Both operands have valid discriminants (0, 1, or 3). The bitwise OR of any
// combination of these values produces only 0, 1, or 3, which are all valid discriminants.
unsafe { Self::from_u8_unchecked(result) }
}
}

impl BitOrAssign for Changed {
#[inline]
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
Expand Down Expand Up @@ -294,6 +312,22 @@ impl<'ctx> GlobalTransformState<'ctx> {
pub fn mark(&mut self, id: DefId, changed: Changed) {
self.changed[id] |= changed;
}

/// Overlays the state from another [`GlobalTransformState`] onto this one.
///
/// This is useful when you want to combine the results of multiple passes into a single
/// state.
///
/// # Panics
///
/// Panics if the lengths of the two states are not equal.
pub fn overlay(&mut self, other: &DefIdSlice<Changed>) {
assert_eq!(self.changed.len(), other.len());

for (target, &value) in self.changed.iter_mut().zip(other) {
*target |= value;
}
}
}

/// A global transformation pass over MIR.
Expand Down
295 changes: 295 additions & 0 deletions libs/@local/hashql/mir/src/pass/transform/canonicalization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
//! MIR canonicalization pass.
//!
//! This module contains the [`Canonicalization`] pass, which runs a fixpoint loop of local and
//! global transformations to simplify MIR bodies into a canonical form.

use core::alloc::Allocator;

use hashql_core::{heap::BumpAllocator, id::bit_vec::DenseBitSet};

use super::{
AdministrativeReduction, CfgSimplify, DeadStoreElimination, ForwardSubstitution, InstSimplify,
};
use crate::{
body::Body,
context::MirContext,
def::{DefId, DefIdSlice},
pass::{
Changed, GlobalTransformPass, GlobalTransformState, TransformPass,
transform::CopyPropagation,
},
};

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct CanonicalizationConfig {
pub max_iterations: usize,
}

impl Default for CanonicalizationConfig {
fn default() -> Self {
Self { max_iterations: 16 }
}
}

/// MIR canonicalization driver.
///
/// This pass orchestrates a sequence of local and global transformations in a fixpoint loop,
/// simplifying MIR bodies into a canonical form. Canonicalization reduces redundancy, propagates
/// values, and eliminates dead code to produce cleaner, more uniform MIR.
///
/// # Pass Ordering
///
/// The pass ordering is carefully chosen so each pass feeds the next with new opportunities:
///
/// 1. **Administrative reduction** - Removes structural clutter and normalizes shape
/// 2. **Instruction simplification** - Constant folding and algebraic simplification
/// 3. **Value propagation** (FS/CP alternating) - Propagates values through the code
/// 4. **Dead store elimination** - Removes stores made dead by propagation
/// 5. **CFG simplification** - Cleans up control flow after local changes
pub struct Canonicalization<A: Allocator> {
alloc: A,
config: CanonicalizationConfig,
}

impl<A: BumpAllocator> Canonicalization<A> {
/// Creates a new canonicalization pass with the given allocator.
///
/// The allocator is used for temporary data structures within sub-passes and is reset
/// between pass invocations.
pub const fn new_in(config: CanonicalizationConfig, alloc: A) -> Self {
Self { alloc, config }
}

/// Runs a local transform pass on all unstable bodies.
///
/// Only bodies in the `unstable` set are processed. The `state` slice is updated to track
/// which bodies were modified.
fn run_local_pass<'env, 'heap>(
context: &mut MirContext<'env, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
mut pass: impl TransformPass<'env, 'heap>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
let mut changed = Changed::No;

for (id, body) in bodies.iter_enumerated_mut() {
if !unstable.contains(id) {
continue;
}

let result = pass.run(context, body);
changed |= result;
state[id] |= result;
}

changed
}

/// Runs a global transform pass on all bodies.
///
/// Unlike local passes, global passes have access to all bodies and can perform
/// inter-procedural transformations. The `state` slice is updated by the pass to track
/// which bodies were modified.
fn run_global_pass<'env, 'heap>(
context: &mut MirContext<'env, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
mut pass: impl GlobalTransformPass<'env, 'heap>,

state: &mut DefIdSlice<Changed>,
) -> Changed {
pass.run(context, &mut GlobalTransformState::new(state), bodies)
}

fn copy_propagation<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
self.alloc.scoped(|alloc| {
let pass = CopyPropagation::new_in(alloc);
Self::run_local_pass(context, bodies, pass, unstable, state)
})
}

fn cfg_simplify<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
self.alloc.scoped(|alloc| {
let pass = CfgSimplify::new_in(alloc);
Self::run_local_pass(context, bodies, pass, unstable, state)
})
}

fn inst_simplify<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
self.alloc.scoped(|alloc| {
let pass = InstSimplify::new_in(alloc);
Self::run_local_pass(context, bodies, pass, unstable, state)
})
}

fn forward_substitution<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
self.alloc.scoped(|alloc| {
let pass = ForwardSubstitution::new_in(alloc);
Self::run_local_pass(context, bodies, pass, unstable, state)
})
}

fn administrative_reduction<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &mut DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
let changed: Changed = self.alloc.scoped(|alloc| {
let pass = AdministrativeReduction::new_in(alloc);
Self::run_global_pass(context, bodies, pass, state)
});

if changed != Changed::No {
// If we've changed, re-queue any that have changed. This allows us to propagate changes
// earlier and potentially skip redundant iterations.
for (id, &changed) in state.iter_enumerated() {
if changed != Changed::No {
unstable.insert(id);
}
}
}

changed
}

fn dse<'heap>(
&mut self,
context: &mut MirContext<'_, 'heap>,
bodies: &mut DefIdSlice<Body<'heap>>,
unstable: &DenseBitSet<DefId>,
state: &mut DefIdSlice<Changed>,
) -> Changed {
self.alloc.scoped(|alloc| {
let pass = DeadStoreElimination::new_in(alloc);
Self::run_local_pass(context, bodies, pass, unstable, state)
})
}
}

impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for Canonicalization<A> {
#[expect(clippy::integer_division_remainder_used)]
fn run(
&mut self,
context: &mut MirContext<'env, 'heap>,
state: &mut GlobalTransformState<'_>,
bodies: &mut DefIdSlice<Body<'heap>>,
) -> Changed {
let global = state;

// We allocate state on the heap rather than scratch because bump scopes require
// `&mut` access across iterations, and our generic allocator can't express the
// necessary lifetime bounds cleanly (limitation of the underlying bump-scope crate).
// Acceptable since this meta-pass runs once and the data is a single byte per body.
let state = {
let uninit = context.heap.allocate_slice_uninit(bodies.len());
let init = uninit.write_filled(Changed::No);

DefIdSlice::from_raw_mut(init)
};
let mut unstable = DenseBitSet::new_filled(bodies.len());

// Pre-pass: run CP + CFG once before the fixpoint loop.
//
// Both passes are cheap and effective on obvious cases (e.g., `if true { ... } else { ...
// }`). CP exposes constant conditions; CFG then prunes unreachable blocks and
// merges straight-line code. This shrinks the MIR upfront so more expensive passes
// run on smaller, cleaner bodies.
let mut global_changed = Changed::No;
global_changed |= self.copy_propagation(context, bodies, &unstable, state);
global_changed |= self.cfg_simplify(context, bodies, &unstable, state);

let mut iter = 0;
loop {
if iter >= self.config.max_iterations {
break;
}

global.overlay(state);

// Reset per-iteration state to track which bodies change in this iteration only.
state.as_raw_mut().fill(Changed::No);

// The pass ordering is chosen so each pass feeds the next with new opportunities:
//
// 1. AR: Removes structural clutter (unnecessary wrappers, trivial blocks/calls) and
// normalizes shape, exposing simpler instructions for later passes.
// 2. IS: Simplifies individual instructions (constant folding, algebraic
// simplification) given the cleaner structure, producing canonical RHS values ideal
// for propagation.
// 3. FS / CP: Propagates values through the code, eliminating temporaries. After
// propagation, many stores become unused.
// 4. DSE: Removes stores made dead by propagation. Dropping these often empties blocks.
// 5. CS: Cleans up CFG after local changes (empty blocks, unconditional edges),
// producing a minimal CFG that maximizes the next iteration's effectiveness.

let mut changed = Changed::No;
changed |= self.administrative_reduction(context, bodies, &mut unstable, state);
changed |= self.inst_simplify(context, bodies, &unstable, state);

// FS vs CP strategy: ForwardSubstitution is more powerful but expensive;
// CopyPropagation is cheaper but weaker. We start with FS (iter=0) to
// aggressively expose the biggest opportunities early when there's most
// redundancy. Subsequent iterations alternate: CP maintains propagation
// cheaply, while periodic FS picks up deeper opportunities.
changed |= if iter % 2 == 0 {
self.forward_substitution(context, bodies, &unstable, state)
} else {
self.copy_propagation(context, bodies, &unstable, state)
};

changed |= self.dse(context, bodies, &unstable, state);
changed |= self.cfg_simplify(context, bodies, &unstable, state);

global_changed |= changed;
if changed == Changed::No {
break;
}

// Update the unstable set based on this iteration's results. Bodies that had no changes
// are removed (monotonically decreasing), but global passes may re-add bodies by
// creating new optimization opportunities in previously stable functions.
for (id, &changed) in state.iter_enumerated() {
if changed == Changed::No {
unstable.remove(id);
} else {
unstable.insert(id);
}
}

if unstable.is_empty() {
break;
}

iter += 1;
}

global.overlay(state);
global_changed
}
}
2 changes: 2 additions & 0 deletions libs/@local/hashql/mir/src/pass/transform/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod administrative_reduction;
mod canonicalization;
mod cfg_simplify;
mod copy_propagation;
mod dbe;
Expand All @@ -13,6 +14,7 @@ mod ssa_repair;

pub use self::{
administrative_reduction::AdministrativeReduction,
canonicalization::{Canonicalization, CanonicalizationConfig},
cfg_simplify::CfgSimplify,
copy_propagation::CopyPropagation,
dbe::DeadBlockElimination,
Expand Down
Loading
Loading