Skip to content

Commit cacf908

Browse files
authored
Add a third register class (#126)
For machines with completely separate vector registers it is useful to have a third register class.
1 parent 244ede8 commit cacf908

File tree

5 files changed

+44
-32
lines changed

5 files changed

+44
-32
lines changed

src/fuzzing/func.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl Function for Func {
146146
fn spillslot_size(&self, regclass: RegClass) -> usize {
147147
match regclass {
148148
RegClass::Int => 1,
149-
RegClass::Float => 2,
149+
RegClass::Float | RegClass::Vector => 2,
150150
}
151151
}
152152
}
@@ -659,9 +659,9 @@ pub fn machine_env() -> MachineEnv {
659659
fn regs(r: core::ops::Range<usize>) -> Vec<PReg> {
660660
r.map(|i| PReg::new(i, RegClass::Int)).collect()
661661
}
662-
let preferred_regs_by_class: [Vec<PReg>; 2] = [regs(0..24), vec![]];
663-
let non_preferred_regs_by_class: [Vec<PReg>; 2] = [regs(24..32), vec![]];
664-
let scratch_by_class: [Option<PReg>; 2] = [None, None];
662+
let preferred_regs_by_class: [Vec<PReg>; 3] = [regs(0..24), vec![], vec![]];
663+
let non_preferred_regs_by_class: [Vec<PReg>; 3] = [regs(24..32), vec![], vec![]];
664+
let scratch_by_class: [Option<PReg>; 3] = [None, None, None];
665665
let fixed_stack_slots = regs(32..63);
666666
// Register 63 is reserved for use as a fixed non-allocatable register.
667667
MachineEnv {

src/ion/data_structures.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,8 @@ pub struct Env<'a, F: Function> {
404404
pub spillslots: Vec<SpillSlotData>,
405405
pub slots_by_size: Vec<SpillSlotList>,
406406

407-
pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 2],
408-
pub preferred_victim_by_class: [PReg; 2],
407+
pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3],
408+
pub preferred_victim_by_class: [PReg; 3],
409409

410410
// When multiple fixed-register constraints are present on a
411411
// single VReg at a single program point (this can happen for,

src/ion/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ impl<'a, F: Function> Env<'a, F> {
7070
slots_by_size: vec![],
7171
allocated_bundle_count: 0,
7272

73-
extra_spillslots_by_class: [smallvec![], smallvec![]],
74-
preferred_victim_by_class: [PReg::invalid(), PReg::invalid()],
73+
extra_spillslots_by_class: [smallvec![], smallvec![], smallvec![]],
74+
preferred_victim_by_class: [PReg::invalid(), PReg::invalid(), PReg::invalid()],
7575

7676
multi_fixed_reg_fixups: vec![],
7777
inserted_moves: vec![],

src/ion/moves.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -812,14 +812,15 @@ impl<'a, F: Function> Env<'a, F> {
812812
redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
813813
last_pos = pos_prio.pos;
814814

815-
// Gather all the moves with Int class and Float class
816-
// separately. These cannot interact, so it is safe to
817-
// have two separate ParallelMove instances. They need to
818-
// be separate because moves between the two classes are
819-
// impossible. (We could enhance ParallelMoves to
820-
// understand register classes, but this seems simpler.)
815+
// Gather all the moves in each RegClass separately.
816+
// These cannot interact, so it is safe to have separate
817+
// ParallelMove instances. They need to be separate because
818+
// moves between the classes are impossible. (We could
819+
// enhance ParallelMoves to understand register classes, but
820+
// this seems simpler.)
821821
let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
822822
let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
823+
let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
823824

824825
for m in moves {
825826
if m.from_alloc == m.to_alloc {
@@ -832,12 +833,17 @@ impl<'a, F: Function> Env<'a, F> {
832833
RegClass::Float => {
833834
float_moves.push(m.clone());
834835
}
836+
RegClass::Vector => {
837+
vec_moves.push(m.clone());
838+
}
835839
}
836840
}
837841

838-
for &(regclass, moves) in
839-
&[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)]
840-
{
842+
for &(regclass, moves) in &[
843+
(RegClass::Int, &int_moves),
844+
(RegClass::Float, &float_moves),
845+
(RegClass::Vector, &vec_moves),
846+
] {
841847
// All moves in `moves` semantically happen in
842848
// parallel. Let's resolve these to a sequence of moves
843849
// that can be done one at a time.

src/lib.rs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,18 @@ use serde::{Deserialize, Serialize};
6868
/// class; i.e., they are disjoint.
6969
///
7070
/// For tight bit-packing throughout our data structures, we support
71-
/// only two classes, "int" and "float". This will usually be enough
72-
/// on modern machines, as they have one class of general-purpose
71+
/// only three classes, "int", "float" and "vector". Usually two will
72+
/// be enough on modern machines, as they have one class of general-purpose
7373
/// integer registers of machine width (e.g. 64 bits), and another
7474
/// class of float/vector registers used both for FP and for vector
75-
/// operations. If needed, we could adjust bitpacking to allow for
76-
/// more classes in the future.
75+
/// operations. Additionally for machines with totally separate vector
76+
/// registers a third class is provided.
7777
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
7878
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
7979
pub enum RegClass {
8080
Int = 0,
8181
Float = 1,
82+
Vector = 2,
8283
}
8384

8485
/// A physical register. Contains a physical register number and a class.
@@ -104,7 +105,7 @@ pub struct PReg {
104105
impl PReg {
105106
pub const MAX_BITS: usize = 6;
106107
pub const MAX: usize = (1 << Self::MAX_BITS) - 1;
107-
pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 1); // including RegClass bit
108+
pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 2); // including RegClass bits
108109

109110
/// Create a new PReg. The `hw_enc` range is 6 bits.
110111
#[inline(always)]
@@ -124,10 +125,11 @@ impl PReg {
124125
/// The register class.
125126
#[inline(always)]
126127
pub const fn class(self) -> RegClass {
127-
if self.bits & (1 << Self::MAX_BITS) == 0 {
128-
RegClass::Int
129-
} else {
130-
RegClass::Float
128+
match self.bits & (0b11 << Self::MAX_BITS) {
129+
0 => RegClass::Int,
130+
1 => RegClass::Float,
131+
2 => RegClass::Vector,
132+
_ => unreachable!(),
131133
}
132134
}
133135

@@ -172,6 +174,7 @@ impl core::fmt::Display for PReg {
172174
let class = match self.class() {
173175
RegClass::Int => "i",
174176
RegClass::Float => "f",
177+
RegClass::Vector => "v",
175178
};
176179
write!(f, "p{}{}", self.hw_enc(), class)
177180
}
@@ -299,21 +302,22 @@ impl VReg {
299302
pub const fn new(virt_reg: usize, class: RegClass) -> Self {
300303
debug_assert!(virt_reg <= VReg::MAX);
301304
VReg {
302-
bits: ((virt_reg as u32) << 1) | (class as u8 as u32),
305+
bits: ((virt_reg as u32) << 2) | (class as u8 as u32),
303306
}
304307
}
305308

306309
#[inline(always)]
307310
pub const fn vreg(self) -> usize {
308-
let vreg = (self.bits >> 1) as usize;
311+
let vreg = (self.bits >> 2) as usize;
309312
vreg
310313
}
311314

312315
#[inline(always)]
313316
pub const fn class(self) -> RegClass {
314-
match self.bits & 1 {
317+
match self.bits & 0b11 {
315318
0 => RegClass::Int,
316319
1 => RegClass::Float,
320+
2 => RegClass::Vector,
317321
_ => unreachable!(),
318322
}
319323
}
@@ -734,6 +738,7 @@ impl Operand {
734738
match class_field {
735739
0 => RegClass::Int,
736740
1 => RegClass::Float,
741+
2 => RegClass::Vector,
737742
_ => unreachable!(),
738743
}
739744
}
@@ -832,6 +837,7 @@ impl core::fmt::Display for Operand {
832837
match self.class() {
833838
RegClass::Int => "i",
834839
RegClass::Float => "f",
840+
RegClass::Vector => "v",
835841
},
836842
self.constraint()
837843
)
@@ -1337,7 +1343,7 @@ pub struct MachineEnv {
13371343
///
13381344
/// If an explicit scratch register is provided in `scratch_by_class` then
13391345
/// it must not appear in this list.
1340-
pub preferred_regs_by_class: [Vec<PReg>; 2],
1346+
pub preferred_regs_by_class: [Vec<PReg>; 3],
13411347

13421348
/// Non-preferred physical registers for each class. These are the
13431349
/// registers that will be allocated if a preferred register is
@@ -1346,7 +1352,7 @@ pub struct MachineEnv {
13461352
///
13471353
/// If an explicit scratch register is provided in `scratch_by_class` then
13481354
/// it must not appear in this list.
1349-
pub non_preferred_regs_by_class: [Vec<PReg>; 2],
1355+
pub non_preferred_regs_by_class: [Vec<PReg>; 3],
13501356

13511357
/// Optional dedicated scratch register per class. This is needed to perform
13521358
/// moves between registers when cyclic move patterns occur. The
@@ -1363,7 +1369,7 @@ pub struct MachineEnv {
13631369
/// If a scratch register is not provided then the register allocator will
13641370
/// automatically allocate one as needed, spilling a value to the stack if
13651371
/// necessary.
1366-
pub scratch_by_class: [Option<PReg>; 2],
1372+
pub scratch_by_class: [Option<PReg>; 3],
13671373

13681374
/// Some `PReg`s can be designated as locations on the stack rather than
13691375
/// actual registers. These can be used to tell the register allocator about

0 commit comments

Comments
 (0)