@@ -5,6 +5,7 @@ use crate::asm::arm64::*;
55use crate :: codegen:: split_patch_point;
66use crate :: cruby:: * ;
77use crate :: backend:: lir:: * ;
8+ use crate :: hir;
89use crate :: options:: asm_dump;
910use crate :: stats:: CompileError ;
1011use crate :: virtualmem:: CodePtr ;
@@ -690,11 +691,52 @@ impl Assembler {
690691 asm_local
691692 }
692693
694+ /// Resolve ParallelMov instructions without using scratch registers.
695+ /// This is used for trampolines that don't allow scratch registers.
696+ /// Linearizes all blocks into a single giant block.
697+ fn resolve_parallel_mov_pass ( self ) -> Assembler {
698+ let mut asm_local = Assembler :: new ( ) ;
699+ asm_local. accept_scratch_reg = self . accept_scratch_reg ;
700+ asm_local. stack_base_idx = self . stack_base_idx ;
701+ asm_local. label_names = self . label_names . clone ( ) ;
702+ asm_local. live_ranges . resize ( self . live_ranges . len ( ) , LiveRange { start : None , end : None } ) ;
703+
704+ // Create one giant block to linearize everything into
705+ asm_local. new_block ( hir:: BlockId ( usize:: MAX ) , true , true ) ;
706+
707+ // Get linearized instructions with branch parameters expanded into ParallelMov
708+ let linearized_insns = self . linearize_instructions ( ) ;
709+
710+ // Process each linearized instruction
711+ for insn in linearized_insns {
712+ match insn {
713+ Insn :: ParallelMov { moves } => {
714+ // Resolve parallel moves without scratch register
715+ if let Some ( resolved_moves) = Assembler :: resolve_parallel_moves ( & moves, None ) {
716+ for ( dst, src) in resolved_moves {
717+ asm_local. mov ( dst, src) ;
718+ }
719+ } else {
720+ unreachable ! ( "ParallelMov requires scratch register but scratch_reg is not allowed" ) ;
721+ }
722+ }
723+ _ => {
724+ // Strip branch args from jumps since everything is now in one block
725+ let insn = Self :: strip_branch_args ( & self , insn) ;
726+ asm_local. push_insn ( insn) ;
727+ }
728+ }
729+ }
730+
731+ asm_local
732+ }
733+
693734 /// Split instructions using scratch registers. To maximize the use of the register pool for
694735 /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions
695736 /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this
696737 /// splits them and uses scratch registers for it.
697- fn arm64_scratch_split ( mut self ) -> Assembler {
738+ /// Linearizes all blocks into a single giant block.
739+ fn arm64_scratch_split ( self ) -> Assembler {
698740 /// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea.
699741 fn split_large_disp ( asm : & mut Assembler , opnd : Opnd , scratch_opnd : Opnd ) -> Opnd {
700742 match opnd {
@@ -750,12 +792,23 @@ impl Assembler {
750792 // Prepare StackState to lower MemBase::Stack
751793 let stack_state = StackState :: new ( self . stack_base_idx ) ;
752794
753- let mut asm_local = Assembler :: new_with_asm ( & self ) ;
795+ let mut asm_local = Assembler :: new ( ) ;
796+ asm_local. accept_scratch_reg = true ;
797+ asm_local. stack_base_idx = self . stack_base_idx ;
798+ asm_local. label_names = self . label_names . clone ( ) ;
799+ asm_local. live_ranges . resize ( self . live_ranges . len ( ) , LiveRange { start : None , end : None } ) ;
800+
801+ // Create one giant block to linearize everything into
802+ asm_local. new_block ( hir:: BlockId ( usize:: MAX ) , true , true ) ;
803+
754804 let asm = & mut asm_local;
755- asm. accept_scratch_reg = true ;
756- let iterator = & mut self . instruction_iterator ( ) ;
757805
758- while let Some ( ( _, mut insn) ) = iterator. next ( asm) {
806+ // Get linearized instructions with branch parameters expanded into ParallelMov
807+ let linearized_insns = self . linearize_instructions ( ) ;
808+
809+ // Process each linearized instruction
810+ for ( idx, insn) in linearized_insns. iter ( ) . enumerate ( ) {
811+ let mut insn = insn. clone ( ) ;
759812 match & mut insn {
760813 Insn :: Add { left, right, out } |
761814 Insn :: Sub { left, right, out } |
@@ -795,7 +848,7 @@ impl Assembler {
795848 } ;
796849
797850 // If the next instruction is JoMul
798- if matches ! ( iterator . peek ( ) , Some ( ( _ , Insn :: JoMul ( _) ) ) ) {
851+ if idx + 1 < linearized_insns . len ( ) && matches ! ( linearized_insns [ idx + 1 ] , Insn :: JoMul ( _) ) {
799852 // Produce a register that is all zeros or all ones
800853 // Based on the sign bit of the 64-bit mul result
801854 asm. push_insn ( Insn :: RShift { out : SCRATCH0_OPND , opnd : reg_out, shift : Opnd :: UImm ( 63 ) } ) ;
@@ -907,6 +960,8 @@ impl Assembler {
907960 split_patch_point ( asm, target, invariant, version) ;
908961 }
909962 _ => {
963+ // Strip branch args from jumps since everything is now in one block
964+ let insn = Self :: strip_branch_args ( & self , insn) ;
910965 asm. push_insn ( insn) ;
911966 }
912967 }
@@ -915,6 +970,28 @@ impl Assembler {
915970 asm_local
916971 }
917972
973+ /// Strip branch arguments from a jump instruction, converting Target::Block(edge) with args
974+ /// into Target::Label. This is needed after linearization since all blocks are merged into one.
975+ fn strip_branch_args ( asm : & Assembler , insn : Insn ) -> Insn {
976+ match insn {
977+ Insn :: Jmp ( Target :: Block ( edge) ) => Insn :: Jmp ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
978+ Insn :: Jz ( Target :: Block ( edge) ) => Insn :: Jz ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
979+ Insn :: Jnz ( Target :: Block ( edge) ) => Insn :: Jnz ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
980+ Insn :: Je ( Target :: Block ( edge) ) => Insn :: Je ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
981+ Insn :: Jne ( Target :: Block ( edge) ) => Insn :: Jne ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
982+ Insn :: Jl ( Target :: Block ( edge) ) => Insn :: Jl ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
983+ Insn :: Jg ( Target :: Block ( edge) ) => Insn :: Jg ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
984+ Insn :: Jge ( Target :: Block ( edge) ) => Insn :: Jge ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
985+ Insn :: Jbe ( Target :: Block ( edge) ) => Insn :: Jbe ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
986+ Insn :: Jb ( Target :: Block ( edge) ) => Insn :: Jb ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
987+ Insn :: Jo ( Target :: Block ( edge) ) => Insn :: Jo ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
988+ Insn :: JoMul ( Target :: Block ( edge) ) => Insn :: JoMul ( Target :: Label ( asm. block_label ( edge. target ) ) ) ,
989+ Insn :: Joz ( opnd, Target :: Block ( edge) ) => Insn :: Joz ( opnd, Target :: Label ( asm. block_label ( edge. target ) ) ) ,
990+ Insn :: Jonz ( opnd, Target :: Block ( edge) ) => Insn :: Jonz ( opnd, Target :: Label ( asm. block_label ( edge. target ) ) ) ,
991+ _ => insn,
992+ }
993+ }
994+
918995 /// Emit platform-specific machine code
919996 /// Returns a list of GC offsets. Can return failure to signal caller to retry.
920997 fn arm64_emit ( & mut self , cb : & mut CodeBlock ) -> Option < Vec < CodePtr > > {
@@ -1112,8 +1189,11 @@ impl Assembler {
11121189 let ( _hook, mut hook_insn_idx) = AssemblerPanicHook :: new ( self , 0 ) ;
11131190
11141191 // For each instruction
1192+ // NOTE: At this point, the assembler should have been linearized into a single giant block
1193+ // by either resolve_parallel_mov_pass() or arm64_scratch_split().
11151194 let mut insn_idx: usize = 0 ;
1116- let insns = self . linearize_instructions ( ) ;
1195+ assert_eq ! ( self . basic_blocks. len( ) , 1 , "Assembler should be linearized into a single block before arm64_emit" ) ;
1196+ let insns = & self . basic_blocks [ 0 ] . insns ;
11171197
11181198 while let Some ( insn) = insns. get ( insn_idx) {
11191199 // Update insn_idx that is shown on panic
@@ -1610,7 +1690,7 @@ impl Assembler {
16101690 /// Optimize and compile the stored instructions
16111691 pub fn compile_with_regs ( mut self , cb : & mut CodeBlock , regs : Vec < Reg > ) -> Result < ( CodePtr , Vec < CodePtr > ) , CompileError > {
16121692 // The backend is allowed to use scratch registers only if it has not accepted them so far.
1613- let use_scratch_reg = !self . accept_scratch_reg ;
1693+ let scratch_reg_allowed = !self . accept_scratch_reg ;
16141694
16151695 // Initialize block labels before any processing
16161696 self . init_block_labels ( ) ;
@@ -1619,16 +1699,23 @@ impl Assembler {
16191699 let asm = self . arm64_split ( ) ;
16201700 asm_dump ! ( asm, split) ;
16211701
1702+ // Linearize before here?
16221703 let mut asm = asm. alloc_regs ( regs) ?;
16231704 asm_dump ! ( asm, alloc_regs) ;
16241705
16251706 // We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
16261707 asm. compile_exits ( ) ;
16271708 asm_dump ! ( asm, compile_exits) ;
16281709
1629- if use_scratch_reg {
1710+ // linearize
1711+ //
1712+ if scratch_reg_allowed {
16301713 asm = asm. arm64_scratch_split ( ) ;
16311714 asm_dump ! ( asm, scratch_split) ;
1715+ } else {
1716+ // For trampolines that use scratch registers, resolve ParallelMov without scratch_reg.
1717+ asm = asm. resolve_parallel_mov_pass ( ) ;
1718+ asm_dump ! ( asm, resolve_parallel_mov) ;
16321719 }
16331720
16341721 // Create label instances in the code block
0 commit comments