Skip to content

Commit 2a93583

Browse files
committed
riscv32: fix several backend bugs
Signed-off-by: Paul Guyot <[email protected]>
1 parent d9e5f33 commit 2a93583

File tree

4 files changed

+307
-306
lines changed

4 files changed

+307
-306
lines changed

libs/jit/src/jit_riscv32.erl

Lines changed: 81 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -468,19 +468,13 @@ update_branches(
468468
>>,
469469
<<DirectBranch/binary, Nops/binary>>;
470470
true ->
471-
% Keep far branch sequence: auipc + lw + jalr + data
472-
% RISC-V far branch is always 16 bytes
473-
case Size of
474-
16 ->
475-
% 16-byte sequence: auipc + lw + jalr + data
476-
I1 = jit_riscv32_asm:auipc(TempReg, 0),
477-
I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8),
478-
I3 = jit_riscv32_asm:jalr(zero, TempReg, 0),
479-
% Calculate absolute target address
480-
TargetAddress = LabelOffset,
481-
I4 = <<TargetAddress:32/little>>,
482-
<<I1/binary, I2/binary, I3/binary, I4/binary>>
483-
end
471+
% Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes)
472+
% Split the relative offset into upper 20 bits and lower 12 bits
473+
Hi20 = (Rel + 16#800) bsr 12,
474+
Lo12 = Rel - (Hi20 bsl 12),
475+
I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
476+
I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
477+
<<I1/binary, I2/binary>>
484478
end;
485479
jump_table_auipc_jalr ->
486480
% Calculate PC-relative offset from AUIPC instruction to target
@@ -679,7 +673,8 @@ return_if_not_equal_to_ctx(
679673
end,
680674
I3 = jit_riscv32_asm:ret(),
681675
% Branch if equal (skip the return)
682-
I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3)),
676+
% Offset must account for the beq instruction itself (4 bytes) plus I2 and I3
677+
I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)),
683678
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
684679
{AvailableRegs1, UsedRegs1} = free_reg(
685680
AvailableRegs0, UsedRegs0, Reg
@@ -754,38 +749,43 @@ branch_to_offset_code(_State, Offset, TargetOffset) when
754749
Rel = TargetOffset - Offset,
755750
jit_riscv32_asm:j(Rel);
756751
branch_to_offset_code(
757-
#state{available_regs = [TempReg | _]}, _Offset, TargetOffset
752+
#state{available_regs = [TempReg | _]}, Offset, TargetOffset
758753
) ->
759-
% Far branch: use auipc + lw + jalr sequence (RISC-V)
760-
% This creates a PC-relative load sequence - always 16 bytes (4-byte aligned)
754+
% Far branch: use auipc + jalr sequence for PC-relative addressing
755+
% This computes: PC + Immediate and jumps to it
761756

762-
% TempReg = PC
763-
I1 = jit_riscv32_asm:auipc(TempReg, 0),
764-
% TempReg = *(PC+8)
765-
I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8),
766-
% Jump to TempReg
767-
I3 = jit_riscv32_asm:jalr(zero, TempReg, 0),
768-
% The literal value is the absolute target offset
769-
I4 = <<TargetOffset:32/little>>,
770-
<<I1/binary, I2/binary, I3/binary, I4/binary>>.
757+
Rel = TargetOffset - Offset,
758+
% Split the relative offset into upper 20 bits and lower 12 bits
759+
% RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12)
760+
% Since jalr's imm12 is sign-extended, if bit 11 of Rel is set,
761+
% we need to add 0x800 before splitting to compensate
762+
Hi20 = (Rel + 16#800) bsr 12,
763+
Lo12Unsigned = Rel band 16#FFF,
764+
% Convert to signed 12-bit value: if bit 11 is set, subtract 4096
765+
Lo12 = if
766+
Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000;
767+
true -> Lo12Unsigned
768+
end,
769+
770+
% TempReg = PC + (Hi20 << 12)
771+
I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
772+
% Jump to TempReg + sign_extend(Lo12)
773+
I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
774+
<<I1/binary, I2/binary>>.
771775

772776
branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
773777
CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
774778
{State, CodeBlock};
775779
branch_to_label_code(
776780
#state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false
777781
) ->
778-
% RISC-V: Far branch sequence - always 16 bytes (4-byte aligned)
782+
% RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes)
779783

780-
% Load PC into temp
784+
% Placeholder: auipc TempReg, 0
781785
I1 = jit_riscv32_asm:auipc(TempReg, 0),
782-
% Load offset from PC+8
783-
I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8),
784-
% Jump to address
785-
I3 = jit_riscv32_asm:jalr(zero, TempReg, 0),
786-
% Placeholder offset
787-
I4 = <<0:32/little>>,
788-
CodeBlock = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
786+
% Placeholder: jalr zero, TempReg, 0
787+
I2 = jit_riscv32_asm:jalr(zero, TempReg, 0),
788+
CodeBlock = <<I1/binary, I2/binary>>,
789789
SequenceSize = byte_size(CodeBlock),
790790
% Add relocation entry
791791
Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}},
@@ -795,17 +795,13 @@ branch_to_label_code(
795795
#state{available_regs = [], branches = Branches} = State0, Offset, Label, false
796796
) ->
797797
% RISC-V: Use t6 as scratch (caller-saved, safe to clobber)
798-
% Same sequence as when we have available regs - always 16 bytes (4-byte aligned)
798+
% Far branch sequence using PC-relative auipc + jalr (8 bytes)
799799

800-
% Load PC into t6
800+
% Placeholder: auipc t6, 0
801801
I1 = jit_riscv32_asm:auipc(t6, 0),
802-
% Load offset from PC+8
803-
I2 = jit_riscv32_asm:lw(t6, t6, 8),
804-
% Jump to address
805-
I3 = jit_riscv32_asm:jalr(zero, t6, 0),
806-
% Placeholder offset
807-
I4 = <<0:32/little>>,
808-
CodeBlock = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
802+
% Placeholder: jalr zero, t6, 0
803+
I2 = jit_riscv32_asm:jalr(zero, t6, 0),
804+
CodeBlock = <<I1/binary, I2/binary>>,
809805
SequenceSize = byte_size(CodeBlock),
810806
% Add relocation entry
811807
Reloc = {Label, Offset, {far_branch, SequenceSize, t6}},
@@ -1528,9 +1524,17 @@ call_func_ptr(
15281524
% Calculate stack offset: find register index in SavedRegs * 4 bytes
15291525
ResultReg = element(2, FuncPtrTuple),
15301526
RegIndex = index_of(ResultReg, SavedRegs),
1531-
StoreResultStackOffset = RegIndex * 4,
1532-
StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset),
1533-
{StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]};
1527+
case RegIndex >= 0 of
1528+
true ->
1529+
StoreResultStackOffset = RegIndex * 4,
1530+
StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset),
1531+
{StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]};
1532+
false ->
1533+
% FuncPtrReg was not in SavedRegs, use an available register
1534+
[ResultReg1 | _] = AvailableRegs1 -- SavedRegs,
1535+
MoveResult = jit_riscv32_asm:mv(ResultReg1, a0),
1536+
{StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]}
1537+
end;
15341538
_ ->
15351539
% Use any free that is not in SavedRegs
15361540
[ResultReg | _] = AvailableRegs1 -- SavedRegs,
@@ -1632,8 +1636,8 @@ parameter_regs0([], _, Acc) ->
16321636
lists:reverse(Acc);
16331637
parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) ->
16341638
parameter_regs0(T, Rest, [a1, a0 | Acc]);
1635-
parameter_regs0([{avm_int64_t, _} | T], [a1, a2, a3 | Rest], Acc) ->
1636-
parameter_regs0(T, Rest, [a3, a2 | Acc]);
1639+
parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) ->
1640+
parameter_regs0(T, Rest, [a2, a1 | Acc]);
16371641
parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) ->
16381642
parameter_regs0(T, Rest, [a3, a2 | Acc]);
16391643
parameter_regs0([_Other | T], [Reg | Rest], Acc) ->
@@ -2637,7 +2641,9 @@ decrement_reductions_and_maybe_schedule_next(
26372641
I4 = jit_riscv32_asm:bne(Temp, zero, 0),
26382642
% Set continuation to the next instruction
26392643
ADROffset = BNEOffset + byte_size(I4),
2640-
I5 = pc_relative_address(Temp, 0),
2644+
% Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address
2645+
% This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes)
2646+
I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
26412647
I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
26422648
% Append the instructions to the stream
26432649
Stream2 = StreamModule:append(Stream1, <<I4/binary, I5/binary, I6/binary>>),
@@ -2647,7 +2653,16 @@ decrement_reductions_and_maybe_schedule_next(
26472653
#state{stream = Stream3} = State2,
26482654
NewOffset = StreamModule:offset(Stream3),
26492655
NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset),
2650-
NewI5 = pc_relative_address(Temp, NewOffset - ADROffset),
2656+
NewI5Offset = NewOffset - ADROffset,
2657+
% Generate the new pc_relative_address instruction, padding with NOP if needed
2658+
NewI5 = case pc_relative_address(Temp, NewI5Offset) of
2659+
I when byte_size(I) =:= 4 ->
2660+
% Only auipc, pad with NOP
2661+
<<I/binary, (jit_riscv32_asm:nop())/binary>>;
2662+
I when byte_size(I) =:= 8 ->
2663+
% auipc + addi, no padding needed
2664+
I
2665+
end,
26512666
Stream4 = StreamModule:replace(
26522667
Stream3, BNEOffset, <<NewI4/binary, NewI5/binary>>
26532668
),
@@ -2753,17 +2768,12 @@ set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State
27532768
% Reserve space for offset load instruction
27542769
% li can generate 1 instruction (4 bytes) for small immediates (< 2048)
27552770
% or 2 instructions (8 bytes) for large immediates
2756-
% Since we use (offset bsl 2), threshold is when offset >= 512 bytes
2757-
% To be safe, use same threshold as AArch64 relative to instruction encoding limits
2758-
{I2, I3} =
2759-
if
2760-
Offset >= 512 ->
2761-
% Need 2 instructions (lui + addi) for large offsets
2762-
{jit_riscv32_asm:nop(), jit_riscv32_asm:nop()};
2763-
true ->
2764-
% Need 1 instruction (addi) for small offsets
2765-
{jit_riscv32_asm:nop(), <<>>}
2766-
end,
2771+
% Since we don't know the final CP value yet (it depends on code size),
2772+
% we must always reserve 2 instructions (8 bytes) to be safe
2773+
% The final CP value is (final_offset << 2), and final_offset is unknown
2774+
% Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0)
2775+
I2 = <<16#FFFFFFFF:32/little>>,
2776+
I3 = <<16#FFFFFFFF:32/little>>,
27672777
MOVOffset = Offset + byte_size(I1),
27682778
% OR the module index with the offset (loaded in temp register)
27692779
I4 = jit_riscv32_asm:or_(Reg, TempReg),
@@ -2783,8 +2793,15 @@ rewrite_cp_offset(
27832793
TempReg
27842794
) ->
27852795
NewOffset = StreamModule:offset(Stream0) - CodeOffset,
2786-
NewMoveInstr = jit_riscv32_asm:li(TempReg, NewOffset bsl 2),
2787-
Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr),
2796+
CPValue = NewOffset bsl 2,
2797+
NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue),
2798+
% We reserved 8 bytes (2 instructions) for the CP value
2799+
% If li generates only 4 bytes, pad with a NOP to maintain alignment
2800+
PaddedInstr = case byte_size(NewMoveInstr) of
2801+
4 -> <<NewMoveInstr/binary, (jit_riscv32_asm:nop())/binary>>;
2802+
8 -> NewMoveInstr
2803+
end,
2804+
Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr),
27882805
State0#state{stream = Stream1}.
27892806

27902807
set_bs(

src/libAtomVM/jit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
#include <math.h>
4040
#include <stddef.h>
4141

42-
#define ENABLE_TRACE
42+
//#define ENABLE_TRACE
4343
#include "trace.h"
4444

4545
// Verify matching atom index in default_atoms.hrl

src/libAtomVM/opcodesswitch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#include "stacktrace.h"
4444
#endif
4545

46-
#define ENABLE_TRACE
46+
//#define ENABLE_TRACE
4747
#include "trace.h"
4848

4949
// These constants can be used to reduce the size of the VM for a specific

0 commit comments

Comments
 (0)