Skip to content

Commit 3df60e0

Browse files
committed
fix: pass algebra test_complex case
1 parent 0dac69d commit 3df60e0

File tree

1 file changed

+50
-4
lines changed

1 file changed

+50
-4
lines changed

crates/toolchain/openvm/src/memcpy.s

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,55 @@
207207
.file "musl_memcpy.c"
208208

209209
# Define memcpy_loop macro for custom instruction (U-type)
210-
.macro memcpy_loop shift
211-
.word 0x00000072 | (\shift << 12) # opcode 0x72 + shift in immediate field (bits 12-31)
212-
.endm
210+
.macro memcpy_loop shift
211+
1:
212+
li t0, 16
213+
bltu a2, t0, 3f # if (len < 16) break
214+
215+
# Fast path only if both src and dst 4-byte aligned
216+
or t2, a3, a4
217+
andi t2, t2, 3
218+
bnez t2, 2f # if either misaligned, use bytewise
219+
220+
# 16B via four lw/sw (RV32IM requires aligned lw/sw)
221+
lw t3, 0(a4)
222+
lw t4, 4(a4)
223+
lw t5, 8(a4)
224+
lw t6, 12(a4)
225+
sw t3, 0(a3)
226+
sw t4, 4(a3)
227+
sw t5, 8(a3)
228+
sw t6, 12(a3)
229+
addi a4, a4, 16
230+
addi a3, a3, 16
231+
addi a2, a2, -16
232+
j 1b
233+
234+
2: # Fallback: alignment-safe bytewise 16B copy
235+
lb t1, 0(a4); sb t1, 0(a3)
236+
lb t1, 1(a4); sb t1, 1(a3)
237+
lb t1, 2(a4); sb t1, 2(a3)
238+
lb t1, 3(a4); sb t1, 3(a3)
239+
lb t1, 4(a4); sb t1, 4(a3)
240+
lb t1, 5(a4); sb t1, 5(a3)
241+
lb t1, 6(a4); sb t1, 6(a3)
242+
lb t1, 7(a4); sb t1, 7(a3)
243+
lb t1, 8(a4); sb t1, 8(a3)
244+
lb t1, 9(a4); sb t1, 9(a3)
245+
lb t1,10(a4); sb t1,10(a3)
246+
lb t1,11(a4); sb t1,11(a3)
247+
lb t1,12(a4); sb t1,12(a3)
248+
lb t1,13(a4); sb t1,13(a3)
249+
lb t1,14(a4); sb t1,14(a3)
250+
lb t1,15(a4); sb t1,15(a3)
251+
addi a4, a4, 16
252+
addi a3, a3, 16
253+
addi a2, a2, -16
254+
j 1b
255+
256+
3:
257+
.endm
258+
213259
.globl memcpy
214260
.p2align 2
215261
.type memcpy,@function
@@ -416,4 +462,4 @@ memcpy:
416462

417463
.ident "Ubuntu clang version 14.0.6-++20220622053131+f28c006a5895-1~exp1~20220622173215.157"
418464
.section ".note.GNU-stack","",@progbits
419-
.addrsig
465+
.addrsig

0 commit comments

Comments
 (0)