Skip to content

Commit dd2e642

Browse files
committed
chore: execution runs without runtime errors; failing at verification stages
1 parent a065c11 commit dd2e642

File tree

8 files changed

+225
-116
lines changed

8 files changed

+225
-116
lines changed

benchmarks/guest/fibonacci/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ edition.workspace = true
77
openvm = { workspace = true, features = ["std"] }
88

99
[features]
10-
default = []
10+
default = ["custom-memcpy"]
11+
custom-memcpy = []

benchmarks/guest/fibonacci/src/main.rs

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,45 @@ use core::ptr;
22

33
openvm::entry!(main);
44

5-
/// Moves all the elements of `src` into `dst`, leaving `src` empty.
6-
#[no_mangle]
7-
pub fn append<T>(dst: &mut [T], src: &mut [T], shift: usize) {
5+
// Explicitly use the C memcpy function to ensure we're using custom memcpy
6+
extern "C" {
7+
// in rust, u8 is 1 byte of memory
8+
fn memcpy(dst: *mut u8, src: *const u8, n: usize) -> *mut u8;
9+
}
10+
11+
/// Test function that explicitly calls memcpy to verify custom implementation
12+
pub fn test_custom_memcpy(dst: &mut [u8], src: &[u8], shift: usize) {
813
let src_len = src.len();
914
let dst_len = dst.len();
1015

16+
// Bounds checking
17+
if shift + src_len > dst_len {
18+
return; // Just return on bounds error
19+
}
20+
1121
unsafe {
12-
// The call to add is always safe because `Vec` will never
13-
// allocate more than `isize::MAX` bytes.
14-
let dst_ptr = dst.as_mut_ptr().wrapping_add(shift);
22+
let dst_ptr = dst.as_mut_ptr().add(shift);
1523
let src_ptr = src.as_ptr();
16-
println!("dst_ptr: {:?}", dst_ptr);
17-
println!("src_ptr: {:?}", src_ptr);
18-
println!("src_len: {:?}", src_len);
19-
20-
// The two regions cannot overlap because mutable references do
21-
// not alias, and two different vectors cannot own the same
22-
// memory.
23-
ptr::copy_nonoverlapping(src_ptr, dst_ptr, src_len);
24+
25+
// This will definitely use our custom memcpy implementation
26+
memcpy(dst_ptr, src_ptr, src_len);
2427
}
2528
}
2629

2730
pub fn main() {
2831
let mut a: [u8; 1000] = [1; 1000];
32+
for i in 0..1000 {
33+
a[i] = 1 as u8;
34+
}
2935
let mut b: [u8; 500] = [2; 500];
36+
for i in 0..500 {
37+
b[i] = 2 as u8;
38+
}
39+
40+
let shift: usize = 3;
3041

31-
let shift: usize = 0;
32-
append(&mut a, &mut b, shift);
42+
// Test the custom memcpy
43+
test_custom_memcpy(&mut a, &b, shift);
3344

3445
for i in 0..1000 {
3546
if i < shift || i >= shift + b.len() {
@@ -42,3 +53,7 @@ pub fn main() {
4253
println!("a: {:?}", a);
4354
println!("b: {:?}", b);
4455
}
56+
/*
57+
ok what lolll
58+
memcpy works (??)
59+
*/

crates/toolchain/openvm/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ num-bigint.workspace = true
2828
chrono = { version = "0.4", default-features = false, features = ["serde"] }
2929

3030
[features]
31-
default = ["getrandom-unsupported"]
31+
default = ["getrandom-unsupported", "custom-memcpy"]
3232
# Defines a custom getrandom backend that always errors. This feature should be enabled if you are sure getrandom is never used but it is pulled in as a compilation dependency.
3333
getrandom-unsupported = ["dep:getrandom", "dep:getrandom-v02"]
3434
# The zkVM uses a bump-pointer heap allocator by default which does not free
3535
# memory. This will use a slower linked-list heap allocator to reclaim memory.
3636
heap-embedded-alloc = ["openvm-platform/heap-embedded-alloc"]
3737
std = ["serde/std", "openvm-platform/std"]
38+
# Enable custom memcpy implementation with specialized instructions
39+
custom-memcpy = []
3840

3941
[package.metadata.cargo-shear]
4042
ignored = ["openvm-custom-insn", "getrandom"]

extensions/memcpy/circuit/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,7 @@ derive_more = { workspace = true, features = ["from"] }
2222
serde.workspace = true
2323
strum = { workspace = true }
2424
tracing.workspace = true
25+
26+
[features]
27+
default = ["custom-memcpy"]
28+
custom-memcpy = []

extensions/memcpy/circuit/src/iteration.rs

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ impl<F: Field> BaseAirWithPublicValues<F> for MemcpyIterAir {}
9393
impl<F: Field> PartitionedBaseAir<F> for MemcpyIterAir {}
9494

9595
impl<AB: InteractionBuilder> Air<AB> for MemcpyIterAir {
96+
// assertions for AIR constraints
9697
fn eval(&self, builder: &mut AB) {
9798
let main = builder.main();
9899
let (prev, local) = (main.row_slice(0), main.row_slice(1));
@@ -377,6 +378,8 @@ impl<'a> CustomBorrow<'a, MemcpyIterRecordMut<'a>, MemcpyIterLayout> for [u8] {
377378
unsafe fn extract_layout(&self) -> MemcpyIterLayout {
378379
let header: &MemcpyIterRecordHeader = self.borrow();
379380
let num_rows = ((header.len - header.shift as u32) >> 4) as usize + 1;
381+
MultiRowLayout::new(MemcpyIterMetadata { num_rows });
382+
let num_rows = ((header.len - header.shift as u32) >> 4) as usize + 1;
380383
MultiRowLayout::new(MemcpyIterMetadata { num_rows })
381384
}
382385
}
@@ -420,7 +423,7 @@ where
420423
) -> Result<(), ExecutionError> {
421424
let Instruction { opcode, c, .. } = instruction;
422425
debug_assert_eq!(*opcode, Rv32MemcpyOpcode::MEMCPY_LOOP.global_opcode());
423-
let shift = c.as_canonical_u32() as u8;
426+
let shift = c.as_canonical_u32() as u8; // written into c slot
424427
debug_assert!([0, 1, 2, 3].contains(&shift));
425428

426429
let mut dest = read_rv32_register(
@@ -440,17 +443,25 @@ where
440443
} as u32,
441444
);
442445
let mut len = read_rv32_register(state.memory.data(), A2_REGISTER_PTR as u32);
446+
debug_assert!(
447+
shift == 0 || (dest % 4 == 0),
448+
"dest must be 4-byte aligned in MEMCPY_LOOP"
449+
);
450+
debug_assert!(len >= shift as u32);
443451

444452
// Create a record sized to the exact number of 16-byte iterations (header + iterations)
445453
// This calculation must match extract_layout and fill_trace
446454

447-
// FIX 1: prevent underflow when len < shift
448-
let effective_len = len.saturating_sub(shift as u32);
455+
let effective_len = len.saturating_sub(shift as u32); // n >= 16 + shift
449456
let num_iters = (effective_len >> 4) as usize;
450-
eprintln!("num_iters = {:?}", num_iters);
451-
// eprintln!("state.pc = {:?}", state.pc);
452-
// eprintln!("state.memory.timestamp = {:?}", state.memory.timestamp);
453-
// eprintln!("state.memory.data() = {:?}", state.memory.data());
457+
// eprintln!(
458+
// "PREFLIGHT: len={}, shift={}, effective_len={}, num_iters={}, allocated_rows={}",
459+
// len,
460+
// shift,
461+
// effective_len,
462+
// num_iters,
463+
// num_iters + 1
464+
// );
454465
let record: MemcpyIterRecordMut<'_> =
455466
state.ctx.alloc(MultiRowLayout::new(MemcpyIterMetadata {
456467
//allocating based on number of rows needed
@@ -464,15 +475,16 @@ where
464475
record.inner.dest = dest;
465476
record.inner.source = source;
466477
record.inner.len = len;
467-
eprintln!(
468-
"shift = {:?}, len = {:?}, source = {:?}, source%16 = {:?}, dest = {:?}, dest%16 = {:?}",
469-
shift, len, source, source % 16, dest, dest % 16
470-
);
478+
// eprintln!(
479+
// "shift = {:?}, len = {:?}, source = {:?}, source%16 = {:?}, dest = {:?}, dest%16 = {:?}",
480+
// shift, len, source, source % 16, dest, dest % 16
481+
// );
471482

472483
// Fill record.var for the first row of iteration trace
473484
// FIX 2: read source-4 (the word ending at s[-1]); zero if out-of-bounds.
474485
if shift != 0 {
475486
if source >= 4 {
487+
// read the previous word from memory
476488
record.var[0].data[3] = tracing_read(
477489
state.memory,
478490
RV32_MEMORY_AS,
@@ -494,13 +506,14 @@ where
494506
source + 4 * i as u32,
495507
&mut record.var[idx].read_aux[i].prev_timestamp,
496508
);
509+
//use shifted data, to construct the write data for each given word
497510
let write_data: [u8; MEMCPY_LOOP_NUM_LIMBS] = array::from_fn(|j| {
498511
if j < shift as usize {
499512
if i > 0 {
500-
// First s bytes come from previous 4-byte word tail
513+
// First s bytes come from previous 4-byte word tail, take from previous word, in our 16 byte chunk
501514
record.var[idx].data[i - 1][j + (4 - shift as usize)]
502515
} else {
503-
// For i == 0, take from previous chunk's last word tail
516+
// For i == 0, take from previous chunk's last word tail; otherwise, take last word of previous chunk
504517
record.var[idx - 1].data[3][j + (4 - shift as usize)]
505518
}
506519
} else {
@@ -570,7 +583,7 @@ where
570583
debug_assert_eq!(record.inner.len, u32::from_le_bytes(len_data));
571584

572585
*state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
573-
586+
eprintln!("PREFLIGHT: done");
574587
Ok(())
575588
}
576589
}
@@ -936,6 +949,7 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
936949
) -> u32 {
937950
let shift = pre_compute.c;
938951
let mut height = 1;
952+
eprintln!("RUNTIME: Starting with height={}, shift={}", height, shift);
939953
// Read dest and source from registers
940954
let (dest, source) = if shift == 0 {
941955
(
@@ -955,6 +969,11 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
955969
let mut source = u32::from_le_bytes(source) - 12 * (shift != 0) as u32;
956970
let mut len = u32::from_le_bytes(len);
957971

972+
eprintln!(
973+
"RUNTIME: Initial values: dest={}, source={}, len={}",
974+
dest, source, len
975+
);
976+
958977
// Check address ranges are valid
959978
debug_assert!(dest < (1 << POINTER_MAX_BITS));
960979
debug_assert!((source - 4 * (shift != 0) as u32) < (1 << POINTER_MAX_BITS));
@@ -994,28 +1013,8 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
9941013
height += 1;
9951014
}
9961015

997-
// Handle remaining bytes (len in [0, 15]) so that total `copy_len == original len`.
998-
if len > 0 {
999-
let remaining_words = ((len + 3) >> 2) as usize;
1000-
for i in 0..remaining_words.min(4) {
1001-
let data = vm_state.vm_read::<u8, 4>(RV32_MEMORY_AS, source + 4 * i as u32);
1002-
let write_data: [u8; 4] = array::from_fn(|j| {
1003-
if j < shift as usize {
1004-
prev_data[j + (4 - shift as usize)]
1005-
} else {
1006-
data[j - shift as usize]
1007-
}
1008-
});
1009-
vm_state.vm_write(RV32_MEMORY_AS, dest + 4 * i as u32, &write_data);
1010-
prev_data = data;
1011-
}
1012-
// Advance pointers to reflect bytes written
1013-
let advanced = (remaining_words as u32) << 2;
1014-
source += advanced;
1015-
dest += advanced;
1016-
len = 0;
1017-
height += 1;
1018-
}
1016+
// Note: remaining bytes (len in [0, 15]) are handled by surrounding code,
1017+
// not by this executor. The height calculation must match the trace exactly.
10191018

10201019
// Write the result back to memory
10211020
if shift == 0 {
@@ -1046,6 +1045,7 @@ unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
10461045

10471046
*vm_state.pc_mut() = vm_state.pc().wrapping_add(DEFAULT_PC_STEP);
10481047
*vm_state.instret_mut() = vm_state.instret() + 1;
1048+
eprintln!("RUNTIME: Returning height={}", height);
10491049
height
10501050
}
10511051

extensions/memcpy/circuit/src/loops.rs

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ pub struct MemcpyLoopCols<T> {
6060
pub write_aux: [MemoryBaseAuxCols<T>; 3],
6161
pub source_minus_twelve_carry: T,
6262
pub to_source_minus_twelve_carry: T,
63+
// When true, indicates we should saturate (clamp) source-12 to 0 (zero-padding case)
64+
pub is_source_small: T,
65+
// When true, indicates we should saturate (clamp) to_source-12 to 0 (zero-padding case)
66+
pub is_to_source_small: T,
6367
}
6468

6569
pub const NUM_MEMCPY_LOOP_COLS: usize = size_of::<MemcpyLoopCols<u8>>();
@@ -124,6 +128,8 @@ impl<AB: InteractionBuilder> Air<AB> for MemcpyLoopAir {
124128
local.shift.iter().for_each(|x| builder.assert_bool(*x));
125129
builder.assert_bool(local.source_minus_twelve_carry);
126130
builder.assert_bool(local.to_source_minus_twelve_carry);
131+
builder.assert_bool(local.is_source_small);
132+
builder.assert_bool(local.is_to_source_small);
127133

128134
let mut shift_zero_when = builder.when(is_shift_zero.clone());
129135
shift_zero_when.assert_zero(local.source_minus_twelve_carry);
@@ -184,6 +190,8 @@ impl<AB: InteractionBuilder> Air<AB> for MemcpyLoopAir {
184190
// dest, to_dest, source - 12 * is_shift_non_zero, to_source - 12 * is_shift_non_zero
185191
let dest_u16_limbs = u8_word_to_u16(local.dest);
186192
let to_dest_u16_limbs = u8_word_to_u16(local.to_dest);
193+
// Limb computation for (source - 12 * is_shift_non_zero), with zero-padding when low limb < 12
194+
// If is_source_small is true, we clamp subtraction to 0 by setting carry appropriately.
187195
let source_u16_limbs = [
188196
local.source[0]
189197
+ local.source[1] * AB::F::from_canonical_u32(1 << MEMCPY_LOOP_LIMB_BITS)
@@ -356,17 +364,19 @@ impl MemcpyLoopChip {
356364
let to_source = source + num_copies;
357365

358366
let word_to_u16 = |data: u32| [data & 0x0ffff, data >> 16];
359-
debug_assert!(source >= 12 * (shift != 0) as u32);
360-
debug_assert!(to_source >= 12 * (shift != 0) as u32);
367+
368+
// Relax: allow small sources; zero-pad semantics like iteration.rs
361369
debug_assert!(dest % 4 == 0);
362370
debug_assert!(to_dest % 4 == 0);
363371
debug_assert!(source % 4 == 0);
364372
debug_assert!(to_source % 4 == 0);
373+
let safe_source = source.saturating_sub(12 * (shift != 0) as u32);
374+
let safe_to_source = to_source.saturating_sub(12 * (shift != 0) as u32);
365375
let range_check_data = [
366376
word_to_u16(dest),
367-
word_to_u16(source - 12 * (shift != 0) as u32),
377+
word_to_u16(safe_source),
368378
word_to_u16(to_dest),
369-
word_to_u16(to_source - 12 * (shift != 0) as u32),
379+
word_to_u16(safe_to_source),
370380
];
371381

372382
range_check_data.iter().for_each(|data| {
@@ -437,19 +447,19 @@ impl MemcpyLoopChip {
437447
cols.source_minus_twelve_carry = F::from_bool((record.source & 0x0ffff) < 12);
438448
cols.to_source_minus_twelve_carry = F::from_bool((to_source & 0x0ffff) < 12);
439449

440-
// tracing::info!("timestamp: {:?}, pc: {:?}, dest: {:?}, source: {:?}, len: {:?}, shift: {:?}, is_valid: {:?}, to_timestamp: {:?}, to_dest: {:?}, to_source: {:?}, to_len: {:?}, write_aux: {:?}",
441-
// cols.from_state.timestamp.as_canonical_u32(),
442-
// cols.from_state.pc.as_canonical_u32(),
443-
// u32::from_le_bytes(cols.dest.map(|x| x.as_canonical_u32() as u8)),
444-
// u32::from_le_bytes(cols.source.map(|x| x.as_canonical_u32() as u8)),
445-
// u32::from_le_bytes(cols.len.map(|x| x.as_canonical_u32() as u8)),
446-
// cols.shift[1].as_canonical_u32() * 2 + cols.shift[0].as_canonical_u32(),
447-
// cols.is_valid.as_canonical_u32(),
448-
// cols.to_timestamp.as_canonical_u32(),
449-
// u32::from_le_bytes(cols.to_dest.map(|x| x.as_canonical_u32() as u8)),
450-
// u32::from_le_bytes(cols.to_source.map(|x| x.as_canonical_u32() as u8)),
451-
// cols.to_len.as_canonical_u32(),
452-
// cols.write_aux.map(|x| x.prev_timestamp.as_canonical_u32()).to_vec());
450+
tracing::info!("timestamp: {:?}, pc: {:?}, dest: {:?}, source: {:?}, len: {:?}, shift: {:?}, is_valid: {:?}, to_timestamp: {:?}, to_dest: {:?}, to_source: {:?}, to_len: {:?}, write_aux: {:?}",
451+
cols.from_state.timestamp.as_canonical_u32(),
452+
cols.from_state.pc.as_canonical_u32(),
453+
u32::from_le_bytes(cols.dest.map(|x| x.as_canonical_u32() as u8)),
454+
u32::from_le_bytes(cols.source.map(|x| x.as_canonical_u32() as u8)),
455+
u32::from_le_bytes(cols.len.map(|x| x.as_canonical_u32() as u8)),
456+
cols.shift[1].as_canonical_u32() * 2 + cols.shift[0].as_canonical_u32(),
457+
cols.is_valid.as_canonical_u32(),
458+
cols.to_timestamp.as_canonical_u32(),
459+
u32::from_le_bytes(cols.to_dest.map(|x| x.as_canonical_u32() as u8)),
460+
u32::from_le_bytes(cols.to_source.map(|x| x.as_canonical_u32() as u8)),
461+
cols.to_len.as_canonical_u32(),
462+
cols.write_aux.map(|x| x.prev_timestamp.as_canonical_u32()).to_vec());
453463
}
454464
RowMajorMatrix::new(rows, NUM_MEMCPY_LOOP_COLS)
455465
}

extensions/memcpy/tests/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ test-case.workspace = true
2424
tracing.workspace = true
2525

2626
[features]
27-
default = ["parallel"]
27+
default = ["parallel", "custom-memcpy"]
2828
parallel = ["openvm-circuit/parallel"]
29+
custom-memcpy = []

0 commit comments

Comments
 (0)