@@ -416,6 +416,8 @@ pub unsafe extern "C" fn start_rust(a0: usize, a1: usize, a2: usize) -> ! {
416416 // Initialize RAM
417417 // 1. Copy over .data from flash to RAM
418418 // 2. Zero out .bss
419+
420+ #[ cfg( target_arch = "riscv32" ) ]
419421 core:: arch:: asm!(
420422 "
421423 // Copy over .data
@@ -455,6 +457,100 @@ pub unsafe extern "C" fn start_rust(a0: usize, a1: usize, a2: usize) -> ! {
455457 a = out( reg) _,
456458 ) ;
457459
460+ #[ cfg( target_arch = "riscv64" ) ]
461+ core:: arch:: asm!(
462+ "
463+ // Copy over .data
464+ la {start},_sdata
465+ la {end},_edata
466+ la {input},_sidata
467+
468+ bgeu {start},{end},3f
469+
470+ // If _sdata and _sidata are not 8-byte aligned, we copy one word before the main loop. This way, in
471+ // the main loop, we are sure `start` and `input` are 8-byte aligned. This is needed to safely
472+ // perform load and store double instructions.
473+ //
474+ // NOTE: We assert in the `link.x` file that _sdata and _sidata are similarly 8-byte aligned. This is
475+ // needed for the main loop here.
476+ andi {b},{start},4
477+ beqz {b},0f
478+ lw {a},0({input})
479+ addi {input},{input},4
480+ sw {a},0({start})
481+ addi {start},{start},4
482+
483+ 0: // .data Main Loop Initialization
484+ // b = FLOOR_ALIGN(_edata, 4)
485+ andi {b},{end},4
486+ sub {b},{end},{b}
487+
488+ bgeu {start},{b},2f
489+ 1: // .data Main Loop
490+ ld {a},0({input})
491+ addi {input},{input},8
492+ sd {a},0({start})
493+ addi {start},{start},8
494+ bltu {start},{b},1b
495+
496+ 2: // .data end align
497+ // If _edata is not 8-byte aligned, we copy one word after the main loop. This way we are sure we
498+ // copied all the data even if _edata is 4-byte aligned.
499+ andi {b},{end},4
500+ beqz {b},3f
501+ lw {a},0({input})
502+ addi {input},{input},4
503+ sw {a},0({start})
504+ addi {start},{start},4
505+
506+ 3: // .data zero registers
507+ li {a},0
508+ li {input},0
509+
510+ 4: // zero out .bss start
511+ la {start},_sbss
512+ la {end},_ebss
513+
514+ bgeu {start},{end},8f
515+
516+ // If _sbss is not 8-byte aligned, we zero one word before the main loop. This way, in the main
517+ // loop, we are sure `start` is 8-byte aligned. This is needed to safely perform store double
518+ // instruction.
519+ andi {b},{start},4
520+ beqz {b},5f
521+ sw zero,0({start})
522+ addi {start},{start},4
523+
524+ 5: // .bss main loop initialization
525+ // b = FLOOR_ALIGN(_ebss, 4)
526+ andi {b},{end},4
527+ sub {b},{end},{b}
528+
529+ bgeu {start},{b},7f
530+ 6: // .bss main loop
531+ sd zero,0({start})
532+ addi {start},{start},8
533+ bltu {start},{b},6b
534+
535+ 7: // .bss end align
536+ // If _ebss is not 8-byte aligned, we need to zero more one word after the main loop.
537+ andi {b},{end},4
538+ beqz {b},8f
539+ sw zero,0({start})
540+
541+ 8: // .bss zero registers
542+ // Zero out used registers
543+ li {b},0
544+ li {start},0
545+ li {end},0
546+ " ,
547+ start = out( reg) _,
548+ end = out( reg) _,
549+ input = out( reg) _,
550+ a = out( reg) _,
551+ b = out( reg) _,
552+ ) ;
553+
458554 compiler_fence ( Ordering :: SeqCst ) ;
459555 }
460556
0 commit comments