Skip to content

On the matter of shared mutable access to linear meory for the threading proposal #135

@wucke13

Description

@wucke13

I drafted a simple implementation of a linear memory which allows shared mutable access without invoking UB (beyond being racy, of course). It does however currently rely on RwLock, which is not available in libcore. So we do have to create a RwLock before it can be incorporated into the code. Also it still needs some love on the LittleEndianBytes trait.

use std::{cell::UnsafeCell, io::IntoInnerError, sync::RwLock};

///  # Unsafe Note
///
/// Raw pointer access it required, because concurent mutation of the linear memory might happen
/// (consider the threading proposal for WASM, where mutliple WASM threads access the same linear
/// memory at the same time). The inherent race condition results in UB w/r/t the state of the `u8`s
/// in the inner data. However, this is tolerable, e.g. avoiding race conditions on the state of the
/// linear memory can not be the task of the interpreter, but has to be fulfilled by the interpreted
/// bytecode itself.
pub struct LinearMemory {
    inner_data: RwLock<Vec<UnsafeCell<u8>>>,
}

impl LinearMemory {
    // pub const PAGE_SIZE: usize = 1 << 16; // 64 KiB
    pub const PAGE_SIZE: usize = 1 << 12; // 4 KiB for debugging

    /// Create a new, empty [`LinearMemory`]
    pub fn new() -> Self {
        Self {
            inner_data: RwLock::new(Vec::new()),
        }
    }

    /// Create a new, empty [`LinearMemory`]
    pub fn new_with_capacity(pages: i16) -> Self {
        let size_bytes = Self::PAGE_SIZE * pages as usize;
        let mut data = Vec::with_capacity(size_bytes);
        data.resize_with(size_bytes, || UnsafeCell::new(0));

        Self {
            inner_data: RwLock::new(data),
        }
    }

    pub fn grow(&self, pages_to_add: i16) {
        let mut lock_guard = self.inner_data.write().unwrap();
        let prior_length_bytes = lock_guard.len();
        let new_length_bytes = prior_length_bytes + Self::PAGE_SIZE * pages_to_add as usize;
        lock_guard.resize_with(new_length_bytes, || UnsafeCell::new(0));
    }

    pub fn store<T: LittleEndianBytes>(&self, index: i32, value: T) {
        let value_size = std::mem::size_of::<i32>();

        let lock_guard = self.inner_data.read().unwrap();
        let ptr = lock_guard.get(index as usize).unwrap().get();
        let bytes = value.to_le_bytes(); // [u8; 4]; // for i32

        // A value must fit into the linear memory
        assert!(
            value_size <= lock_guard.len(),
            "value does not fit into linear memory"
        );

        // The following statement must be true
        // `index + value_size <= lock_guard.len()`
        // This check verifies it, while avoiding the possible overflow. The subtraction can not
        // underflow because of the previous assert.
        assert!(
            (index as usize) <= lock_guard.len() - value_size,
            "value write would extend beyond the end of the linear_memory"
        );

        // Safety argument:
        //
        // - nonoverlapping is guaranteed, because `src` is a pointer to a stack allocated array,
        //   while the destination is heap allocated Vec
        // - the first assert above guarantee that `src` fits into the destination
        // - the second assert above guarantees that even with the offset in `index`, `src` does not
        //   extend beyond the destinations last `UnsafeCell<u8>`
        // - the use of `UnsafeCell` avoids any `&` or `&mut` to ever be created on any of the `u8`s
        //   contained in the `UnsafeCell`s, so no UB is created through the existence of unsound
        //   references
        unsafe { ptr.copy_from_nonoverlapping(bytes.as_ref().as_ptr(), value_size) }
    }

    pub fn load<const N: usize, T: LittleEndianBytes2<N>>(&self, index: i32) -> T {
        let value_size = std::mem::size_of::<i32>();
        assert_eq!(value_size, N, "value size must match const generic N");

        let lock_guard = self.inner_data.read().unwrap();
        let ptr = lock_guard.get(index as usize).unwrap().get();
        let mut bytes = [0; N];

        // A value must fit into the linear memory
        assert!(
            value_size <= lock_guard.len(),
            "value does not fit into linear memory"
        );

        // The following statement must be true
        // `index + value_size <= lock_guard.len()`
        // This check verifies it, while avoiding the possible overflow. The subtraction can not
        // underflow because of the previous assert.
        assert!(
            (index as usize) <= lock_guard.len() - value_size,
            "value read would extend beyond the end of the linear_memory"
        );

        // Safety argument:
        //
        // - nonoverlapping is guaranteed, because `dest` is a pointer to a stack allocated array,
        //   while the source is heap allocated Vec
        // - the first assert above guarantee that source is bigger than `dest`
        // - the second assert above guarantees that even with the offset in `index`, `dest` does
        //   not extend beyond the destinations last `UnsafeCell<u8>` in source
        // - the use of `UnsafeCell` avoids any `&` or `&mut` to ever be created on any of the `u8`s
        //   contained in the `UnsafeCell`s, so no UB is created through the existence of unsound
        //   references
        unsafe { ptr.copy_to_nonoverlapping(bytes.as_mut_ptr(), bytes.len()) };
        T::from_le_bytes(bytes)
    }
}

impl core::fmt::Debug for LinearMemory {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "LinearMemory {{ inner_data: [ ")?;
        let lock_guard = self.inner_data.read().unwrap();
        let mut iter = lock_guard.iter();

        if let Some(first_byte_uc) = iter.next() {
            write!(f, "{}", unsafe { *first_byte_uc.get() })?;
        }

        for uc in iter {
            // Safety argument:
            //
            // TODO
            let byte = unsafe { *uc.get() };

            write!(f, ", {byte}")?;
        }
        write!(f, " ] }}")
    }
}

impl Default for LinearMemory {
    fn default() -> Self {
        Self::new()
    }
}

pub trait LittleEndianBytes {
    fn to_le_bytes(self) -> impl AsRef<[u8]>;
}

pub trait LittleEndianBytes2<const N: usize> {
    fn from_le_bytes(bytes: [u8; N]) -> Self;
}

impl LittleEndianBytes2<1> for i8 {
    fn from_le_bytes(bytes: [u8; 1]) -> Self {
        Self::from_le_bytes(bytes)
    }
}

impl LittleEndianBytes for i8 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}
impl LittleEndianBytes for i16 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}
impl LittleEndianBytes for i32 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

impl LittleEndianBytes for i64 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

impl LittleEndianBytes for u8 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}
impl LittleEndianBytes for u16 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}
impl LittleEndianBytes for u32 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

impl LittleEndianBytes for u64 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

impl LittleEndianBytes for f32 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

impl LittleEndianBytes for f64 {
    fn to_le_bytes(self) -> impl AsRef<[u8]> {
        self.to_le_bytes()
    }
}

fn main() {
    let linear_mem: UnsafeCell<Box<[u8]>> = UnsafeCell::new(vec![0; 1 << 5].into_boxed_slice());

    let x: i32 = 13;
    let x_ptr: *const i32 = &x as *const i32;

    unsafe {
        // naive try 1
        // UB, the `UnsafeCell` only secures the Box, not its contents
        (&raw mut (*linear_mem.get())[6]).write_unaligned(13);

        // naive try 2
        // UB, the `UnsafeCell` only secures the Box, not its contents
        let raw_ptr_to_linear_mem = linear_mem.get();
        (&raw mut (*raw_ptr_to_linear_mem)[1] as *mut [u8; std::mem::size_of::<i32>()])
            .write_unaligned((-1i32).to_le_bytes());

        // naive try 3
        // this is UB, it overwrites the box, not the box's contents
        // linear_mem
        //     .get()
        //     .byte_offset(9)
        //     .cast::<[u8; std::mem::size_of::<i16>()]>()
        //     .write(5i16.to_le_bytes());

        // naive try 4
        // again, this is UB
        // linear_mem
        //     .get()
        //     .byte_offset(9)
        //     .copy_from_nonoverlapping(5i16.to_le_bytes().as_ptr() as _, std::mem::size_of::<i16>());

        println!("linear_memoyr: {:?}", linear_mem.get());

        // this might actually be sound 
        let linear_memory = LinearMemory::new_with_capacity(1);

        linear_memory.store(1, 3i16);
        linear_memory.store(7, 3i32);
        linear_memory.store((LinearMemory::PAGE_SIZE - 4) as i32, -1i32);

        // causes a controlled panic, as it is unsound
        linear_memory.store((LinearMemory::PAGE_SIZE - 3) as i32, i32::MIN + 1);

        println!("{linear_memory:?}");
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions