Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions crates/lib/src/core/db/merkle_node/merkle_node_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ For example, data for a vnode of hash 1234 with two children:
{dir data node}
*/

use rmp_serde::Serializer;
use serde::Serialize;
use std::fmt::Debug;
use std::fmt::Display;
use std::fs::File;
use std::io::Read;
use std::io::Seek;
Expand Down Expand Up @@ -223,10 +219,14 @@ impl MerkleNodeDB {
}

pub fn node(&self) -> Result<EMerkleTreeNode, OxenError> {
Self::to_node(self.dtype, &self.data())
let node = Self::to_node(self.dtype, &self.data())?;
Ok(node)
}

pub fn to_node(dtype: MerkleTreeNodeType, data: &[u8]) -> Result<EMerkleTreeNode, OxenError> {
pub fn to_node(
dtype: MerkleTreeNodeType,
data: &[u8],
) -> Result<EMerkleTreeNode, rmp_serde::decode::Error> {
match dtype {
MerkleTreeNodeType::Commit => {
Ok(EMerkleTreeNode::Commit(CommitNode::deserialize(data)?))
Expand Down Expand Up @@ -254,11 +254,14 @@ impl MerkleNodeDB {
Self::open(path, true)
}

pub fn open_read_write_if_not_exists(
pub fn open_read_write_if_not_exists<N: TMerkleTreeNode>(
repo: &LocalRepository,
node: &impl TMerkleTreeNode,
node: &N,
parent_id: Option<MerkleHash>,
) -> Result<Option<Self>, OxenError> {
) -> Result<Option<Self>, OxenError>
where
OxenError: From<N::SerializationError>,
{
if Self::exists(repo, &node.hash()) {
let db_path = node_db_path(repo, &node.hash());
log::debug!(
Expand All @@ -271,11 +274,14 @@ impl MerkleNodeDB {
}
}

pub fn open_read_write(
pub fn open_read_write<N: TMerkleTreeNode>(
repo: &LocalRepository,
node: &impl TMerkleTreeNode,
node: &N,
parent_id: Option<MerkleHash>,
) -> Result<Self, OxenError> {
) -> Result<Self, OxenError>
where
OxenError: From<N::SerializationError>,
{
let path = node_db_path(repo, &node.hash());
if !path.exists() {
util::fs::create_dir_all(&path)?;
Expand Down Expand Up @@ -324,7 +330,7 @@ impl MerkleNodeDB {

let dtype = lookup
.as_ref()
.map(|l| MerkleTreeNodeType::from_u8(l.data_type))
.map(|l| MerkleTreeNodeType::from_u8_unwrap(l.data_type))
.unwrap_or(MerkleTreeNodeType::Commit);
Comment on lines 331 to 334
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Return invalid node-type bytes as Err, not panics.

These bytes come straight from the node files, so from_u8_unwrap() means one corrupt record—or a repo written with a newer node variant—can abort open()/map(). This PR already introduced a fallible from_u8() for exactly this case, and these methods already return Result, so the read path should propagate the error instead of crashing.

💡 Suggested fix
-        let dtype = lookup
-            .as_ref()
-            .map(|l| MerkleTreeNodeType::from_u8_unwrap(l.data_type))
-            .unwrap_or(MerkleTreeNodeType::Commit);
+        let dtype = lookup
+            .as_ref()
+            .map(|l| MerkleTreeNodeType::from_u8(l.data_type))
+            .transpose()?
+            .unwrap_or(MerkleTreeNodeType::Commit);
-        let data_type = MerkleTreeNodeType::from_u8_unwrap(lookup.data_type);
+        let data_type = MerkleTreeNodeType::from_u8(lookup.data_type)?;
-            let dtype = MerkleTreeNodeType::from_u8_unwrap(*dtype);
+            let dtype = MerkleTreeNodeType::from_u8(*dtype)?;

Also applies to: 510-510, 529-529

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/lib/src/core/db/merkle_node/merkle_node_db.rs` around lines 331 - 334,
The code currently uses MerkleTreeNodeType::from_u8_unwrap on bytes read from
disk (e.g., in the lookup mapping shown) which will panic on invalid bytes;
change these sites (the occurrence in the lookup mapping and the similar uses at
the other noted locations) to call the fallible MerkleTreeNodeType::from_u8 and
propagate the error instead of unwrapping so open()/map() return Err for invalid
node-type bytes; update the surrounding logic in the functions handling disk
reads (e.g., open(), map()) to propagate the Result from from_u8 rather than
assuming a valid value.

let parent_id = lookup.as_ref().map(|l| l.parent_id);
Ok(Self {
Expand Down Expand Up @@ -364,11 +370,14 @@ impl MerkleNodeDB {
}

/// Write the base node info.
fn write_node<N: TMerkleTreeNode + Serialize + Debug + Display>(
fn write_node<N: TMerkleTreeNode>(
&mut self,
node: &N,
parent_id: Option<MerkleHash>,
) -> Result<(), OxenError> {
) -> Result<(), OxenError>
where
OxenError: From<N::SerializationError>,
{
if self.read_only {
return Err(OxenError::basic_str("Cannot write to read-only db"));
}
Expand All @@ -393,8 +402,7 @@ impl MerkleNodeDB {
}

// Write data length
let mut buf = Vec::new();
node.serialize(&mut Serializer::new(&mut buf)).unwrap();
let buf = node.to_msgpack_bytes()?;
let data_len = buf.len() as u32;
node_file.write_all(&data_len.to_le_bytes())?;
// log::debug!("write_node Wrote data length {}", data_len);
Expand All @@ -413,7 +421,10 @@ impl MerkleNodeDB {
Ok(())
}

pub fn add_child<N: TMerkleTreeNode>(&mut self, item: &N) -> Result<(), OxenError> {
pub fn add_child<N: TMerkleTreeNode>(&mut self, item: &N) -> Result<(), OxenError>
where
OxenError: From<N::SerializationError>,
{
if self.read_only {
return Err(OxenError::basic_str("Cannot write to read-only db"));
}
Expand All @@ -425,9 +436,7 @@ impl MerkleNodeDB {
return Err(OxenError::basic_str("Must call open() before writing"));
};

// TODO: Abstract and re-use in write_all
let mut buf = Vec::new();
item.serialize(&mut Serializer::new(&mut buf)).unwrap();
let buf = item.to_msgpack_bytes()?;
let data_len = buf.len() as u64;
// log::debug!("--add_child-- node_file {:?}", node_file);
// log::debug!("--add_child-- dtype {:?}", item.dtype());
Expand Down Expand Up @@ -498,7 +507,7 @@ impl MerkleNodeDB {
};

// Parse the node parent id
let data_type = MerkleTreeNodeType::from_u8(lookup.data_type);
let data_type = MerkleTreeNodeType::from_u8_unwrap(lookup.data_type);
let parent_id = MerkleTreeNode::deserialize_id(&lookup.data, data_type)?;

let mut file_data = Vec::new();
Expand All @@ -517,7 +526,7 @@ impl MerkleNodeDB {
cursor.seek(SeekFrom::Start(*offset))?;
let mut data = vec![0; *len as usize];
cursor.read_exact(&mut data)?;
let dtype = MerkleTreeNodeType::from_u8(*dtype);
let dtype = MerkleTreeNodeType::from_u8_unwrap(*dtype);
let node = MerkleTreeNode {
parent_id: Some(parent_id),
hash: MerkleHash::new(*hash),
Expand Down
13 changes: 13 additions & 0 deletions crates/lib/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::model::ParsedResource;
use crate::model::RepoNew;
use crate::model::Schema;
use crate::model::Workspace;
use crate::model::merkle_tree::node_type::InvalidMerkleTreeNodeType;

pub mod path_buf_error;
pub mod string_error;
Expand Down Expand Up @@ -143,6 +144,14 @@ pub enum OxenError {
#[error("{0}")]
CommitEntryNotFound(StringError),

//
// Merkle Tree Operations
//
/// A failure during serialization or deserialization of a merkle tree node: it has an unknown
/// u8 marker for its node type.
#[error("{0}")]
MerkleTreeError(#[from] InvalidMerkleTreeNodeType),

//
// Schema (dataframes)
//
Expand Down Expand Up @@ -303,6 +312,10 @@ pub enum OxenError {
#[error("Invalid integer: {0}")]
ParseIntError(#[from] ParseIntError),

/// Wraps any error that we get from encoding message pack data.
#[error("Encode error: {0}")]
RmpEncodeError(#[from] rmp_serde::encode::Error),

/// Wraps any error that we get from decoding message pack data.
#[error("Decode error: {0}")]
RmpDecodeError(#[from] rmp_serde::decode::Error),
Expand Down
6 changes: 2 additions & 4 deletions crates/lib/src/model/merkle_tree/node/commit_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::core::v_old::v0_19_0::model::merkle_tree::node::commit_node::CommitNo
use crate::core::versions::MinOxenVersion;
use crate::error::OxenError;
use crate::model::{Commit, LocalRepository};
use crate::model::{MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode};
use crate::model::{MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType};

pub trait TCommitNode {
fn node_type(&self) -> &MerkleTreeNodeType;
Expand Down Expand Up @@ -124,7 +124,7 @@ impl CommitNode {
}
}

pub fn deserialize(data: &[u8]) -> Result<CommitNode, OxenError> {
pub fn deserialize(data: &[u8]) -> Result<CommitNode, rmp_serde::decode::Error> {
// In order to support versions that didn't have the enum,
// if it fails we will fall back to the old struct, then populate the enum
let commit: CommitNode = match rmp_serde::from_slice(data) {
Expand Down Expand Up @@ -204,8 +204,6 @@ impl MerkleTreeNodeIdType for CommitNode {
}
}

impl TMerkleTreeNode for CommitNode {}

/// Debug is used for verbose multi-line output with println!("{:?}", node)
impl fmt::Debug for CommitNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down
8 changes: 2 additions & 6 deletions crates/lib/src/model/merkle_tree/node/dir_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ use crate::core::v_latest::model::merkle_tree::node::dir_node::DirNodeData as Di
use crate::core::v_old::v0_19_0::model::merkle_tree::node::dir_node::DirNodeData as DirNodeDataV0_19_0;
use crate::core::versions::MinOxenVersion;
use crate::error::OxenError;
use crate::model::{
LocalRepository, MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode,
};
use crate::model::{LocalRepository, MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType};
use crate::view::DataTypeCount;

pub trait TDirNode {
Expand Down Expand Up @@ -120,7 +118,7 @@ impl DirNode {
}
}

pub fn deserialize(data: &[u8]) -> Result<DirNode, OxenError> {
pub fn deserialize(data: &[u8]) -> Result<DirNode, rmp_serde::decode::Error> {
// In order to support versions that didn't have the enum,
// if it fails we will fall back to the old struct, then populate the enum
let dir_node: DirNode = match rmp_serde::from_slice(data) {
Expand Down Expand Up @@ -262,8 +260,6 @@ impl MerkleTreeNodeIdType for DirNode {
}
}

impl TMerkleTreeNode for DirNode {}

/// Debug is used for verbose multi-line output with println!("{:?}", node)
impl fmt::Debug for DirNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down
8 changes: 2 additions & 6 deletions crates/lib/src/model/merkle_tree/node/file_chunk_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

use serde::{Deserialize, Serialize};

use crate::error::OxenError;
use crate::model::{MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode};
use crate::model::{MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType};

use std::fmt;

Expand All @@ -17,9 +16,8 @@ pub struct FileChunkNode {
}

impl FileChunkNode {
pub fn deserialize(data: &[u8]) -> Result<FileChunkNode, OxenError> {
pub fn deserialize(data: &[u8]) -> Result<FileChunkNode, rmp_serde::decode::Error> {
rmp_serde::from_slice(data)
.map_err(|e| OxenError::basic_str(format!("Error deserializing file chunk node: {e}")))
}
}

Expand All @@ -43,8 +41,6 @@ impl MerkleTreeNodeIdType for FileChunkNode {
}
}

impl TMerkleTreeNode for FileChunkNode {}

/// Debug is used for verbose multi-line output with println!("{:?}", node)
impl fmt::Debug for FileChunkNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down
5 changes: 1 addition & 4 deletions crates/lib/src/model/merkle_tree/node/file_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use crate::model::merkle_tree::node::file_node_types::{FileChunkType, FileStorag
use crate::model::metadata::generic_metadata::GenericMetadata;
use crate::model::{
EntryDataType, LocalRepository, MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType,
TMerkleTreeNode,
};
use serde::{Deserialize, Serialize};
use std::fmt;
Expand Down Expand Up @@ -95,7 +94,7 @@ impl FileNode {
}
}

pub fn deserialize(data: &[u8]) -> Result<FileNode, OxenError> {
pub fn deserialize(data: &[u8]) -> Result<FileNode, rmp_serde::decode::Error> {
let file_node: FileNode = match rmp_serde::from_slice(data) {
Ok(file_node) => file_node,
Err(_) => {
Expand Down Expand Up @@ -263,8 +262,6 @@ impl Hash for FileNode {
}
}

impl TMerkleTreeNode for FileNode {}

/// Debug is used for verbose multi-line output with println!("{:?}", node)
impl fmt::Debug for FileNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down
5 changes: 4 additions & 1 deletion crates/lib/src/model/merkle_tree/node/merkle_tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,10 @@ impl MerkleTreeNode {
self.node.clone()
}

pub fn deserialize_id(data: &[u8], dtype: MerkleTreeNodeType) -> Result<MerkleHash, OxenError> {
pub fn deserialize_id(
data: &[u8],
dtype: MerkleTreeNodeType,
) -> Result<MerkleHash, rmp_serde::decode::Error> {
match dtype {
MerkleTreeNodeType::Commit => {
CommitNode::deserialize(data).map(|commit| *commit.hash())
Expand Down
8 changes: 2 additions & 6 deletions crates/lib/src/model/merkle_tree/node/vnode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ use crate::core::v_latest::model::merkle_tree::node::vnode::VNodeData as VNodeIm
use crate::core::v_old::v0_19_0::model::merkle_tree::node::vnode::VNodeData as VNodeImplV0_19_0;
use crate::core::versions::MinOxenVersion;
use crate::error::OxenError;
use crate::model::{
LocalRepository, MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode,
};
use crate::model::{LocalRepository, MerkleHash, MerkleTreeNodeIdType, MerkleTreeNodeType};

pub trait TVNode {
fn node_type(&self) -> &MerkleTreeNodeType;
Expand Down Expand Up @@ -57,7 +55,7 @@ impl VNode {
}
}

pub fn deserialize(data: &[u8]) -> Result<VNode, OxenError> {
pub fn deserialize(data: &[u8]) -> Result<VNode, rmp_serde::decode::Error> {
// In order to support versions that didn't have the enum,
// if it fails we will fall back to the old struct, then populate the enum
let vnode: VNode = match rmp_serde::from_slice(data) {
Expand Down Expand Up @@ -153,5 +151,3 @@ impl fmt::Display for VNode {
write!(f, "")
}
}

impl TMerkleTreeNode for VNode {}
Loading
Loading