diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 5343d4359f..f91d299a45 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -1,10 +1,7 @@ use std::{ io, path::{Path, PathBuf}, - sync::{ - atomic::{self, AtomicU64, AtomicUsize}, - Arc, - }, + sync::{atomic::AtomicUsize, Arc}, }; use raphtory_api::core::{ @@ -25,22 +22,24 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::{PersistenceConfig, PersistenceStrategy}, resolver::GIDResolverOps, - wal::{GraphWal, TransactionID, Wal}, - Extension, GIDResolver, Layer, ReadLockedLayer, WalImpl, ES, GS, NS, + transaction::TransactionManager, + wal::Wal, + Extension, GIDResolver, Layer, ReadLockedLayer, WalType, ES, GS, NS, }; use tempfile::TempDir; +mod replay; + #[derive(Debug)] -pub struct TemporalGraph { +pub struct TemporalGraph { // mapping between logical and physical ids pub logical_to_physical: Arc, pub node_count: AtomicUsize, storage: Arc>, graph_dir: Option, pub transaction_manager: Arc, - pub wal: Arc, } #[derive(Debug)] @@ -84,47 +83,15 @@ impl<'a> From<&'a Path> for GraphDir { } } -#[derive(Debug)] -pub struct TransactionManager { - last_transaction_id: AtomicU64, - wal: Arc, -} - -impl TransactionManager { - const STARTING_TRANSACTION_ID: TransactionID = 1; - - pub fn new(wal: Arc) -> Self { - Self { - last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), - wal, - } - } - - pub fn load(self, last_transaction_id: TransactionID) { - self.last_transaction_id - .store(last_transaction_id, atomic::Ordering::SeqCst) - } - - pub fn begin_transaction(&self) -> TransactionID { - let transaction_id = self - .last_transaction_id - .fetch_add(1, atomic::Ordering::SeqCst); - self.wal.log_begin_transaction(transaction_id).unwrap(); - transaction_id - } - - pub fn end_transaction(&self, transaction_id: TransactionID) { - self.wal.log_end_transaction(transaction_id).unwrap(); - } -} - impl Default for TemporalGraph { fn default() -> Self { - Self::new(Extension::default()).unwrap() + let config = PersistenceConfig::default(); + let wal = Arc::new(WalType::new(None).unwrap()); + Self::new(Extension::new(config, wal)).unwrap() } } -impl, ES = ES, GS = GS>> TemporalGraph { +impl, ES = ES, GS = GS>> TemporalGraph { pub fn new(ext: EXT) -> Result { let node_meta = Meta::new_for_nodes(); let edge_meta = Meta::new_for_edges(); @@ -147,27 +114,6 @@ impl, ES = ES, GS = GS>> Temporal ) } - pub fn load_from_path(path: impl AsRef) -> Result { - let path = path.as_ref(); - let storage = Layer::load(path)?; - let id_type = storage.nodes().id_type(); - - let gid_resolver_dir = path.join("gid_resolver"); - let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; - let node_count = AtomicUsize::new(storage.nodes().num_nodes()); - let wal_dir = path.join("wal"); - let wal = Arc::new(WalImpl::new(Some(wal_dir))?); - - Ok(Self { - graph_dir: Some(path.into()), - logical_to_physical: resolver.into(), - node_count, - storage: Arc::new(storage), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), - wal, - }) - } - pub fn new_with_meta( graph_dir: Option, node_meta: Meta, @@ -207,16 +153,30 @@ impl, ES = ES, GS = GS>> Temporal ext, ); - let wal_dir = graph_dir.as_ref().map(|dir| dir.wal_dir()); - let wal = Arc::new(WalImpl::new(wal_dir)?); - Ok(Self { graph_dir, logical_to_physical, node_count: AtomicUsize::new(0), storage: Arc::new(storage), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), - wal, + transaction_manager: Arc::new(TransactionManager::new()), + }) + } + + pub fn load_from_path(path: impl AsRef, ext: EXT) -> Result { + let path = path.as_ref(); + let storage = Layer::load(path, ext)?; + let id_type = storage.nodes().id_type(); + + let gid_resolver_dir = path.join("gid_resolver"); + let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; + let node_count = AtomicUsize::new(storage.nodes().num_nodes()); + + Ok(Self { + graph_dir: Some(path.into()), + logical_to_physical: resolver.into(), + node_count, + storage: Arc::new(storage), + transaction_manager: Arc::new(TransactionManager::new()), }) } @@ -229,6 +189,10 @@ impl, ES = ES, GS = GS>> Temporal self.storage().extension() } + pub fn wal(&self) -> &EXT::WalType { + self.storage().extension().wal() + } + pub fn read_event_counter(&self) -> usize { self.storage().read_event_id() } @@ -251,10 +215,12 @@ impl, ES = ES, GS = GS>> Temporal .get_str(string) .or_else(|| self.logical_to_physical.get_u64(string.id())), }?; + // VIDs in the resolver may not be initialised yet, need to double-check the node actually exists! let nodes = self.storage().nodes(); let (page_id, pos) = nodes.resolve_pos(vid); let node_page = nodes.segments().get(page_id)?; + if pos.0 < node_page.num_nodes() { Some(vid) } else { @@ -389,9 +355,10 @@ impl, ES = ES, GS = GS>> Temporal } } +/// Holds write locks across all segments in the graph for fast bulk ingestion. pub struct WriteLockedGraph<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, { pub nodes: WriteLockedNodePages<'a, storage::NS>, pub edges: WriteLockedEdgePages<'a, storage::ES>, @@ -399,7 +366,7 @@ where pub graph: &'a TemporalGraph, } -impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> +impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> WriteLockedGraph<'a, EXT> { pub fn new(graph: &'a TemporalGraph) -> Self { diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs new file mode 100644 index 0000000000..8398420052 --- /dev/null +++ b/db4-graph/src/replay.rs @@ -0,0 +1,188 @@ +//! Implements WAL replay for a `WriteLockedGraph`. +//! Allows for fast replay by making use of one-time lock acquisition for +//! all the segments in the graph. + +use crate::WriteLockedGraph; +use raphtory_api::core::{ + entities::{ + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + EID, GID, VID, + }, + storage::timeindex::TimeIndexEntry, +}; +use raphtory_core::entities::GidRef; +use storage::{ + api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + pages::resolve_pos, + persist::strategy::PersistenceStrategy, + resolver::GIDResolverOps, + wal::{GraphReplay, TransactionID, LSN}, + ES, GS, NS, +}; + +impl GraphReplay for WriteLockedGraph<'_, EXT> +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, +{ + fn replay_add_edge( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: TimeIndexEntry, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: usize, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let temporal_graph = self.graph(); + let node_max_page_len = temporal_graph.storage().nodes().max_page_len(); + let edge_max_page_len = temporal_graph.storage().edges().max_page_len(); + + // 1. Insert prop ids into edge meta. + // No need to validate props again since they are already validated before + // being logged to the WAL. + let edge_meta = temporal_graph.edge_meta(); + let mut prop_ids_and_values = Vec::new(); + + for (prop_name, prop_id, prop_value) in props.into_iter() { + let prop_mapper = edge_meta.temporal_prop_mapper(); + + prop_mapper.set_id_and_dtype(prop_name, prop_id, prop_value.dtype()); + prop_ids_and_values.push((prop_id, prop_value)); + } + + // 2. Insert node ids into resolver. + temporal_graph + .logical_to_physical + .set(GidRef::from(&src_name), src_id)?; + temporal_graph + .logical_to_physical + .set(GidRef::from(&dst_name), dst_id)?; + + // 3. Insert layer id into the layer meta of both edge and node. + let node_meta = temporal_graph.node_meta(); + + edge_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + node_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + + // 4. Grab src writer and add edge data. + let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); + let num_nodes = src_id.index() + 1; + self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments. + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(src_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); + + let is_new_edge_static = src_writer + .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist, else just record the timestamp. + if is_new_edge_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } else { + src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); + } + + src_writer.mut_segment.set_lsn(lsn); + + // Release the writer for mutable access to dst_writer. + drop(src_writer); + } + + // 5. Grab dst writer and add edge data. + let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len); + let num_nodes = dst_id.index() + 1; + self.resize_chunks_to_num_nodes(num_nodes); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(dst_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); + + let is_new_edge_static = dst_writer + .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + let is_new_edge_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); + + if is_new_edge_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } + + if is_new_edge_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } else { + dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); + } + + dst_writer.mut_segment.set_lsn(lsn); + + drop(dst_writer); + } + + // 6. Grab edge writer and add temporal props & metadata. + let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len); + let num_edges = eid.index() + 1; + self.resize_chunks_to_num_edges(num_edges); + + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); + + let is_new_edge_static = edge_writer + .get_edge(STATIC_GRAPH_LAYER_ID, edge_pos) + .is_none(); + + // Add edge into the static graph if it doesn't already exist. + if is_new_edge_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + + // Add edge into the specified layer with timestamp and props. + edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); + + edge_writer.writer.set_lsn(lsn); + } + + Ok(()) + } +} diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 61136444cd..b504d43372 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -11,7 +11,7 @@ use std::{ sync::{Arc, atomic::AtomicU32}, }; -use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment}; +use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment, wal::LSN}; pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { type Extension; @@ -58,8 +58,7 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn try_head_mut(&self) -> Option>; - /// mark segment as dirty without triggering a write - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); /// notify that an edge was added (might need to write to disk) fn notify_write( @@ -98,6 +97,9 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { &self, locked_head: impl DerefMut, ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; } pub trait LockedESegment: Send + Sync + std::fmt::Debug { diff --git a/db4-storage/src/api/graph_props.rs b/db4-storage/src/api/graph_props.rs index 768aa8b123..a06ab76acc 100644 --- a/db4-storage/src/api/graph_props.rs +++ b/db4-storage/src/api/graph_props.rs @@ -29,7 +29,7 @@ where fn est_size(&self) -> usize; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); fn notify_write( &self, diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index ebea776c8a..c05a6d42fd 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -30,6 +30,7 @@ use crate::{ gen_ts::LayerIter, segments::node::segment::MemNodeSegment, utils::{Iter2, Iter3, Iter4}, + wal::LSN, }; pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { @@ -94,7 +95,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { head_lock: impl DerefMut, ) -> Result<(), StorageError>; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); fn check_node(&self, pos: LocalPOS, layer_id: usize) -> bool; @@ -128,6 +129,9 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { &self, locked_head: impl DerefMut, ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; } pub trait LockedNSSegment: std::fmt::Debug + Send + Sync { diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index ebe5bf708b..4888a0186c 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -14,7 +14,7 @@ use crate::{ GraphStore, ReadLockedGraphStore, edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage, }, - persist::strategy::NoOpStrategy, + persist::strategy::{NoOpStrategy, PersistenceStrategy}, resolver::mapping_resolver::MappingResolver, segments::{ edge::{ @@ -27,7 +27,6 @@ use crate::{ segment::NodeSegmentView, }, }, - wal::no_wal::NoWal, }; use parking_lot::RwLock; use raphtory_api::core::entities::{EID, VID}; @@ -43,6 +42,7 @@ pub mod persist; pub mod properties; pub mod resolver; pub mod segments; +pub mod transaction; pub mod utils; pub mod wal; @@ -52,7 +52,7 @@ pub type ES

= EdgeSegmentView

; pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; -pub type WalImpl = NoWal; +pub type WalType = ::WalType; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 8578e5619c..320a660f4a 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -2,8 +2,15 @@ use crate::{ LocalPOS, api::edges::EdgeSegmentOps, error::StorageError, pages::layer_counter::GraphStats, segments::edge::segment::MemEdgeSegment, }; -use raphtory_api::core::entities::{VID, properties::prop::Prop}; -use raphtory_core::storage::timeindex::{AsTime, TimeIndexEntry}; +use arrow_array::{ArrayRef, BooleanArray}; +use raphtory_api::core::entities::{ + VID, + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, +}; +use raphtory_core::{ + entities::EID, + storage::timeindex::{AsTime, TimeIndexEntry}, +}; use std::ops::DerefMut; pub struct EdgeWriter< @@ -41,17 +48,20 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen dst: VID, props: impl IntoIterator, layer_id: usize, - lsn: u64, ) -> LocalPOS { - let existing_edge = self + let is_new_edge = !self .page .contains_edge(edge_pos, layer_id, self.writer.deref()); - if !existing_edge { + + if is_new_edge { self.increment_layer_num_edges(layer_id); } + self.graph_stats.update_time(t.t()); + self.writer - .insert_edge_internal(t, edge_pos, src, dst, layer_id, props, lsn); + .insert_edge_internal(t, edge_pos, src, dst, layer_id, props); + edge_pos } @@ -87,7 +97,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let existing_edge = self .page @@ -97,27 +106,30 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen } self.graph_stats.update_time(t.t()); self.writer - .delete_edge_internal(t, edge_pos, src, dst, layer_id, lsn); + .delete_edge_internal(t, edge_pos, src, dst, layer_id); } + /// Adds a static edge to the graph. + /// + /// If `edge_pos` is `None`, a new position is allocated. If `Some`, the provided position + /// is used. + /// Set `already_counted` to `true` when bulk loading to avoid double-counting statistics. pub fn add_static_edge( &mut self, edge_pos: Option, src: impl Into, dst: impl Into, - lsn: u64, - exist: bool, // used when edge_pos is Some but the is not counted, this is used in the bulk loader + already_counted: bool, ) -> LocalPOS { - let layer_id = 0; // assuming layer_id 0 for static edges, adjust as needed - - if edge_pos.is_some() && !exist { + if edge_pos.is_some() && !already_counted { self.page.increment_num_edges(); - self.increment_layer_num_edges(layer_id); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); } - let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(layer_id)); + let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(STATIC_GRAPH_LAYER_ID)); self.writer - .insert_static_edge_internal(edge_pos, src, dst, layer_id, lsn); + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); + edge_pos } @@ -127,26 +139,26 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen edge_pos: LocalPOS, src: VID, dst: VID, - exists: bool, + edge_exists: bool, layer_id: usize, c_props: impl IntoIterator, t_props: impl IntoIterator, - lsn: u64, ) { - if !exists { - self.increment_layer_num_edges(0); + if !edge_exists { + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); self.increment_layer_num_edges(layer_id); + + self.writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); } - self.writer - .insert_static_edge_internal(edge_pos, src, dst, 0, lsn); + self.graph_stats.update_time(t.t()); self.writer .update_const_properties(edge_pos, src, dst, layer_id, c_props); - self.graph_stats.update_time(t.t()); self.writer - .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props, lsn); + .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props); } pub fn segment_id(&self) -> usize { @@ -157,10 +169,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen self.graph_stats.increment(layer_id); } - pub fn contains_edge(&self, pos: LocalPOS, layer_id: usize) -> bool { - self.page.contains_edge(pos, layer_id, self.writer.deref()) - } - pub fn get_edge(&self, layer_id: usize, edge_pos: LocalPOS) -> Option<(VID, VID)> { self.page.get_edge(edge_pos, layer_id, self.writer.deref()) } diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 71ed0d1be3..b2fa36fadd 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -13,11 +13,14 @@ use crate::{ layer_counter::GraphStats, locked::edges::{LockedEdgePage, WriteLockedEdgePages}, }, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; -use raphtory_api::core::entities::{EID, VID, properties::meta::Meta}; +use raphtory_api::core::entities::{ + EID, VID, + properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, +}; use raphtory_core::{ entities::{ELID, LayerIds}, storage::timeindex::{AsTime, TimeIndexEntry}, @@ -42,7 +45,7 @@ pub struct ReadLockedEdgeStorage, EXT> { locked_pages: Box<[ES::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedEdgeStorage { +impl, EXT: PersistenceStrategy> ReadLockedEdgeStorage { pub fn storage(&self) -> &EdgeStorageInner { &self.storage } @@ -96,7 +99,7 @@ impl, EXT: Config> ReadLockedEdgeStorage, EXT: Config> EdgeStorageInner { +impl, EXT: PersistenceStrategy> EdgeStorageInner { pub fn locked(self: &Arc) -> ReadLockedEdgeStorage { let locked_pages = self .segments @@ -117,6 +120,10 @@ impl, EXT: Config> EdgeStorageInner &self.layer_counter } + pub fn segments(&self) -> &boxcar::Vec> { + &self.segments + } + pub fn new_with_meta(edges_path: Option, edge_meta: Arc, ext: EXT) -> Self { let free_pages = (0..N).map(RwLock::new).collect::>(); let empty = Self { @@ -130,21 +137,25 @@ impl, EXT: Config> EdgeStorageInner let layer_mapper = empty.edge_meta().layer_meta(); let prop_mapper = empty.edge_meta().temporal_prop_mapper(); let metadata_mapper = empty.edge_meta().metadata_mapper(); + if layer_mapper.num_fields() > 0 || prop_mapper.num_fields() > 0 || metadata_mapper.num_fields() > 0 { - let segment = empty.get_or_create_segment(0); + let segment = empty.get_or_create_segment(STATIC_GRAPH_LAYER_ID); let mut head = segment.head_mut(); + for layer in layer_mapper.ids() { head.get_or_create_layer(layer); } + if prop_mapper.num_fields() > 0 { head.get_or_create_layer(0) .properties_mut() .set_has_properties() } - segment.mark_dirty(); + + segment.set_dirty(true); } empty } @@ -185,7 +196,7 @@ impl, EXT: Config> EdgeStorageInner pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len; let meta = Arc::new(Meta::new_for_edges()); @@ -330,9 +341,11 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } + let count = self.segments.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.segments.get(segment_id) { return segment; @@ -342,7 +355,7 @@ impl, EXT: Config> EdgeStorageInner } } } else { - // we need to create the segment + // We need to create the segment. self.segments.reserve(segment_id + 1 - count); loop { @@ -360,7 +373,7 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } @@ -371,7 +384,7 @@ impl, EXT: Config> EdgeStorageInner #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.max_edge_page_len() + self.ext.config().max_edge_page_len } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/graph_prop_page/writer.rs b/db4-storage/src/pages/graph_prop_page/writer.rs index 50485d47c8..612a1be9cc 100644 --- a/db4-storage/src/pages/graph_prop_page/writer.rs +++ b/db4-storage/src/pages/graph_prop_page/writer.rs @@ -31,10 +31,9 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { lsn: u64, ) { let add = self.mem_segment.add_properties(t, props); - self.mem_segment.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } pub fn check_metadata(&self, props: &[(usize, Prop)]) -> Result<(), StorageError> { @@ -43,10 +42,9 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { let add = self.mem_segment.update_metadata(props); - self.mem_segment.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } } diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index 6e958182c3..105895db5d 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -7,7 +7,7 @@ use crate::{ graph_prop_page::writer::GraphPropWriter, locked::graph_props::{LockedGraphPropPage, WriteLockedGraphPropPages}, }, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, }; use std::{ @@ -31,7 +31,9 @@ pub struct GraphPropStorageInner { ext: EXT, } -impl, EXT: Config> GraphPropStorageInner { +impl, EXT: PersistenceStrategy> + GraphPropStorageInner +{ pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); @@ -66,6 +68,10 @@ impl, EXT: Config> GraphPropStorageInne self.page.entry() } + pub fn segment(&self) -> &Arc { + &self.page + } + pub fn writer(&self) -> GraphPropWriter<'_, GS> { let head = self.page.head_mut(); let graph_props = &self.page; diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs index a07f03147b..ff01546c1d 100644 --- a/db4-storage/src/pages/locked/edges.rs +++ b/db4-storage/src/pages/locked/edges.rs @@ -79,6 +79,11 @@ impl<'a, ES: EdgeSegmentOps> WriteLockedEdgePages<'a, ES> { Self { writers } } + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedEdgePage<'a, ES>> { self.writers.par_iter_mut() } diff --git a/db4-storage/src/pages/locked/graph_props.rs b/db4-storage/src/pages/locked/graph_props.rs index 5ef775dfdb..87d41dc222 100644 --- a/db4-storage/src/pages/locked/graph_props.rs +++ b/db4-storage/src/pages/locked/graph_props.rs @@ -27,10 +27,9 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { lsn: u64, ) { let add = self.lock.add_properties(t, props); - self.lock.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } /// Add metadata (constant properties) to the graph @@ -41,10 +40,9 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { /// Update metadata (constant properties) on the graph pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { let add = self.lock.update_metadata(props); - self.lock.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } } diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index 48b4fd7f10..78aed9dbd5 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -11,7 +11,7 @@ use rayon::prelude::*; use std::ops::DerefMut; pub struct LockedNodePage<'a, NS> { - page_id: usize, + segment_id: usize, max_page_len: u32, layer_counter: &'a GraphStats, page: &'a NS, @@ -20,14 +20,14 @@ pub struct LockedNodePage<'a, NS> { impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { pub fn new( - page_id: usize, + segment_id: usize, layer_counter: &'a GraphStats, max_page_len: u32, page: &'a NS, lock: RwLockWriteGuard<'a, MemNodeSegment>, ) -> Self { Self { - page_id, + segment_id, layer_counter, max_page_len, page, @@ -49,14 +49,15 @@ impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { } #[inline(always)] - pub fn page_id(&self) -> usize { - self.page_id + pub fn segment_id(&self) -> usize { + self.segment_id } #[inline(always)] pub fn resolve_pos(&self, node_id: VID) -> Option { let (page, pos) = resolve_pos(node_id, self.max_page_len); - if page == self.page_id { + + if page == self.segment_id { Some(pos) } else { None @@ -86,6 +87,15 @@ impl<'a, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> { Self { writers } } + pub fn len(&self) -> usize { + self.writers.len() + } + + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedNodePage<'a, NS>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedNodePage<'a, NS>> { self.writers.par_iter_mut() } @@ -104,10 +114,6 @@ impl<'a, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> { } } - pub fn len(&self) -> usize { - self.writers.len() - } - pub fn vacuum(&mut self) -> Result<(), StorageError> { for LockedNodePage { page, lock, .. } in &mut self.writers { page.vacuum(lock.deref_mut())?; diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index dc23557405..80d55f859a 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,14 +3,14 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; use graph_prop_store::GraphPropStorageInner; -use node_page::writer::{NodeWriter, WriterPair}; +use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ @@ -48,13 +48,13 @@ pub mod test_utils; // graph // (node/edges) // segment // layer_ids (0, 1, 2, ...) // actual graphy bits #[derive(Debug)] -pub struct GraphStore { +pub struct GraphStore { nodes: Arc>, edges: Arc>, graph_props: Arc>, graph_dir: Option, event_id: AtomicUsize, - _ext: EXT, + ext: EXT, } #[derive(Debug)] @@ -62,7 +62,7 @@ pub struct ReadLockedGraphStore< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: Config, + EXT: PersistenceStrategy, > { pub nodes: Arc>, pub edges: Arc>, @@ -73,89 +73,15 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > GraphStore { - pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { - let nodes = self.nodes.locked().into(); - let edges = self.edges.locked().into(); - - ReadLockedGraphStore { - nodes, - edges, - graph: self.clone(), - } - } - - pub fn extension(&self) -> &EXT { - &self._ext - } - - pub fn nodes(&self) -> &Arc> { - &self.nodes - } - - pub fn edges(&self) -> &Arc> { - &self.edges - } - - pub fn graph_props(&self) -> &Arc> { - &self.graph_props - } - - pub fn edge_meta(&self) -> &Meta { - self.edges.edge_meta() - } - - pub fn node_meta(&self) -> &Meta { - self.nodes.prop_meta() - } - - pub fn graph_props_meta(&self) -> &Meta { - self.graph_props.meta() - } - - pub fn earliest(&self) -> i64 { - self.nodes - .stats() - .earliest() - .min(self.edges.stats().earliest()) - } - - pub fn latest(&self) -> i64 { - self.nodes.stats().latest().max(self.edges.stats().latest()) - } - - pub fn load(graph_dir: impl AsRef) -> Result { - let nodes_path = graph_dir.as_ref().join("nodes"); - let edges_path = graph_dir.as_ref().join("edges"); - let graph_props_path = graph_dir.as_ref().join("graph_props"); - - let ext = read_graph_config::(graph_dir.as_ref())?; - - let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); - let edge_meta = edge_storage.edge_meta().clone(); - let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); - let node_meta = node_storage.prop_meta(); - - // Load graph temporal properties and metadata - let graph_props_storage = - Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - - for node_type in ext.node_types().iter() { - node_meta.get_or_create_node_type_id(node_type); - } - - let t_len = edge_storage.t_len(); + pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); - Ok(Self { - nodes: node_storage, - edges: edge_storage, - graph_props: graph_props_storage, - event_id: AtomicUsize::new(t_len), - graph_dir: Some(graph_dir.as_ref().to_path_buf()), - _ext: ext, - }) + Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) } pub fn new_with_meta( @@ -184,33 +110,106 @@ impl< edge_meta, ext.clone(), )); - let graph_storage = Arc::new(GraphPropStorageInner::new_with_meta( + let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( graph_props_path.as_deref(), graph_props_meta, ext.clone(), )); if let Some(graph_dir) = graph_dir { - write_graph_config(graph_dir, &ext) + ext.config() + .save_to_dir(graph_dir) .expect("Unrecoverable! Failed to write graph config"); } Self { nodes: node_storage, edges: edge_storage, - graph_props: graph_storage, + graph_props: graph_prop_storage, event_id: AtomicUsize::new(0), graph_dir: graph_dir.map(|p| p.to_path_buf()), - _ext: ext, + ext, } } - pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { - let node_meta = Meta::new_for_nodes(); - let edge_meta = Meta::new_for_edges(); - let graph_props_meta = Meta::new_for_graph_props(); + pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result { + let nodes_path = graph_dir.as_ref().join("nodes"); + let edges_path = graph_dir.as_ref().join("edges"); + let graph_props_path = graph_dir.as_ref().join("graph_props"); - Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) + let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); + let edge_meta = edge_storage.edge_meta().clone(); + let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); + let node_meta = node_storage.prop_meta(); + + // Load graph temporal properties and metadata. + let graph_prop_storage = + Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); + + for node_type in ext.config().node_types().iter() { + node_meta.get_or_create_node_type_id(node_type); + } + + let t_len = edge_storage.t_len(); + + Ok(Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(t_len), + graph_dir: Some(graph_dir.as_ref().to_path_buf()), + ext, + }) + } + + pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { + let nodes = self.nodes.locked().into(); + let edges = self.edges.locked().into(); + + ReadLockedGraphStore { + nodes, + edges, + graph: self.clone(), + } + } + + pub fn extension(&self) -> &EXT { + &self.ext + } + + pub fn nodes(&self) -> &Arc> { + &self.nodes + } + + pub fn edges(&self) -> &Arc> { + &self.edges + } + + pub fn graph_props(&self) -> &Arc> { + &self.graph_props + } + + pub fn edge_meta(&self) -> &Meta { + self.edges.edge_meta() + } + + pub fn node_meta(&self) -> &Meta { + self.nodes.prop_meta() + } + + pub fn graph_props_meta(&self) -> &Meta { + self.graph_props.meta() + } + + pub fn earliest(&self) -> i64 { + self.nodes + .stats() + .earliest() + .min(self.edges.stats().earliest()) + } + + pub fn latest(&self) -> i64 { + self.nodes.stats().latest().max(self.edges.stats().latest()) } pub fn add_edge( @@ -247,10 +246,11 @@ impl< let src = src.into(); let dst = dst.into(); let mut session = self.write_session(src, dst, None); + session.set_lsn(lsn); let elid = session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer(0)); - session.add_edge_into_layer(t, src, dst, elid, lsn, props); + session.add_edge_into_layer(t, src, dst, elid, props); Ok(elid) } @@ -318,7 +318,7 @@ impl< let (segment, node_pos) = self.nodes.resolve_pos(node); let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::constant(self.node_meta(), props.into_iter())?; - node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?, 0); // TODO: LSN + node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?); Ok(()) } @@ -336,7 +336,7 @@ impl< let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::temporal(self.node_meta(), props.into_iter())?; - node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?, 0); // TODO: LSN + node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?); Ok(()) } @@ -349,26 +349,36 @@ impl< let (src_chunk, _) = self.nodes.resolve_pos(src); let (dst_chunk, _) = self.nodes.resolve_pos(dst); + // Acquire locks in consistent order (lower chunk ID first) to prevent deadlocks. let node_writers = if src_chunk < dst_chunk { - let src_writer = self.node_writer(src_chunk); - let dst_writer = self.node_writer(dst_chunk); - WriterPair::Different { - src_writer, - dst_writer, + let src = self.node_writer(src_chunk); + let dst = self.node_writer(dst_chunk); + + NodeWriters { + src, + dst: Some(dst), } } else if src_chunk > dst_chunk { - let dst_writer = self.node_writer(dst_chunk); - let src_writer = self.node_writer(src_chunk); - WriterPair::Different { - src_writer, - dst_writer, + let dst = self.node_writer(dst_chunk); + let src = self.node_writer(src_chunk); + + NodeWriters { + src, + dst: Some(dst), } } else { - let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + let src = self.node_writer(src_chunk); + + NodeWriters { src, dst: None } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } @@ -386,22 +396,32 @@ impl< self.nodes().get_or_create_segment(src_chunk); self.nodes().get_or_create_segment(dst_chunk); + // FIXME: This can livelock due to inconsistent lock acquisition order. loop { if let Some(src_writer) = self.nodes().try_writer(src_chunk) { if let Some(dst_writer) = self.nodes().try_writer(dst_chunk) { - break WriterPair::Different { - src_writer, - dst_writer, + break NodeWriters { + src: src_writer, + dst: Some(dst_writer), }; } } } } else { let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + NodeWriters { + src: writer, + dst: None, + } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } @@ -433,38 +453,19 @@ impl< } } -impl Drop for GraphStore { +impl Drop for GraphStore { fn drop(&mut self) { let node_types = self.nodes.prop_meta().get_all_node_types(); - self._ext.set_node_types(node_types); + self.ext.config_mut().set_node_types(node_types); + if let Some(graph_dir) = self.graph_dir.as_ref() { - if write_graph_config(graph_dir, &self._ext).is_err() { + if self.ext.config().save_to_dir(graph_dir).is_err() { eprintln!("Unrecoverable! Failed to write graph meta"); } } } } -fn write_graph_config( - graph_dir: impl AsRef, - config: &EXT, -) -> Result<(), StorageError> { - let config_file = graph_dir.as_ref().join("graph_config.json"); - let config_file = std::fs::File::create(&config_file)?; - - serde_json::to_writer_pretty(config_file, config)?; - Ok(()) -} - -fn read_graph_config( - graph_dir: impl AsRef, -) -> Result { - let config_file = graph_dir.as_ref().join("graph_config.json"); - let config_file = std::fs::File::open(config_file)?; - let config = serde_json::from_reader(config_file)?; - Ok(config) -} - #[inline(always)] pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, LocalPOS) { let i = i.into(); @@ -484,11 +485,14 @@ mod test { check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, }, + persist::strategy::{DEFAULT_MAX_MEMORY_BYTES, PersistenceConfig, PersistenceStrategy}, + wal::no_wal::NoWal, }; use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; + use std::sync::Arc; fn check_edges(edges: Vec<(impl Into, impl Into)>, chunk_size: u32, par_load: bool) { // Set optional layer_id to None @@ -499,7 +503,12 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + chunk_size, + chunk_size, + ); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -509,7 +518,12 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + chunk_size, + chunk_size, + ); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -581,7 +595,11 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); } @@ -589,7 +607,11 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - let g = GraphStore::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = GraphStore::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); let check = |g: &Layer| { @@ -631,7 +653,11 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); g.add_node_props::(2, 0, 0, vec![]) @@ -1434,13 +1460,23 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + node_page_len, + edge_page_len, + ); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + node_page_len, + edge_page_len, + ); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } } diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index 882c97d05e..981a9b9ad2 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -37,9 +37,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_outbound_edge_inner(t, src_pos, dst, e_id, lsn); + self.add_outbound_edge_inner(t, src_pos, dst, e_id); } pub fn add_static_outbound_edge( @@ -47,10 +46,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0), lsn); + self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0)); } fn add_outbound_edge_inner( @@ -59,7 +57,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let src_pos = src_pos.into(); let dst = dst.into(); @@ -69,9 +66,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let e_id = e_id.into(); let layer_id = e_id.layer(); - let (is_new_node, add) = self - .mut_segment - .add_outbound_edge(t, src_pos, dst, e_id, lsn); + let (is_new_node, add) = self.mut_segment.add_outbound_edge(t, src_pos, dst, e_id); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(src_pos, layer_id) { @@ -85,9 +80,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_inbound_edge_inner(t, dst_pos, src, e_id, lsn); + self.add_inbound_edge_inner(t, dst_pos, src, e_id); } pub fn add_static_inbound_edge( @@ -95,10 +89,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: LocalPOS, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0), lsn); + self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0)); } fn add_inbound_edge_inner( @@ -107,7 +100,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); let src = src.into(); @@ -116,9 +108,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri } let layer = e_id.layer(); let dst_pos = dst_pos.into(); - let (is_new_node, add) = self - .mut_segment - .add_inbound_edge(t, dst_pos, src, e_id, lsn); + let (is_new_node, add) = self.mut_segment.add_inbound_edge(t, dst_pos, src, e_id); self.page.increment_est_size(add); @@ -133,11 +123,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { self.l_counter.update_time(t.t()); let (is_new_node, add) = self.mut_segment.add_props(t, pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(pos, layer_id) { self.l_counter.increment(layer_id); @@ -158,10 +146,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { let (is_new_node, add) = self.mut_segment.update_metadata(pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(pos, layer_id) { self.l_counter.increment(layer_id); @@ -172,9 +158,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.mut_segment.get_metadata(pos, layer_id, prop_id) } - pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID, lsn: u64) { + pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID) { self.l_counter.update_time(t.t()); - let add = self.mut_segment.update_timestamp(t, pos, e_id, lsn); + let add = self.mut_segment.update_timestamp(t, pos, e_id); self.page.increment_est_size(add); } @@ -194,18 +180,17 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri layer_id: usize, gid: GidRef<'_>, node_type: usize, - lsn: u64, ) { let node_type = (node_type != 0).then_some(node_type); - self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type), lsn); + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type)); } - pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GidRef<'_>, lsn: u64) { - self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), None), lsn); + pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GidRef<'_>) { + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), None)); } - pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID, lsn: u64) { - self.update_timestamp(t, node, e_id, lsn); + pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID) { + self.update_timestamp(t, node, e_id); } } @@ -231,34 +216,19 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> Drop } } -pub enum WriterPair<'a, MP: DerefMut, NS: NodeSegmentOps> { - Same { - writer: NodeWriter<'a, MP, NS>, - }, - Different { - src_writer: NodeWriter<'a, MP, NS>, - dst_writer: NodeWriter<'a, MP, NS>, - }, +/// Holds writers for src and dst node segments when adding an edge. +/// If both nodes are in the same segment, `dst` is `None` and `src` is used for both. +pub struct NodeWriters<'a, MP: DerefMut, NS: NodeSegmentOps> { + pub src: NodeWriter<'a, MP, NS>, + pub dst: Option>, } -impl<'a, MP: DerefMut, NS: NodeSegmentOps> WriterPair<'a, MP, NS> { +impl<'a, MP: DerefMut, NS: NodeSegmentOps> NodeWriters<'a, MP, NS> { pub fn get_mut_src(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - src_writer: writer_i, - .. - } => writer_i, - } + &mut self.src } pub fn get_mut_dst(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - dst_writer: writer_j, - .. - } => writer_j, - } + self.dst.as_mut().unwrap_or(&mut self.src) } } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 78a021c157..d5f33c87e4 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -7,7 +7,7 @@ use crate::{ layer_counter::GraphStats, locked::nodes::{LockedNodePage, WriteLockedNodePages}, }, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, segments::node::segment::MemNodeSegment, }; use parking_lot::RwLockWriteGuard; @@ -43,7 +43,7 @@ pub struct ReadLockedNodeStorage, EXT> { locked_segments: Box<[NS::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedNodeStorage { +impl, EXT: PersistenceStrategy> ReadLockedNodeStorage { pub fn node_ref( &self, node: impl Into, @@ -93,7 +93,7 @@ impl, EXT: Config> ReadLockedNodeStorage NodeStorageInner { +impl NodeStorageInner { pub fn prop_meta(&self) -> &Arc { &self.node_meta } @@ -128,11 +128,11 @@ impl NodeStorageInner { } pub fn max_page_len(&self) -> u32 { - self.ext.max_node_page_len() + self.ext.config().max_node_page_len } } -impl, EXT: Config> NodeStorageInner { +impl, EXT: PersistenceStrategy> NodeStorageInner { pub fn new_with_meta( nodes_path: Option, node_meta: Arc, @@ -161,7 +161,7 @@ impl, EXT: Config> NodeStorageInner .properties_mut() .set_has_properties() } - segment.mark_dirty(); + segment.set_dirty(true); } empty } @@ -364,19 +364,21 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.pages.get(segment_id) { return segment; } + let count = self.pages.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.pages.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } } else { - // we need to create the segment + // We need to create the segment. self.pages.reserve(segment_id + 1 - count); loop { @@ -395,7 +397,7 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.pages.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 36999acd38..9971e7b5e4 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -1,14 +1,18 @@ use super::{ - GraphStore, edge_page::writer::EdgeWriter, node_page::writer::WriterPair, resolve_pos, + GraphStore, edge_page::writer::EdgeWriter, node_page::writer::NodeWriters, resolve_pos, }; use crate::{ LocalPOS, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::PersistenceStrategy, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + wal::LSN, }; use parking_lot::RwLockWriteGuard; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{ + entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + storage::dict_mapper::MaybeNew, +}; use raphtory_core::{ entities::{EID, ELID, VID}, storage::timeindex::AsTime, @@ -19,10 +23,10 @@ pub struct WriteSession< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: Config, + EXT: PersistenceStrategy, > { - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, } @@ -31,12 +35,12 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > WriteSession<'a, NS, ES, GS, EXT> { pub fn new( - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, ) -> Self { Self { @@ -56,7 +60,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, props: impl IntoIterator, ) { let src = src.into(); @@ -69,19 +72,15 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer + .add_edge(t, edge_pos, src, dst, props, layer); let edge_id = edge.inner(); @@ -94,18 +93,18 @@ impl< { self.node_writers .get_mut_src() - .add_outbound_edge(Some(t), src_pos, dst, edge_id, lsn); + .add_outbound_edge(Some(t), src_pos, dst, edge_id); self.node_writers .get_mut_dst() - .add_inbound_edge(Some(t), dst_pos, src, edge_id, lsn); + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers .get_mut_src() - .update_timestamp(t, src_pos, e_id, lsn); + .update_timestamp(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_timestamp(t, dst_pos, e_id, lsn); + .update_timestamp(t, dst_pos, e_id); } pub fn delete_edge_from_layer( @@ -114,7 +113,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); @@ -126,19 +124,14 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer.delete_edge(t, edge_pos, src, dst, layer); let edge_id = edge.inner(); @@ -150,81 +143,73 @@ impl< .get_out_edge(src_pos, dst, edge_id.layer()) .is_none() { - self.node_writers.get_mut_src().add_outbound_edge( - Some(t), - src_pos, - dst, - edge_id, - lsn, - ); - self.node_writers.get_mut_dst().add_inbound_edge( - Some(t), - dst_pos, - src, - edge_id, - lsn, - ); + self.node_writers + .get_mut_src() + .add_outbound_edge(Some(t), src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers .get_mut_src() - .update_deletion_time(t, src_pos, e_id, lsn); + .update_deletion_time(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_deletion_time(t, dst_pos, e_id, lsn); + .update_deletion_time(t, dst_pos, e_id); } } - pub fn add_static_edge( - &mut self, - src: impl Into, - dst: impl Into, - lsn: u64, - ) -> MaybeNew { + pub fn add_static_edge(&mut self, src: impl Into, dst: impl Into) -> MaybeNew { let src = src.into(); let dst = dst.into(); - let layer_id = 0; // static graph goes to layer 0 let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(e_id) = self - .node_writers - .get_mut_src() - .get_out_edge(src_pos, dst, layer_id) - { - // If edge_writer is not set, we need to create a new one - if self.edge_writer.is_none() { - self.edge_writer = Some(self.graph.edge_writer(e_id)); - } - let edge_writer = self.edge_writer.as_mut().unwrap(); - let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); - - edge_writer.add_static_edge(Some(edge_pos), src, dst, lsn, true); - - MaybeNew::Existing(e_id) - } else { - let mut edge_writer = self.graph.get_free_writer(); - let edge_id = edge_writer.add_static_edge(None, src, dst, lsn, false); - let edge_id = - edge_id.as_eid(edge_writer.segment_id(), self.graph.edges().max_page_len()); - - self.edge_writer = Some(edge_writer); // Attach edge_writer to hold onto locks - + let existing_eid = self.node_writers .get_mut_src() - .add_static_outbound_edge(src_pos, dst, edge_id, lsn); - self.node_writers - .get_mut_dst() - .add_static_inbound_edge(dst_pos, src, edge_id, lsn); + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); - MaybeNew::New(edge_id) + // Edge already exists, so no need to add it again. + if let Some(eid) = existing_eid { + return MaybeNew::Existing(eid); } + + let edge_pos = None; + let already_counted = false; + let edge_pos = self + .edge_writer + .add_static_edge(edge_pos, src, dst, already_counted); + let edge_id = edge_pos.as_eid( + self.edge_writer.segment_id(), + self.graph.edges().max_page_len(), + ); + + self.node_writers + .get_mut_src() + .add_static_outbound_edge(src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_static_inbound_edge(dst_pos, src, edge_id); + + MaybeNew::New(edge_id) } pub fn node_writers( &mut self, - ) -> &mut WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + ) -> &mut NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { &mut self.node_writers } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.node_writers.src.mut_segment.set_lsn(lsn); + + if let Some(dst) = &mut self.node_writers.dst { + dst.mut_segment.set_lsn(lsn); + } + + self.edge_writer.writer.set_lsn(lsn); + } } diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 44adc815b9..945395d8dc 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -19,7 +19,7 @@ use crate::{ }, error::StorageError, pages::GraphStore, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, }; use super::fixtures::{AddEdge, Fixture, NodeFixture}; @@ -28,7 +28,7 @@ pub fn make_graph_from_edges< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: &[(VID, VID, Option)], // src, dst, optional layer_id graph_dir: &Path, @@ -58,9 +58,10 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|eid| eid.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) @@ -75,9 +76,10 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|e| e.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) @@ -90,7 +92,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: Vec<(impl Into, impl Into, Option)>, // src, dst, optional layer_id par_load: bool, @@ -121,7 +123,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( stage: &str, expected_edges: &[(VID, VID, Option)], // (src, dst, layer_id) @@ -205,9 +207,10 @@ pub fn check_edges_support< check("pre-drop", &edges, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let maybe_ns = GraphStore::::load(graph_dir.path()); + let maybe_ns = GraphStore::::load(graph_dir.path(), ext); match maybe_ns { Ok(graph) => { @@ -221,7 +224,7 @@ pub fn check_edges_support< } pub fn check_graph_with_nodes_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -349,14 +352,15 @@ pub fn check_graph_with_nodes_support< check_fn(temp_props, const_props, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); check_fn(temp_props, const_props, &graph); } } pub fn check_graph_with_props_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -503,9 +507,10 @@ pub fn check_graph_with_props_support< if check_load { // Load the graph from disk and check again + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); black_box(check_fn(edges, &graph)); } } diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 0b6b045d85..45a439dea4 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,51 +1,124 @@ -use std::ops::DerefMut; - -use serde::{Deserialize, Serialize}; - -use crate::segments::{ - edge::segment::{EdgeSegmentView, MemEdgeSegment}, - graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, - node::segment::{MemNodeSegment, NodeSegmentView}, +use crate::{ + error::StorageError, + segments::{ + edge::segment::{EdgeSegmentView, MemEdgeSegment}, + graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, + node::segment::{MemNodeSegment, NodeSegmentView}, + }, + wal::{Wal, no_wal::NoWal}, }; +use serde::{Deserialize, Serialize}; +use std::{fmt::Debug, ops::DerefMut, path::Path, sync::Arc}; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; -pub trait Config: - Default + std::fmt::Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize -{ - fn max_node_page_len(&self) -> u32; - fn max_edge_page_len(&self) -> u32; +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PersistenceConfig { + pub max_node_page_len: u32, + pub max_edge_page_len: u32, + pub max_memory_bytes: usize, + pub bg_flush_enabled: bool, + pub node_types: Vec, +} - fn max_memory_bytes(&self) -> usize; - fn is_parallel(&self) -> bool; - fn node_types(&self) -> &[String]; - fn set_node_types(&mut self, types: impl IntoIterator>); +impl Default for PersistenceConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, + bg_flush_enabled: true, + node_types: Vec::new(), + } + } } -pub trait PersistentStrategy: Config { +impl PersistenceConfig { + const CONFIG_FILE: &str = "persistence_config.json"; + + pub fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } + + pub fn new_with_memory(max_memory_bytes: usize) -> Self { + Self { + max_memory_bytes, + ..Default::default() + } + } + + pub fn new_with_page_lens( + max_memory_bytes: usize, + max_node_page_len: u32, + max_edge_page_len: u32, + ) -> Self { + Self { + max_memory_bytes, + max_node_page_len, + max_edge_page_len, + ..Default::default() + } + } + + pub fn with_bg_flush(mut self) -> Self { + self.bg_flush_enabled = true; + self + } + + pub fn node_types(&self) -> &[String] { + &self.node_types + } + + pub fn set_node_types(&mut self, types: impl IntoIterator>) { + self.node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); + } +} + +pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS; type ES; type GS; + type WalType: Wal; + + fn new(config: PersistenceConfig, wal: Arc) -> Self; + + fn config(&self) -> &PersistenceConfig; + + // Need this to set node_types. + // TODO: Remove this once we have a better way to set node_types. + fn config_mut(&mut self) -> &mut PersistenceConfig; + + fn wal(&self) -> &Self::WalType; fn persist_node_segment>( &self, - node_page: &Self::NS, + node_segment: &Self::NS, writer: MP, ) where Self: Sized; - fn persist_edge_page>( + fn persist_edge_segment>( &self, - edge_page: &Self::ES, + edge_segment: &Self::ES, writer: MP, ) where Self: Sized; - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, - graph_segment: &Self::GS, + graph_prop_segment: &Self::GS, writer: MP, ) where Self: Sized; @@ -54,58 +127,33 @@ pub trait PersistentStrategy: Config { fn disk_storage_enabled() -> bool; } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct NoOpStrategy { - max_node_page_len: u32, - max_edge_page_len: u32, + config: PersistenceConfig, + wal: Arc, } -impl NoOpStrategy { - pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { - Self { - max_node_page_len, - max_edge_page_len, - } - } -} - -impl Default for NoOpStrategy { - fn default() -> Self { - Self::new(DEFAULT_MAX_PAGE_LEN_NODES, DEFAULT_MAX_PAGE_LEN_EDGES) - } -} - -impl Config for NoOpStrategy { - fn max_node_page_len(&self) -> u32 { - self.max_node_page_len - } - - #[inline(always)] - fn max_edge_page_len(&self) -> u32 { - self.max_edge_page_len - } +impl PersistenceStrategy for NoOpStrategy { + type ES = EdgeSegmentView; + type NS = NodeSegmentView; + type GS = GraphPropSegmentView; + type WalType = NoWal; - fn max_memory_bytes(&self) -> usize { - usize::MAX + fn new(config: PersistenceConfig, wal: Arc) -> Self { + Self { config, wal } } - fn is_parallel(&self) -> bool { - false + fn config(&self) -> &PersistenceConfig { + &self.config } - fn node_types(&self) -> &[String] { - &[] + fn config_mut(&mut self) -> &mut PersistenceConfig { + &mut self.config } - fn set_node_types(&mut self, _types: impl IntoIterator>) { - // No operation + fn wal(&self) -> &Self::WalType { + &self.wal } -} - -impl PersistentStrategy for NoOpStrategy { - type ES = EdgeSegmentView; - type NS = NodeSegmentView; - type GS = GraphPropSegmentView; fn persist_node_segment>( &self, @@ -115,7 +163,7 @@ impl PersistentStrategy for NoOpStrategy { // No operation } - fn persist_edge_page>( + fn persist_edge_segment>( &self, _edge_page: &Self::ES, _writer: MP, @@ -123,7 +171,7 @@ impl PersistentStrategy for NoOpStrategy { // No operation } - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, _graph_segment: &Self::GS, _writer: MP, diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 66c072ef30..31b14ff149 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -3,13 +3,14 @@ use crate::{ api::edges::{EdgeSegmentOps, LockedESegment}, error::StorageError, pages::resolve_pos, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, properties::PropMutEntry, segments::{ HasRow, SegmentContainer, edge::entry::{MemEdgeEntry, MemEdgeRef}, }, utils::Iter4, + wal::LSN, }; use arrow_array::{ArrayRef, BooleanArray}; use parking_lot::lock_api::ArcRwLockReadGuard; @@ -53,6 +54,7 @@ impl HasRow for EdgeEntry { pub struct MemEdgeSegment { layers: Vec>, est_size: usize, + lsn: LSN, } impl>> From for MemEdgeSegment { @@ -63,7 +65,11 @@ impl>> From for MemEdgeSeg !layers.is_empty(), "MemEdgeSegment must have at least one layer" ); - Self { layers, est_size } + Self { + layers, + est_size, + lsn: 0, + } } } @@ -84,6 +90,7 @@ impl MemEdgeSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], est_size: 0, + lsn: 0, } } @@ -130,7 +137,25 @@ impl MemEdgeSegment { } pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + self.lsn + } + + pub fn set_lsn(&mut self, lsn: u64) { + self.lsn = lsn; + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + layers, + est_size: 0, + lsn: self.lsn, + } } pub fn max_page_len(&self) -> u32 { @@ -207,20 +232,20 @@ impl MemEdgeSegment { dst: VID, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); let mut prop_entry: PropMutEntry<'_> = self.layers[layer_id] .properties_mut() .get_mut_entry(local_row); + let ts = TimeIndexEntry::new(t.t(), t.i()); prop_entry.append_t_props(ts, props); + let layer_est_size = self.layers[layer_id].est_size(); self.est_size += layer_est_size.saturating_sub(est_size); } @@ -232,14 +257,12 @@ impl MemEdgeSegment { src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let t = TimeIndexEntry::new(t.t(), t.i()); // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); let props = self.layers[layer_id].properties_mut(); @@ -254,14 +277,12 @@ impl MemEdgeSegment { src: impl Into, dst: impl Into, layer_id: usize, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); // Ensure we have enough layers self.ensure_layer(layer_id); - self.layers[layer_id].set_lsn(lsn); let est_size = self.layers[layer_id].est_size(); self.reserve_local_row(edge_pos, src, dst, layer_id); @@ -271,7 +292,7 @@ impl MemEdgeSegment { fn ensure_layer(&mut self, layer_id: usize) { if layer_id >= self.layers.len() { - // Get details from first layer to create consistent new layers + // Get details from first layer to create consistent new layers. if let Some(first_layer) = self.layers.first() { let segment_id = first_layer.segment_id(); let max_page_len = first_layer.max_page_len(); @@ -441,7 +462,7 @@ impl LockedESegment for ArcLockedSegmentView { } } -impl>> EdgeSegmentOps for EdgeSegmentView

{ +impl>> EdgeSegmentOps for EdgeSegmentView

{ type Extension = P; type Entry<'a> = MemEdgeEntry<'a, parking_lot::RwLockReadGuard<'a, MemEdgeSegment>>; @@ -476,7 +497,7 @@ impl>> EdgeSegmentOps for EdgeSegm } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len; Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), @@ -580,7 +601,11 @@ impl>> EdgeSegmentOps for EdgeSegm .map_or(0, |layer| layer.len()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} + + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for EdgeSegmentView"); + } } #[cfg(test)] @@ -607,7 +632,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("test1"))], - 1, ); segment.insert_edge_internal( @@ -617,7 +641,6 @@ mod test { VID(4), 0, vec![(0, Prop::from("test2"))], - 2, ); segment.insert_edge_internal( @@ -627,7 +650,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test3"))], - 3, ); // Verify edges exist @@ -757,7 +779,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("test1"))], - 1, ); segment1.insert_edge_internal( TimeIndexEntry::new(2, 1), @@ -766,7 +787,6 @@ mod test { VID(4), 0, vec![(0, Prop::from("test2"))], - 1, ); segment1.insert_edge_internal( TimeIndexEntry::new(3, 2), @@ -775,7 +795,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test3"))], - 1, ); // Equivalent bulk insertion @@ -825,7 +844,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("individual1"))], - 1, ); // Bulk insert some edges @@ -857,7 +875,6 @@ mod test { VID(8), 0, vec![(0, Prop::from("individual2"))], - 1, ); // Another bulk insert @@ -977,14 +994,13 @@ mod test { VID(2), 0, vec![(0, Prop::from("test"))], - 1, ); let est_size1 = segment.est_size(); assert!(est_size1 > 0); - segment.delete_edge_internal(TimeIndexEntry::new(2, 3), LocalPOS(0), VID(5), VID(3), 0, 0); + segment.delete_edge_internal(TimeIndexEntry::new(2, 3), LocalPOS(0), VID(5), VID(3), 0); let est_size2 = segment.est_size(); @@ -1001,7 +1017,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test2"))], - 1, ); let est_size3 = segment.est_size(); @@ -1012,7 +1027,7 @@ mod test { // Insert a static edge - segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0, 1); + segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0); let est_size4 = segment.est_size(); assert_eq!( diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs index 7d20c0624d..d7147524c2 100644 --- a/db4-storage/src/segments/graph_prop/mod.rs +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -4,7 +4,7 @@ pub mod segment; use crate::{ api::graph_props::GraphPropSegmentOps, error::StorageError, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, segments::graph_prop::{entry::MemGraphPropEntry, segment::MemGraphPropSegment}, }; use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -20,7 +20,7 @@ use std::{ /// `GraphPropSegmentView` manages graph temporal properties and graph metadata /// (constant properties). Reads / writes are always served from the in-memory segment. #[derive(Debug)] -pub struct GraphPropSegmentView { +pub struct GraphPropSegmentView { /// In-memory segment that contains the latest graph properties /// and graph metadata writes. head: Arc>, @@ -33,7 +33,7 @@ pub struct GraphPropSegmentView { _persistent: P, } -impl GraphPropSegmentOps for GraphPropSegmentView

{ +impl GraphPropSegmentOps for GraphPropSegmentView

{ type Extension = P; type Entry<'a> = MemGraphPropEntry<'a>; @@ -79,8 +79,8 @@ impl GraphPropSegmentOps for GraphPropSegmentView

{ self.est_size.load(Ordering::Relaxed) } - fn mark_dirty(&self) { - self.is_dirty.store(true, Ordering::Relaxed); + fn set_dirty(&self, dirty: bool) { + self.is_dirty.store(dirty, Ordering::Release); } fn notify_write( diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index a007804978..34ed599f83 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,6 +1,7 @@ use crate::{ error::StorageError, segments::{HasRow, SegmentContainer}, + wal::LSN, }; use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop}; use raphtory_core::{ @@ -14,6 +15,7 @@ use std::sync::Arc; pub struct MemGraphPropSegment { /// Layers containing graph properties and metadata. layers: Vec>, + lsn: LSN, } /// A unit-like struct for use with `SegmentContainer`. @@ -22,7 +24,7 @@ pub struct MemGraphPropSegment { #[derive(Debug, Default)] pub struct UnitEntry(usize); -// `UnitEntry` does not store data, but `HasRow has to be implemented +// UnitEntry does not store data, but HasRow has to be implemented // for SegmentContainer to work. impl HasRow for UnitEntry { fn row(&self) -> usize { @@ -48,13 +50,10 @@ impl MemGraphPropSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } - pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) - } - pub fn get_or_create_layer(&mut self, layer_id: usize) -> &mut SegmentContainer { if layer_id >= self.layers.len() { let max_page_len = self.layers[0].max_page_len(); @@ -86,7 +85,18 @@ impl MemGraphPropSegment { pub fn take(&mut self) -> Self { let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); - Self { layers } + Self { + layers, + lsn: self.lsn, + } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.lsn = lsn; } pub fn add_properties( diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs index e0b39c7fc5..c8e660b4e4 100644 --- a/db4-storage/src/segments/mod.rs +++ b/db4-storage/src/segments/mod.rs @@ -157,7 +157,6 @@ pub struct SegmentContainer { max_page_len: u32, properties: Properties, meta: Arc, - lsn: u64, } pub trait HasRow: Default + Send + Sync + Sized { @@ -176,7 +175,6 @@ impl SegmentContainer { max_page_len, properties: Default::default(), meta, - lsn: 0, } } @@ -286,16 +284,6 @@ impl SegmentContainer { self.segment_id } - #[inline(always)] - pub fn lsn(&self) -> u64 { - self.lsn - } - - #[inline(always)] - pub fn set_lsn(&mut self, lsn: u64) { - self.lsn = lsn; - } - pub fn len(&self) -> u32 { self.data.data.len() as u32 } diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 2ac2e5b31d..c8106afdc7 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -3,11 +3,12 @@ use crate::{ api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, segments::{ HasRow, SegmentContainer, node::entry::{MemNodeEntry, MemNodeRef}, }, + wal::LSN, }; use either::Either; use parking_lot::lock_api::ArcRwLockReadGuard; @@ -36,6 +37,7 @@ pub struct MemNodeSegment { segment_id: usize, max_page_len: u32, layers: Vec>, + lsn: LSN, } impl>> From for MemNodeSegment { @@ -51,6 +53,7 @@ impl>> From for MemNodeSegm segment_id, max_page_len, layers, + lsn: 0, } } } @@ -122,10 +125,12 @@ impl MemNodeSegment { let max_page_len = self.layers[0].max_page_len(); let segment_id = self.layers[0].segment_id(); let meta = self.layers[0].meta().clone(); + self.layers.resize_with(layer_id + 1, || { SegmentContainer::new(segment_id, max_page_len, meta.clone()) }); } + &mut self.layers[layer_id] } @@ -141,8 +146,29 @@ impl MemNodeSegment { self.get_adj(n, layer_id).map_or(0, |adj| adj.degree(dir)) } - pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + if lsn > self.lsn { + self.lsn = lsn; + } + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + segment_id: self.segment_id, + max_page_len: self.max_page_len, + layers, + lsn: self.lsn, + } } pub fn to_vid(&self, pos: LocalPOS) -> VID { @@ -190,6 +216,7 @@ impl MemNodeSegment { segment_id, max_page_len, layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } @@ -199,14 +226,12 @@ impl MemNodeSegment { src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let dst = dst.into(); let e_id = e_id.into(); let layer_id = e_id.layer(); let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_out = layer.reserve_local_row(src_pos); let new_entry = add_out.is_new(); @@ -228,7 +253,6 @@ impl MemNodeSegment { dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let src = src.into(); let e_id = e_id.into(); @@ -237,7 +261,6 @@ impl MemNodeSegment { let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_in = layer.reserve_local_row(dst_pos); let new_entry = add_in.is_new(); @@ -263,17 +286,10 @@ impl MemNodeSegment { prop_mut_entry.addition_timestamp(ts, e_id); } - pub fn update_timestamp( - &mut self, - t: T, - node_pos: LocalPOS, - e_id: ELID, - lsn: u64, - ) -> usize { + pub fn update_timestamp(&mut self, t: T, node_pos: LocalPOS, e_id: ELID) -> usize { let layer_id = e_id.layer(); let (est_size, row) = { let segment_container = self.get_or_create_layer(layer_id); //&mut self.layers[e_id.layer()]; - segment_container.set_lsn(lsn); let est_size = segment_container.est_size(); let row = segment_container.reserve_local_row(node_pos).inner().row(); (est_size, row) @@ -384,7 +400,7 @@ impl LockedNSSegment for ArcLockedSegmentView { } } -impl>> NodeSegmentOps for NodeSegmentView

{ +impl>> NodeSegmentOps for NodeSegmentView

{ type Extension = P; type Entry<'a> = MemNodeEntry<'a, parking_lot::RwLockReadGuard<'a, MemNodeSegment>>; @@ -441,7 +457,7 @@ impl>> NodeSegmentOps for NodeSegm _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.max_node_page_len(); + let max_page_len = ext.config().max_node_page_len; Self { inner: parking_lot::RwLock::new(MemNodeSegment::new(page_id, max_page_len, meta)) .into(), @@ -483,7 +499,7 @@ impl>> NodeSegmentOps for NodeSegm Ok(()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} fn check_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { false @@ -546,6 +562,10 @@ impl>> NodeSegmentOps for NodeSegm ) -> Result<(), StorageError> { Ok(()) } + + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for NodeSegmentView"); + } } #[cfg(test)] @@ -554,7 +574,10 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::strategy::NoOpStrategy, + persist::strategy::{ + DEFAULT_MAX_MEMORY_BYTES, NoOpStrategy, PersistenceConfig, PersistenceStrategy, + }, + wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ meta::Meta, @@ -569,7 +592,8 @@ mod test { let node_meta = Arc::new(Meta::default()); let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); - let ext = NoOpStrategy::new(10, 10); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 10, 10); + let ext = NoOpStrategy::new(config, Arc::new(NoWal)); let segment = NodeSegmentView::new( 0, node_meta.clone(), @@ -584,7 +608,7 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0)); let est_size2 = segment.est_size(); assert!( @@ -592,7 +616,7 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0), 0); + writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0)); let est_size3 = segment.est_size(); assert!( @@ -602,7 +626,7 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0)); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -617,7 +641,7 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))], 0); + writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))]); let est_size5 = segment.est_size(); assert!( @@ -625,7 +649,7 @@ mod test { "Estimated size should increase after adding constant properties" ); - writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0), 0); + writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0)); let est_size6 = segment.est_size(); assert!( @@ -640,7 +664,7 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))], 0); + writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))]); let est_size7 = segment.est_size(); assert!( @@ -648,7 +672,7 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))], 0); + writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))]); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, diff --git a/db4-storage/src/transaction/mod.rs b/db4-storage/src/transaction/mod.rs new file mode 100644 index 0000000000..439e5b00de --- /dev/null +++ b/db4-storage/src/transaction/mod.rs @@ -0,0 +1,40 @@ +use std::sync::atomic::{self, AtomicU64}; + +use crate::wal::TransactionID; + +#[derive(Debug)] +pub struct TransactionManager { + last_transaction_id: AtomicU64, +} + +impl TransactionManager { + const STARTING_TRANSACTION_ID: TransactionID = 1; + + pub fn new() -> Self { + Self { + last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), + } + } + + /// Restores the last used transaction ID to the specified value. + /// Intended for using during recovery. + pub fn restore_transaction_id(&self, last_transaction_id: TransactionID) { + self.last_transaction_id + .store(last_transaction_id, atomic::Ordering::SeqCst) + } + + pub fn begin_transaction(&self) -> TransactionID { + self.last_transaction_id + .fetch_add(1, atomic::Ordering::SeqCst) + } + + pub fn end_transaction(&self, _transaction_id: TransactionID) { + // No-op for now. + } +} + +impl Default for TransactionManager { + fn default() -> Self { + Self::new() + } +} diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 71ba54ce4a..7b0b0e6745 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,6 +1,6 @@ use std::path::Path; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, @@ -8,84 +8,24 @@ use raphtory_core::{ use crate::{ error::StorageError, - wal::{GraphReplayer, GraphWal, LSN, TransactionID, no_wal::NoWal}, + wal::{GraphReplay, GraphWal, LSN, TransactionID, no_wal::NoWal}, }; impl GraphWal for NoWal { type ReplayEntry = (); - fn log_begin_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_end_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_add_static_edge( - &self, - _transaction_id: TransactionID, - _t: TimeIndexEntry, - _src: VID, - _dst: VID, - ) -> Result { - Ok(0) - } - fn log_add_edge( &self, _transaction_id: TransactionID, _t: TimeIndexEntry, - _src: VID, - _dst: VID, + _src_name: GID, + _src_id: VID, + _dst_name: GID, + _dst_id: VID, _eid: EID, + _layer_name: Option<&str>, _layer_id: usize, - _props: &[(usize, Prop)], - ) -> Result { - Ok(0) - } - - fn log_node_id( - &self, - _transaction_id: TransactionID, - _gid: GID, - _vid: VID, - ) -> Result { - Ok(0) - } - - fn log_edge_id( - &self, - _transaction_id: TransactionID, - _src: VID, - _dst: VID, - _eid: EID, - _layer_id: usize, - ) -> Result { - Ok(0) - } - - fn log_const_prop_ids>( - &self, - _transaction_id: TransactionID, - _props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result { - Ok(0) - } - - fn log_temporal_prop_ids>( - &self, - _transaction_id: TransactionID, - _props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result { - Ok(0) - } - - fn log_layer_id( - &self, - _transaction_id: TransactionID, - _name: &str, - _id: usize, + _props: Vec<(&str, usize, Prop)>, ) -> Result { Ok(0) } @@ -100,7 +40,7 @@ impl GraphWal for NoWal { std::iter::once(Ok((0, ()))) } - fn replay_to_graph( + fn replay_to_graph( _dir: impl AsRef, _graph: &mut G, ) -> Result<(), StorageError> { diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 7538781b16..992be05bf5 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -1,5 +1,5 @@ use crate::error::StorageError; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, @@ -20,18 +20,15 @@ pub struct WalRecord { /// Core Wal methods. pub trait Wal { - fn new(dir: Option) -> Result + fn new(dir: Option<&Path>) -> Result where Self: Sized; /// Appends data to the WAL and returns the assigned LSN. fn append(&self, data: &[u8]) -> Result; - /// Immediately flushes in-memory WAL entries to disk. - fn sync(&self) -> Result<(), StorageError>; - - /// Blocks until the WAL has fsynced the given LSN to disk. - fn wait_for_sync(&self, lsn: LSN); + /// Flushes in-memory WAL entries up to the given LSN to disk. + fn flush(&self, lsn: LSN) -> Result<(), StorageError>; /// Rotates the underlying WAL file. /// `cutoff_lsn` acts as a hint for which records can be safely discarded during rotation. @@ -46,93 +43,18 @@ pub trait GraphWal { /// ReplayEntry represents the type of the wal entry returned during replay. type ReplayEntry; - fn log_begin_transaction(&self, transaction_id: TransactionID) -> Result; - - fn log_end_transaction(&self, transaction_id: TransactionID) -> Result; - - /// Log a static edge addition. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - fn log_add_static_edge( - &self, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result; - - /// Log an edge addition to a layer with temporal props. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - /// * `eid` - The edge ID - /// * `layer_id` - The layer ID - /// * `props` - The temporal properties of the edge fn log_add_edge( &self, transaction_id: TransactionID, t: TimeIndexEntry, - src: VID, - dst: VID, - eid: EID, - layer_id: usize, - props: &[(usize, Prop)], - ) -> Result; - - fn log_node_id( - &self, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result; - - fn log_edge_id( - &self, - transaction_id: TransactionID, - src: VID, - dst: VID, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, eid: EID, + layer_name: Option<&str>, layer_id: usize, - ) -> Result; - - /// Log constant prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value) - fn log_const_prop_ids>( - &self, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result; - - /// Log temporal prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value). - fn log_temporal_prop_ids>( - &self, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result; - - fn log_layer_id( - &self, - transaction_id: TransactionID, - name: &str, - id: usize, + props: Vec<(&str, usize, Prop)>, ) -> Result; /// Logs a checkpoint record, indicating that all Wal operations upto and including @@ -145,74 +67,26 @@ pub trait GraphWal { ) -> impl Iterator>; /// Replays and applies all the wal entries in the given directory to the given graph. - fn replay_to_graph( + fn replay_to_graph( dir: impl AsRef, graph: &mut G, ) -> Result<(), StorageError>; } -/// Trait for defining callbacks for replaying from wal -pub trait GraphReplayer { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_add_static_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result<(), StorageError>; - +/// Trait for defining callbacks for replaying from wal. +pub trait GraphReplay { fn replay_add_edge( - &self, + &mut self, lsn: LSN, transaction_id: TransactionID, t: TimeIndexEntry, - src: VID, - dst: VID, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, eid: EID, + layer_name: Option, layer_id: usize, - props: &[(usize, Prop)], - ) -> Result<(), StorageError>; - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError>; - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError>; - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError>; - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, + props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError>; } diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 72e666fefa..4a30237821 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -11,7 +11,7 @@ use crate::{ pub struct NoWal; impl Wal for NoWal { - fn new(_dir: Option) -> Result { + fn new(_dir: Option<&Path>) -> Result { Ok(Self) } @@ -19,12 +19,10 @@ impl Wal for NoWal { Ok(0) } - fn sync(&self) -> Result<(), StorageError> { + fn flush(&self, _lsn: LSN) -> Result<(), StorageError> { Ok(()) } - fn wait_for_sync(&self, _lsn: LSN) {} - fn rotate(&self, _cutoff_lsn: LSN) -> Result<(), StorageError> { Ok(()) } diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index 6235882469..8c574abe3e 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -65,6 +65,10 @@ impl Default for EID { } impl EID { + pub fn index(&self) -> usize { + self.0 + } + pub fn as_u64(self) -> u64 { self.0 as u64 } diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 577c941dab..3e34abbccf 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -6,7 +6,7 @@ use crate::{ use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use raphtory::{ - db::api::storage::storage::{Extension, PersistentStrategy}, + db::api::storage::storage::{Extension, PersistenceStrategy}, prelude::{GraphViewOps, PropertiesOps}, serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}, }; diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 95548913e0..229f0ae230 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -22,7 +22,7 @@ use itertools::Itertools; use raphtory::{ db::{ api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::MaterializedGraph, }, graph::views::deletion_graph::PersistentGraph, diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 518cb3faf1..dd79c5cdf3 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -2,7 +2,7 @@ use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ db::api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::{internal::InternalStorageOps, MaterializedGraph}, }, errors::{GraphError, InvalidPathReason}, diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 98f4c09cb5..2069a74cbd 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -1,7 +1,7 @@ use base64::{prelude::BASE64_URL_SAFE, DecodeError, Engine}; use raphtory::{ db::api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::MaterializedGraph, }, errors::GraphError, diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 6c399b8598..ec7b49da8b 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -5,7 +5,7 @@ use crate::{ MutationError, }, }; -use db4_graph::{TransactionManager, WriteLockedGraph}; +use db4_graph::WriteLockedGraph; use raphtory_api::{ core::{ entities::{ @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension, WalImpl}; +use storage::{wal::LSN, Extension}; pub trait InternalAdditionOps { type Error: From; @@ -93,12 +93,6 @@ pub trait InternalAdditionOps { meta: &Meta, props: impl Iterator, ) -> Result>, Self::Error>; - - /// TODO: Not sure the below methods belong here... - - fn transaction_manager(&self) -> &TransactionManager; - - fn wal(&self) -> &WalImpl; } pub trait EdgeWriteLock: Send + Sync { @@ -106,7 +100,6 @@ pub trait EdgeWriteLock: Send + Sync { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew; /// add edge update @@ -116,7 +109,6 @@ pub trait EdgeWriteLock: Send + Sync { src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew; @@ -125,12 +117,13 @@ pub trait EdgeWriteLock: Send + Sync { t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew; fn store_src_node_info(&mut self, id: impl Into, node_id: Option); fn store_dst_node_info(&mut self, id: impl Into, node_id: Option); + + fn set_lsn(&mut self, lsn: LSN); } pub trait SessionAdditionOps: Send + Sync { @@ -260,14 +253,6 @@ impl InternalAdditionOps for GraphStorage { Ok(self.mutable()?.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, @@ -377,16 +362,6 @@ where self.base().validate_gids(gids) } - #[inline] - fn transaction_manager(&self) -> &TransactionManager { - self.base().transaction_manager() - } - - #[inline] - fn wal(&self) -> &WalImpl { - self.base().wal() - } - fn resolve_node_and_type( &self, id: NodeRef, diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index ed9aadf9e7..53bafc8a66 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -1,8 +1,9 @@ use crate::mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, + durability_ops::DurabilityOps, MutationError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::properties::{ meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX}, @@ -20,13 +21,15 @@ use raphtory_core::{ }; use storage::{ pages::{node_page::writer::node_info_as_props, session::WriteSession}, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, WalImpl, ES, GS, NS, + transaction::TransactionManager, + wal::LSN, + Extension, WalType, ES, GS, NS, }; -pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> { +pub struct WriteS<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> { static_session: WriteSession<'a, NS, ES, GS, EXT>, } @@ -35,16 +38,15 @@ pub struct UnlockedSession<'a> { graph: &'a TemporalGraph, } -impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> EdgeWriteLock +impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> EdgeWriteLock for WriteS<'a, EXT> { fn internal_add_static_edge( &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.static_session.add_static_edge(src, dst, lsn) + self.static_session.add_static_edge(src, dst) } fn internal_add_edge( @@ -53,11 +55,10 @@ impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> Edge src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { self.static_session - .add_edge_into_layer(t, src, dst, eid, lsn, props); + .add_edge_into_layer(t, src, dst, eid, props); eid } @@ -67,18 +68,16 @@ impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> Edge t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { let src = src.into(); let dst = dst.into(); let eid = self .static_session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer_deletion(layer)); - self.static_session - .delete_edge_from_layer(t, src, dst, eid, lsn); + self.static_session.delete_edge_from_layer(t, src, dst, eid); eid } @@ -90,7 +89,7 @@ impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> Edge self.static_session .node_writers() .get_mut_src() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); }; } @@ -101,9 +100,13 @@ impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> Edge self.static_session .node_writers() .get_mut_dst() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); }; } + + fn set_lsn(&mut self, lsn: LSN) { + self.static_session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for UnlockedSession<'a> { @@ -173,9 +176,7 @@ impl<'a> SessionAdditionOps for UnlockedSession<'a> { impl InternalAdditionOps for TemporalGraph { type Error = MutationError; - type WS<'a> = UnlockedSession<'a>; - type AtomicAddEdge<'a> = WriteS<'a, Extension>; fn write_lock(&self) -> Result, Self::Error> { @@ -229,7 +230,6 @@ impl InternalAdditionOps for TemporalGraph { local_pos, 0, node_info_as_props(id.as_gid_ref().left(), None), - 0, ); MaybeNew::Existing(0) } @@ -245,7 +245,6 @@ impl InternalAdditionOps for TemporalGraph { id.as_gid_ref().left(), Some(node_type_id.inner()).filter(|&id| id != 0), ), - 0, ); node_type_id } @@ -309,7 +308,7 @@ impl InternalAdditionOps for TemporalGraph { ) -> Result<(), Self::Error> { let (segment, node_pos) = self.storage().nodes().resolve_pos(v); let mut node_writer = self.storage().node_writer(segment); - node_writer.add_props(t, node_pos, 0, props, 0); + node_writer.add_props(t, node_pos, 0, props); Ok(()) } @@ -350,12 +349,14 @@ impl InternalAdditionOps for TemporalGraph { Ok(prop_ids) } } +} +impl DurabilityOps for TemporalGraph { fn transaction_manager(&self) -> &TransactionManager { &self.transaction_manager } - fn wal(&self) -> &WalImpl { - &self.wal + fn wal(&self) -> &WalType { + &self.extension().wal() } } diff --git a/raphtory-storage/src/mutation/deletion_ops.rs b/raphtory-storage/src/mutation/deletion_ops.rs index 06b934cc3c..0a7b0a4b12 100644 --- a/raphtory-storage/src/mutation/deletion_ops.rs +++ b/raphtory-storage/src/mutation/deletion_ops.rs @@ -36,8 +36,9 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { layer: usize, ) -> Result, Self::Error> { let mut session = self.storage().write_session(src, dst, None); - let edge = session.add_static_edge(src, dst, 0); - session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer)), 0); + session.set_lsn(0); + let edge = session.add_static_edge(src, dst); + session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer))); Ok(edge) } @@ -52,7 +53,7 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { let (src, dst) = writer.get_edge(0, edge_pos).unwrap_or_else(|| { panic!("Internal Error: Edge {eid:?} not found in storage"); }); - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); Ok(()) } } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs new file mode 100644 index 0000000000..c31e578624 --- /dev/null +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -0,0 +1,37 @@ +use crate::graph::graph::GraphStorage; +use raphtory_api::inherit::Base; +use storage::{transaction::TransactionManager, WalType}; + +/// Accessor methods for transactions and write-ahead logging. +pub trait DurabilityOps { + fn transaction_manager(&self) -> &TransactionManager; + + fn wal(&self) -> &WalType; +} + +impl DurabilityOps for GraphStorage { + fn transaction_manager(&self) -> &TransactionManager { + self.mutable().unwrap().transaction_manager.as_ref() + } + + fn wal(&self) -> &WalType { + self.mutable().unwrap().wal() + } +} + +pub trait InheritDurabilityOps: Base {} + +impl DurabilityOps for G +where + G::Base: DurabilityOps, +{ + #[inline] + fn transaction_manager(&self) -> &TransactionManager { + self.base().transaction_manager() + } + + #[inline] + fn wal(&self) -> &WalType { + self.base().wal() + } +} diff --git a/raphtory-storage/src/mutation/mod.rs b/raphtory-storage/src/mutation/mod.rs index 44f18037b9..28cd67085d 100644 --- a/raphtory-storage/src/mutation/mod.rs +++ b/raphtory-storage/src/mutation/mod.rs @@ -3,7 +3,7 @@ use crate::{ graph::graph::Immutable, mutation::{ addition_ops::InheritAdditionOps, deletion_ops::InheritDeletionOps, - property_addition_ops::InheritPropertyAdditionOps, + durability_ops::InheritDurabilityOps, property_addition_ops::InheritPropertyAdditionOps, }, }; use parking_lot::RwLockWriteGuard; @@ -30,6 +30,7 @@ use thiserror::Error; pub mod addition_ops; pub mod addition_ops_ext; pub mod deletion_ops; +pub mod durability_ops; pub mod property_addition_ops; pub type NodeWriterT<'a> = NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>; @@ -70,5 +71,6 @@ pub trait InheritMutationOps: Base {} impl InheritAdditionOps for G {} impl InheritPropertyAdditionOps for G {} impl InheritDeletionOps for G {} +impl InheritDurabilityOps for G {} impl InheritMutationOps for Arc {} diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index 0447d09bf7..65e9dcc681 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -87,7 +87,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); writer.check_metadata(node_pos, 0, &props)?; - writer.update_c_props(node_pos, 0, props, 0); + writer.update_c_props(node_pos, 0, props); Ok(writer) } @@ -98,7 +98,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.update_c_props(node_pos, 0, props, 0); + writer.update_c_props(node_pos, 0, props); Ok(writer) } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index ed319d3d97..9dfb98ab91 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -14,10 +14,16 @@ use crate::{ prelude::{GraphViewOps, NodeViewOps}, }; use raphtory_api::core::entities::properties::prop::Prop; -use raphtory_storage::mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}; +use raphtory_core::entities::GID; +use raphtory_storage::mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, +}; use storage::wal::{GraphWal, Wal}; -pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { +pub trait AdditionOps: + StaticGraphViewOps + InternalAdditionOps> + DurabilityOps +{ // TODO: Probably add vector reference here like add /// Add a node to the graph /// @@ -143,7 +149,9 @@ pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + StaticGraphViewOps> AdditionOps for G { +impl> + StaticGraphViewOps + DurabilityOps> + AdditionOps for G +{ fn add_node< V: AsNodeRef, T: TryIntoInputTime, @@ -248,7 +256,6 @@ impl> + StaticGraphViewOps> Addit props: PII, layer: Option<&str>, ) -> Result, GraphError> { - // Log transaction start let transaction_id = self.transaction_manager().begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; @@ -267,19 +274,6 @@ impl> + StaticGraphViewOps> Addit ) .map_err(into_graph_err)?; - // Log prop name -> prop id mappings - self.wal() - .log_temporal_prop_ids(transaction_id, &props_with_status) - .unwrap(); - - let props = props_with_status - .into_iter() - .map(|maybe_new| { - let (_, prop_id, prop) = maybe_new.inner(); - (prop_id, prop) - }) - .collect::>(); - let ti = time_from_input_session(&session, t)?; let src_id = self .resolve_node(src.as_node_ref()) @@ -289,77 +283,92 @@ impl> + StaticGraphViewOps> Addit .map_err(into_graph_err)?; let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; - // Log node -> node id mappings // FIXME: We are logging node -> node id mappings AFTER they are inserted into the // resolver. Make sure resolver mapping CANNOT get to disk before Wal. - if let Some(gid) = src.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), src_id.inner()) - .unwrap(); - } - - if let Some(gid) = dst.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), dst_id.inner()) - .unwrap(); - } + let src_gid = src + .as_node_ref() + .as_gid_ref() + .left() + .map(|gid_ref| GID::from(gid_ref)) + .unwrap(); + let dst_gid = dst + .as_node_ref() + .as_gid_ref() + .left() + .map(|gid_ref| GID::from(gid_ref)) + .unwrap(); let src_id = src_id.inner(); let dst_id = dst_id.inner(); - // Log layer -> layer id mappings - if let Some(layer) = layer { - self.wal() - .log_layer_id(transaction_id, layer, layer_id.inner()) - .unwrap(); - } - let layer_id = layer_id.inner(); - // Holds all locks for nodes and edge until add_edge_op goes out of scope + // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. let mut add_edge_op = self .atomic_add_edge(src_id, dst_id, None, layer_id) .map_err(into_graph_err)?; - // Log edge addition - let add_static_edge_lsn = self - .wal() - .log_add_static_edge(transaction_id, ti, src_id, dst_id) - .unwrap(); - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id, add_static_edge_lsn); + // NOTE: We log edge id after it is inserted into the edge segment. + // This is fine as long as we hold onto the edge segment lock through add_edge_op + // for the entire operation. + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); - // Log edge -> edge id mappings - // NOTE: We log edge id mappings after they are inserted into edge segments. - // This is fine as long as we hold onto segment locks for the entire operation. - let add_edge_lsn = self + // All names, ids and values have been generated for this operation. + // Create a wal entry to mark it as durable. + let props_for_wal = props_with_status + .iter() + .map(|maybe_new| { + let (prop_name, prop_id, prop) = maybe_new.as_ref().inner(); + (prop_name.as_ref(), *prop_id, prop.clone()) + }) + .collect::>(); + + let lsn = self .wal() .log_add_edge( transaction_id, ti, + src_gid, src_id, + dst_gid, dst_id, edge_id.inner(), + layer, layer_id, - &props, + props_for_wal, ) .unwrap(); + + let props = props_with_status + .into_iter() + .map(|maybe_new| { + let (_, prop_id, prop) = maybe_new.inner(); + (prop_id, prop) + }) + .collect::>(); + let edge_id = add_edge_op.internal_add_edge( ti, src_id, dst_id, edge_id.map(|eid| eid.with_layer(layer_id)), - add_edge_lsn, props, ); add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); - // Log transaction end + // Update the src, dst and edge segments with the lsn of the wal entry. + add_edge_op.set_lsn(lsn); + self.transaction_manager().end_transaction(transaction_id); - // Flush all wal entries to disk. - self.wal().sync().unwrap(); + // Drop to release all the segment locks. + // FIXME: Make sure segments cannot get to disk before wal entry is flushed. + // drop(add_edge_op); + + // Flush the wal entry to disk. + self.wal().flush(lsn).unwrap(); Ok(EdgeView::new( self.clone(), diff --git a/raphtory/src/db/api/mutation/deletion_ops.rs b/raphtory/src/db/api/mutation/deletion_ops.rs index e25b1ca190..8157040213 100644 --- a/raphtory/src/db/api/mutation/deletion_ops.rs +++ b/raphtory/src/db/api/mutation/deletion_ops.rs @@ -51,7 +51,7 @@ pub trait DeletionOps: .atomic_add_edge(src_id, dst_id, None, layer_id) .map_err(into_graph_err)?; - let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, 0, layer_id); + let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, layer_id); add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 465660ec55..156c4b6d8b 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -6,7 +6,7 @@ use crate::{ }, errors::GraphError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{GraphDir, TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::{ properties::{ @@ -26,6 +26,7 @@ use raphtory_storage::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, addition_ops_ext::{UnlockedSession, WriteS}, deletion_ops::InternalDeletionOps, + durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, EdgeWriterT, NodeWriterT, }, @@ -35,10 +36,15 @@ use std::{ path::Path, sync::Arc, }; +use storage::{ + transaction::TransactionManager, + wal::{Wal, LSN}, + WalType, +}; pub use storage::{ - persist::strategy::{Config, PersistentStrategy}, - Extension, WalImpl, + persist::strategy::{PersistenceConfig, PersistenceStrategy}, + Extension, }; #[cfg(feature = "search")] use { @@ -103,31 +109,31 @@ impl Storage { } pub(crate) fn new_at_path(path: impl AsRef) -> Result { - Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path( - path, - Extension::default(), - )?)), - #[cfg(feature = "search")] - index: RwLock::new(GraphIndex::Empty), - }) - } + let config = PersistenceConfig::default(); + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal); + let temporal_graph = TemporalGraph::new_with_path(path, ext)?; - pub(crate) fn new_with_path_and_ext( - path: impl AsRef, - ext: Extension, - ) -> Result { Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path(path, ext)?)), + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) } pub(crate) fn load_from(path: impl AsRef) -> Result { - let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + let config = PersistenceConfig::load_from_dir(path.as_ref()) + .unwrap_or_else(|_| PersistenceConfig::default()); + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal); + let temporal_graph = TemporalGraph::load_from_path(path, ext)?; + Ok(Self { - graph, + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) @@ -314,9 +320,8 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.session.internal_add_static_edge(src, dst, lsn) + self.session.internal_add_static_edge(src, dst) } fn internal_add_edge( @@ -325,11 +330,9 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { src: impl Into, dst: impl Into, e_id: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { - self.session - .internal_add_edge(t, src, dst, e_id, lsn, props) + self.session.internal_add_edge(t, src, dst, e_id, props) } fn internal_delete_edge( @@ -337,10 +340,9 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { - self.session.internal_delete_edge(t, src, dst, lsn, layer) + self.session.internal_delete_edge(t, src, dst, layer) } fn store_src_node_info(&mut self, id: impl Into, node_id: Option) { @@ -350,6 +352,10 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { fn store_dst_node_info(&mut self, id: impl Into, node_id: Option) { self.session.store_dst_node_info(id, node_id); } + + fn set_lsn(&mut self, lsn: LSN) { + self.session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for StorageWriteSession<'a> { @@ -533,14 +539,6 @@ impl InternalAdditionOps for Storage { Ok(self.graph.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.graph.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.graph.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, @@ -550,6 +548,16 @@ impl InternalAdditionOps for Storage { } } +impl DurabilityOps for Storage { + fn transaction_manager(&self) -> &TransactionManager { + self.graph.mutable().unwrap().transaction_manager.as_ref() + } + + fn wal(&self) -> &WalType { + self.graph.mutable().unwrap().wal() + } +} + impl InternalPropertyAdditionOps for Storage { type Error = GraphError; diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index bae70fdfe3..1a9f5a4c0d 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -33,7 +33,7 @@ use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, core::{ entities::{ - properties::meta::{Meta, PropMapper}, + properties::meta::{Meta, PropMapper, STATIC_GRAPH_LAYER_ID}, EID, }, storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, @@ -57,7 +57,7 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::{persist::strategy::PersistentStrategy, Extension}; +use storage::{persist::strategy::PersistenceStrategy, Extension}; #[cfg(feature = "search")] use crate::{ @@ -343,28 +343,26 @@ fn materialize_impl( .inner(); writer.store_node_id_and_node_type( node_pos, - 0, + STATIC_GRAPH_LAYER_ID, gid.as_ref(), new_type_id, - 0, ); } else { - writer.store_node_id(node_pos, 0, gid.as_ref(), 0); + writer.store_node_id(node_pos, STATIC_GRAPH_LAYER_ID, gid.as_ref()); } graph_storage .write_session()? .set_node(gid.as_ref(), new_id)?; for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row, 0); + writer.add_props(t, node_pos, 0, row); } writer.update_c_props( node_pos, - 0, + STATIC_GRAPH_LAYER_ID, node.metadata_ids() .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), - 0, ); } } @@ -385,13 +383,13 @@ fn materialize_impl( if let Some(edge_pos) = shard.resolve_pos(eid) { let mut writer = shard.writer(); // make the edge for the first time - writer.add_static_edge(Some(edge_pos), src, dst, 0, false); + writer.add_static_edge(Some(edge_pos), src, dst, false); for edge in edge.explode_layers() { let layer = layer_map[edge.edge.layer().unwrap()]; for edge in edge.explode() { let t = edge.edge.time().unwrap(); - writer.add_edge(t, edge_pos, src, dst, [], layer, 0); + writer.add_edge(t, edge_pos, src, dst, [], layer); } //TODO: move this in edge.row() for (t, t_props) in edge @@ -411,7 +409,7 @@ fn materialize_impl( let props = t_props .map(|(_, prop_id, prop)| (prop_id, prop)) .collect::>(); - writer.add_edge(t, edge_pos, src, dst, props, layer, 0); + writer.add_edge(t, edge_pos, src, dst, props, layer); } writer.update_c_props( edge_pos, @@ -432,7 +430,7 @@ fn materialize_impl( graph.layer_ids(), ) { let layer = layer_map[layer]; - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); } } } @@ -449,12 +447,12 @@ fn materialize_impl( if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + writer.add_static_outbound_edge(node_pos, dst_id, eid); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + writer.add_static_inbound_edge(node_pos, src_id, eid); } for e in edge.explode_layers() { @@ -466,7 +464,6 @@ fn materialize_impl( node_pos, dst_id, eid.with_layer(layer), - 0, ); } if let Some(node_pos) = maybe_dst_pos { @@ -476,7 +473,6 @@ fn materialize_impl( node_pos, src_id, eid.with_layer(layer), - 0, ); } } @@ -487,14 +483,14 @@ fn materialize_impl( let t = e.time_and_index().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, node_pos, eid.with_layer(l)); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); let t = e.time_and_index().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, node_pos, eid.with_layer(l)); } } @@ -508,11 +504,11 @@ fn materialize_impl( let layer = layer_map[layer]; if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer)); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer)); } } } diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 6e4add574f..79c57a4c12 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -41,6 +41,7 @@ use raphtory_storage::{ mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps}, deletion_ops::InternalDeletionOps, + durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, }, }; @@ -176,7 +177,8 @@ impl< G: StaticGraphViewOps + InternalAdditionOps + InternalPropertyAdditionOps - + InternalDeletionOps, + + InternalDeletionOps + + DurabilityOps, > EdgeView { pub fn delete(&self, t: T, layer: Option<&str>) -> Result<(), GraphError> { @@ -439,14 +441,7 @@ impl EdgeView { .atomic_add_edge(src, dst, Some(e_id), layer_id) .map_err(into_graph_err)?; - writer.internal_add_edge( - t, - src, - dst, - MaybeNew::New(e_id.with_layer(layer_id)), - 0, - props, - ); + writer.internal_add_edge(t, src, dst, MaybeNew::New(e_id.with_layer(layer_id)), props); Ok(()) } diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index a8bd2c2473..2c86df5b92 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -48,7 +48,7 @@ use std::{ ops::Deref, sync::Arc, }; -use storage::{persist::strategy::PersistentStrategy, Extension}; +use storage::{persist::strategy::PersistenceStrategy, Extension}; #[repr(transparent)] #[derive(Debug, Clone, Default)] @@ -586,10 +586,15 @@ impl Graph { if !Extension::disk_storage_enabled() { return Err(GraphError::DiskGraphNotEnabled); } + path.init()?; + let graph_storage_path = path.graph_path()?; + let storage = Storage::new_at_path(graph_storage_path)?; + let graph = Self { - inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), + inner: Arc::new(storage), }; + path.write_metadata(&graph)?; Ok(graph) } @@ -606,7 +611,7 @@ impl Graph { /// #[cfg(feature = "io")] pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { - //TODO: add support for loading indexes and vectors + // TODO: add support for loading indexes and vectors Ok(Self { inner: Arc::new(Storage::load_from(path.graph_path()?)?), }) diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 91a45774a7..2572013757 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -36,7 +36,7 @@ use std::{ }; use storage::{ api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, Extension, }; diff --git a/raphtory/src/db/mod.rs b/raphtory/src/db/mod.rs index 54e9c74f6c..63e711afda 100644 --- a/raphtory/src/db/mod.rs +++ b/raphtory/src/db/mod.rs @@ -1,4 +1,3 @@ pub mod api; pub mod graph; -pub mod replay; pub mod task; diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs deleted file mode 100644 index 2c356faa3a..0000000000 --- a/raphtory/src/db/replay/mod.rs +++ /dev/null @@ -1,115 +0,0 @@ -use db4_graph::TemporalGraph; -use raphtory_api::core::{ - entities::{properties::prop::Prop, EID, GID, VID}, - storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, -}; -use storage::{ - api::edges::EdgeSegmentOps, - error::StorageError, - wal::{GraphReplayer, TransactionID, LSN}, - Extension, -}; - -/// Wrapper struct for implementing GraphReplayer for a TemporalGraph. -/// This is needed to workaround Rust's orphan rule since both ReplayGraph and TemporalGraph -/// are foreign to this crate. -#[derive(Debug)] -pub struct ReplayGraph { - graph: TemporalGraph, -} - -impl ReplayGraph { - pub fn new(graph: TemporalGraph) -> Self { - Self { graph } - } -} - -impl GraphReplayer for ReplayGraph { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_static_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, - eid: EID, - layer_id: usize, - props: &[(usize, Prop)], - ) -> Result<(), StorageError> { - let edge_segment = self.graph.storage().edges().get_edge_segment(eid); - - match edge_segment { - Some(edge_segment) => { - edge_segment.head().lsn(); - } - _ => {} - } - - Ok(()) - } - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, - ) -> Result<(), StorageError> { - Ok(()) - } -} diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index b3fdc23192..af0b342ea8 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -198,19 +198,17 @@ pub fn load_nodes_from_df< let mut writer = shard.writer(); let t = TimeIndexEntry(time, secondary_index); let layer_id = STATIC_GRAPH_LAYER_ID; - let lsn = 0; update_time(t); - writer - .store_node_id_and_node_type(mut_node, layer_id, gid, *node_type, lsn); + writer.store_node_id_and_node_type(mut_node, layer_id, gid, *node_type); let t_props = prop_cols.iter_row(row); let c_props = metadata_cols .iter_row(row) .chain(shared_metadata.iter().cloned()); - writer.add_props(t, mut_node, layer_id, t_props, lsn); - writer.update_c_props(mut_node, layer_id, c_props, lsn); + writer.add_props(t, mut_node, layer_id, t_props); + writer.update_c_props(mut_node, layer_id, c_props); }; } @@ -410,10 +408,10 @@ pub fn load_edges_from_df = vec![]; - let mut c_props: Vec<(usize, Prop)> = vec![]; - - for (row, (src, dst, time, secondary_index, eid, layer, exists)) in - zip.enumerate() - { - if let Some(eid_pos) = shard.resolve_pos(*eid) { - let t = TimeIndexEntry(time, secondary_index); - let mut writer = shard.writer(); - - t_props.clear(); - t_props.extend(prop_cols.iter_row(row)); - - c_props.clear(); - c_props.extend(metadata_cols.iter_row(row)); - c_props.extend_from_slice(&shared_metadata); - - writer.bulk_add_edge( - t, - eid_pos, - *src, - *dst, - exists, - *layer, - c_props.drain(..), - t_props.drain(..), - 0, - ); + write_locked_graph + .edges + .par_iter_mut() + .for_each(|locked_page| { + let zip = izip!( + src_col_resolved.iter(), + dst_col_resolved.iter(), + time_col.iter(), + secondary_index_col.iter(), + eid_col_resolved.iter(), + layer_col_resolved.iter(), + eids_exist + .iter() + .map(|exists| exists.load(Ordering::Relaxed)) + ); + let mut t_props: Vec<(usize, Prop)> = vec![]; + let mut c_props: Vec<(usize, Prop)> = vec![]; + + for (row, (src, dst, time, secondary_index, eid, layer, exists)) in + zip.enumerate() + { + if let Some(eid_pos) = locked_page.resolve_pos(*eid) { + let t = TimeIndexEntry(time, secondary_index); + let mut writer = locked_page.writer(); + + t_props.clear(); + t_props.extend(prop_cols.iter_row(row)); + + c_props.clear(); + c_props.extend(metadata_cols.iter_row(row)); + c_props.extend_from_slice(&shared_metadata); + + writer.bulk_add_edge( + t, + eid_pos, + *src, + *dst, + exists, + *layer, + c_props.drain(..), + t_props.drain(..), + ); + } } - } - }); + }); }); }); @@ -762,12 +761,12 @@ pub(crate) fn load_node_props_from_df< { if let Some(mut_node) = shard.resolve_pos(*vid) { let mut writer = shard.writer(); - writer.store_node_id_and_node_type(mut_node, 0, gid, *node_type, 0); + writer.store_node_id_and_node_type(mut_node, 0, gid, *node_type); c_props.clear(); c_props.extend(metadata_cols.iter_row(idx)); c_props.extend_from_slice(&shared_metadata); - writer.update_c_props(mut_node, 0, c_props.drain(..), 0); + writer.update_c_props(mut_node, 0, c_props.drain(..)); }; } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 375ac77838..9bdf9a258e 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -25,28 +25,29 @@ use std::{ use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; -/// Stores graph data -pub const GRAPH_PATH: &str = "graph"; -pub const DEFAULT_GRAPH_PATH: &str = "graph0"; +/// Metadata file that stores path to the data folder. +pub const ROOT_META_PATH: &str = ".raph"; +/// Outer most directory containing all data. pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; -/// Stores data folder path -pub const ROOT_META_PATH: &str = ".raph"; - -/// Stores graph folder path and graph metadata +/// Metadata file that stores path to the graph folder and graph metadata. pub const GRAPH_META_PATH: &str = ".meta"; -/// Temporary metadata for atomic replacement -pub const DIRTY_PATH: &str = ".dirty"; +/// Directory that stores graph data. +pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; -/// Directory that stores search indexes +/// Directory that stores search indexes. pub const INDEX_PATH: &str = "index"; -/// Directory that stores vector embeddings of the graph +/// Directory that stores vector embeddings of the graph. pub const VECTORS_PATH: &str = "vectors"; +/// Temporary metadata file for atomic replacement. +pub const DIRTY_PATH: &str = ".dirty"; + pub(crate) fn valid_relative_graph_path( relative_path: &str, prefix: &str, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 102f9bd120..9eda186868 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -2,7 +2,7 @@ use crate::prelude::IndexMutationOps; use crate::{ db::api::{ - mutation::AdditionOps, storage::storage::PersistentStrategy, view::StaticGraphViewOps, + mutation::AdditionOps, storage::storage::PersistenceStrategy, view::StaticGraphViewOps, }, errors::GraphError, serialise::{ @@ -22,6 +22,7 @@ use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { fn encode_to_zip(&self, writer: ZipWriter) -> Result<(), GraphError>; + /// Encode the graph into bytes. fn encode_to_bytes(&self) -> Result, GraphError>;