Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
0c54386
Move TransactionManager to db4-storage
fabubaker Dec 1, 2025
92f2793
Create DurabilityOps
fabubaker Dec 1, 2025
cff7a1e
Remove wal from transaction manager
fabubaker Dec 2, 2025
6f24855
Simplify wal log and replay methods
fabubaker Dec 2, 2025
c192d64
Add sketch of correct logging to add_edge
fabubaker Dec 2, 2025
fd49e14
Add lsn to MemNodeSegment/MemEdgeSegment
fabubaker Dec 2, 2025
aa9f84b
Add set_lsn method for AtomicAddEdge
fabubaker Dec 2, 2025
c2c77fa
Simplify WriterPair to NodeWriters
fabubaker Dec 2, 2025
f263616
Remove lsn args
fabubaker Dec 3, 2025
5c0069c
Implement basic add_edge replay
fabubaker Dec 4, 2025
fa11c7a
Simplify GraphWal
fabubaker Dec 5, 2025
aa73139
Remove wrapper for graph replay
fabubaker Dec 10, 2025
df6679c
Merge branch 'db_v4' of github.com:Pometry/Raphtory into db_v4_/wal
fabubaker Dec 10, 2025
6944a6f
Fix leftover merge issues
fabubaker Dec 10, 2025
65be87a
Change mark_dirty to set_dirty
fabubaker Dec 10, 2025
9a80df7
Add replay tests
fabubaker Dec 11, 2025
c3f3352
Always set edge_writer in init in WriteSession
fabubaker Dec 11, 2025
bc54103
Return early from add_static_edge if edge exists
fabubaker Dec 12, 2025
f7ac76a
Move GraphReplay to WriteLockedGraph
fabubaker Dec 12, 2025
8e778ca
Implement add_edge replay for WriteLockedGraph
fabubaker Dec 16, 2025
110781b
Minor cleanup
fabubaker Dec 18, 2025
5b7532a
Check lsn before replaying wal entries
fabubaker Jan 6, 2026
987a17a
Implement take for node and edge segments
fabubaker Jan 6, 2026
c4d5d7c
Set lsn during replay
fabubaker Jan 6, 2026
01257e5
Rename wal sync to flush
fabubaker Jan 7, 2026
9221008
Modify flush to take an LSN
fabubaker Jan 7, 2026
0fddef3
Remove background wal flush
fabubaker Jan 8, 2026
612dc09
Rename to PersistenceStrategy
fabubaker Jan 8, 2026
c4e8525
Merge branch 'db_v4' of github.com:Pometry/Raphtory into db_v4_/wal
fabubaker Jan 8, 2026
417ce5f
Rename graph_config to persistence_config
fabubaker Jan 9, 2026
3528f71
Use config through PersistenceStrategy
fabubaker Jan 9, 2026
aea3450
Merge branch 'db_v4' of github.com:Pometry/Raphtory into db_v4_/wal
fabubaker Jan 9, 2026
9979c50
Fix parallel_flush
fabubaker Jan 9, 2026
aa77163
Remove defaults for Extension
fabubaker Jan 9, 2026
aa55404
Move read/write from dir methods to PersistenceConfig
fabubaker Jan 9, 2026
934b3a8
Add config_mut TODO
fabubaker Jan 12, 2026
eb459fd
Apply some more page -> segment rename
fabubaker Jan 12, 2026
427f2a8
Expose WalType through PersistenceStrategy
fabubaker Jan 12, 2026
7a13c79
Pass wal as argument to constructor
fabubaker Jan 12, 2026
00d4a3c
Expose wal from extension
fabubaker Jan 12, 2026
f78279c
Add more docs to graph paths
fabubaker Jan 12, 2026
7c375dc
Minor cleanup
fabubaker Jan 14, 2026
334fef4
Modify graph load to accept extension
fabubaker Jan 14, 2026
30c2744
Use PersistenceConfig::new instead of strategy constructors
fabubaker Jan 14, 2026
8ad9128
Run fmt
fabubaker Jan 14, 2026
529a2cd
Use &Path instead of PathBuf for wal
fabubaker Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 40 additions & 73 deletions db4-graph/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use std::{
io,
path::{Path, PathBuf},
sync::{
atomic::{self, AtomicU64, AtomicUsize},
Arc,
},
sync::{atomic::AtomicUsize, Arc},
};

use raphtory_api::core::{
Expand All @@ -25,22 +22,24 @@ use storage::{
nodes::WriteLockedNodePages,
},
},
persist::strategy::{Config, PersistentStrategy},
persist::strategy::{PersistenceConfig, PersistenceStrategy},
resolver::GIDResolverOps,
wal::{GraphWal, TransactionID, Wal},
Extension, GIDResolver, Layer, ReadLockedLayer, WalImpl, ES, GS, NS,
transaction::TransactionManager,
wal::Wal,
Extension, GIDResolver, Layer, ReadLockedLayer, WalType, ES, GS, NS,
};
use tempfile::TempDir;

mod replay;

#[derive(Debug)]
pub struct TemporalGraph<EXT: Config = Extension> {
pub struct TemporalGraph<EXT: PersistenceStrategy = Extension> {
// mapping between logical and physical ids
pub logical_to_physical: Arc<GIDResolver>,
pub node_count: AtomicUsize,
storage: Arc<Layer<EXT>>,
graph_dir: Option<GraphDir>,
pub transaction_manager: Arc<TransactionManager>,
pub wal: Arc<WalImpl>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -84,47 +83,15 @@ impl<'a> From<&'a Path> for GraphDir {
}
}

#[derive(Debug)]
pub struct TransactionManager {
last_transaction_id: AtomicU64,
wal: Arc<WalImpl>,
}

impl TransactionManager {
const STARTING_TRANSACTION_ID: TransactionID = 1;

pub fn new(wal: Arc<WalImpl>) -> Self {
Self {
last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID),
wal,
}
}

pub fn load(self, last_transaction_id: TransactionID) {
self.last_transaction_id
.store(last_transaction_id, atomic::Ordering::SeqCst)
}

pub fn begin_transaction(&self) -> TransactionID {
let transaction_id = self
.last_transaction_id
.fetch_add(1, atomic::Ordering::SeqCst);
self.wal.log_begin_transaction(transaction_id).unwrap();
transaction_id
}

pub fn end_transaction(&self, transaction_id: TransactionID) {
self.wal.log_end_transaction(transaction_id).unwrap();
}
}

impl Default for TemporalGraph<Extension> {
fn default() -> Self {
Self::new(Extension::default()).unwrap()
let config = PersistenceConfig::default();
let wal = Arc::new(WalType::new(None).unwrap());
Self::new(Extension::new(config, wal)).unwrap()
}
}

impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> TemporalGraph<EXT> {
impl<EXT: PersistenceStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> TemporalGraph<EXT> {
pub fn new(ext: EXT) -> Result<Self, StorageError> {
let node_meta = Meta::new_for_nodes();
let edge_meta = Meta::new_for_edges();
Expand All @@ -147,27 +114,6 @@ impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> Temporal
)
}

pub fn load_from_path(path: impl AsRef<Path>) -> Result<Self, StorageError> {
let path = path.as_ref();
let storage = Layer::load(path)?;
let id_type = storage.nodes().id_type();

let gid_resolver_dir = path.join("gid_resolver");
let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?;
let node_count = AtomicUsize::new(storage.nodes().num_nodes());
let wal_dir = path.join("wal");
let wal = Arc::new(WalImpl::new(Some(wal_dir))?);

Ok(Self {
graph_dir: Some(path.into()),
logical_to_physical: resolver.into(),
node_count,
storage: Arc::new(storage),
transaction_manager: Arc::new(TransactionManager::new(wal.clone())),
wal,
})
}

pub fn new_with_meta(
graph_dir: Option<GraphDir>,
node_meta: Meta,
Expand Down Expand Up @@ -207,16 +153,30 @@ impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> Temporal
ext,
);

let wal_dir = graph_dir.as_ref().map(|dir| dir.wal_dir());
let wal = Arc::new(WalImpl::new(wal_dir)?);

Ok(Self {
graph_dir,
logical_to_physical,
node_count: AtomicUsize::new(0),
storage: Arc::new(storage),
transaction_manager: Arc::new(TransactionManager::new(wal.clone())),
wal,
transaction_manager: Arc::new(TransactionManager::new()),
})
}

pub fn load_from_path(path: impl AsRef<Path>, ext: EXT) -> Result<Self, StorageError> {
let path = path.as_ref();
let storage = Layer::load(path, ext)?;
let id_type = storage.nodes().id_type();

let gid_resolver_dir = path.join("gid_resolver");
let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?;
let node_count = AtomicUsize::new(storage.nodes().num_nodes());

Ok(Self {
graph_dir: Some(path.into()),
logical_to_physical: resolver.into(),
node_count,
storage: Arc::new(storage),
transaction_manager: Arc::new(TransactionManager::new()),
})
}

Expand All @@ -229,6 +189,10 @@ impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> Temporal
self.storage().extension()
}

pub fn wal(&self) -> &EXT::WalType {
self.storage().extension().wal()
}

pub fn read_event_counter(&self) -> usize {
self.storage().read_event_id()
}
Expand All @@ -251,10 +215,12 @@ impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> Temporal
.get_str(string)
.or_else(|| self.logical_to_physical.get_u64(string.id())),
}?;

// VIDs in the resolver may not be initialised yet, need to double-check the node actually exists!
let nodes = self.storage().nodes();
let (page_id, pos) = nodes.resolve_pos(vid);
let node_page = nodes.segments().get(page_id)?;

if pos.0 < node_page.num_nodes() {
Some(vid)
} else {
Expand Down Expand Up @@ -389,17 +355,18 @@ impl<EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>> Temporal
}
}

/// Holds write locks across all segments in the graph for fast bulk ingestion.
pub struct WriteLockedGraph<'a, EXT>
where
EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>,
EXT: PersistenceStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>,
{
pub nodes: WriteLockedNodePages<'a, storage::NS<EXT>>,
pub edges: WriteLockedEdgePages<'a, storage::ES<EXT>>,
pub graph_props: WriteLockedGraphPropPages<'a, storage::GS<EXT>>,
pub graph: &'a TemporalGraph<EXT>,
}

impl<'a, EXT: PersistentStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>>
impl<'a, EXT: PersistenceStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>>
WriteLockedGraph<'a, EXT>
{
pub fn new(graph: &'a TemporalGraph<EXT>) -> Self {
Expand Down
188 changes: 188 additions & 0 deletions db4-graph/src/replay.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
//! Implements WAL replay for a `WriteLockedGraph`.
//! Allows for fast replay by making use of one-time lock acquisition for
//! all the segments in the graph.

use crate::WriteLockedGraph;
use raphtory_api::core::{
entities::{
properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop},
EID, GID, VID,
},
storage::timeindex::TimeIndexEntry,
};
use raphtory_core::entities::GidRef;
use storage::{
api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps},
error::StorageError,
pages::resolve_pos,
persist::strategy::PersistenceStrategy,
resolver::GIDResolverOps,
wal::{GraphReplay, TransactionID, LSN},
ES, GS, NS,
};

impl<EXT> GraphReplay for WriteLockedGraph<'_, EXT>
where
EXT: PersistenceStrategy<NS = NS<EXT>, ES = ES<EXT>, GS = GS<EXT>>,
{
fn replay_add_edge(
&mut self,
lsn: LSN,
transaction_id: TransactionID,
t: TimeIndexEntry,
src_name: GID,
src_id: VID,
dst_name: GID,
dst_id: VID,
eid: EID,
layer_name: Option<String>,
layer_id: usize,
props: Vec<(String, usize, Prop)>,
) -> Result<(), StorageError> {
let temporal_graph = self.graph();
let node_max_page_len = temporal_graph.storage().nodes().max_page_len();
let edge_max_page_len = temporal_graph.storage().edges().max_page_len();

// 1. Insert prop ids into edge meta.
// No need to validate props again since they are already validated before
// being logged to the WAL.
let edge_meta = temporal_graph.edge_meta();
let mut prop_ids_and_values = Vec::new();

for (prop_name, prop_id, prop_value) in props.into_iter() {
let prop_mapper = edge_meta.temporal_prop_mapper();

prop_mapper.set_id_and_dtype(prop_name, prop_id, prop_value.dtype());
prop_ids_and_values.push((prop_id, prop_value));
}

// 2. Insert node ids into resolver.
temporal_graph
.logical_to_physical
.set(GidRef::from(&src_name), src_id)?;
temporal_graph
.logical_to_physical
.set(GidRef::from(&dst_name), dst_id)?;

// 3. Insert layer id into the layer meta of both edge and node.
let node_meta = temporal_graph.node_meta();

edge_meta
.layer_meta()
.set_id(layer_name.as_deref().unwrap_or("_default"), layer_id);
node_meta
.layer_meta()
.set_id(layer_name.as_deref().unwrap_or("_default"), layer_id);

// 4. Grab src writer and add edge data.
let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len);
let num_nodes = src_id.index() + 1;
self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments.

let segment = self
.graph()
.storage()
.nodes()
.get_or_create_segment(src_segment_id);
let immut_lsn = segment.immut_lsn();

// Replay this entry only if it doesn't exist in immut.
if immut_lsn < lsn {
let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer();
src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name));

let is_new_edge_static = src_writer
.get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID)
.is_none();
let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none();

// Add the edge to the static graph if it doesn't already exist.
if is_new_edge_static {
src_writer.add_static_outbound_edge(src_pos, dst_id, eid);
}

// Add the edge to the layer if it doesn't already exist, else just record the timestamp.
if is_new_edge_layer {
src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id));
} else {
src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id));
}

src_writer.mut_segment.set_lsn(lsn);

// Release the writer for mutable access to dst_writer.
drop(src_writer);
}

// 5. Grab dst writer and add edge data.
let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len);
let num_nodes = dst_id.index() + 1;
self.resize_chunks_to_num_nodes(num_nodes);

let segment = self
.graph()
.storage()
.nodes()
.get_or_create_segment(dst_segment_id);
let immut_lsn = segment.immut_lsn();

// Replay this entry only if it doesn't exist in immut.
if immut_lsn < lsn {
let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer();
dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name));

let is_new_edge_static = dst_writer
.get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID)
.is_none();
let is_new_edge_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none();

if is_new_edge_static {
dst_writer.add_static_inbound_edge(dst_pos, src_id, eid);
}

if is_new_edge_layer {
dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id));
} else {
dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id));
}

dst_writer.mut_segment.set_lsn(lsn);

drop(dst_writer);
}

// 6. Grab edge writer and add temporal props & metadata.
let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len);
let num_edges = eid.index() + 1;
self.resize_chunks_to_num_edges(num_edges);

let segment = self
.graph()
.storage()
.edges()
.get_or_create_segment(edge_segment_id);
let immut_lsn = segment.immut_lsn();

// Replay this entry only if it doesn't exist in immut.
if immut_lsn < lsn {
let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer();

let is_new_edge_static = edge_writer
.get_edge(STATIC_GRAPH_LAYER_ID, edge_pos)
.is_none();

// Add edge into the static graph if it doesn't already exist.
if is_new_edge_static {
let already_counted = false;
edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted);
}

// Add edge into the specified layer with timestamp and props.
edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id);

edge_writer.writer.set_lsn(lsn);
}

Ok(())
}
}
Loading
Loading