iraikov · iraikov · Mar 6, 2025 · Mar 5, 2025 · Mar 5, 2025 · Mar 6, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
 
 include(${PROJECT_SOURCE_DIR}/cmake/neuroh5_utils.cmake)
 
-set(NEUROH5_VERSION 0.1.15)
+set(NEUROH5_VERSION 0.1.17)
 
 cmake_policy(SET CMP0074 NEW) # enables use of HDF5_ROOT variable
 

diff --git a/README.md b/README.md
@@ -82,24 +82,71 @@ connected). In the next section, we present the Destination Block
 Sparse format, and present details of the implementation and initial
 performance metrics.
 
-## Destination Block Sparse connectivity format
-
-In the Destination Block Sparse format the destination indices are
-stored in blocks (of destinations). The following invariants hold:
-
-1. The destination indices in a block are contiguous. 
-2. The number of destinations per block may vary from block to block.
-
-The Destination Block Sparse format consists of the following datasets:
-
-- Source Index : This array holds the indices of all source vertices in the projection. It's length is equal to the number of edges in the projection.
-- Destination Index : This array holds the first destination index in each block. Its length is equal to the number of blocks.
-- Destination Block Pointer : This array holds offsets into the Destination Pointer array. Its length is equal to the number of blocks plus one. The number of destinations in block i equals:
-  Destination Block Pointer[i + 1] – Destination Block Pointer[i]
-The destination index of destination j in block i is Destination Index[i] + j.
-- Destination Pointer : This array holds offsets into the Source Index and edge attribute datasets. Its length is equal to the sum of the destination counts in all blocks plus one. For each destination block, Destination Pointer stores one offset per destination in the block. The number of source entries for destination j of block i equals:
-
-  Destination Pointer[Destination Block Pointer[i] + j + 1] - estination Pointer[Destination Block Pointer[i] + j]
+## Destination Block Sparse (DBS) Format
+
+The Destination Block Sparse (DBS) format is a memory-efficient graph
+representation designed for parallel processing of large-scale neural
+network connectivity. This format optimizes for the common case where
+each destination (target neuron) connects to a relatively small subset
+of sources (input neurons).
+
+### Core Data Structures
+
+The DBS format consists of four primary arrays:
+
+1. **Source Index Array (`src_idx`)**: 
+   - Contains the indices of all source vertices in the projection
+   - Length equals the total number of edges (connections) in the projection
+   - Stores the actual connectivity information
+
+2. **Destination Block Pointer Array (`dst_blk_ptr`)**: 
+   - Contains offsets into the Destination Pointer array
+   - Length equals the number of blocks plus one (includes a sentinel value)
+   - The difference between consecutive elements indicates the number of destinations in each block
+
+3. **Destination Index Array (`dst_idx`)**: 
+   - Contains the first destination index in each block
+   - Length equals the number of blocks
+   - Destinations within a block have contiguous indices
+
+4. **Destination Pointer Array (`dst_ptr`)**: 
+   - Contains offsets into the Source Index array
+   - Length equals the total number of destinations plus one (includes a sentinel value)
+   - Indicates where each destination's source connections begin and end
+
+### Key Properties and Relationships
+
+- **Block Structure**: Destinations are organized into blocks where each block contains contiguous destination indices
+- **Variable Block Size**: The number of destinations per block can vary
+- **Contiguous Destinations**: All destinations within a block have contiguous indices
+- **Efficient Edge Lookup**: To find all sources connected to a specific destination:
+   1. Locate the block containing the destination
+   2. Calculate the destination's offset within the block
+   3. Use the offset to find the appropriate pointers in `dst_ptr`
+   4. Access the source indices from `src_idx`
+
+### Formal Relationships
+
+For a given block index `i`:
+- Number of destinations in block `i` = `dst_blk_ptr[i+1] - dst_blk_ptr[i]`
+- Destination index of the j-th destination in block `i` = `dst_idx[i] + j`
+- For the j-th destination in block `i`:
+  - Offset into `dst_ptr` = `dst_blk_ptr[i] + j`
+  - Source index range starts at `src_idx[dst_ptr[dst_blk_ptr[i] + j]]`
+  - Source index range ends at `src_idx[dst_ptr[dst_blk_ptr[i] + j + 1] - 1]`
+  - Number of sources = `dst_ptr[dst_blk_ptr[i] + j + 1] - dst_ptr[dst_blk_ptr[i] + j]`
+
+## Benefits for Parallel Processing
+
+This format is particularly well-suited for parallel processing because:
+1. It clusters related destinations into blocks, improving cache locality
+2. It allows for balanced distribution of computational load across processors
+3. It minimizes communication overhead when distributing graph data
+4. It provides efficient access patterns for both forward and backward traversals
+
+The format achieves memory efficiency by using index arrays and offset
+pointers rather than storing a full adjacency matrix, making it ideal
+for sparse connectivity patterns typical in neural networks.
 
 ## Edge Attributes
 

diff --git a/include/mpi/node_rank_map.hh b/include/mpi/node_rank_map.hh
@@ -0,0 +1,44 @@
+// -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+//==============================================================================
+///  @file node_rank_map.hh
+///
+///  Function for creating a mapping of graph nodes to MPI ranks.
+///
+///  Copyright (C) 2025 Project NeuroH5.
+//==============================================================================
+
+#ifndef NODE_RANK_MAP_HH
+#define NODE_RANK_MAP_HH
+
+#include <mpi.h>
+
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <cassert>
+
+#include "throw_assert.hh"
+
+using namespace std;
+
+
+namespace neuroh5
+{
+
+  namespace mpi
+  {
+
+
+    void compute_node_rank_map
+    (
+     MPI_Comm comm,
+     set<size_t> &rank_set,
+     vector< NODE_IDX_T > &local_node_index,
+     size_t &total_num_nodes,
+     map<NODE_IDX_T, rank_t> &node_rank_map
+     );
+  }
+}
+
+#endif
+
diff --git a/setup.py b/setup.py
@@ -141,7 +141,7 @@ def build_extensions(self):
     name="NeuroH5",
     package_dir={"": "python"},
     packages=["neuroh5"],
-    version="0.1.15",
+    version="0.1.17",
     maintainer="Ivan Raikov",
     maintainer_email="ivan.g.raikov@gmail.com",
     description="A parallel HDF5-based library for storage and processing of large-scale graphs and neural cell model attributes.",

diff --git a/src/data/append_edge_map.cc b/src/data/append_edge_map.cc
@@ -5,7 +5,7 @@
 ///
 ///  Populates a mapping between node indices and edge values.
 ///
-///  Copyright (C) 2016-2018 Project NeuroH5.
+///  Copyright (C) 2016-2025 Project NeuroH5.
 //==============================================================================
 
 #include <vector>
@@ -41,11 +41,17 @@ namespace neuroh5
      )
     {
       int ierr = 0; size_t dst_ptr_size;
+
+      // Ensure we have data to process
+      if (dst_blk_ptr.empty() || dst_idx.empty() || dst_ptr.empty() || src_idx.empty())
+        {
+          return ierr;
+        };
 
       if (dst_blk_ptr.size() > 0)
         {
           dst_ptr_size = dst_ptr.size();
-          for (size_t b = 0; b < dst_blk_ptr.size()-1; ++b)
+          for (size_t b = 0; b < dst_idx.size(); ++b)
             {
               size_t low_dst_ptr = dst_blk_ptr[b],
                 high_dst_ptr = dst_blk_ptr[b+1];
@@ -57,6 +63,7 @@ namespace neuroh5
                     {
                       NODE_IDX_T dst = dst_base + ii + dst_start;
                       size_t low = dst_ptr[i], high = dst_ptr[i+1];
+
                       if (high > low)
                         {
                           switch (edge_map_type)

diff --git a/src/graph/append_graph.cc b/src/graph/append_graph.cc
@@ -5,7 +5,7 @@
 ///  Top-level functions for appending edge information to graphs in
 ///  DBS (Destination Block Sparse) format.
 ///
-///  Copyright (C) 2016-2024 Project NeuroH5.
+///  Copyright (C) 2016-2025 Project NeuroH5.
 //==============================================================================
 
 
@@ -20,6 +20,7 @@
 #include "sort_permutation.hh"
 #include "serialize_edge.hh"
 #include "range_sample.hh"
+#include "node_rank_map.hh"
 #include "debug.hh"
 #include "mpi_debug.hh"
 #include "throw_assert.hh"
@@ -107,77 +108,27 @@ namespace neuroh5
       src_start = pop_ranges[src_pop_idx].start;
       src_end   = src_start + pop_ranges[src_pop_idx].count;
 
-      vector< NODE_IDX_T > node_index;
-
-      { // Determine the destination node indices present in the input
-        // edge map across all ranks
-        MPI_Request request;
-        size_t num_nodes = input_edge_map.size();
-        vector<size_t> sendbuf_num_nodes(size, num_nodes);
-        vector<size_t> recvbuf_num_nodes(size);
-        vector<int> recvcounts(size, 0);
-        vector<int> displs(size+1, 0);
-        throw_assert(MPI_Iallgather(&sendbuf_num_nodes[0], 1, MPI_SIZE_T,
-                                    &recvbuf_num_nodes[0], 1, MPI_SIZE_T,
-                                    all_comm,
-                                    &request) == MPI_SUCCESS,
-                     "append_graph: error in MPI_Iallgather");
-        throw_assert(MPI_Wait(&request, MPI_STATUS_IGNORE) == MPI_SUCCESS,
-                     "append_graph: error in MPI_Wait");
-
-        for (size_t p=0; p<size; p++)
-          {
-            total_num_nodes = total_num_nodes + recvbuf_num_nodes[p];
-            displs[p+1] = displs[p] + recvbuf_num_nodes[p];
-            recvcounts[p] = recvbuf_num_nodes[p];
-          }
-
-        vector< NODE_IDX_T > local_node_index;
-        for (auto iter: input_edge_map)
-          {
-            NODE_IDX_T dst          = iter.first;
-            local_node_index.push_back(dst);
-          }
-
-        node_index.resize(total_num_nodes,0);
-        throw_assert(MPI_Iallgatherv(&local_node_index[0], num_nodes, MPI_NODE_IDX_T,
-                                     &node_index[0], &recvcounts[0], &displs[0], MPI_NODE_IDX_T,
-                                     all_comm,
-                                     &request) == MPI_SUCCESS,
-                     "append_graph: error in MPI_Iallgatherv");
-        throw_assert(MPI_Wait(&request, MPI_STATUS_IGNORE) == MPI_SUCCESS,
-                     "append_graph: error in MPI_Wait");
-
-        vector<size_t> p = sort_permutation(node_index, compare_nodes);
-        apply_permutation_in_place(node_index, p);
-      }
-
-      throw_assert_nomsg(node_index.size() == total_num_nodes);
-
-      if (total_num_nodes == 0)
+      vector< NODE_IDX_T > local_node_index;
+      for (auto iter: input_edge_map)
         {
-          throw_assert_nomsg(MPI_Barrier(all_comm) == MPI_SUCCESS);
-          return 0;
+          NODE_IDX_T dst = iter.first;
+          local_node_index.push_back(dst);
         }
 
       set<size_t> io_rank_set;
       data::range_sample(size, io_size, io_rank_set);
       bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end());
 
-      // A vector that maps nodes to compute ranks
+      // Map nodes to compute ranks
       map< NODE_IDX_T, rank_t > node_rank_map;
-      {
-        rank_t r=0; 
-        for (size_t i = 0; i < node_index.size(); i++)
-          {
-            while (io_rank_set.count(r) == 0)
-              {
-                r++;
-                if ((unsigned int)size <= r) r=0;
-              }
-            node_rank_map.insert(make_pair(node_index[i], r++));
-          }
-      }
+      mpi::compute_node_rank_map(all_comm, io_rank_set, local_node_index,
+                                 total_num_nodes, node_rank_map);
+      if (total_num_nodes == 0)
+        {
+          throw_assert_nomsg(MPI_Barrier(all_comm) == MPI_SUCCESS);
+          return 0;
+        }
+
       rank_edge_map_t rank_edge_map;
       mpi::MPI_DEBUG(all_comm, "append_graph: ", src_pop_name, " -> ", dst_pop_name, ": ",
                      " total_num_nodes = ", total_num_nodes);

diff --git a/src/graph/validate_edge_list.cc b/src/graph/validate_edge_list.cc
@@ -40,11 +40,10 @@ namespace neuroh5
       pair<pop_t,pop_t> pp;
 
       // loop over all edges, look up the node populations, and validate the pairs
-
-      if (dst_blk_ptr.size() > 0)
+      if (dst_idx.size() > 0)
         {
           size_t dst_ptr_size = dst_ptr.size();
-          for (size_t b = 0; b < dst_blk_ptr.size()-1; ++b)
+          for (size_t b = 0; b < dst_idx.size(); ++b)
             {
               size_t low_dst_ptr = dst_blk_ptr[b],
                 high_dst_ptr = dst_blk_ptr[b+1];