Skip to content

Commit 47c4cde

Browse files
authored
feat: Add insertion optimization and per-index memory attribution (#7)
Insertion Performance: - Unified data record persistence in insertHere() (was duplicated in basicInsert/split) - Added cache lookup fast path in _insert() when eviction disabled - Fixed propagateMBRUpdate() clearing cache pointer via setKeyOwned() Per-Index Memory: - Added FieldMemoryStats and per-field tracking to MappingManager - Added IndexMemoryStats and printMemoryBreakdown() to IndexDetails - Pass field_name through SegmentAllocator for file-to-field registration Test Fixes: - Fixed double-delete in test_xtree.cpp (cache owns mock objects) - Fixed use-after-free in test_performance.cpp (removed clearCache before delete) Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
1 parent 67c0e71 commit 47c4cde

14 files changed

+620
-139
lines changed

core/src/main/cpp/src/indexdetails.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ void IndexDetails<Record>::initializeDurableStore(const std::string& data_dir, b
6666
try {
6767
// Create the durable runtime which owns all persistence components
6868
// read_only mode: skip WAL replay for fast serverless reader startup
69-
runtime_ = persist::DurableRuntime::open(paths, policy, false, read_only);
69+
// Pass field_name for per-index memory tracking
70+
runtime_ = persist::DurableRuntime::open(paths, policy, false, read_only, field_name_);
7071

7172
// Create the durable store context (must outlive DurableStore)
7273
durable_context_ = std::make_unique<persist::DurableContext>(persist::DurableContext{

core/src/main/cpp/src/indexdetails.hpp

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <mutex>
3737
#include <atomic>
3838
#include <cassert>
39+
#include <iomanip>
3940

4041
namespace xtree {
4142

@@ -598,6 +599,148 @@ namespace xtree {
598599
return field_name_;
599600
}
600601

602+
// ========== Per-Index Memory Statistics ==========
603+
604+
// Comprehensive memory stats for this index
605+
struct IndexMemoryStats {
606+
std::string field_name;
607+
608+
// MMap memory (from MappingManager)
609+
size_t mmap_bytes = 0;
610+
size_t mmap_extents = 0;
611+
size_t mmap_pins = 0;
612+
613+
// Cache memory (from ShardedLRUCache)
614+
size_t cache_bytes = 0;
615+
size_t cache_entries = 0;
616+
617+
// Segment allocator stats
618+
size_t segment_live_bytes = 0;
619+
size_t segment_dead_bytes = 0;
620+
double segment_fragmentation_pct = 0.0;
621+
622+
// Total memory for this index
623+
size_t total_bytes() const {
624+
return mmap_bytes + cache_bytes;
625+
}
626+
};
627+
628+
// Get memory stats for this specific index
629+
IndexMemoryStats getMemoryStats() const {
630+
IndexMemoryStats stats;
631+
stats.field_name = field_name_;
632+
633+
// Get per-field mmap stats from MappingManager
634+
auto mmap_stats = persist::MappingManager::global().getPerFieldStats();
635+
auto it = mmap_stats.find(field_name_);
636+
if (it != mmap_stats.end()) {
637+
stats.mmap_bytes = it->second.mmap_bytes;
638+
stats.mmap_extents = it->second.extent_count;
639+
stats.mmap_pins = it->second.pin_count;
640+
}
641+
642+
// Get per-field cache stats from ShardedLRUCache
643+
auto cache_stats = getCache().getPerFieldMemory();
644+
auto cache_it = cache_stats.find(field_name_);
645+
if (cache_it != cache_stats.end()) {
646+
stats.cache_bytes = cache_it->second;
647+
}
648+
649+
// Get segment stats if we have a durable runtime
650+
if (runtime_) {
651+
auto seg_stats = runtime_->allocator().get_total_stats();
652+
stats.segment_live_bytes = seg_stats.live_bytes;
653+
stats.segment_dead_bytes = seg_stats.dead_bytes;
654+
stats.segment_fragmentation_pct = seg_stats.fragmentation() * 100.0;
655+
}
656+
657+
return stats;
658+
}
659+
660+
// Get memory stats for all indexes
661+
static std::vector<IndexMemoryStats> getAllIndexStats() {
662+
std::vector<IndexMemoryStats> all_stats;
663+
all_stats.reserve(indexes.size());
664+
665+
for (auto* idx : indexes) {
666+
if (idx) {
667+
all_stats.push_back(idx->getMemoryStats());
668+
}
669+
}
670+
671+
return all_stats;
672+
}
673+
674+
// Print per-index memory breakdown (for debugging/monitoring)
675+
static void printMemoryBreakdown() {
676+
std::cout << "\n=== Per-Index Memory Breakdown ===" << std::endl;
677+
678+
// Get raw per-field mmap stats directly from MappingManager
679+
auto raw_mmap_stats = persist::MappingManager::global().getPerFieldStats();
680+
681+
std::cout << "| Field | MMap (MB) | Cache (MB) | Segments (MB) | Total (MB) |" << std::endl;
682+
std::cout << "|-------|-----------|------------|---------------|------------|" << std::endl;
683+
684+
auto all_stats = getAllIndexStats();
685+
size_t total_mmap = 0, total_cache = 0, total_seg = 0;
686+
size_t fields_with_zero = 0;
687+
size_t fields_with_memory = 0;
688+
689+
for (const auto& stats : all_stats) {
690+
// Look up mmap stats directly from raw_mmap_stats for this field
691+
size_t mmap_bytes = 0;
692+
auto it = raw_mmap_stats.find(stats.field_name);
693+
if (it != raw_mmap_stats.end()) {
694+
mmap_bytes = it->second.mmap_bytes;
695+
}
696+
697+
size_t mmap_mb = mmap_bytes / (1024 * 1024);
698+
size_t cache_mb = stats.cache_bytes / (1024 * 1024);
699+
size_t seg_mb = stats.segment_live_bytes / (1024 * 1024);
700+
size_t total_mb = (mmap_bytes + stats.cache_bytes) / (1024 * 1024);
701+
702+
// Only show fields with non-zero memory (in MB) to reduce noise
703+
if (mmap_mb > 0 || cache_mb > 0 || seg_mb > 0) {
704+
std::cout << "| " << stats.field_name
705+
<< " | " << mmap_mb
706+
<< " | " << cache_mb
707+
<< " | " << seg_mb
708+
<< " | " << total_mb
709+
<< " |" << std::endl;
710+
fields_with_memory++;
711+
} else {
712+
fields_with_zero++;
713+
}
714+
715+
total_mmap += mmap_bytes;
716+
total_cache += stats.cache_bytes;
717+
total_seg += stats.segment_live_bytes;
718+
}
719+
720+
// Check for unregistered memory
721+
auto unreg_it = raw_mmap_stats.find("_unregistered_");
722+
if (unreg_it != raw_mmap_stats.end() && unreg_it->second.mmap_bytes > 0) {
723+
std::cout << "| _unregistered_ | "
724+
<< (unreg_it->second.mmap_bytes / (1024 * 1024))
725+
<< " | 0 | 0 | "
726+
<< (unreg_it->second.mmap_bytes / (1024 * 1024))
727+
<< " |" << std::endl;
728+
total_mmap += unreg_it->second.mmap_bytes;
729+
}
730+
731+
std::cout << "|-------|-----------|------------|---------------|------------|" << std::endl;
732+
std::cout << "| Total | "
733+
<< (total_mmap / (1024 * 1024))
734+
<< " | " << (total_cache / (1024 * 1024))
735+
<< " | " << (total_seg / (1024 * 1024))
736+
<< " | " << ((total_mmap + total_cache) / (1024 * 1024))
737+
<< " |" << std::endl;
738+
739+
if (fields_with_zero > 0) {
740+
std::cout << "(" << fields_with_zero << " fields with evicted/zero memory not shown)" << std::endl;
741+
}
742+
}
743+
601744
// IRecord* getCachedNode( UniqueId recordAddress ) { return NULL; }
602745

603746
// COW management methods

0 commit comments

Comments
 (0)