Skip to content

Commit 14c5492

Browse files
committed
fix: Resolve test parallelism issues causing hangs and improve stability
- Add RESOURCE_LOCK to all tests with potential resource conflicts - MySQL tests: Prevent connection conflicts and race conditions - Server tests: Prevent port binding and state conflicts - Storage tests: Prevent file I/O conflicts - Cache tests: Prevent cache state conflicts - Client tests: Prevent server connection conflicts - Limit test parallelism from nproc to j=4 to prevent resource contention - Add diagnostic test targets for debugging: - make test-sequential: Run tests one-by-one - make test-verbose: Run with detailed output - make test-debug: Run with debug information - make test-parallel-2: Run with minimal parallelism - Fix critical bugs in cache invalidation queue and query cache - Enhance MySQL binlog reader error handling and validation - Improve HTTP server connection management - Add comprehensive test cases for improved coverage This resolves the issue where 'make test' would randomly freeze due to parallel execution resource conflicts. Tests now run reliably with controlled parallelism.
1 parent b6744e3 commit 14c5492

24 files changed

+1058
-99
lines changed

Makefile

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# MygramDB Makefile
22
# Convenience wrapper for CMake build system
33

4-
.PHONY: help build test clean rebuild install uninstall format format-check lint configure run docker-build docker-up docker-down docker-logs docker-test
4+
.PHONY: help build test test-parallel-full test-parallel-2 test-verbose test-sequential test-debug clean rebuild install uninstall format format-check lint configure run docker-build docker-up docker-down docker-logs docker-test
55

66
# Build directory
77
BUILD_DIR := build
@@ -20,7 +20,12 @@ help:
2020
@echo ""
2121
@echo "Available targets:"
2222
@echo " make build - Build the project (default)"
23-
@echo " make test - Run all tests"
23+
@echo " make test - Run all tests (limited parallelism j=4, recommended)"
24+
@echo " make test-parallel-2 - Run tests with j=2 (safer)"
25+
@echo " make test-parallel-full - Run tests with full parallelism (may hang)"
26+
@echo " make test-verbose - Run tests with verbose output (for debugging hangs)"
27+
@echo " make test-sequential - Run tests sequentially (for identifying hanging tests)"
28+
@echo " make test-debug - Run tests with debug output"
2429
@echo " make clean - Clean build directory"
2530
@echo " make rebuild - Clean and rebuild"
2631
@echo " make install - Install binaries and files"
@@ -42,6 +47,7 @@ help:
4247
@echo "Examples:"
4348
@echo " make # Build the project"
4449
@echo " make test # Run tests"
50+
@echo " make test-sequential # Run tests one at a time to identify hangs"
4551
@echo " make install # Install to $(PREFIX) (default: /usr/local)"
4652
@echo " make PREFIX=/opt/mygramdb install # Install to custom location"
4753
@echo " make CMAKE_OPTIONS=\"-DENABLE_ASAN=ON\" configure # Enable AddressSanitizer"
@@ -60,12 +66,45 @@ build: configure
6066
$(MAKE) -C $(BUILD_DIR) -j$$(nproc)
6167
@echo "Build complete!"
6268

63-
# Run tests
69+
# Run tests (limited parallelism to avoid resource conflicts)
6470
test: build
65-
@echo "Running tests..."
71+
@echo "Running tests with limited parallelism (j=4)..."
72+
cd $(BUILD_DIR) && ctest --output-on-failure --parallel 4
73+
@echo "Tests complete!"
74+
75+
# Run tests with maximum parallelism (may hang due to resource conflicts)
76+
test-parallel-full: build
77+
@echo "Running tests with full parallelism (j=$$(nproc))..."
78+
@echo "WARNING: This may hang due to resource conflicts"
6679
cd $(BUILD_DIR) && ctest --output-on-failure --parallel $$(nproc)
6780
@echo "Tests complete!"
6881

82+
# Run tests with minimal parallelism (safer)
83+
test-parallel-2: build
84+
@echo "Running tests with minimal parallelism (j=2)..."
85+
cd $(BUILD_DIR) && ctest --output-on-failure --parallel 2
86+
@echo "Tests complete!"
87+
88+
# Run tests with verbose output to identify hanging tests
89+
test-verbose: build
90+
@echo "Running tests with verbose output..."
91+
@echo "Press Ctrl+C if a test hangs to identify which one"
92+
cd $(BUILD_DIR) && ctest --verbose --parallel 4
93+
@echo "Tests complete!"
94+
95+
# Run tests sequentially with progress to identify hanging tests
96+
test-sequential: build
97+
@echo "Running tests sequentially (no parallel execution)..."
98+
@echo "This will show which test is running when it hangs"
99+
cd $(BUILD_DIR) && ctest --verbose --output-on-failure
100+
@echo "Tests complete!"
101+
102+
# Run tests with debug output
103+
test-debug: build
104+
@echo "Running tests with debug output..."
105+
cd $(BUILD_DIR) && ctest --debug --verbose --output-on-failure --parallel 4
106+
@echo "Tests complete!"
107+
69108
# Clean build directory
70109
clean:
71110
@echo "Cleaning build directory..."

src/cache/invalidation_queue.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -108,33 +108,41 @@ void InvalidationQueue::WorkerLoop() {
108108
std::unique_lock<std::mutex> lock(queue_mutex_);
109109

110110
// Wait for trigger: batch size reached or max delay elapsed
111-
auto oldest_timestamp = std::chrono::steady_clock::time_point::max();
112111
if (!pending_ngrams_.empty()) {
113112
// Find oldest entry
113+
auto oldest_timestamp = std::chrono::steady_clock::time_point::max();
114114
for (const auto& [key, timestamp] : pending_ngrams_) {
115115
if (timestamp < oldest_timestamp) {
116116
oldest_timestamp = timestamp;
117117
}
118118
}
119-
}
120119

121-
const auto now = std::chrono::steady_clock::now();
122-
const auto time_since_oldest = now - oldest_timestamp;
120+
const auto now = std::chrono::steady_clock::now();
121+
const auto time_since_oldest = now - oldest_timestamp;
123122

124-
if (pending_ngrams_.size() >= batch_size_ || time_since_oldest >= max_delay_) {
125-
// Check running_ before processing to handle spurious wakeup and shutdown
126-
if (!running_.load()) {
127-
break;
128-
}
123+
if (pending_ngrams_.size() >= batch_size_ || time_since_oldest >= max_delay_) {
124+
// Check running_ before processing to handle spurious wakeup and shutdown
125+
if (!running_.load()) {
126+
break;
127+
}
129128

130-
// Process batch
131-
lock.unlock();
132-
ProcessBatch();
129+
// Process batch
130+
lock.unlock();
131+
ProcessBatch();
132+
} else {
133+
// Wait for signal or timeout
134+
const auto remaining_delay = max_delay_ - time_since_oldest;
135+
queue_cv_.wait_for(lock, remaining_delay,
136+
[this] { return !running_.load() || pending_ngrams_.size() >= batch_size_; });
137+
138+
// After wakeup, check running_ before continuing
139+
if (!running_.load()) {
140+
break;
141+
}
142+
}
133143
} else {
134-
// Wait for signal or timeout
135-
const auto remaining_delay = max_delay_ - time_since_oldest;
136-
queue_cv_.wait_for(lock, remaining_delay,
137-
[this] { return !running_.load() || pending_ngrams_.size() >= batch_size_; });
144+
// Queue is empty: wait indefinitely for new items
145+
queue_cv_.wait(lock, [this] { return !running_.load() || !pending_ngrams_.empty(); });
138146

139147
// After wakeup, check running_ before continuing
140148
if (!running_.load()) {
@@ -191,12 +199,14 @@ void InvalidationQueue::ProcessBatch() {
191199

192200
// Erase entries from cache
193201
for (const auto& key : keys_to_erase) {
194-
if (cache_ != nullptr) {
195-
cache_->Erase(key);
196-
}
202+
// Unregister metadata first, then erase from cache
203+
// This ensures metadata is cleaned up even if Erase() throws
197204
if (invalidation_mgr_ != nullptr) {
198205
invalidation_mgr_->UnregisterCacheEntry(key);
199206
}
207+
if (cache_ != nullptr) {
208+
cache_->Erase(key);
209+
}
200210
}
201211

202212
// Update batch statistics

src/cache/query_cache.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ std::optional<std::vector<DocId>> QueryCache::Lookup(const CacheKey& key) {
1717
// Start timing
1818
auto start_time = std::chrono::high_resolution_clock::now();
1919

20-
stats_.total_queries++;
21-
2220
// Shared lock for read
2321
std::shared_lock lock(mutex_);
2422

23+
stats_.total_queries++;
24+
2525
auto iter = cache_map_.find(key);
2626
if (iter == cache_map_.end()) {
2727
stats_.cache_misses++;

src/cli/mygram-cli.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,10 @@ class MygramClient {
439439
while (std::getline(iss, table, ',')) {
440440
// Trim whitespace
441441
table.erase(0, table.find_first_not_of(" \t\r\n"));
442-
table.erase(table.find_last_not_of(" \t\r\n") + 1);
442+
auto pos = table.find_last_not_of(" \t\r\n");
443+
if (pos != std::string::npos) {
444+
table.erase(pos + 1);
445+
}
443446
if (!table.empty()) {
444447
available_tables.push_back(table);
445448
}
@@ -539,7 +542,10 @@ class MygramClient {
539542

540543
// Trim whitespace
541544
line.erase(0, line.find_first_not_of(" \t\r\n"));
542-
line.erase(line.find_last_not_of(" \t\r\n") + 1);
545+
auto pos = line.find_last_not_of(" \t\r\n");
546+
if (pos != std::string::npos) {
547+
line.erase(pos + 1);
548+
}
543549

544550
// Add to history if non-empty
545551
if (!line.empty()) {
@@ -559,7 +565,10 @@ class MygramClient {
559565

560566
// Trim whitespace
561567
line.erase(0, line.find_first_not_of(" \t\r\n"));
562-
line.erase(line.find_last_not_of(" \t\r\n") + 1);
568+
auto pos = line.find_last_not_of(" \t\r\n");
569+
if (pos != std::string::npos) {
570+
line.erase(pos + 1);
571+
}
563572
#endif
564573

565574
if (line.empty()) {
@@ -822,17 +831,27 @@ int main(int argc, char* argv[]) {
822831
}
823832
} else if (arg == "-p") {
824833
if (i + 1 < argc) {
825-
config.port =
826-
static_cast<uint16_t>(std::stoi(argv[++i])); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
834+
try {
835+
config.port =
836+
static_cast<uint16_t>(std::stoi(argv[++i])); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
837+
} catch (const std::exception& e) {
838+
std::cerr << "Error: Invalid port number" << '\n';
839+
return 1;
840+
}
827841
} else {
828842
std::cerr << "Error: -p requires an argument" << '\n';
829843
return 1;
830844
}
831845
} else if (arg == "--retry") {
832846
if (i + 1 < argc) {
833-
config.retry_count = std::stoi(argv[++i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
834-
if (config.retry_count < 0) {
835-
std::cerr << "Error: --retry value must be non-negative" << '\n';
847+
try {
848+
config.retry_count = std::stoi(argv[++i]); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
849+
if (config.retry_count < 0) {
850+
std::cerr << "Error: --retry value must be non-negative" << '\n';
851+
return 1;
852+
}
853+
} catch (const std::exception& e) {
854+
std::cerr << "Error: Invalid retry count" << '\n';
836855
return 1;
837856
}
838857
} else {

src/config/config.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <stdexcept>
1717

1818
#include "config_schema_embedded.h" // Auto-generated embedded schema
19+
#include "utils/memory_utils.h"
20+
#include "utils/string_utils.h"
1921

2022
#ifdef USE_MYSQL
2123
#include "mysql/connection.h"
@@ -596,6 +598,34 @@ Config ParseConfigFromJson(const json& root) {
596598
config.cache.invalidation.max_delay_ms = invalidation["max_delay_ms"].get<int>();
597599
}
598600
}
601+
602+
// Validate cache memory against physical memory
603+
if (config.cache.enabled && config.cache.max_memory_bytes > 0) {
604+
auto system_info = utils::GetSystemMemoryInfo();
605+
if (system_info) {
606+
constexpr double kMaxCacheRatio = 0.5; // Maximum 50% of physical memory
607+
uint64_t max_allowed_cache = static_cast<uint64_t>(
608+
static_cast<double>(system_info->total_physical_bytes) * kMaxCacheRatio);
609+
610+
if (config.cache.max_memory_bytes > max_allowed_cache) {
611+
std::stringstream err_msg;
612+
err_msg << "Cache configuration error: max_memory_mb exceeds safe limit\n";
613+
err_msg << " Configured cache size: " << utils::FormatBytes(config.cache.max_memory_bytes) << "\n";
614+
err_msg << " Physical memory: " << utils::FormatBytes(system_info->total_physical_bytes) << "\n";
615+
err_msg << " Maximum allowed (50% of physical memory): " << utils::FormatBytes(max_allowed_cache) << "\n";
616+
err_msg << " Recommendation:\n";
617+
err_msg << " - Set cache.max_memory_mb to at most "
618+
<< (max_allowed_cache / 1024 / 1024) << " MB\n";
619+
err_msg << " - Consider system memory requirements for index and operations\n";
620+
err_msg << " Example:\n";
621+
err_msg << " cache:\n";
622+
err_msg << " max_memory_mb: " << (max_allowed_cache / 1024 / 1024);
623+
throw std::runtime_error(err_msg.str());
624+
}
625+
} else {
626+
spdlog::warn("Unable to validate cache memory against physical memory (could not get system memory info)");
627+
}
628+
}
599629
}
600630

601631
return config;

0 commit comments

Comments
 (0)