|
| 1 | +/** |
| 2 | + * @file document_store_stress_test.cpp |
| 3 | + * @brief Stress tests for DocumentStore (marked as SLOW for CI) |
| 4 | + * |
| 5 | + * These tests are designed to detect concurrency bugs like use-after-free |
| 6 | + * through high memory pressure and concurrent operations. They are excluded |
| 7 | + * from regular CI runs due to their longer execution time. |
| 8 | + */ |
| 9 | + |
| 10 | +#include <gtest/gtest.h> |
| 11 | + |
| 12 | +#include <atomic> |
| 13 | +#include <thread> |
| 14 | +#include <vector> |
| 15 | + |
| 16 | +#include "storage/document_store.h" |
| 17 | + |
| 18 | +using namespace mygramdb::storage; |
| 19 | + |
| 20 | +/** |
| 21 | + * @brief Stress test for RemoveDocument to detect use-after-free bugs |
| 22 | + * |
| 23 | + * This test targets the RemoveDocument function with high concurrency and memory |
| 24 | + * pressure. It was added to prevent regression of the use-after-free bug where |
| 25 | + * RemoveDocument held a reference to the primary key string after erasing the |
| 26 | + * map entry (the reference became dangling). |
| 27 | + * |
| 28 | + * The bug manifested as: |
| 29 | + * - const std::string& primary_key = pk_it->second; // Reference to string |
| 30 | + * - doc_id_to_pk_.erase(doc_id); // Invalidates reference |
| 31 | + * - StructuredLog()...Field("primary_key", primary_key) // Use after free! |
| 32 | + * |
| 33 | + * The fix was to copy the string before erasing: |
| 34 | + * - std::string primary_key = pk_it->second; // Copy the string |
| 35 | + */ |
| 36 | +TEST(DocumentStoreStressTest, RemoveDocumentUseAfterFreeRegression) { |
| 37 | + constexpr int kIterations = 10; |
| 38 | + constexpr int kDocsPerIteration = 500; |
| 39 | + constexpr int kNumThreads = 8; |
| 40 | + |
| 41 | + for (int iter = 0; iter < kIterations; ++iter) { |
| 42 | + DocumentStore store; |
| 43 | + |
| 44 | + // Add documents with long primary keys to increase memory churn |
| 45 | + std::vector<DocId> doc_ids; |
| 46 | + doc_ids.reserve(kDocsPerIteration); |
| 47 | + |
| 48 | + for (int i = 0; i < kDocsPerIteration; ++i) { |
| 49 | + // Use longer primary keys to increase memory allocation/deallocation |
| 50 | + std::string pk = "primary_key_with_longer_content_for_memory_pressure_" + std::to_string(iter) + "_" + |
| 51 | + std::to_string(i) + "_padding"; |
| 52 | + |
| 53 | + std::unordered_map<std::string, FilterValue> filters; |
| 54 | + filters["iteration"] = static_cast<int64_t>(iter); |
| 55 | + filters["index"] = static_cast<int64_t>(i); |
| 56 | + |
| 57 | + auto result = store.AddDocument(pk, filters); |
| 58 | + ASSERT_TRUE(result.has_value()) << "Failed to add document " << i; |
| 59 | + doc_ids.push_back(*result); |
| 60 | + } |
| 61 | + |
| 62 | + ASSERT_EQ(store.Size(), kDocsPerIteration); |
| 63 | + |
| 64 | + // Concurrent deletion from multiple threads |
| 65 | + std::vector<std::thread> threads; |
| 66 | + std::atomic<int> delete_count{0}; |
| 67 | + std::atomic<int> docs_per_thread = kDocsPerIteration / kNumThreads; |
| 68 | + |
| 69 | + for (int t = 0; t < kNumThreads; ++t) { |
| 70 | + threads.emplace_back([&store, &doc_ids, &delete_count, &docs_per_thread, t]() { |
| 71 | + int start = t * docs_per_thread; |
| 72 | + int end = (t == kNumThreads - 1) ? static_cast<int>(doc_ids.size()) : start + docs_per_thread; |
| 73 | + |
| 74 | + for (int i = start; i < end; ++i) { |
| 75 | + // RemoveDocument should not crash even with concurrent access |
| 76 | + // The bug was that primary_key reference became invalid after erase |
| 77 | + bool removed = store.RemoveDocument(doc_ids[i]); |
| 78 | + if (removed) { |
| 79 | + delete_count++; |
| 80 | + } |
| 81 | + } |
| 82 | + }); |
| 83 | + } |
| 84 | + |
| 85 | + for (auto& thread : threads) { |
| 86 | + thread.join(); |
| 87 | + } |
| 88 | + |
| 89 | + // All documents should be deleted |
| 90 | + EXPECT_EQ(delete_count.load(), kDocsPerIteration) << "Iteration " << iter << " failed"; |
| 91 | + EXPECT_EQ(store.Size(), 0) << "Store not empty after iteration " << iter; |
| 92 | + |
| 93 | + // Verify all documents are gone |
| 94 | + for (const auto& doc_id : doc_ids) { |
| 95 | + auto doc = store.GetDocument(doc_id); |
| 96 | + EXPECT_FALSE(doc.has_value()) << "Document " << doc_id << " still exists"; |
| 97 | + } |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +/** |
| 102 | + * @brief Test concurrent add and remove operations with memory stress |
| 103 | + * |
| 104 | + * This test creates memory pressure by doing rapid add/remove cycles |
| 105 | + * across multiple threads, which increases the likelihood of detecting |
| 106 | + * use-after-free bugs due to memory reuse. |
| 107 | + */ |
| 108 | +TEST(DocumentStoreStressTest, ConcurrentAddRemoveMemoryStress) { |
| 109 | + DocumentStore store; |
| 110 | + |
| 111 | + constexpr int kNumThreads = 6; |
| 112 | + constexpr int kOperationsPerThread = 200; |
| 113 | + |
| 114 | + std::atomic<bool> stop{false}; |
| 115 | + std::atomic<int> add_success{0}; |
| 116 | + std::atomic<int> remove_success{0}; |
| 117 | + std::vector<std::thread> threads; |
| 118 | + |
| 119 | + // Producer threads - add documents |
| 120 | + for (int t = 0; t < kNumThreads / 2; ++t) { |
| 121 | + threads.emplace_back([&store, &stop, &add_success, t]() { |
| 122 | + int counter = 0; |
| 123 | + while (!stop && counter < kOperationsPerThread) { |
| 124 | + std::string pk = "stress_add_thread_" + std::to_string(t) + "_doc_" + std::to_string(counter) + |
| 125 | + "_with_extra_padding_for_memory_allocation"; |
| 126 | + |
| 127 | + std::unordered_map<std::string, FilterValue> filters; |
| 128 | + filters["thread"] = static_cast<int64_t>(t); |
| 129 | + filters["counter"] = static_cast<int64_t>(counter); |
| 130 | + filters["description"] = std::string("Document created by thread ") + std::to_string(t); |
| 131 | + |
| 132 | + auto result = store.AddDocument(pk, filters); |
| 133 | + if (result.has_value()) { |
| 134 | + add_success++; |
| 135 | + } |
| 136 | + counter++; |
| 137 | + } |
| 138 | + }); |
| 139 | + } |
| 140 | + |
| 141 | + // Consumer threads - remove documents (will remove whatever exists) |
| 142 | + for (int t = 0; t < kNumThreads / 2; ++t) { |
| 143 | + threads.emplace_back([&store, &stop, &remove_success]() { |
| 144 | + while (!stop) { |
| 145 | + // Get all doc IDs and try to remove some |
| 146 | + auto all_ids = store.GetAllDocIds(); |
| 147 | + for (const auto& doc_id : all_ids) { |
| 148 | + if (stop) |
| 149 | + break; |
| 150 | + if (store.RemoveDocument(doc_id)) { |
| 151 | + remove_success++; |
| 152 | + } |
| 153 | + } |
| 154 | + // Small yield to allow other operations |
| 155 | + std::this_thread::yield(); |
| 156 | + } |
| 157 | + }); |
| 158 | + } |
| 159 | + |
| 160 | + // Let it run for a bit |
| 161 | + std::this_thread::sleep_for(std::chrono::milliseconds(300)); |
| 162 | + stop = true; |
| 163 | + |
| 164 | + for (auto& thread : threads) { |
| 165 | + thread.join(); |
| 166 | + } |
| 167 | + |
| 168 | + // Verify operations completed without crashes |
| 169 | + EXPECT_GT(add_success.load(), 0) << "No documents were added"; |
| 170 | + // Note: remove_success may be 0 if all adds happened after removes finished |
| 171 | + // The main verification is that no crashes occurred |
| 172 | + |
| 173 | + // Final state verification |
| 174 | + size_t final_size = store.Size(); |
| 175 | + EXPECT_GE(final_size, 0); |
| 176 | +} |
0 commit comments