Skip to content

Commit 2f2ed86

Browse files
committed
MB-68916: Fix ep_testsuite_dcp
Some tests in ep_testsuite_dcp spawn a std::thread for running a DCP client loop that opens DCP producer, issues a StreamRequest and listens for data. The problematic part here is in the StreamRequest. The Producer's stream-map is type folly::AtomicHashArray<> that maps vbid->streams. That folly type uses a few thread locals internally that end up storing references to memory that is allocated in the bucket arena in the EPE::stream_req path. Then std::thread is destroyed outside any bucket context causing a mem track mismatch at thread's TSD release. Eg: % env CB_ARENA_MALLOC_VERIFY_DEALLOC_CLIENT=1 lldb -- ./ep_testsuite_dcp "-E" "ep" "-v" "-e" "compression_mode=active;dbname=./ep_testsuite_dcp.value_eviction.comp_active.db" -C 29 .. Running [29/91]: test producer keep stream open...===ERROR===: JeArenaMalloc deallocation mismatch Memory freed by client:100 domain:None which is assigned arena:0, but memory was previously allocated from arena:2 (client-specific arena). Allocation address:0x103dcda40 size:24 .. Process 90807 stopped * thread #43, name = 'dcp_thread', stop reason = EXC_BREAKPOINT (code=1, subcode=0x185e46ae4) frame #0: 0x0000000185e46ae4 libsystem_c.dylib` __abort + 168 libsystem_c.dylib`: -> 0x185e46ae4 <+168>: brk #0x1 libsystem_c.dylib`abort_report_np: 0x185e46ae8 <+0>: pacibsp 0x185e46aec <+4>: sub sp, sp, #0x30 0x185e46af0 <+8>: stp x20, x19, [sp, #0x10] Target 0: (ep_testsuite_dcp) stopped. (lldb) bt * thread #43, name = 'dcp_thread', stop reason = EXC_BREAKPOINT (code=1, subcode=0x185e46ae4) * frame #0: 0x0000000185e46ae4 libsystem_c.dylib` __abort + 168 frame #1: 0x0000000185e46a3c libsystem_c.dylib` abort + 192 frame #2: 0x00000001001d8808 ep_testsuite_dcp` cb::verifyMemDeallocatedByCorrectClient(client=0x0000000171696c78, ptr=0x0000000103dcda40, size=24) + 536 at je_arena_malloc.cc:247 frame #3: 0x00000001001d8898 ep_testsuite_dcp` cb::_JEArenaMalloc<cb::JEArenaSimpleTracker>::sized_free(ptr=0x0000000103dcda40, size=24) + 84 at je_arena_malloc.cc:442 frame #4: 0x00000001000d6dc0 ep_testsuite_dcp` cb::_ArenaMalloc<cb::_JEArenaMalloc<cb::JEArenaSimpleTracker>>::sized_free(ptr=0x0000000103dcda40, size=24) + 32 at cb_arena_malloc.h:273 frame #5: 0x00000001000d6d8c ep_testsuite_dcp` cb_sized_free(ptr=0x0000000103dcda40, size=24) + 44 at cb_malloc_arena.cc:75 frame #6: 0x00000001000d7398 ep_testsuite_dcp` operator delete(ptr=0x0000000103dcda40, size=24) + 32 at global_new_replacement.cc:146 frame #7: 0x000000010081f534 ep_testsuite_dcp` void folly::threadlocal_detail::ElementWrapper::set<folly::ThreadCachedInt<unsigned long long, unsigned long long>::IntCache*>(this=0x0000000171696d33, pt=0x0000000103dcda40, (null)=THIS_THREAD)::'lambda'(void*, folly::TLPDestructionMode)::operator()(void*, folly::TLPDestructionMode) const + 68 at ThreadLocalDetail.h:138 frame #8: 0x000000010081f4e4 ep_testsuite_dcp` void folly::threadlocal_detail::ElementWrapper::set<folly::ThreadCachedInt<unsigned long long, unsigned long long>::IntCache*>(folly::ThreadCachedInt<unsigned long long, unsigned long long>::IntCache*)::'lambda'(void*, folly::TLPDestructionMode)::__invoke(pt=0x0000000103dcda40, (null)=THIS_THREAD) + 36 at ThreadLocalDetail.h:137 frame #9: 0x0000000100168e90 ep_testsuite_dcp` folly::threadlocal_detail::ElementWrapper::dispose(this=0x000000011f7053c8, mode=THIS_THREAD) + 324 at ThreadLocalDetail.h:114 frame #10: 0x0000000100cd0758 ep_testsuite_dcp` folly::threadlocal_detail::StaticMetaBase::onThreadExit(ptr=0x0000000103dd1800) + 404 at ThreadLocalDetail.cpp:153 frame #11: 0x0000000185f37870 libsystem_pthread.dylib` _pthread_tsd_cleanup + 488 frame #12: 0x0000000185f3a684 libsystem_pthread.dylib` _pthread_exit + 84 frame #13: 0x0000000185f39fa0 libsystem_pthread.dylib` _pthread_start + 148 Fix by executing DcpOpen+StreamRequest in the main test thread. All memcached/bucket resources are released before the main thread shuts down. Notes: - That is a test-only issue, no production bug addressed here - Patch verified locally by https://review.couchbase.org/c/platform/+/234973, which is being submitted after all related failures are fixed Change-Id: Ida20abef00daddb8da4d65305316eba7baccaec7 Reviewed-on: https://review.couchbase.org/c/kv_engine/+/235361 Tested-by: Build Bot <[email protected]> Well-Formed: Restriction Checker Reviewed-by: Jim Walker <[email protected]>
1 parent 865921e commit 2f2ed86

File tree

1 file changed

+56
-58
lines changed

1 file changed

+56
-58
lines changed

engines/ep/tests/ep_testsuite_dcp.cc

Lines changed: 56 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ class TestDcpConsumer {
236236
stream_ctxs.push_back(ctx);
237237
}
238238

239+
void openConnectionAndStreams();
240+
239241
void run(bool openConn = true);
240242

241243
// Stop the thread if it is running. This is safe to be called from
@@ -399,22 +401,22 @@ void TestDcpConsumer::deleteOrExpireCase(TestDcpConsumer::VBStats& stats,
399401
return;
400402
}
401403

402-
void TestDcpConsumer::run(bool openConn) {
404+
void TestDcpConsumer::openConnectionAndStreams() {
403405
checkle(size_t{1}, stream_ctxs.size(), "No dcp_stream arguments provided!");
404-
405-
/* Open the connection with the DCP producer */
406-
if (openConn) {
407-
openConnection();
408-
}
409-
406+
openConnection();
410407
if (collectionFilter) {
411408
// Enable noop ops needed for collections
412409
checkeq(cb::engine_errc::success,
413410
dcp->control(*cookie, opaque, "enable_noop", "true"),
414411
"Failed to enable noop");
415412
}
416-
/* Open streams in the above open connection */
417413
openStreams();
414+
}
415+
416+
void TestDcpConsumer::run(bool openConn) {
417+
if (openConn) {
418+
openConnectionAndStreams();
419+
}
418420

419421
size_t num_stream_ends_received = 0;
420422
uint32_t bytes_read = 0;
@@ -2133,7 +2135,7 @@ static test_result testDcpProducerExpiredItemBackfill(
21332135
}
21342136

21352137
tdc.addStreamCtx(ctx);
2136-
2138+
tdc.openStreams();
21372139
tdc.run(false);
21382140

21392141
testHarness->destroy_cookie(cookie);
@@ -2397,6 +2399,7 @@ static enum test_result test_dcp_producer_stream_req_coldness(EngineIface* h) {
23972399
ctx.flags |= DCP_ADD_STREAM_FLAG_DISKONLY;
23982400

23992401
tdc.addStreamCtx(ctx);
2402+
tdc.openStreams();
24002403
tdc.run(false);
24012404

24022405
checkeq(tdc.getNruCounters()[1],
@@ -2540,18 +2543,16 @@ static enum test_result test_dcp_producer_keep_stream_open(EngineIface* h) {
25402543
auto* cookie = testHarness->create_cookie(h);
25412544
const std::string conn_name = "test-consumer";
25422545
auto consumer = std::make_unique<TestDcpConsumer>(conn_name, cookie, h);
2543-
auto dcp_thread = create_thread(
2544-
[&]() {
2545-
DcpStreamCtx ctx;
2546-
ctx.vbucket = Vbid(0);
2547-
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2548-
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
2549-
ctx.snapshot = {0, 0};
2550-
ctx.skip_verification = true;
2551-
consumer->addStreamCtx(ctx);
2552-
consumer->run();
2553-
},
2554-
"dcp_thread");
2546+
DcpStreamCtx ctx;
2547+
ctx.vbucket = Vbid(0);
2548+
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2549+
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
2550+
ctx.snapshot = {0, 0};
2551+
ctx.skip_verification = true;
2552+
consumer->addStreamCtx(ctx);
2553+
consumer->openConnectionAndStreams();
2554+
auto dcp_thread =
2555+
create_thread([&]() { consumer->run(false); }, "dcp_thread");
25552556

25562557
/* Wait for producer to be created */
25572558
wait_for_stat_to_be(h, "ep_dcp_producer_count", 1, "dcp");
@@ -2668,18 +2669,16 @@ static enum test_result test_dcp_producer_keep_stream_open_replica(
26682669
auto* cookie1 = testHarness->create_cookie(h);
26692670
const std::string conn_name1 = "test-consumer";
26702671
auto consumer = std::make_unique<TestDcpConsumer>(conn_name1, cookie1, h);
2671-
auto dcp_thread = create_thread(
2672-
[&]() {
2673-
DcpStreamCtx ctx;
2674-
ctx.vbucket = Vbid(0);
2675-
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2676-
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
2677-
ctx.snapshot = {0, 0};
2678-
ctx.skip_verification = true;
2679-
consumer->addStreamCtx(ctx);
2680-
consumer->run();
2681-
},
2682-
"dcp_thread");
2672+
DcpStreamCtx ctx;
2673+
ctx.vbucket = Vbid(0);
2674+
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2675+
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
2676+
ctx.snapshot = {0, 0};
2677+
ctx.skip_verification = true;
2678+
consumer->addStreamCtx(ctx);
2679+
consumer->openConnectionAndStreams();
2680+
auto dcp_thread =
2681+
create_thread([&]() { consumer->run(false); }, "dcp_thread");
26832682

26842683
/* Wait for producer to be created */
26852684
wait_for_stat_to_be(h, "ep_dcp_producer_count", 1, "dcp");
@@ -2745,18 +2744,16 @@ static enum test_result test_dcp_producer_stream_cursor_movement(
27452744
auto* cookie = testHarness->create_cookie(h);
27462745
const std::string conn_name = "test-consumer";
27472746
auto consumer = std::make_unique<TestDcpConsumer>(conn_name, cookie, h);
2748-
auto dcp_thread = create_thread(
2749-
[&]() {
2750-
DcpStreamCtx ctx;
2751-
ctx.vbucket = Vbid(0);
2752-
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2753-
ctx.seqno = {20, std::numeric_limits<uint64_t>::max()};
2754-
ctx.snapshot = {20, 20};
2755-
ctx.skip_verification = true;
2756-
consumer->addStreamCtx(ctx);
2757-
consumer->run();
2758-
},
2759-
"dcp_thread");
2747+
DcpStreamCtx ctx;
2748+
ctx.vbucket = Vbid(0);
2749+
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
2750+
ctx.seqno = {20, std::numeric_limits<uint64_t>::max()};
2751+
ctx.snapshot = {20, 20};
2752+
ctx.skip_verification = true;
2753+
consumer->addStreamCtx(ctx);
2754+
consumer->openConnectionAndStreams();
2755+
auto dcp_thread =
2756+
create_thread([&]() { consumer->run(false); }, "dcp_thread");
27602757

27612758
/* Wait for producer to be created */
27622759
wait_for_stat_to_be(h, "ep_dcp_producer_count", 1, "dcp");
@@ -5754,6 +5751,7 @@ static test_result test_dcp_replica_stream_expiries(
57545751
"Failed to enable_expiry_opcode");
57555752
}
57565753
tdc.addStreamCtx(ctx);
5754+
tdc.openStreams();
57575755
tdc.run(false);
57585756

57595757
testHarness->destroy_cookie(cookie1);
@@ -5838,7 +5836,7 @@ static test_result test_stream_deleteWithMeta_expiration(
58385836
}
58395837

58405838
tdc.addStreamCtx(ctx);
5841-
5839+
tdc.openStreams();
58425840
tdc.run(false);
58435841

58445842
testHarness->destroy_cookie(cookie);
@@ -7025,20 +7023,19 @@ static enum test_result test_dcp_multiple_streams(EngineIface* h) {
70257023
static enum test_result test_dcp_on_vbucket_state_change(EngineIface* h) {
70267024
// Set up a DcpTestConsumer that would remain in in-memory mode
70277025
auto* cookie = testHarness->create_cookie(h);
7026+
70287027
const std::string conn_name = "test-consumer";
70297028
auto consumer = std::make_unique<TestDcpConsumer>(conn_name, cookie, h);
7030-
auto dcp_thread = create_thread(
7031-
[&]() {
7032-
DcpStreamCtx ctx;
7033-
ctx.vbucket = Vbid(0);
7034-
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
7035-
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
7036-
ctx.snapshot = {0, 0};
7037-
ctx.skip_verification = true;
7038-
consumer->addStreamCtx(ctx);
7039-
consumer->run();
7040-
},
7041-
"dcp_thread");
7029+
DcpStreamCtx ctx;
7030+
ctx.vbucket = Vbid(0);
7031+
ctx.vb_uuid = get_ull_stat(h, "vb_0:0:id", "failovers");
7032+
ctx.seqno = {0, std::numeric_limits<uint64_t>::max()};
7033+
ctx.snapshot = {0, 0};
7034+
ctx.skip_verification = true;
7035+
consumer->addStreamCtx(ctx);
7036+
consumer->openConnectionAndStreams();
7037+
auto dcp_thread =
7038+
create_thread([&]() { consumer->run(false); }, "dcp_thread");
70427039

70437040
// Wait for producer to be created
70447041
wait_for_stat_to_be(h, "ep_dcp_producer_count", 1, "dcp");
@@ -7859,6 +7856,7 @@ static enum test_result testDcpOsoBackfill(EngineIface* h) {
78597856
"Failed control enable_out_of_order_snapshots");
78607857

78617858
tdc.addStreamCtx(ctx);
7859+
tdc.openStreams();
78627860
tdc.run(false);
78637861

78647862
testHarness->destroy_cookie(cookie);

0 commit comments

Comments
 (0)