Skip to content

Commit 71b9b60

Browse files
committed
osdc/ObjectCacher: avoid io blocking for bufferheads exceed limit
Fixes: https://tracker.ceph.com/issues/62918 Signed-off-by: shua.lv <[email protected]>
1 parent 11d962d commit 71b9b60

File tree

6 files changed

+188
-7
lines changed

6 files changed

+188
-7
lines changed

qa/suites/rados/objectstore/backends/objectcacher-stress.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ tasks:
1414
clients:
1515
all:
1616
- osdc/stress_objectcacher.sh
17+
- osdc/object_cacher_misc.sh
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/sh -ex
2+
3+
ceph_test_objectcacher_misc --flush-test
4+
5+
echo OK

src/osdc/ObjectCacher.cc

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,13 +1276,14 @@ void ObjectCacher::bh_write_commit(int64_t poolid, sobject_t oid,
12761276
finish_contexts(cct, ls, r);
12771277
}
12781278

1279-
void ObjectCacher::flush(ZTracer::Trace *trace, loff_t amount)
1279+
void ObjectCacher::flush(ZTracer::Trace *trace, loff_t amount, int max_bhs)
12801280
{
12811281
ceph_assert(trace != nullptr);
12821282
ceph_assert(ceph_mutex_is_locked(lock));
12831283
ceph::real_time cutoff = ceph::real_clock::now();
12841284

1285-
ldout(cct, 10) << "flush " << amount << dendl;
1285+
ldout(cct, 10) << "flush " << amount
1286+
<< " bytes, max bufferheads " << max_bhs << dendl;
12861287

12871288
/*
12881289
* NOTE: we aren't actually pulling things off the LRU here, just
@@ -1291,22 +1292,24 @@ void ObjectCacher::flush(ZTracer::Trace *trace, loff_t amount)
12911292
* lru_dirty.lru_get_next_expire() again.
12921293
*/
12931294
int64_t left = amount;
1294-
while (amount == 0 || left > 0) {
1295+
int left_bhs = max_bhs;
1296+
while ((amount == 0 && max_bhs == 0) || left > 0 || left_bhs > 0) {
12951297
BufferHead *bh = static_cast<BufferHead*>(
12961298
bh_lru_dirty.lru_get_next_expire());
12971299
if (!bh) break;
12981300
if (bh->last_write > cutoff) break;
12991301

13001302
if (scattered_write) {
1301-
bh_write_adjacencies(bh, cutoff, amount > 0 ? &left : NULL, NULL);
1303+
bh_write_adjacencies(bh, cutoff, amount > 0 ? &left : NULL,
1304+
max_bhs > 0 ? &left_bhs : NULL);
13021305
} else {
13031306
left -= bh->length();
1307+
left_bhs--;
13041308
bh_write(bh, *trace);
13051309
}
13061310
}
13071311
}
13081312

1309-
13101313
void ObjectCacher::trim()
13111314
{
13121315
ceph_assert(ceph_mutex_is_locked(lock));
@@ -1944,6 +1947,7 @@ int ObjectCacher::_wait_for_write(OSDWrite *wr, uint64_t len, ObjectSet *oset,
19441947
void ObjectCacher::flusher_entry()
19451948
{
19461949
ldout(cct, 10) << "flusher start" << dendl;
1950+
int target_dirty_bh = target_dirty >> BUFFER_MEMORY_WEIGHT;
19471951
std::unique_lock l{lock};
19481952
while (!flusher_stop) {
19491953
loff_t all = get_stat_tx() + get_stat_rx() + get_stat_clean() +
@@ -1957,20 +1961,27 @@ void ObjectCacher::flusher_entry()
19571961
<< target_dirty << " target, "
19581962
<< max_dirty << " max)"
19591963
<< dendl;
1960-
loff_t actual = get_stat_dirty() + get_stat_dirty_waiting();
19611964

19621965
ZTracer::Trace trace;
19631966
if (cct->_conf->osdc_blkin_trace_all) {
19641967
trace.init("flusher", &trace_endpoint);
19651968
trace.event("start");
19661969
}
19671970

1971+
loff_t actual = get_stat_dirty() + get_stat_dirty_waiting();
1972+
int actual_bhs = dirty_or_tx_bh.size() + get_stat_nr_dirty_waiters();
19681973
if (actual > 0 && (uint64_t) actual > target_dirty) {
19691974
// flush some dirty pages
19701975
ldout(cct, 10) << "flusher " << get_stat_dirty() << " dirty + "
19711976
<< get_stat_dirty_waiting() << " dirty_waiting > target "
19721977
<< target_dirty << ", flushing some dirty bhs" << dendl;
19731978
flush(&trace, actual - target_dirty);
1979+
} else if (actual_bhs > target_dirty_bh) {
1980+
ldout(cct, 10) << "flusher " << dirty_or_tx_bh.size() << " dirty/tx bh + "
1981+
<< get_stat_nr_dirty_waiters() << " dirty_waiters > "
1982+
<< "target dirty bh " << target_dirty_bh
1983+
<< ", flushing some dirty bhs" << dendl;
1984+
flush(&trace, 0, actual_bhs - target_dirty_bh);
19741985
} else {
19751986
// check tail of lru for old dirty items
19761987
ceph::real_time cutoff = ceph::real_clock::now();

src/osdc/ObjectCacher.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ class ObjectCacher {
541541
int64_t *amount, int *max_count);
542542

543543
void trim();
544-
void flush(ZTracer::Trace *trace, loff_t amount=0);
544+
void flush(ZTracer::Trace *trace, loff_t amount=0, int max_bhs=0);
545545

546546
/**
547547
* flush a range of buffers

src/test/osdc/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,16 @@ target_link_libraries(ceph_test_objectcacher_stress
1111
)
1212
install(TARGETS ceph_test_objectcacher_stress
1313
DESTINATION ${CMAKE_INSTALL_BINDIR})
14+
15+
add_executable(ceph_test_objectcacher_misc
16+
object_cacher_misc.cc
17+
MemWriteback.cc
18+
)
19+
target_link_libraries(ceph_test_objectcacher_misc
20+
osdc
21+
global
22+
${EXTRALIBS}
23+
${CMAKE_DL_LIBS}
24+
)
25+
install(TARGETS ceph_test_objectcacher_misc
26+
DESTINATION ${CMAKE_INSTALL_BINDIR})
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
4+
#include <cstdlib>
5+
#include <ctime>
6+
#include <sstream>
7+
#include <string>
8+
#include <vector>
9+
#include <boost/scoped_ptr.hpp>
10+
11+
#include "common/ceph_argparse.h"
12+
#include "common/ceph_mutex.h"
13+
#include "common/common_init.h"
14+
#include "common/config.h"
15+
#include "common/snap_types.h"
16+
#include "global/global_init.h"
17+
#include "include/buffer.h"
18+
#include "include/Context.h"
19+
#include "include/stringify.h"
20+
#include "osdc/ObjectCacher.h"
21+
22+
#include "FakeWriteback.h"
23+
#include "MemWriteback.h"
24+
25+
#include <atomic>
26+
27+
using namespace std;
28+
29+
int flush_test()
30+
{
31+
bool fail = false;
32+
bool done = false;
33+
uint64_t delay_ns = 0;
34+
ceph::mutex lock = ceph::make_mutex("object_cacher_misc");
35+
MemWriteback writeback(g_ceph_context, &lock, delay_ns);
36+
37+
int max_dirty_age = 1;
38+
uint64_t max_cache = 1 << 20; // max cache size, 1MB
39+
uint64_t max_dirty = 1 << 19; // max dirty, 512KB
40+
uint64_t target_dirty = 1 << 18; // target dirty, 256KB
41+
42+
int bl_size = 1 << 12;
43+
ceph::_page_shift = 16; // 64KB
44+
int max_dirty_bhs = max_dirty / (1 << ceph::_page_shift); // 8
45+
46+
std::cout << "Test configuration:\n"
47+
<< setw(20) << "max_cache: " << max_cache << "\n"
48+
<< setw(20) << "max_dirty_age: " << max_dirty_age << "\n"
49+
<< setw(20) << "max_dirty: " << max_dirty << "\n"
50+
<< setw(20) << "ceph::_page_shift: " << ceph::_page_shift << "\n"
51+
<< setw(20) << "max_dirty_bh: " << max_dirty_bhs << "\n"
52+
<< setw(20) << "write extent size: " << bl_size << "\n\n";
53+
54+
ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL,
55+
max_cache, // max cache size, 1MB
56+
1, // max objects, just one
57+
max_dirty, // max dirty, 512KB
58+
target_dirty, // target dirty, 256KB
59+
max_dirty_age,
60+
true);
61+
obc.start();
62+
63+
SnapContext snapc;
64+
ceph_tid_t journal_tid = 0;
65+
std::string oid("flush_test_obj");
66+
ObjectCacher::ObjectSet object_set(NULL, 0, 0);
67+
ceph::bufferlist zeroes_bl;
68+
zeroes_bl.append_zero(bl_size);
69+
70+
std::map<int, C_SaferCond> create_finishers;
71+
72+
utime_t last_start;
73+
for (int i = 0; i < max_dirty_bhs; ++i) {
74+
if (i == (max_dirty_bhs - 1)) {
75+
last_start = ceph_clock_now();
76+
}
77+
ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, zeroes_bl,
78+
ceph::real_clock::zero(), 0,
79+
++journal_tid);
80+
ObjectExtent extent(oid, 0, zeroes_bl.length()*i, zeroes_bl.length(), 0);
81+
extent.oloc.pool = 0;
82+
extent.buffer_extents.push_back(make_pair(0, bl_size));
83+
wr->extents.push_back(extent);
84+
lock.lock();
85+
obc.writex(wr, &object_set, &create_finishers[i]);
86+
lock.unlock();
87+
}
88+
utime_t last_end = ceph_clock_now();
89+
90+
std::cout << "Write " << max_dirty_bhs << " extents"
91+
<< ", total size " << zeroes_bl.length() * max_dirty_bhs
92+
<< ", attain max dirty bufferheads " << max_dirty_bhs
93+
<< ", but below max dirty " << max_dirty << std::endl;
94+
95+
if (last_end - last_start > utime_t(max_dirty_age, 0)) {
96+
std::cout << "Error: the last writex took more than " << max_dirty_age
97+
<< "s(max_dirty_age), fail to trigger flush" << std::endl;
98+
fail = true;;
99+
} else {
100+
std::cout << "Info: the last writex took " << last_end - last_start
101+
<< ", success to trigger flush" << std::endl;
102+
}
103+
104+
for (int i = 0; i < max_dirty_bhs; ++i) {
105+
create_finishers[i].wait();
106+
}
107+
108+
lock.lock();
109+
C_SaferCond flushcond;
110+
obc.flush_all(&flushcond);
111+
done = obc.flush_all(&flushcond);
112+
if (!done) {
113+
lock.unlock();
114+
flushcond.wait();
115+
lock.lock();
116+
}
117+
118+
obc.release_set(&object_set);
119+
lock.unlock();
120+
obc.stop();
121+
122+
if (fail) {
123+
std::cout << "Test ObjectCacher flush completed failed" << std::endl;
124+
return EXIT_FAILURE;
125+
}
126+
127+
std::cout << "Test ObjectCacher flush completed successfully" << std::endl;
128+
return EXIT_SUCCESS;
129+
}
130+
131+
int main(int argc, const char **argv)
132+
{
133+
auto args = argv_to_vec(argc, argv);
134+
auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
135+
CODE_ENVIRONMENT_UTILITY,
136+
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
137+
bool flush = false;
138+
std::vector<const char*>::iterator i;
139+
for (i = args.begin(); i != args.end();) {
140+
if (ceph_argparse_flag(args, i, "--flush-test", NULL)) {
141+
flush = true;
142+
} else {
143+
cerr << "unknown option " << *i << std::endl;
144+
return EXIT_FAILURE;
145+
}
146+
}
147+
148+
if (flush) {
149+
return flush_test();
150+
}
151+
}

0 commit comments

Comments
 (0)