Skip to content

Commit 6f8275c

Browse files
committed
Revert "os/bluestore: Deferred writes unit test, replicates corruption"
This reverts commit b2086ef. Signed-off-by: Matan Breizman <[email protected]>
1 parent e1c7507 commit 6f8275c

File tree

1 file changed

+37
-253
lines changed

1 file changed

+37
-253
lines changed
Lines changed: 37 additions & 253 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
11
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
22
// vim: ts=8 sw=2 smarttab
33

4-
#include <fcntl.h>
54
#include <stdio.h>
65
#include <stdlib.h>
76
#include <string.h>
87
#include <iostream>
98
#include <memory>
10-
#include <string>
119
#include <time.h>
1210

13-
#include "common/pretty_binary.h"
14-
#include "global/global_context.h"
15-
#include "kv/KeyValueDB.h"
1611
#include "os/ObjectStore.h"
1712
#include "os/bluestore/BlueStore.h"
1813
#include "include/Context.h"
@@ -23,11 +18,8 @@
2318
#include "common/errno.h"
2419
#include "common/options.h" // for the size literals
2520
#include <semaphore.h>
26-
#include "os/bluestore/Allocator.h"
27-
#include "os/bluestore/AvlAllocator.h"
2821

29-
using namespace std;
30-
typedef boost::mt11213b gen_type;
22+
3123

3224
class C_do_action : public Context {
3325
public:
@@ -40,33 +32,24 @@ class C_do_action : public Context {
4032
}
4133
};
4234

43-
gen_type rng(0);
44-
boost::uniform_int<> chargen('a', 'z');
45-
46-
std::string gen_string(size_t size) {
47-
std::string s;
48-
for (size_t i = 0; i < size; i++) {
49-
s.push_back(chargen(rng));
50-
}
51-
return s;
52-
}
53-
5435
void create_deferred_and_terminate() {
5536
std::unique_ptr<ObjectStore> store;
37+
38+
g_ceph_context->_conf._clear_safe_to_start_threads();
39+
g_ceph_context->_conf.set_val_or_die("bluestore_prefer_deferred_size", "4096");
40+
g_ceph_context->_conf.set_val_or_die("bluestore_allocator", "bitmap");
41+
g_ceph_context->_conf.set_val_or_die("bluestore_block_size", "10240000000");
42+
g_ceph_context->_conf.apply_changes(nullptr);
43+
5644
int64_t poolid;
5745
coll_t cid;
5846
ghobject_t hoid;
5947
ObjectStore::CollectionHandle ch;
60-
std::string const bluestore_dir = "bluestore.test_temp_dir";
61-
{
62-
string cmd = string("rm -rf ") + bluestore_dir;
63-
int r = ::system(cmd.c_str());
64-
ceph_assert(r == 0);
65-
}
66-
ceph_assert(::mkdir(bluestore_dir.c_str(), 0777) == 0);
48+
std::string const db_store_dir = "bluestore.test_temp_dir_" + std::to_string(time(NULL));
49+
ceph_assert(::mkdir(db_store_dir.c_str(), 0777) == 0);
6750
store = ObjectStore::create(g_ceph_context,
6851
"bluestore",
69-
bluestore_dir.c_str(),
52+
db_store_dir.c_str(),
7053
"store_test_temp_journal");
7154
ceph_assert(store->mkfs() == 0);
7255
ceph_assert(store->mount() == 0);
@@ -94,271 +77,72 @@ void create_deferred_and_terminate() {
9477
}
9578

9679
size_t object_count = 10;
97-
size_t keys_per_transaction = 100;
98-
size_t omap_push_repeats = 2200;
9980

10081
// initial fill
10182
bufferlist bl_64K;
10283
bl_64K.append(std::string(64 * 1024, '-'));
103-
//write objects
84+
85+
std::atomic<size_t> prefill_counter{0};
86+
sem_t prefill_mutex;
87+
sem_init(&prefill_mutex, 0, 0);
88+
10489
for (size_t o = 0; o < object_count; o++) {
10590
ObjectStore::Transaction t;
10691
std::string oid = "object-" + std::to_string(o);
10792
ghobject_t hoid(hobject_t(oid, "", CEPH_NOSNAP, 1, poolid, ""));
93+
10894
t.write(cid, hoid, 0, bl_64K.length(), bl_64K);
95+
t.register_on_commit(new C_do_action([&] {
96+
if (++prefill_counter == object_count) {
97+
sem_post(&prefill_mutex);
98+
}
99+
}));
100+
109101
r = store->queue_transaction(ch, std::move(t));
110102
ceph_assert(r == 0);
111103
}
112-
//spam omap
113-
for (size_t q = 0; q < omap_push_repeats; q++) {
114-
for (size_t o = 0; o < object_count; o++) {
115-
ObjectStore::Transaction t;
116-
std::string oid = "object-" + std::to_string(o);
117-
ghobject_t hoid(hobject_t(oid, "", CEPH_NOSNAP, 1, poolid, ""));
118-
119-
std::map<std::string, bufferlist> new_keys;
120-
for (size_t m = 0; m < keys_per_transaction; m++) {
121-
bufferlist bl;
122-
bl.append(gen_string(100));
123-
new_keys.emplace(to_string(q)+gen_string(50), bl);
124-
}
125-
t.omap_setkeys(cid, hoid, new_keys);
126-
r = store->queue_transaction(ch, std::move(t));
127-
ceph_assert(r == 0);
128-
};
129-
}
104+
sem_wait(&prefill_mutex);
130105

131106
// small deferred writes over object
132107
// and complete overwrite of previous one
133108
bufferlist bl_8_bytes;
134109
bl_8_bytes.append("abcdefgh");
135110
std::atomic<size_t> deferred_counter{0};
136-
for (size_t o = 0; o < object_count/* - 1*/; o++) {
111+
for (size_t o = 0; o < object_count - 1; o++) {
137112
ObjectStore::Transaction t;
138113

139114
// sprinkle deferred writes
140-
std::string oid_d = "object-" + std::to_string(o/* + 1*/);
115+
std::string oid_d = "object-" + std::to_string(o + 1);
141116
ghobject_t hoid_d(hobject_t(oid_d, "", CEPH_NOSNAP, 1, poolid, ""));
117+
142118
for(int i = 0; i < 16; i++) {
143119
t.write(cid, hoid_d, 4096 * i, bl_8_bytes.length(), bl_8_bytes);
144120
}
145-
// overwrite object content
121+
122+
// overwrite previous object
146123
std::string oid_m = "object-" + std::to_string(o);
147124
ghobject_t hoid_m(hobject_t(oid_m, "", CEPH_NOSNAP, 1, poolid, ""));
148-
t.write(cid, hoid_m, 4096 * o, bl_64K.length(), bl_64K);
125+
t.write(cid, hoid_m, 0, bl_64K.length(), bl_64K);
149126

150127
t.register_on_commit(new C_do_action([&] {
151-
if (++deferred_counter == object_count) {
128+
if (++deferred_counter == object_count - 1) {
152129
exit(0);
153130
}
154131
}));
155132
r = store->queue_transaction(ch, std::move(t));
156133
ceph_assert(r == 0);
157134
}
158-
sleep(100);
135+
sleep(10);
159136
ceph_assert(0 && "should not reach here");
160137
}
161138

162-
void mount_check_L()
163-
{
164-
std::unique_ptr<ObjectStore> store;
165-
store = ObjectStore::create(g_ceph_context,
166-
"bluestore", "bluestore.test_temp_dir", "store_test_temp_journal");
167-
// this should replay all deferred writes
168-
std::cout << "mounting..." << std::endl;
169-
ceph_assert(store->mount() == 0);
170-
std::cout << "checking for stale deferred (L)..." << std::endl;
171-
172-
// now there should be no L entries
173-
BlueStore* bs = dynamic_cast<BlueStore*>(store.get());
174-
ceph_assert(bs);
175-
KeyValueDB* db = bs->get_kv();
176-
KeyValueDB::Iterator it = db->get_iterator("L");
177-
it->seek_to_first();
178-
if (it->valid()) {
179-
while (it->valid()) {
180-
std::cout << pretty_binary_string(it->key()) << std::endl;
181-
it->next();
182-
}
183-
ceph_assert(false && "there are L entries");
184-
}
185-
it.reset();
186-
ceph_assert(store->umount() == 0);
187-
std::cout << "all done and good" << std::endl;
188-
}
189-
190-
191-
192-
193-
/*
194-
* The test verifies that its not possible for deferred_replay procedure
195-
* to overwrite BlueFS data.
196-
* Corruption occurs when:
197-
* - BlueFS allocated some space
198-
* - deferred wrote over this space
199-
* Instead, stronger condition is checked:
200-
* - BlueFS allocated any space
201-
* - deferred wrote over
202-
*/
203-
void mount_check_alloc()
204-
{
205-
std::unique_ptr<ObjectStore> store;
206-
207-
ObjectStore::CollectionHandle ch;
208-
store = ObjectStore::create(g_ceph_context,
209-
"bluestore",
210-
"bluestore.test_temp_dir",
211-
"store_test_temp_journal");
212-
// this should replay all deferred writes
213-
BlueStore* bs = dynamic_cast<BlueStore*>(store.get());
214-
ceph_assert(bs);
215-
216-
bool called_allocate = false;
217-
vector<pair<uint64_t, uint64_t> > captured_allocations;
218-
bs->set_tracepoint_debug_deferred_replay_start(
219-
[&](){
220-
std::cout << "action before deferred replay" << std::endl;
221-
Allocator* alloc = bs->debug_get_alloc();
222-
alloc->foreach(
223-
[&](uint64_t offset, uint64_t length) {
224-
captured_allocations.emplace_back(offset, length);
225-
});
226-
std::cout << "sleeping to give compaction a chance" << std::endl;
227-
sleep(10);
228-
std::cout << "sleep end" << std::endl;
229-
});
230-
bs->set_tracepoint_debug_deferred_replay_end(
231-
[&](){
232-
std::cout << "action after deferred replay" << std::endl;
233-
Allocator* alloc = bs->debug_get_alloc();
234-
auto ca_it = captured_allocations.begin();
235-
alloc->foreach(
236-
[&](uint64_t offset, uint64_t length) {
237-
if (ca_it == captured_allocations.end()) {
238-
called_allocate = true;
239-
return;
240-
}
241-
if (ca_it->first != offset || ca_it->second != length) {
242-
called_allocate = true;
243-
}
244-
ca_it++;
245-
});
246-
std::cout << "called_allocate=" << called_allocate << std::endl;
247-
bs->set_tracepoint_debug_deferred_replay_track(nullptr);
248-
bs->set_tracepoint_debug_deferred_replay_start(nullptr);
249-
bs->set_tracepoint_debug_deferred_replay_end(nullptr);
250-
});
251-
252-
interval_set<uint64_t> not_onode_allocations;
253-
bs->set_tracepoint_debug_init_alloc_done(
254-
[&](){
255-
Allocator* alloc = bs->debug_get_alloc();
256-
alloc->foreach(
257-
[&](uint64_t start, uint64_t len) {
258-
not_onode_allocations.insert(start, len);
259-
});
260-
bs->set_tracepoint_debug_init_alloc_done(nullptr);
261-
});
262-
interval_set<uint64_t> extents_sum;
263-
bs->set_tracepoint_debug_deferred_replay_track(
264-
[&](const bluestore_deferred_transaction_t& dtxn) {
265-
for (auto& op : dtxn.ops) {
266-
for (auto& e : op.extents) {
267-
extents_sum.insert(e.offset, e.length);
268-
}
269-
}
270-
});
271-
std::cout << "mounting..." << std::endl;
272-
ceph_assert(store->mount() == 0);
273-
std::cout << "mount done" << std::endl;
274-
std::cout << std::hex << "disk not used by onodes:" << not_onode_allocations << std::dec << std::endl;
275-
std::cout << std::hex << "disk deferred wrote to:" << extents_sum << std::dec << std::endl;
276-
std::cout << "allocated_some=" << called_allocate << std::endl;
277-
interval_set<uint64_t> wrote_to_not_onodes;
278-
wrote_to_not_onodes.intersection_of(extents_sum, not_onode_allocations);
279-
std::cout << std::hex << "disk not used by onodes written by deferred="
280-
<< wrote_to_not_onodes << std::dec << std::endl;
281-
bool only_wrote_to_onodes = wrote_to_not_onodes.empty();
282-
bs->set_tracepoint_debug_deferred_replay_start(nullptr);
283-
ceph_assert(store->umount() == 0);
284-
285-
ceph_assert(!called_allocate || only_wrote_to_onodes);
286-
}
287-
288-
289-
290-
291-
int argc;
292-
char **argv;
293-
294-
boost::intrusive_ptr<CephContext> setup_env() {
139+
int main(int argc, char **argv) {
295140
auto args = argv_to_vec(argc, argv);
296-
auto cct = global_init(
297-
NULL, args, CEPH_ENTITY_TYPE_CLIENT,
298-
CODE_ENVIRONMENT_UTILITY,
299-
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
141+
auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
142+
CODE_ENVIRONMENT_UTILITY,
143+
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
300144
common_init_finish(g_ceph_context);
301145

302-
g_ceph_context->_conf._clear_safe_to_start_threads();
303-
g_ceph_context->_conf.set_val_or_die("bluestore_prefer_deferred_size", "4096");
304-
g_ceph_context->_conf.set_val_or_die("bluefs_shared_alloc_size", "4096");
305-
g_ceph_context->_conf.set_val_or_die("bluestore_block_size", "10240000000");
306-
g_ceph_context->_conf.apply_changes(nullptr);
307-
return cct;
308-
}
309-
310-
int main(int _argc, char **_argv) {
311-
argc = _argc;
312-
argv = _argv;
313-
314-
pid_t first_test = fork();
315-
if (first_test == 0) {
316-
std::cout << "1. Testing deletion of deferred (L) entries." << std::endl;
317-
pid_t child = fork();
318-
if (child == 0) {
319-
auto cct = setup_env();
320-
g_ceph_context->_conf->bluestore_allocator = "bitmap";
321-
g_ceph_context->_conf->bluestore_rocksdb_options +=
322-
",level0_file_num_compaction_trigger=4";
323-
create_deferred_and_terminate();
324-
ceph_assert(false && "should exit() earlier");
325-
} else {
326-
std::cout << "Waiting for fill omap and create deferred..." << std::endl;
327-
int stat;
328-
waitpid(child, &stat, 0);
329-
ceph_assert(WIFEXITED(stat) && WEXITSTATUS(stat) == 0);
330-
std::cout << "done and subprocess terminated." << std::endl;
331-
auto cct = setup_env();
332-
g_ceph_context->_conf->bluestore_allocator = "bitmap";
333-
g_ceph_context->_conf->bluestore_rocksdb_options +=
334-
",level0_file_num_compaction_trigger=2";
335-
mount_check_L();
336-
}
337-
} else {
338-
int first_stat;
339-
waitpid(first_test, &first_stat, 0);
340-
ceph_assert(WIFEXITED(first_stat) && WEXITSTATUS(first_stat) == 0);
341-
std::cout << "2. Testing overwrite of space allocated by BlueFS" << std::endl;
342-
pid_t child = fork();
343-
if (child == 0) {
344-
auto cct = setup_env();
345-
g_ceph_context->_conf->bluestore_allocator = "avl";
346-
g_ceph_context->_conf->bluestore_rocksdb_options +=
347-
",level0_file_num_compaction_trigger=4";
348-
create_deferred_and_terminate();
349-
ceph_assert(false && "should exit() earlier");
350-
} else {
351-
std::cout << "Waiting for fill omap and create deferred..." << std::endl;
352-
int stat;
353-
waitpid(child, &stat, 0);
354-
ceph_assert(WIFEXITED(stat) && WEXITSTATUS(stat) == 0);
355-
std::cout << "done and subprocess terminated." << std::endl;
356-
auto cct = setup_env();
357-
g_ceph_context->_conf->bluestore_allocator = "avl";
358-
g_ceph_context->_conf->bluestore_rocksdb_options +=
359-
",level0_file_num_compaction_trigger=2";
360-
mount_check_alloc();
361-
}
362-
}
146+
create_deferred_and_terminate();
363147
return 0;
364148
}

0 commit comments

Comments
 (0)