11// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
22// vim: ts=8 sw=2 smarttab
33
4- #include < fcntl.h>
54#include < stdio.h>
65#include < stdlib.h>
76#include < string.h>
87#include < iostream>
98#include < memory>
10- #include < string>
119#include < time.h>
1210
13- #include " common/pretty_binary.h"
14- #include " global/global_context.h"
15- #include " kv/KeyValueDB.h"
1611#include " os/ObjectStore.h"
1712#include " os/bluestore/BlueStore.h"
1813#include " include/Context.h"
2318#include " common/errno.h"
2419#include " common/options.h" // for the size literals
2520#include < semaphore.h>
26- #include " os/bluestore/Allocator.h"
27- #include " os/bluestore/AvlAllocator.h"
2821
29- using namespace std ;
30- typedef boost::mt11213b gen_type;
22+
3123
3224class C_do_action : public Context {
3325public:
@@ -40,33 +32,24 @@ class C_do_action : public Context {
4032 }
4133};
4234
43- gen_type rng (0 );
44- boost::uniform_int<> chargen (' a' , ' z' );
45-
46- std::string gen_string (size_t size) {
47- std::string s;
48- for (size_t i = 0 ; i < size; i++) {
49- s.push_back (chargen (rng));
50- }
51- return s;
52- }
53-
5435void create_deferred_and_terminate () {
5536 std::unique_ptr<ObjectStore> store;
37+
38+ g_ceph_context->_conf ._clear_safe_to_start_threads ();
39+ g_ceph_context->_conf .set_val_or_die (" bluestore_prefer_deferred_size" , " 4096" );
40+ g_ceph_context->_conf .set_val_or_die (" bluestore_allocator" , " bitmap" );
41+ g_ceph_context->_conf .set_val_or_die (" bluestore_block_size" , " 10240000000" );
42+ g_ceph_context->_conf .apply_changes (nullptr );
43+
5644 int64_t poolid;
5745 coll_t cid;
5846 ghobject_t hoid;
5947 ObjectStore::CollectionHandle ch;
60- std::string const bluestore_dir = " bluestore.test_temp_dir" ;
61- {
62- string cmd = string (" rm -rf " ) + bluestore_dir;
63- int r = ::system (cmd.c_str ());
64- ceph_assert (r == 0 );
65- }
66- ceph_assert (::mkdir (bluestore_dir.c_str (), 0777 ) == 0 );
48+ std::string const db_store_dir = " bluestore.test_temp_dir_" + std::to_string (time (NULL ));
49+ ceph_assert (::mkdir (db_store_dir.c_str (), 0777 ) == 0 );
6750 store = ObjectStore::create (g_ceph_context,
6851 " bluestore" ,
69- bluestore_dir .c_str (),
52+ db_store_dir .c_str (),
7053 " store_test_temp_journal" );
7154 ceph_assert (store->mkfs () == 0 );
7255 ceph_assert (store->mount () == 0 );
@@ -94,271 +77,72 @@ void create_deferred_and_terminate() {
9477 }
9578
9679 size_t object_count = 10 ;
97- size_t keys_per_transaction = 100 ;
98- size_t omap_push_repeats = 2200 ;
9980
10081 // initial fill
10182 bufferlist bl_64K;
10283 bl_64K.append (std::string (64 * 1024 , ' -' ));
103- // write objects
84+
85+ std::atomic<size_t > prefill_counter{0 };
86+ sem_t prefill_mutex;
87+ sem_init (&prefill_mutex, 0 , 0 );
88+
10489 for (size_t o = 0 ; o < object_count; o++) {
10590 ObjectStore::Transaction t;
10691 std::string oid = " object-" + std::to_string (o);
10792 ghobject_t hoid (hobject_t (oid, " " , CEPH_NOSNAP, 1 , poolid, " " ));
93+
10894 t.write (cid, hoid, 0 , bl_64K.length (), bl_64K);
95+ t.register_on_commit (new C_do_action ([&] {
96+ if (++prefill_counter == object_count) {
97+ sem_post (&prefill_mutex);
98+ }
99+ }));
100+
109101 r = store->queue_transaction (ch, std::move (t));
110102 ceph_assert (r == 0 );
111103 }
112- // spam omap
113- for (size_t q = 0 ; q < omap_push_repeats; q++) {
114- for (size_t o = 0 ; o < object_count; o++) {
115- ObjectStore::Transaction t;
116- std::string oid = " object-" + std::to_string (o);
117- ghobject_t hoid (hobject_t (oid, " " , CEPH_NOSNAP, 1 , poolid, " " ));
118-
119- std::map<std::string, bufferlist> new_keys;
120- for (size_t m = 0 ; m < keys_per_transaction; m++) {
121- bufferlist bl;
122- bl.append (gen_string (100 ));
123- new_keys.emplace (to_string (q)+gen_string (50 ), bl);
124- }
125- t.omap_setkeys (cid, hoid, new_keys);
126- r = store->queue_transaction (ch, std::move (t));
127- ceph_assert (r == 0 );
128- };
129- }
104+ sem_wait (&prefill_mutex);
130105
131106 // small deferred writes over object
132107 // and complete overwrite of previous one
133108 bufferlist bl_8_bytes;
134109 bl_8_bytes.append (" abcdefgh" );
135110 std::atomic<size_t > deferred_counter{0 };
136- for (size_t o = 0 ; o < object_count/* - 1*/ ; o++) {
111+ for (size_t o = 0 ; o < object_count - 1 ; o++) {
137112 ObjectStore::Transaction t;
138113
139114 // sprinkle deferred writes
140- std::string oid_d = " object-" + std::to_string (o/* + 1*/ );
115+ std::string oid_d = " object-" + std::to_string (o + 1 );
141116 ghobject_t hoid_d (hobject_t (oid_d, " " , CEPH_NOSNAP, 1 , poolid, " " ));
117+
142118 for (int i = 0 ; i < 16 ; i++) {
143119 t.write (cid, hoid_d, 4096 * i, bl_8_bytes.length (), bl_8_bytes);
144120 }
145- // overwrite object content
121+
122+ // overwrite previous object
146123 std::string oid_m = " object-" + std::to_string (o);
147124 ghobject_t hoid_m (hobject_t (oid_m, " " , CEPH_NOSNAP, 1 , poolid, " " ));
148- t.write (cid, hoid_m, 4096 * o , bl_64K.length (), bl_64K);
125+ t.write (cid, hoid_m, 0 , bl_64K.length (), bl_64K);
149126
150127 t.register_on_commit (new C_do_action ([&] {
151- if (++deferred_counter == object_count) {
128+ if (++deferred_counter == object_count - 1 ) {
152129 exit (0 );
153130 }
154131 }));
155132 r = store->queue_transaction (ch, std::move (t));
156133 ceph_assert (r == 0 );
157134 }
158- sleep (100 );
135+ sleep (10 );
159136 ceph_assert (0 && " should not reach here" );
160137}
161138
162- void mount_check_L ()
163- {
164- std::unique_ptr<ObjectStore> store;
165- store = ObjectStore::create (g_ceph_context,
166- " bluestore" , " bluestore.test_temp_dir" , " store_test_temp_journal" );
167- // this should replay all deferred writes
168- std::cout << " mounting..." << std::endl;
169- ceph_assert (store->mount () == 0 );
170- std::cout << " checking for stale deferred (L)..." << std::endl;
171-
172- // now there should be no L entries
173- BlueStore* bs = dynamic_cast <BlueStore*>(store.get ());
174- ceph_assert (bs);
175- KeyValueDB* db = bs->get_kv ();
176- KeyValueDB::Iterator it = db->get_iterator (" L" );
177- it->seek_to_first ();
178- if (it->valid ()) {
179- while (it->valid ()) {
180- std::cout << pretty_binary_string (it->key ()) << std::endl;
181- it->next ();
182- }
183- ceph_assert (false && " there are L entries" );
184- }
185- it.reset ();
186- ceph_assert (store->umount () == 0 );
187- std::cout << " all done and good" << std::endl;
188- }
189-
190-
191-
192-
193- /*
194- * The test verifies that its not possible for deferred_replay procedure
195- * to overwrite BlueFS data.
196- * Corruption occurs when:
197- * - BlueFS allocated some space
198- * - deferred wrote over this space
199- * Instead, stronger condition is checked:
200- * - BlueFS allocated any space
201- * - deferred wrote over
202- */
203- void mount_check_alloc ()
204- {
205- std::unique_ptr<ObjectStore> store;
206-
207- ObjectStore::CollectionHandle ch;
208- store = ObjectStore::create (g_ceph_context,
209- " bluestore" ,
210- " bluestore.test_temp_dir" ,
211- " store_test_temp_journal" );
212- // this should replay all deferred writes
213- BlueStore* bs = dynamic_cast <BlueStore*>(store.get ());
214- ceph_assert (bs);
215-
216- bool called_allocate = false ;
217- vector<pair<uint64_t , uint64_t > > captured_allocations;
218- bs->set_tracepoint_debug_deferred_replay_start (
219- [&](){
220- std::cout << " action before deferred replay" << std::endl;
221- Allocator* alloc = bs->debug_get_alloc ();
222- alloc->foreach (
223- [&](uint64_t offset, uint64_t length) {
224- captured_allocations.emplace_back (offset, length);
225- });
226- std::cout << " sleeping to give compaction a chance" << std::endl;
227- sleep (10 );
228- std::cout << " sleep end" << std::endl;
229- });
230- bs->set_tracepoint_debug_deferred_replay_end (
231- [&](){
232- std::cout << " action after deferred replay" << std::endl;
233- Allocator* alloc = bs->debug_get_alloc ();
234- auto ca_it = captured_allocations.begin ();
235- alloc->foreach (
236- [&](uint64_t offset, uint64_t length) {
237- if (ca_it == captured_allocations.end ()) {
238- called_allocate = true ;
239- return ;
240- }
241- if (ca_it->first != offset || ca_it->second != length) {
242- called_allocate = true ;
243- }
244- ca_it++;
245- });
246- std::cout << " called_allocate=" << called_allocate << std::endl;
247- bs->set_tracepoint_debug_deferred_replay_track (nullptr );
248- bs->set_tracepoint_debug_deferred_replay_start (nullptr );
249- bs->set_tracepoint_debug_deferred_replay_end (nullptr );
250- });
251-
252- interval_set<uint64_t > not_onode_allocations;
253- bs->set_tracepoint_debug_init_alloc_done (
254- [&](){
255- Allocator* alloc = bs->debug_get_alloc ();
256- alloc->foreach (
257- [&](uint64_t start, uint64_t len) {
258- not_onode_allocations.insert (start, len);
259- });
260- bs->set_tracepoint_debug_init_alloc_done (nullptr );
261- });
262- interval_set<uint64_t > extents_sum;
263- bs->set_tracepoint_debug_deferred_replay_track (
264- [&](const bluestore_deferred_transaction_t & dtxn) {
265- for (auto & op : dtxn.ops ) {
266- for (auto & e : op.extents ) {
267- extents_sum.insert (e.offset , e.length );
268- }
269- }
270- });
271- std::cout << " mounting..." << std::endl;
272- ceph_assert (store->mount () == 0 );
273- std::cout << " mount done" << std::endl;
274- std::cout << std::hex << " disk not used by onodes:" << not_onode_allocations << std::dec << std::endl;
275- std::cout << std::hex << " disk deferred wrote to:" << extents_sum << std::dec << std::endl;
276- std::cout << " allocated_some=" << called_allocate << std::endl;
277- interval_set<uint64_t > wrote_to_not_onodes;
278- wrote_to_not_onodes.intersection_of (extents_sum, not_onode_allocations);
279- std::cout << std::hex << " disk not used by onodes written by deferred="
280- << wrote_to_not_onodes << std::dec << std::endl;
281- bool only_wrote_to_onodes = wrote_to_not_onodes.empty ();
282- bs->set_tracepoint_debug_deferred_replay_start (nullptr );
283- ceph_assert (store->umount () == 0 );
284-
285- ceph_assert (!called_allocate || only_wrote_to_onodes);
286- }
287-
288-
289-
290-
291- int argc;
292- char **argv;
293-
294- boost::intrusive_ptr<CephContext> setup_env () {
139+ int main (int argc, char **argv) {
295140 auto args = argv_to_vec (argc, argv);
296- auto cct = global_init (
297- NULL , args, CEPH_ENTITY_TYPE_CLIENT,
298- CODE_ENVIRONMENT_UTILITY,
299- CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
141+ auto cct = global_init (NULL , args, CEPH_ENTITY_TYPE_CLIENT,
142+ CODE_ENVIRONMENT_UTILITY,
143+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
300144 common_init_finish (g_ceph_context);
301145
302- g_ceph_context->_conf ._clear_safe_to_start_threads ();
303- g_ceph_context->_conf .set_val_or_die (" bluestore_prefer_deferred_size" , " 4096" );
304- g_ceph_context->_conf .set_val_or_die (" bluefs_shared_alloc_size" , " 4096" );
305- g_ceph_context->_conf .set_val_or_die (" bluestore_block_size" , " 10240000000" );
306- g_ceph_context->_conf .apply_changes (nullptr );
307- return cct;
308- }
309-
310- int main (int _argc, char **_argv) {
311- argc = _argc;
312- argv = _argv;
313-
314- pid_t first_test = fork ();
315- if (first_test == 0 ) {
316- std::cout << " 1. Testing deletion of deferred (L) entries." << std::endl;
317- pid_t child = fork ();
318- if (child == 0 ) {
319- auto cct = setup_env ();
320- g_ceph_context->_conf ->bluestore_allocator = " bitmap" ;
321- g_ceph_context->_conf ->bluestore_rocksdb_options +=
322- " ,level0_file_num_compaction_trigger=4" ;
323- create_deferred_and_terminate ();
324- ceph_assert (false && " should exit() earlier" );
325- } else {
326- std::cout << " Waiting for fill omap and create deferred..." << std::endl;
327- int stat;
328- waitpid (child, &stat, 0 );
329- ceph_assert (WIFEXITED (stat) && WEXITSTATUS (stat) == 0 );
330- std::cout << " done and subprocess terminated." << std::endl;
331- auto cct = setup_env ();
332- g_ceph_context->_conf ->bluestore_allocator = " bitmap" ;
333- g_ceph_context->_conf ->bluestore_rocksdb_options +=
334- " ,level0_file_num_compaction_trigger=2" ;
335- mount_check_L ();
336- }
337- } else {
338- int first_stat;
339- waitpid (first_test, &first_stat, 0 );
340- ceph_assert (WIFEXITED (first_stat) && WEXITSTATUS (first_stat) == 0 );
341- std::cout << " 2. Testing overwrite of space allocated by BlueFS" << std::endl;
342- pid_t child = fork ();
343- if (child == 0 ) {
344- auto cct = setup_env ();
345- g_ceph_context->_conf ->bluestore_allocator = " avl" ;
346- g_ceph_context->_conf ->bluestore_rocksdb_options +=
347- " ,level0_file_num_compaction_trigger=4" ;
348- create_deferred_and_terminate ();
349- ceph_assert (false && " should exit() earlier" );
350- } else {
351- std::cout << " Waiting for fill omap and create deferred..." << std::endl;
352- int stat;
353- waitpid (child, &stat, 0 );
354- ceph_assert (WIFEXITED (stat) && WEXITSTATUS (stat) == 0 );
355- std::cout << " done and subprocess terminated." << std::endl;
356- auto cct = setup_env ();
357- g_ceph_context->_conf ->bluestore_allocator = " avl" ;
358- g_ceph_context->_conf ->bluestore_rocksdb_options +=
359- " ,level0_file_num_compaction_trigger=2" ;
360- mount_check_alloc ();
361- }
362- }
146+ create_deferred_and_terminate ();
363147 return 0 ;
364148}
0 commit comments