@@ -640,7 +640,8 @@ uint64_t BlueFS::_get_minimal_reserved(unsigned id) const
640640uint64_t BlueFS::get_full_reserved (unsigned id)
641641{
642642 if (!is_shared_alloc (id)) {
643- return locked_alloc[id].length + _get_minimal_reserved (id);
643+ return locked_alloc[id].head_length + locked_alloc[id].tail_length +
644+ _get_minimal_reserved (id);
644645 }
645646 return 0 ;
646647}
@@ -709,6 +710,18 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
709710
710711 _init_alloc ();
711712
713+ // temporary lock candidate regions to forbid their use during mkfs
714+ for (uint8_t i = 0 ; i < MAX_BDEV; i++) {
715+ if (!alloc[i]) continue ;
716+ bluefs_locked_extents_t res_la = locked_alloc[i].get_merged ();
717+ if (res_la.head_length ) {
718+ alloc[i]->init_rm_free (res_la.head_offset , res_la.head_length );
719+ }
720+ if (res_la.tail_length ) {
721+ alloc[i]->init_rm_free (res_la.tail_offset , res_la.tail_length );
722+ }
723+ }
724+
712725 // init log
713726 FileRef log_file = ceph::make_ref<File>();
714727 log_file->fnode .ino = 1 ;
@@ -793,7 +806,7 @@ void BlueFS::_init_alloc()
793806 continue ;
794807 }
795808 ceph_assert (bdev[id]->get_size ());
796- locked_alloc[id] = bluefs_extent_t ();
809+ locked_alloc[id]. reset ();
797810
798811 if (is_shared_alloc (id)) {
799812 dout (1 ) << __func__ << " shared, id " << id << std::hex
@@ -810,37 +823,37 @@ void BlueFS::_init_alloc()
810823 name += to_string (uintptr_t (this ));
811824
812825 auto reserved = _get_minimal_reserved (id);
813- uint64_t locked_offs = 0 ;
814- {
815- // Try to lock tailing space at device if allocator controlled space
816- // isn't aligned with recommended alloc unit.
817- // Final decision whether locked tail to be maintained is made after
818- // BlueFS replay depending on existing allocations.
819- uint64_t size0 = _get_block_device_size (id);
820- uint64_t size = size0 - reserved;
821- size = p2align (size, alloc_size[id]) + reserved;
822- if (size < size0) {
823- locked_offs = size;
824- locked_alloc[id] = bluefs_extent_t (id, locked_offs, uint32_t (size0 - size));
825- }
826+ uint64_t full_size = _get_block_device_size (id);
827+ uint64_t free_end = p2align (full_size, alloc_size[id]);
828+
829+ // Trying to lock the following extents:
830+ // [reserved, alloc_size] and [p2align(dev_size, alloc_size), dev_size]
831+ // to make all the allocations alligned to alloc_size if possible.
832+ // Final decision whether locked head/tail to be maintained is made after
833+ // BlueFS replay depending on existing allocations.
834+ auto &locked = locked_alloc[id];
835+ locked.head_offset = reserved;
836+ locked.head_length = p2nphase (reserved, alloc_size[id]);
837+ if (free_end < full_size) {
838+ locked.tail_offset = free_end;
839+ locked.tail_length = full_size - free_end;
826840 }
827841 string alloc_type = cct->_conf ->bluefs_allocator ;
828842 dout (1 ) << __func__ << " new, id " << id << std::hex
829843 << " , allocator name " << name
830844 << " , allocator type " << alloc_type
831- << " , capacity 0x" << bdev[id]-> get_size ()
845+ << " , capacity 0x" << full_size
832846 << " , reserved 0x" << reserved
833- << " , locked 0x" << locked_alloc[id].offset
834- << " ~" << locked_alloc[id].length
847+ << " , maybe locked " << locked
835848 << " , block size 0x" << bdev[id]->get_block_size ()
836849 << " , alloc unit 0x" << alloc_size[id]
837850 << std::dec << dendl;
838- alloc[id] = Allocator::create (cct, alloc_type,
839- bdev[id]->get_size (),
851+ alloc[id] = Allocator::create (cct,
852+ alloc_type,
853+ full_size,
840854 bdev[id]->get_block_size (),
841855 name);
842- uint64_t free_len = locked_offs ? locked_offs : _get_block_device_size (id) - reserved;
843- alloc[id]->init_add_free (reserved, free_len);
856+ alloc[id]->init_add_free (reserved, full_size - reserved);
844857 }
845858 }
846859}
@@ -1066,32 +1079,34 @@ int BlueFS::mount()
10661079
10671080 // init freelist
10681081 for (auto & p : nodes.file_map ) {
1069- dout (30 ) << __func__ << " noting alloc for " << p.second ->fnode << dendl;
1082+ dout (20 ) << __func__ << " noting alloc for " << p.second ->fnode << dendl;
10701083 for (auto & q : p.second ->fnode .extents ) {
10711084 bool is_shared = is_shared_alloc (q.bdev );
10721085 ceph_assert (!is_shared || (is_shared && shared_alloc));
10731086 if (is_shared && shared_alloc->need_init && shared_alloc->a ) {
10741087 shared_alloc->bluefs_used += q.length ;
10751088 alloc[q.bdev ]->init_rm_free (q.offset , q.length );
10761089 } else if (!is_shared) {
1077- if (locked_alloc[q.bdev ].length ) {
1078- auto locked_offs = locked_alloc[q.bdev ].offset ;
1079- if (q.offset + q.length > locked_offs) {
1080- // we already have allocated extents in locked range,
1081- // do not enforce this lock then.
1082- bluefs_extent_t dummy;
1083- std::swap (locked_alloc[q.bdev ], dummy);
1084- alloc[q.bdev ]->init_add_free (dummy.offset , dummy.length );
1085- dout (1 ) << __func__ << std::hex
1086- << " unlocked at " << q.bdev
1087- << " 0x" << dummy.offset << " ~" << dummy.length
1088- << std::dec << dendl;
1089- }
1090- }
1090+ locked_alloc[q.bdev ].reset_intersected (q);
10911091 alloc[q.bdev ]->init_rm_free (q.offset , q.length );
10921092 }
10931093 }
10941094 }
1095+ // finalize and apply locked allocation regions
1096+ for (uint8_t i = 0 ; i < MAX_BDEV; i++) {
1097+ bluefs_locked_extents_t res_la = locked_alloc[i].finalize ();
1098+ dout (1 ) << __func__ << std::hex
1099+ << " final locked allocations " << (int )i
1100+ << " " << locked_alloc[i] << " => " << res_la
1101+ << dendl;
1102+ if (res_la.head_length ) {
1103+ alloc[i]->init_rm_free (res_la.head_offset , res_la.head_length );
1104+ }
1105+ if (res_la.tail_length ) {
1106+ alloc[i]->init_rm_free (res_la.tail_offset , res_la.tail_length );
1107+ }
1108+ }
1109+
10951110 if (shared_alloc) {
10961111 shared_alloc->need_init = false ;
10971112 dout (1 ) << __func__ << " shared_bdev_used = "
0 commit comments