Skip to content

Commit ecdd6ca

Browse files
nicola-cabfinnschiermerjedelbo
authored
RCORE-2050 New Array Header support for compression (#7521)
* code review --------- Co-authored-by: Finn Schiermer Andersen <[email protected]> Co-authored-by: Jørgen Edelbo <[email protected]>
1 parent d1b267d commit ecdd6ca

File tree

10 files changed

+683
-168
lines changed

10 files changed

+683
-168
lines changed

src/realm/alloc.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ Allocator& Allocator::get_default() noexcept
113113
// * adding a cross-over mapping. (if the array crosses a mapping boundary)
114114
// * using an already established cross-over mapping. (ditto)
115115
// this can proceed concurrently with other calls to translate()
116-
char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, ref_type ref) const noexcept
116+
char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, ref_type ref,
117+
bool known_in_slab) const noexcept
117118
{
118119
size_t idx = get_section_index(ref);
119120
RefTranslation& txl = ref_translation_ptr[idx];
@@ -122,7 +123,9 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re
122123
#if REALM_ENABLE_ENCRYPTION
123124
realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, nullptr);
124125
#endif
125-
auto size = NodeHeader::get_byte_size_from_header(addr);
126+
// if we know the translation is inside the slab area, we don't need to check
127+
// for anything beyond the header, and we don't need to check if decryption is needed
128+
auto size = known_in_slab ? 8 : NodeHeader::get_byte_size_from_header(addr);
126129
bool crosses_mapping = offset + size > (1 << section_shift);
127130
// Move the limit on use of the existing primary mapping.
128131
// Take into account that another thread may attempt to change / have changed it concurrently,

src/realm/alloc.hpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ class Allocator {
113113
/// Calls do_translate().
114114
char* translate(ref_type ref) const noexcept;
115115

116+
/// Simpler version if we know the ref points inside the slab area
117+
char* translate_in_slab(ref_type ref) const noexcept;
118+
116119
/// Returns true if, and only if the object at the specified 'ref'
117120
/// is in the immutable part of the memory managed by this
118121
/// allocator. The method by which some objects become part of the
@@ -249,8 +252,8 @@ class Allocator {
249252
/// then entirely the responsibility of the caller that the memory
250253
/// is not modified by way of the returned memory pointer.
251254
virtual char* do_translate(ref_type ref) const noexcept = 0;
252-
char* translate_critical(RefTranslation*, ref_type ref) const noexcept;
253-
char* translate_less_critical(RefTranslation*, ref_type ref) const noexcept;
255+
char* translate_critical(RefTranslation*, ref_type ref, bool known_in_slab = false) const noexcept;
256+
char* translate_less_critical(RefTranslation*, ref_type ref, bool known_in_slab = false) const noexcept;
254257
virtual void get_or_add_xover_mapping(RefTranslation&, size_t, size_t, size_t) = 0;
255258
Allocator() noexcept;
256259
size_t get_section_index(size_t pos) const noexcept;
@@ -556,7 +559,8 @@ inline Allocator::Allocator() noexcept
556559
}
557560

558561
// performance critical part of the translation process. Less critical code is in translate_less_critical.
559-
inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, ref_type ref) const noexcept
562+
inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, ref_type ref,
563+
bool known_in_slab) const noexcept
560564
{
561565
size_t idx = get_section_index(ref);
562566
RefTranslation& txl = ref_translation_ptr[idx];
@@ -574,7 +578,7 @@ inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr,
574578
}
575579
else {
576580
// the lowest possible xover offset may grow concurrently, but that will be handled inside the call
577-
return translate_less_critical(ref_translation_ptr, ref);
581+
return translate_less_critical(ref_translation_ptr, ref, known_in_slab);
578582
}
579583
}
580584
realm::util::terminate("Invalid ref translation entry", __FILE__, __LINE__, txl.cookie, 0x1234567890, ref, idx);
@@ -592,6 +596,17 @@ inline char* Allocator::translate(ref_type ref) const noexcept
592596
}
593597
}
594598

599+
inline char* Allocator::translate_in_slab(ref_type ref) const noexcept
600+
{
601+
auto ref_translation_ptr = m_ref_translation_ptr.load(std::memory_order_acquire);
602+
if (REALM_LIKELY(ref_translation_ptr)) {
603+
return translate_critical(ref_translation_ptr, ref, true);
604+
}
605+
else {
606+
return do_translate(ref);
607+
}
608+
}
609+
595610

596611
} // namespace realm
597612

src/realm/alloc_slab.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ MemRef SlabAlloc::do_alloc(size_t size)
235235
#endif
236236

237237
char* addr = reinterpret_cast<char*>(entry);
238-
REALM_ASSERT_EX(addr == translate(ref), addr, ref, get_file_path_for_assertions());
238+
REALM_ASSERT_EX(addr == translate_in_slab(ref), addr, ref, get_file_path_for_assertions());
239239

240240
#if REALM_ENABLE_ALLOC_SET_ZERO
241241
std::fill(addr, addr + size, 0);
@@ -300,6 +300,7 @@ SlabAlloc::FreeBlock* SlabAlloc::pop_freelist_entry(FreeList list)
300300

301301
void SlabAlloc::FreeBlock::unlink()
302302
{
303+
REALM_ASSERT_DEBUG(next != nullptr && prev != nullptr);
303304
auto _next = next;
304305
auto _prev = prev;
305306
_next->prev = prev;

src/realm/array.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -989,42 +989,18 @@ MemRef Array::clone(MemRef mem, Allocator& alloc, Allocator& target_alloc)
989989
MemRef Array::create(Type type, bool context_flag, WidthType width_type, size_t size, int_fast64_t value,
990990
Allocator& alloc)
991991
{
992-
REALM_ASSERT_7(value, ==, 0, ||, width_type, ==, wtype_Bits);
993-
REALM_ASSERT_7(size, ==, 0, ||, width_type, !=, wtype_Ignore);
994-
995-
bool is_inner_bptree_node = false, has_refs = false;
996-
switch (type) {
997-
case type_Normal:
998-
break;
999-
case type_InnerBptreeNode:
1000-
is_inner_bptree_node = true;
1001-
has_refs = true;
1002-
break;
1003-
case type_HasRefs:
1004-
has_refs = true;
1005-
break;
1006-
}
1007-
992+
REALM_ASSERT_DEBUG(value == 0 || width_type == wtype_Bits);
993+
REALM_ASSERT_DEBUG(size == 0 || width_type != wtype_Ignore);
1008994
int width = 0;
1009-
size_t byte_size_0 = header_size;
1010-
if (value != 0) {
1011-
width = int(bit_width(value));
1012-
byte_size_0 = calc_aligned_byte_size(size, width); // Throws
1013-
}
1014-
// Adding zero to Array::initial_capacity to avoid taking the
1015-
// address of that member
1016-
size_t byte_size = std::max(byte_size_0, initial_capacity + 0);
1017-
MemRef mem = alloc.alloc(byte_size); // Throws
1018-
char* header = mem.get_addr();
1019-
1020-
init_header(header, is_inner_bptree_node, has_refs, context_flag, width_type, width, size, byte_size);
1021-
995+
if (value != 0)
996+
width = static_cast<int>(bit_width(value));
997+
auto mem = Node::create_node(size, alloc, context_flag, type, width_type, width);
1022998
if (value != 0) {
999+
const auto header = mem.get_addr();
10231000
char* data = get_data_from_header(header);
10241001
size_t begin = 0, end = size;
10251002
REALM_TEMPEX(fill_direct, width, (data, begin, end, value));
10261003
}
1027-
10281004
return mem;
10291005
}
10301006

src/realm/array.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ class Array : public Node, public ArrayParent {
485485
/// It is an error to specify a non-zero value unless the width
486486
/// type is wtype_Bits. It is also an error to specify a non-zero
487487
/// size if the width type is wtype_Ignore.
488-
static MemRef create(Type, bool context_flag, WidthType, size_t size, int_fast64_t value, Allocator&);
488+
static MemRef create(Type, bool, WidthType, size_t, int_fast64_t, Allocator&);
489489

490490
// Overriding method in ArrayParent
491491
void update_child_ref(size_t, ref_type) override;
@@ -938,8 +938,7 @@ inline void Array::adjust(size_t begin, size_t end, int_fast64_t diff)
938938
inline size_t Array::get_byte_size() const noexcept
939939
{
940940
const char* header = get_header_from_data(m_data);
941-
WidthType wtype = Node::get_wtype_from_header(header);
942-
size_t num_bytes = NodeHeader::calc_byte_size(wtype, m_size, m_width);
941+
size_t num_bytes = NodeHeader::get_byte_size_from_header(header);
943942

944943
REALM_ASSERT_7(m_alloc.is_read_only(m_ref), ==, true, ||, num_bytes, <=, get_capacity_from_header(header));
945944

src/realm/array_basic_tpl.hpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,12 @@ inline MemRef BasicArray<T>::create_array(size_t init_size, Allocator& allocator
4040
// Adding zero to Array::initial_capacity to avoid taking the
4141
// address of that member
4242
size_t byte_size = std::max(byte_size_0, Node::initial_capacity + 0); // Throws
43-
44-
MemRef mem = allocator.alloc(byte_size); // Throws
45-
46-
bool is_inner_bptree_node = false;
47-
bool has_refs = false;
48-
bool context_flag = false;
49-
int width = sizeof(T);
50-
init_header(mem.get_addr(), is_inner_bptree_node, has_refs, context_flag, wtype_Multiply, width, init_size,
51-
byte_size);
52-
43+
MemRef mem = allocator.alloc(byte_size); // Throws
44+
uint8_t flags = 0;
45+
const int width = sizeof(T) * 8; // element width is in bits now
46+
const auto header = mem.get_addr();
47+
init_header(header, Encoding::WTypMult, flags, width, init_size);
48+
set_capacity_in_header(byte_size, header);
5349
return mem;
5450
}
5551

src/realm/node.cpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,24 @@ MemRef Node::create_node(size_t size, Allocator& alloc, bool context_flag, Type
3232
size_t byte_size = std::max(byte_size_0, size_t(initial_capacity));
3333

3434
MemRef mem = alloc.alloc(byte_size); // Throws
35-
char* header = mem.get_addr();
36-
37-
init_header(header, type == type_InnerBptreeNode, type != type_Normal, context_flag, width_type, width, size,
38-
byte_size);
39-
35+
const auto header = mem.get_addr();
36+
REALM_ASSERT_DEBUG(width_type != WidthType::wtype_Extend);
37+
Encoding encoding{static_cast<int>(width_type)};
38+
39+
uint8_t flags = 0;
40+
if (type == type_InnerBptreeNode)
41+
flags |= static_cast<uint8_t>(Flags::InnerBPTree) | static_cast<uint8_t>(Flags::HasRefs);
42+
if (type != type_Normal)
43+
flags |= static_cast<uint8_t>(Flags::HasRefs);
44+
if (context_flag)
45+
flags |= static_cast<uint8_t>(Flags::Context);
46+
// width must be passed to init_header in bits, but for wtype_Multiply and wtype_Ignore
47+
// it is provided by the caller of this function in bytes, so convert to bits
48+
if (width_type != wtype_Bits)
49+
width = width * 8;
50+
51+
init_header(header, encoding, flags, width, size);
52+
set_capacity_in_header(byte_size, header);
4053
return mem;
4154
}
4255

@@ -69,17 +82,20 @@ size_t Node::calc_item_count(size_t bytes, size_t width) const noexcept
6982
void Node::alloc(size_t init_size, size_t new_width)
7083
{
7184
REALM_ASSERT(is_attached());
72-
85+
char* header = get_header_from_data(m_data);
86+
REALM_ASSERT(!wtype_is_extended(header));
7387
size_t needed_bytes = calc_byte_len(init_size, new_width);
7488
// this method is not public and callers must (and currently do) ensure that
7589
// needed_bytes are never larger than max_array_payload.
7690
REALM_ASSERT_RELEASE(init_size <= max_array_size);
7791

78-
if (is_read_only())
92+
if (is_read_only()) {
7993
do_copy_on_write(needed_bytes);
94+
// header has changed after copy on write if the array was compressed
95+
header = get_header_from_data(m_data);
96+
}
8097

8198
REALM_ASSERT(!m_alloc.is_read_only(m_ref));
82-
char* header = get_header_from_data(m_data);
8399
size_t orig_capacity_bytes = get_capacity_from_header(header);
84100
size_t orig_width = get_width_from_header(header);
85101

@@ -114,18 +130,28 @@ void Node::alloc(size_t init_size, size_t new_width)
114130
// this array instance in a corrupt state
115131
update_parent(); // Throws
116132
}
117-
118-
// Update header
133+
// update width (important when we convert from normal uncompressed array into compressed format)
119134
if (new_width != orig_width) {
120135
set_width_in_header(int(new_width), header);
121136
}
122137
set_size_in_header(init_size, header);
123138
m_size = init_size;
124139
}
125140

141+
void Node::destroy() noexcept
142+
{
143+
if (!is_attached())
144+
return;
145+
char* header = get_header_from_data(m_data);
146+
m_alloc.free_(m_ref, header);
147+
m_data = nullptr;
148+
}
149+
126150
void Node::do_copy_on_write(size_t minimum_size)
127151
{
128152
const char* header = get_header_from_data(m_data);
153+
// only type A arrays should be allowed during copy on write
154+
REALM_ASSERT(!wtype_is_extended(header));
129155

130156
// Calculate size in bytes
131157
size_t array_size = calc_byte_size(get_wtype_from_header(header), m_size, get_width_from_header(header));
@@ -140,7 +166,6 @@ void Node::do_copy_on_write(size_t minimum_size)
140166
const char* old_end = header + array_size;
141167
char* new_begin = mref.get_addr();
142168
realm::safe_copy_n(old_begin, old_end - old_begin, new_begin);
143-
144169
ref_type old_ref = m_ref;
145170

146171
// Update internal data
@@ -150,7 +175,6 @@ void Node::do_copy_on_write(size_t minimum_size)
150175
// Update capacity in header. Uses m_data to find header, so
151176
// m_data must be initialized correctly first.
152177
set_capacity_in_header(new_size, new_begin);
153-
154178
update_parent();
155179

156180
#if REALM_ENABLE_MEMDEBUG

src/realm/node.hpp

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class Node : public NodeHeader {
114114
{
115115
}
116116

117-
virtual ~Node() {}
117+
virtual ~Node() = default;
118118

119119
/**************************** Initializers *******************************/
120120

@@ -123,10 +123,10 @@ class Node : public NodeHeader {
123123
char* init_from_mem(MemRef mem) noexcept
124124
{
125125
char* header = mem.get_addr();
126+
REALM_ASSERT_DEBUG(!wtype_is_extended(header));
126127
m_ref = mem.get_ref();
127128
m_data = get_data_from_header(header);
128129
m_size = get_size_from_header(header);
129-
130130
return header;
131131
}
132132

@@ -212,14 +212,7 @@ class Node : public NodeHeader {
212212
/// children of that array. See non-static destroy_deep() for an
213213
/// alternative. If this accessor is already in the detached state, this
214214
/// function has no effect (idempotency).
215-
void destroy() noexcept
216-
{
217-
if (!is_attached())
218-
return;
219-
char* header = get_header_from_data(m_data);
220-
m_alloc.free_(m_ref, header);
221-
m_data = nullptr;
222-
}
215+
void destroy() noexcept;
223216

224217
/// Shorthand for `destroy(MemRef(ref, alloc), alloc)`.
225218
static void destroy(ref_type ref, Allocator& alloc) noexcept
@@ -234,7 +227,6 @@ class Node : public NodeHeader {
234227
alloc.free_(mem);
235228
}
236229

237-
238230
/// Setting a new parent affects ownership of the attached array node, if
239231
/// any. If a non-null parent is specified, and there was no parent
240232
/// originally, then the caller passes ownership to the parent, and vice
@@ -245,6 +237,7 @@ class Node : public NodeHeader {
245237
m_parent = parent;
246238
m_ndx_in_parent = ndx_in_parent;
247239
}
240+
248241
void set_ndx_in_parent(size_t ndx) noexcept
249242
{
250243
m_ndx_in_parent = ndx;
@@ -333,8 +326,6 @@ class Node : public NodeHeader {
333326
// Includes array header. Not necessarily 8-byte aligned.
334327
virtual size_t calc_byte_len(size_t num_items, size_t width) const;
335328
virtual size_t calc_item_count(size_t bytes, size_t width) const noexcept;
336-
static void init_header(char* header, bool is_inner_bptree_node, bool has_refs, bool context_flag,
337-
WidthType width_type, int width, size_t size, size_t capacity) noexcept;
338329

339330
private:
340331
friend class NodeTree;
@@ -362,22 +353,6 @@ class ArrayPayload {
362353
virtual void set_spec(Spec*, size_t) const {}
363354
};
364355

365-
366-
inline void Node::init_header(char* header, bool is_inner_bptree_node, bool has_refs, bool context_flag,
367-
WidthType width_type, int width, size_t size, size_t capacity) noexcept
368-
{
369-
// Note: Since the header layout contains unallocated bit and/or
370-
// bytes, it is important that we put the entire header into a
371-
// well defined state initially.
372-
std::fill(header, header + header_size, 0);
373-
set_is_inner_bptree_node_in_header(is_inner_bptree_node, header);
374-
set_hasrefs_in_header(has_refs, header);
375-
set_context_flag_in_header(context_flag, header);
376-
set_wtype_in_header(width_type, header);
377-
set_width_in_header(width, header);
378-
set_size_in_header(size, header);
379-
set_capacity_in_header(capacity, header);
380-
}
381356
} // namespace realm
382357

383358
#endif /* REALM_NODE_HPP */

0 commit comments

Comments
 (0)