Skip to content

Commit ce6f196

Browse files
authored
Remove enum string feature (#7858)
1 parent 499e630 commit ce6f196

37 files changed

+486
-1358
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
-----------
1818

1919
### Internals
20-
* None.
20+
* Ability to enumerate a string column has been removed.
2121

2222
----------------------------------------------
2323

src/realm/array_string.cpp

Lines changed: 3 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,9 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
5454
else {
5555
auto arr = new (&m_storage) Array(m_alloc);
5656
arr->init_from_mem(mem);
57-
// The context flag is used to indicate interned strings vs old enum strings
58-
// (in conjunction with has_refs() == false)
59-
if (arr->get_context_flag_from_header(arr->get_header())) {
60-
// init for new interned strings (replacing old enum strings)
61-
m_type = Type::interned_strings;
62-
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
63-
}
64-
else {
65-
// init for old enum strings
66-
m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
67-
ArrayParent* p;
68-
REALM_ASSERT(m_spec != nullptr);
69-
REALM_ASSERT(m_col_ndx != realm::npos);
70-
ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
71-
m_string_enum_values->init_from_ref(r);
72-
m_string_enum_values->set_parent(p, m_col_ndx);
73-
m_type = Type::enum_strings;
74-
}
57+
// init for new interned strings
58+
m_type = Type::interned_strings;
59+
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
7560
}
7661
}
7762
else {
@@ -122,7 +107,6 @@ size_t ArrayString::size() const
122107
return static_cast<ArraySmallBlobs*>(m_arr)->size();
123108
case Type::big_strings:
124109
return static_cast<ArrayBigBlobs*>(m_arr)->size();
125-
case Type::enum_strings:
126110
case Type::interned_strings:
127111
return static_cast<Array*>(m_arr)->size();
128112
}
@@ -141,7 +125,6 @@ void ArrayString::add(StringData value)
141125
case Type::big_strings:
142126
static_cast<ArrayBigBlobs*>(m_arr)->add_string(value);
143127
break;
144-
case Type::enum_strings:
145128
case Type::interned_strings: {
146129
auto a = static_cast<Array*>(m_arr);
147130
size_t ndx = a->size();
@@ -169,16 +152,6 @@ void ArrayString::set(size_t ndx, StringData value)
169152
static_cast<Array*>(m_arr)->set(ndx, id);
170153
break;
171154
}
172-
case Type::enum_strings: {
173-
size_t sz = m_string_enum_values->size();
174-
size_t res = m_string_enum_values->find_first(value, 0, sz);
175-
if (res == realm::not_found) {
176-
m_string_enum_values->add(value);
177-
res = sz;
178-
}
179-
static_cast<Array*>(m_arr)->set(ndx, res);
180-
break;
181-
}
182155
}
183156
}
184157

@@ -194,11 +167,6 @@ void ArrayString::insert(size_t ndx, StringData value)
194167
case Type::big_strings:
195168
static_cast<ArrayBigBlobs*>(m_arr)->insert_string(ndx, value);
196169
break;
197-
case Type::enum_strings: {
198-
static_cast<Array*>(m_arr)->insert(ndx, 0);
199-
set(ndx, value);
200-
break;
201-
}
202170
case Type::interned_strings: {
203171
static_cast<Array*>(m_arr)->insert(ndx, 0);
204172
set(ndx, value);
@@ -216,31 +184,6 @@ StringData ArrayString::get(size_t ndx) const
216184
return static_cast<ArraySmallBlobs*>(m_arr)->get_string(ndx);
217185
case Type::big_strings:
218186
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
219-
case Type::enum_strings: {
220-
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
221-
return m_string_enum_values->get(index);
222-
}
223-
case Type::interned_strings: {
224-
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
225-
return m_string_interner->get(id);
226-
}
227-
}
228-
return {};
229-
}
230-
231-
StringData ArrayString::get_legacy(size_t ndx) const
232-
{
233-
switch (m_type) {
234-
case Type::small_strings:
235-
return static_cast<ArrayStringShort*>(m_arr)->get(ndx);
236-
case Type::medium_strings:
237-
return static_cast<ArraySmallBlobs*>(m_arr)->get_string_legacy(ndx);
238-
case Type::big_strings:
239-
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
240-
case Type::enum_strings: {
241-
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
242-
return m_string_enum_values->get(index);
243-
}
244187
case Type::interned_strings: {
245188
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
246189
return m_string_interner->get(id);
@@ -263,10 +206,6 @@ bool ArrayString::is_null(size_t ndx) const
263206
return static_cast<ArraySmallBlobs*>(m_arr)->is_null(ndx);
264207
case Type::big_strings:
265208
return static_cast<ArrayBigBlobs*>(m_arr)->is_null(ndx);
266-
case Type::enum_strings: {
267-
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
268-
return m_string_enum_values->is_null(id);
269-
}
270209
case Type::interned_strings: {
271210
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
272211
return id == 0;
@@ -288,7 +227,6 @@ void ArrayString::erase(size_t ndx)
288227
static_cast<ArrayBigBlobs*>(m_arr)->erase(ndx);
289228
break;
290229
case Type::interned_strings:
291-
case Type::enum_strings:
292230
static_cast<Array*>(m_arr)->erase(ndx);
293231
break;
294232
}
@@ -311,10 +249,6 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
311249
case Type::big_strings:
312250
static_cast<ArrayBigBlobs*>(m_arr)->truncate(ndx);
313251
break;
314-
case Type::enum_strings:
315-
// this operation will never be called for enumerated columns
316-
REALM_UNREACHABLE();
317-
break;
318252
case Type::interned_strings:
319253
m_arr->truncate(ndx);
320254
break;
@@ -333,7 +267,6 @@ void ArrayString::clear()
333267
case Type::big_strings:
334268
static_cast<ArrayBigBlobs*>(m_arr)->clear();
335269
break;
336-
case Type::enum_strings:
337270
case Type::interned_strings:
338271
static_cast<Array*>(m_arr)->clear();
339272
break;
@@ -355,14 +288,6 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
355288
return static_cast<ArrayBigBlobs*>(m_arr)->find_first(as_binary, true, begin, end);
356289
break;
357290
}
358-
case Type::enum_strings: {
359-
size_t sz = m_string_enum_values->size();
360-
size_t res = m_string_enum_values->find_first(value, 0, sz);
361-
if (res != realm::not_found) {
362-
return static_cast<Array*>(m_arr)->find_first(res, begin, end);
363-
}
364-
break;
365-
}
366291
case Type::interned_strings: {
367292
// we need a way to avoid this lookup for each leaf array. The lookup must appear
368293
// higher up the call stack and passed down.
@@ -420,8 +345,6 @@ size_t ArrayString::lower_bound(StringData value)
420345
return lower_bound_string(static_cast<ArraySmallBlobs*>(m_arr), value);
421346
case Type::big_strings:
422347
return lower_bound_string(static_cast<ArrayBigBlobs*>(m_arr), value);
423-
case Type::enum_strings:
424-
break;
425348
case Type::interned_strings:
426349
REALM_UNREACHABLE();
427350
break;
@@ -434,9 +357,6 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
434357
if (m_type == Type::big_strings)
435358
return Type::big_strings;
436359

437-
if (m_type == Type::enum_strings)
438-
return Type::enum_strings;
439-
440360
if (m_type == Type::interned_strings)
441361
return Type::interned_strings;
442362

@@ -529,7 +449,6 @@ void ArrayString::verify() const
529449
case Type::big_strings:
530450
static_cast<ArrayBigBlobs*>(m_arr)->verify();
531451
break;
532-
case Type::enum_strings:
533452
case Type::interned_strings:
534453
static_cast<Array*>(m_arr)->verify();
535454
break;
@@ -567,7 +486,6 @@ ref_type ArrayString::typed_write(ref_type ref, _impl::ArrayWriterBase& out, All
567486
leaf.destroy_deep(true);
568487
}
569488
else {
570-
// whether it's the old enum strings or the new interned strings,
571489
// just write out the array using integer leaf compression
572490
ret_val = leaf.write(out, false, out.only_modified, out.compress);
573491
}

src/realm/array_string.hpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,6 @@ class ArrayString : public ArrayPayload {
7474
{
7575
m_string_interner = string_interner;
7676
}
77-
bool need_spec() const override
78-
{
79-
return true;
80-
}
81-
void set_spec(Spec* spec, size_t col_ndx) const override
82-
{
83-
m_spec = spec;
84-
m_col_ndx = col_ndx;
85-
}
8677

8778
void update_parent()
8879
{
@@ -108,7 +99,6 @@ class ArrayString : public ArrayPayload {
10899
}
109100
void insert(size_t ndx, StringData value);
110101
StringData get(size_t ndx) const;
111-
StringData get_legacy(size_t ndx) const;
112102
Mixed get_any(size_t ndx) const override;
113103
bool is_null(size_t ndx) const;
114104
void erase(size_t ndx);
@@ -137,16 +127,14 @@ class ArrayString : public ArrayPayload {
137127
static constexpr size_t storage_size =
138128
std::max({sizeof(ArrayStringShort), sizeof(ArraySmallBlobs), sizeof(ArrayBigBlobs), sizeof(Array)});
139129

140-
enum class Type { small_strings, medium_strings, big_strings, enum_strings, interned_strings };
130+
enum class Type { small_strings, medium_strings, big_strings, interned_strings };
141131

142132
Type m_type = Type::small_strings;
143133

144134
Allocator& m_alloc;
145135
alignas(storage_alignment) std::byte m_storage[storage_size];
146136
Array* m_arr;
147137
bool m_nullable = true;
148-
mutable Spec* m_spec = nullptr;
149-
mutable size_t m_col_ndx = realm::npos;
150138
std::unique_ptr<ArrayString> m_string_enum_values;
151139
mutable StringInterner* m_string_interner = nullptr;
152140

src/realm/cluster.cpp

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,7 @@ void Cluster::create()
154154
do_create<ArrayDoubleNull>(col_key);
155155
break;
156156
case col_type_String: {
157-
if (m_tree_top.is_string_enum_type(col_ndx)) {
158-
do_create<ArrayInteger>(col_key);
159-
}
160-
else {
161-
do_create<ArrayString>(col_key);
162-
}
157+
do_create<ArrayString>(col_key);
163158
break;
164159
}
165160
case col_type_Binary:
@@ -267,17 +262,6 @@ inline void Cluster::set_string_interner(ArrayMixed& arr, ColKey col_key) const
267262
m_tree_top.set_string_interner(arr, col_key);
268263
}
269264

270-
template <class T>
271-
inline void Cluster::set_spec(T&, ColKey::Idx) const
272-
{
273-
}
274-
275-
template <>
276-
inline void Cluster::set_spec(ArrayString& arr, ColKey::Idx col_ndx) const
277-
{
278-
m_tree_top.set_spec(arr, col_ndx);
279-
}
280-
281265
template <class T>
282266
inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool nullable)
283267
{
@@ -286,7 +270,6 @@ inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool
286270
T arr(m_alloc);
287271
auto col_ndx = col.get_index();
288272
arr.set_parent(this, col_ndx.val + s_first_col_index);
289-
set_spec<T>(arr, col_ndx);
290273
set_string_interner<T>(arr, col);
291274
arr.init_from_parent();
292275
if (init_val.is_null()) {
@@ -507,13 +490,9 @@ void Cluster::move(size_t ndx, ClusterNode* new_node, int64_t offset)
507490
case col_type_Double:
508491
do_move<ArrayDouble>(ndx, col_key, new_leaf);
509492
break;
510-
case col_type_String: {
511-
if (m_tree_top.is_string_enum_type(col_key.get_index()))
512-
do_move<ArrayInteger>(ndx, col_key, new_leaf);
513-
else
514-
do_move<ArrayString>(ndx, col_key, new_leaf);
493+
case col_type_String:
494+
do_move<ArrayString>(ndx, col_key, new_leaf);
515495
break;
516-
}
517496
case col_type_Binary:
518497
do_move<ArrayBinary>(ndx, col_key, new_leaf);
519498
break;
@@ -781,7 +760,6 @@ inline void Cluster::do_erase(size_t ndx, ColKey col_key)
781760
auto col_ndx = col_key.get_index();
782761
T values(m_alloc);
783762
values.set_parent(this, col_ndx.val + s_first_col_index);
784-
set_spec<T>(values, col_ndx);
785763
set_string_interner<T>(values, col_key);
786764
values.init_from_parent();
787765
if constexpr (std::is_same_v<T, ArrayTypedLink>) {
@@ -1048,26 +1026,6 @@ void Cluster::nullify_incoming_links(RowKey key, CascadeState& state)
10481026
m_tree_top.get_owning_table()->for_each_backlink_column(nullify_fwd_links);
10491027
}
10501028

1051-
void Cluster::upgrade_string_to_enum(ColKey col_key, ArrayString& keys)
1052-
{
1053-
auto col_ndx = col_key.get_index();
1054-
Array indexes(m_alloc);
1055-
indexes.create(Array::type_Normal, false);
1056-
ArrayString values(m_alloc);
1057-
ref_type ref = Array::get_as_ref(col_ndx.val + s_first_col_index);
1058-
set_string_interner(values, col_key);
1059-
values.init_from_ref(ref);
1060-
size_t sz = values.size();
1061-
for (size_t i = 0; i < sz; i++) {
1062-
auto v = values.get(i);
1063-
size_t pos = keys.lower_bound(v);
1064-
REALM_ASSERT_3(pos, !=, keys.size());
1065-
indexes.add(pos);
1066-
}
1067-
Array::set(col_ndx.val + s_first_col_index, indexes.get_ref());
1068-
Array::destroy_deep(ref, m_alloc);
1069-
}
1070-
10711029
void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
10721030
{
10731031
auto col_ndx = col_key.get_index();
@@ -1080,9 +1038,6 @@ void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
10801038
if (leaf->need_string_interner()) {
10811039
m_tree_top.set_string_interner(*leaf, col_key);
10821040
}
1083-
if (leaf->need_spec()) {
1084-
m_tree_top.set_spec(*leaf, col_ndx);
1085-
}
10861041
leaf->init_from_ref(ref);
10871042
leaf->set_parent(const_cast<Cluster*>(this), col_ndx.val + 1);
10881043
}
@@ -1098,7 +1053,6 @@ template <typename ArrayType>
10981053
void Cluster::verify(ref_type ref, size_t index, util::Optional<size_t>& sz) const
10991054
{
11001055
ArrayType arr(get_alloc());
1101-
set_spec(arr, ColKey::Idx{unsigned(index) - 1});
11021056
auto table = get_owning_table();
11031057
REALM_ASSERT(index <= table->m_leaf_ndx2colkey.size());
11041058
auto col_key = table->m_leaf_ndx2colkey[index - 1];
@@ -1440,7 +1394,6 @@ void Cluster::dump_objects(int64_t key_offset, std::string lead) const
14401394
}
14411395
case col_type_String: {
14421396
ArrayString arr(m_alloc);
1443-
set_spec(arr, col.get_index());
14441397
set_string_interner(arr, col);
14451398
ref_type ref = Array::get_as_ref(j);
14461399
arr.init_from_ref(ref);

src/realm/cluster.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,6 @@ class Cluster : public ClusterNode {
321321
size_t get_ndx(RowKey key, size_t ndx) const noexcept override;
322322
size_t erase(RowKey k, CascadeState& state) override;
323323
void nullify_incoming_links(RowKey key, CascadeState& state) override;
324-
void upgrade_string_to_enum(ColKey col, ArrayString& keys);
325324

326325
void init_leaf(ColKey col, ArrayPayload* leaf) const;
327326
void add_leaf(ColKey col, ref_type ref);

0 commit comments

Comments
 (0)