Skip to content

Commit 50e48e0

Browse files
committed
Don't emit transaction log instructions for mutations on newly-created objects
The non-sync transaction logs are only used to drive notifications and notifications don't care about mutations on objects in the same commit as the objects were created it, so we don't need to emit the instructions at all. This significantly cuts the size of the transaction log for commits which are primarily inserting objects. This does a very basic check for "newly-created" which tracks the most recently created object for each table and skips mutation instructions for that object. This handles recursively creating an object and all of its embedded objects without the overhead of tracking every single object created within a transaction, and insertion workflows will typically not return to an object after creating another object in the same table. This requires adding an additional small amount of tracking for embedded objects, as Replication previously didn't know when new embedded objects were created.
1 parent 553d059 commit 50e48e0

File tree

7 files changed

+468
-204
lines changed

7 files changed

+468
-204
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### Enhancements
44
* <New feature description> (PR [#????](https://github.com/realm/realm-core/pull/????))
55
* Include the originating client reset error in AutoClientResetFailure errors. ([#7761](https://github.com/realm/realm-core/pull/7761))
6+
* Reduce the size of the local transaction log produced by creating objects, improving the performance of insertion-heavy transactions ([PR #7734](https://github.com/realm/realm-core/pull/7734)).
67

78
### Fixed
89
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)

src/realm/replication.cpp

Lines changed: 97 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ void Replication::do_initiate_transact(Group&, version_type, bool)
5454
char* data = m_stream.get_data();
5555
size_t size = m_stream.get_size();
5656
m_encoder.set_buffer(data, data + size);
57+
m_most_recently_created_object.clear();
5758
}
5859

5960
Replication::version_type Replication::prepare_commit(version_type orig_version)
@@ -100,7 +101,6 @@ void Replication::erase_class(TableKey tk, StringData table_name, size_t)
100101
m_encoder.erase_class(tk); // Throws
101102
}
102103

103-
104104
void Replication::insert_column(const Table* t, ColKey col_key, DataType type, StringData col_name,
105105
Table* target_table)
106106
{
@@ -140,13 +140,29 @@ void Replication::erase_column(const Table* t, ColKey col_key)
140140
m_encoder.erase_column(col_key); // Throws
141141
}
142142

143+
void Replication::track_new_object(ObjKey key)
144+
{
145+
m_selected_obj = key;
146+
m_selected_collection = CollectionId();
147+
m_newly_created_object = true;
148+
149+
auto table_index = m_selected_table->get_index_in_group();
150+
if (table_index >= m_most_recently_created_object.size()) {
151+
if (table_index >= m_most_recently_created_object.capacity())
152+
m_most_recently_created_object.reserve(table_index * 2);
153+
m_most_recently_created_object.resize(table_index + 1);
154+
}
155+
m_most_recently_created_object[table_index] = m_selected_obj;
156+
}
157+
143158
void Replication::create_object(const Table* t, GlobalKey id)
144159
{
145160
if (auto logger = would_log(LogLevel::debug)) {
146161
logger->log(LogCategory::object, LogLevel::debug, "Create object '%1'", t->get_class_name());
147162
}
148163
select_table(t); // Throws
149164
m_encoder.create_object(id.get_local_key(0)); // Throws
165+
track_new_object(id.get_local_key(0)); // Throws
150166
}
151167

152168
void Replication::create_object_with_primary_key(const Table* t, ObjKey key, Mixed pk)
@@ -157,6 +173,14 @@ void Replication::create_object_with_primary_key(const Table* t, ObjKey key, Mix
157173
}
158174
select_table(t); // Throws
159175
m_encoder.create_object(key); // Throws
176+
track_new_object(key);
177+
}
178+
179+
void Replication::create_linked_object(const Table* t, ObjKey key)
180+
{
181+
select_table(t); // Throws
182+
track_new_object(key); // Throws
183+
// Does not need to encode anything as embedded tables can't be observed
160184
}
161185

162186
void Replication::remove_object(const Table* t, ObjKey key)
@@ -177,11 +201,27 @@ void Replication::remove_object(const Table* t, ObjKey key)
177201
m_encoder.remove_object(key); // Throws
178202
}
179203

180-
void Replication::select_obj(ObjKey key)
204+
void Replication::do_select_table(const Table* table)
181205
{
182-
if (key == m_selected_obj) {
183-
return;
206+
m_encoder.select_table(table->get_key()); // Throws
207+
m_selected_table = table;
208+
m_selected_collection = CollectionId();
209+
m_selected_obj = ObjKey();
210+
}
211+
212+
void Replication::do_select_obj(ObjKey key)
213+
{
214+
m_selected_obj = key;
215+
m_selected_collection = CollectionId();
216+
217+
auto table_index = m_selected_table->get_index_in_group();
218+
if (table_index < m_most_recently_created_object.size()) {
219+
m_newly_created_object = m_most_recently_created_object[table_index] == key;
184220
}
221+
else {
222+
m_newly_created_object = false;
223+
}
224+
185225
if (auto logger = would_log(LogLevel::debug)) {
186226
auto class_name = m_selected_table->get_class_name();
187227
if (m_selected_table->get_primary_key_column()) {
@@ -198,16 +238,28 @@ void Replication::select_obj(ObjKey key)
198238
logger->log(LogCategory::object, LogLevel::debug, "Mutating anonymous object '%1'[%2]", class_name, key);
199239
}
200240
}
201-
m_selected_obj = key;
202-
m_selected_list = CollectionId();
241+
}
242+
243+
void Replication::do_select_collection(const CollectionBase& coll)
244+
{
245+
select_table(coll.get_table().unchecked_ptr());
246+
ColKey col_key = coll.get_col_key();
247+
ObjKey key = coll.get_owner_key();
248+
auto path = coll.get_stable_path();
249+
250+
if (select_obj(key)) {
251+
m_encoder.select_collection(col_key, key, path); // Throws
252+
}
253+
m_selected_collection = CollectionId(coll.get_table()->get_key(), key, std::move(path));
203254
}
204255

205256
void Replication::do_set(const Table* t, ColKey col_key, ObjKey key, _impl::Instruction variant)
206257
{
207258
if (variant != _impl::Instruction::instr_SetDefault) {
208259
select_table(t); // Throws
209-
select_obj(key);
210-
m_encoder.modify_object(col_key, key); // Throws
260+
if (select_obj(key)) {
261+
m_encoder.modify_object(col_key, key); // Throws
262+
}
211263
}
212264
}
213265

@@ -243,8 +295,9 @@ void Replication::set(const Table* t, ColKey col_key, ObjKey key, Mixed value, _
243295
void Replication::nullify_link(const Table* t, ColKey col_key, ObjKey key)
244296
{
245297
select_table(t); // Throws
246-
select_obj(key);
247-
m_encoder.modify_object(col_key, key); // Throws
298+
if (select_obj(key)) {
299+
m_encoder.modify_object(col_key, key); // Throws
300+
}
248301
if (auto logger = would_log(LogLevel::trace)) {
249302
logger->log(LogCategory::object, LogLevel::trace, " Nullify '%1'", t->get_column_name(col_key));
250303
}
@@ -258,7 +311,6 @@ void Replication::add_int(const Table* t, ColKey col_key, ObjKey key, int_fast64
258311
}
259312
}
260313

261-
262314
Path Replication::get_prop_name(Path&& path) const
263315
{
264316
auto col_key = path[0].get_col_key();
@@ -308,33 +360,36 @@ void Replication::log_collection_operation(const char* operation, const Collecti
308360

309361
void Replication::list_insert(const CollectionBase& list, size_t list_ndx, Mixed value, size_t)
310362
{
311-
select_collection(list); // Throws
312-
m_encoder.collection_insert(list.translate_index(list_ndx)); // Throws
363+
if (select_collection(list)) { // Throws
364+
m_encoder.collection_insert(list.translate_index(list_ndx)); // Throws
365+
}
313366
log_collection_operation("Insert", list, value, int64_t(list_ndx));
314367
}
315368

316369
void Replication::list_set(const CollectionBase& list, size_t list_ndx, Mixed value)
317370
{
318-
select_collection(list); // Throws
319-
m_encoder.collection_set(list.translate_index(list_ndx)); // Throws
371+
if (select_collection(list)) { // Throws
372+
m_encoder.collection_set(list.translate_index(list_ndx)); // Throws
373+
}
320374
log_collection_operation("Set", list, value, int64_t(list_ndx));
321375
}
322376

323377
void Replication::list_erase(const CollectionBase& list, size_t link_ndx)
324378
{
325-
select_collection(list); // Throws
326-
m_encoder.collection_erase(list.translate_index(link_ndx)); // Throws
379+
if (select_collection(list)) { // Throws
380+
m_encoder.collection_erase(list.translate_index(link_ndx)); // Throws
381+
}
327382
if (auto logger = would_log(LogLevel::trace)) {
328-
329383
logger->log(LogCategory::object, LogLevel::trace, " Erase '%1' at position %2",
330384
get_prop_name(list.get_short_path()), link_ndx);
331385
}
332386
}
333387

334388
void Replication::list_move(const CollectionBase& list, size_t from_link_ndx, size_t to_link_ndx)
335389
{
336-
select_collection(list); // Throws
337-
m_encoder.collection_move(list.translate_index(from_link_ndx), list.translate_index(to_link_ndx)); // Throws
390+
if (select_collection(list)) { // Throws
391+
m_encoder.collection_move(list.translate_index(from_link_ndx), list.translate_index(to_link_ndx)); // Throws
392+
}
338393
if (auto logger = would_log(LogLevel::trace)) {
339394
logger->log(LogCategory::object, LogLevel::trace, " Move %1 to %2 in '%3'", from_link_ndx, to_link_ndx,
340395
get_prop_name(list.get_short_path()));
@@ -343,64 +398,34 @@ void Replication::list_move(const CollectionBase& list, size_t from_link_ndx, si
343398

344399
void Replication::set_insert(const CollectionBase& set, size_t set_ndx, Mixed value)
345400
{
346-
select_collection(set); // Throws
347-
m_encoder.collection_insert(set_ndx); // Throws
348-
log_collection_operation("Insert", set, value, Mixed());
401+
Replication::list_insert(set, set_ndx, value, 0); // Throws
349402
}
350403

351-
void Replication::set_erase(const CollectionBase& set, size_t set_ndx, Mixed value)
404+
void Replication::set_erase(const CollectionBase& set, size_t set_ndx, Mixed)
352405
{
353-
select_collection(set); // Throws
354-
m_encoder.collection_erase(set_ndx); // Throws
355-
if (auto logger = would_log(LogLevel::trace)) {
356-
logger->log(LogCategory::object, LogLevel::trace, " Erase %1 from '%2'", value,
357-
get_prop_name(set.get_short_path()));
358-
}
406+
Replication::list_erase(set, set_ndx); // Throws
359407
}
360408

361409
void Replication::set_clear(const CollectionBase& set)
362410
{
363-
select_collection(set); // Throws
364-
m_encoder.collection_clear(set.size()); // Throws
365-
if (auto logger = would_log(LogLevel::trace)) {
366-
logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", get_prop_name(set.get_short_path()));
367-
}
368-
}
369-
370-
void Replication::do_select_table(const Table* table)
371-
{
372-
m_encoder.select_table(table->get_key()); // Throws
373-
m_selected_table = table;
374-
m_selected_list = CollectionId();
375-
m_selected_obj = ObjKey();
376-
}
377-
378-
void Replication::do_select_collection(const CollectionBase& list)
379-
{
380-
select_table(list.get_table().unchecked_ptr());
381-
ColKey col_key = list.get_col_key();
382-
ObjKey key = list.get_owner_key();
383-
auto path = list.get_stable_path();
384-
385-
select_obj(key);
386-
387-
m_encoder.select_collection(col_key, key, path); // Throws
388-
m_selected_list = CollectionId(list.get_table()->get_key(), key, std::move(path));
411+
Replication::list_clear(set); // Throws
389412
}
390413

391414
void Replication::list_clear(const CollectionBase& list)
392415
{
393-
select_collection(list); // Throws
394-
m_encoder.collection_clear(list.size()); // Throws
416+
if (select_collection(list)) { // Throws
417+
m_encoder.collection_clear(list.size()); // Throws
418+
}
395419
if (auto logger = would_log(LogLevel::trace)) {
396420
logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", get_prop_name(list.get_short_path()));
397421
}
398422
}
399423

400424
void Replication::link_list_nullify(const Lst<ObjKey>& list, size_t link_ndx)
401425
{
402-
select_collection(list);
403-
m_encoder.collection_erase(link_ndx);
426+
if (select_collection(list)) { // Throws
427+
m_encoder.collection_erase(link_ndx);
428+
}
404429
if (auto logger = would_log(LogLevel::trace)) {
405430
logger->log(LogCategory::object, LogLevel::trace, " Nullify '%1' position %2",
406431
m_selected_table->get_column_name(list.get_col_key()), link_ndx);
@@ -409,22 +434,25 @@ void Replication::link_list_nullify(const Lst<ObjKey>& list, size_t link_ndx)
409434

410435
void Replication::dictionary_insert(const CollectionBase& dict, size_t ndx, Mixed key, Mixed value)
411436
{
412-
select_collection(dict);
413-
m_encoder.collection_insert(ndx);
437+
if (select_collection(dict)) { // Throws
438+
m_encoder.collection_insert(ndx);
439+
}
414440
log_collection_operation("Insert", dict, value, key);
415441
}
416442

417443
void Replication::dictionary_set(const CollectionBase& dict, size_t ndx, Mixed key, Mixed value)
418444
{
419-
select_collection(dict);
420-
m_encoder.collection_set(ndx);
445+
if (select_collection(dict)) { // Throws
446+
m_encoder.collection_set(ndx);
447+
}
421448
log_collection_operation("Set", dict, value, key);
422449
}
423450

424451
void Replication::dictionary_erase(const CollectionBase& dict, size_t ndx, Mixed key)
425452
{
426-
select_collection(dict);
427-
m_encoder.collection_erase(ndx);
453+
if (select_collection(dict)) { // Throws
454+
m_encoder.collection_erase(ndx);
455+
}
428456
if (auto logger = would_log(LogLevel::trace)) {
429457
logger->log(LogCategory::object, LogLevel::trace, " Erase %1 from '%2'", key,
430458
get_prop_name(dict.get_short_path()));
@@ -433,8 +461,9 @@ void Replication::dictionary_erase(const CollectionBase& dict, size_t ndx, Mixed
433461

434462
void Replication::dictionary_clear(const CollectionBase& dict)
435463
{
436-
select_collection(dict);
437-
m_encoder.collection_clear(dict.size());
464+
if (select_collection(dict)) { // Throws
465+
m_encoder.collection_clear(dict.size());
466+
}
438467
if (auto logger = would_log(LogLevel::trace)) {
439468
logger->log(LogCategory::object, LogLevel::trace, " Clear '%1'", get_prop_name(dict.get_short_path()));
440469
}

0 commit comments

Comments
 (0)