Skip to content

Commit 42f5630

Browse files
authored
[rc-bridge] Incr Backup Fixes #2 (ydb-platform#24511)
2 parents f24ce9a + f58dadf commit 42f5630

11 files changed

+611
-103
lines changed

ydb/core/backup/impl/change_record.h

Lines changed: 95 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <ydb/core/change_exchange/change_record.h>
66
#include <ydb/core/protos/change_exchange.pb.h>
77
#include <ydb/core/protos/tx_datashard.pb.h>
8+
#include <ydb/core/protos/datashard_backup.pb.h>
89
#include <ydb/core/scheme/scheme_tablecell.h>
910
#include <ydb/core/tx/replication/service/lightweight_schema.h>
1011
#include <ydb/library/yverify_stream/yverify_stream.h>
@@ -70,62 +71,114 @@ class TChangeRecord: public NChangeExchange::TChangeRecordBase {
7071
auto& upsert = *record.MutableUpsert();
7172

7273
switch (ProtoBody.GetCdcDataChange().GetRowOperationCase()) {
73-
case NKikimrChangeExchange::TDataChange::kUpsert: {
74-
// Check if NewImage is available, otherwise fall back to Upsert
75-
if (ProtoBody.GetCdcDataChange().HasNewImage()) {
76-
*upsert.MutableTags() = {
77-
ProtoBody.GetCdcDataChange().GetNewImage().GetTags().begin(),
78-
ProtoBody.GetCdcDataChange().GetNewImage().GetTags().end()};
79-
auto it = Schema->ValueColumns.find("__ydb_incrBackupImpl_deleted");
80-
Y_ABORT_UNLESS(it != Schema->ValueColumns.end(), "Invariant violation");
81-
upsert.AddTags(it->second.Tag);
82-
83-
TString serializedCellVec = ProtoBody.GetCdcDataChange().GetNewImage().GetData();
84-
Y_ABORT_UNLESS(
85-
TSerializedCellVec::UnsafeAppendCells({TCell::Make<bool>(false)}, serializedCellVec),
86-
"Invalid cell format, can't append cells");
74+
case NKikimrChangeExchange::TDataChange::kUpsert:
75+
case NKikimrChangeExchange::TDataChange::kReset: {
76+
TVector<NTable::TTag> tags;
77+
TVector<TCell> cells;
78+
NKikimrBackup::TColumnStateMap columnStateMap;
79+
80+
// Handle both Upsert and Reset operations
81+
const bool isResetOperation = ProtoBody.GetCdcDataChange().GetRowOperationCase() == NKikimrChangeExchange::TDataChange::kReset;
82+
const auto& operationData = isResetOperation
83+
? ProtoBody.GetCdcDataChange().GetReset()
84+
: ProtoBody.GetCdcDataChange().GetUpsert();
85+
86+
TSerializedCellVec originalCells;
87+
Y_ABORT_UNLESS(TSerializedCellVec::TryParse(operationData.GetData(), originalCells));
88+
89+
tags.assign(operationData.GetTags().begin(), operationData.GetTags().end());
90+
cells.assign(originalCells.GetCells().begin(), originalCells.GetCells().end());
91+
92+
THashSet<NTable::TTag> presentTags(operationData.GetTags().begin(), operationData.GetTags().end());
93+
for (const auto& [name, columnInfo] : Schema->ValueColumns) {
94+
if (name == "__ydb_incrBackupImpl_deleted" || name == "__ydb_incrBackupImpl_columnStates") {
95+
continue;
96+
}
97+
98+
auto* columnState = columnStateMap.AddColumnStates();
99+
columnState->SetTag(columnInfo.Tag);
100+
101+
if (presentTags.contains(columnInfo.Tag)) {
102+
auto it = std::find(operationData.GetTags().begin(), operationData.GetTags().end(), columnInfo.Tag);
103+
if (it != operationData.GetTags().end()) {
104+
size_t idx = std::distance(operationData.GetTags().begin(), it);
105+
if (idx < originalCells.GetCells().size()) {
106+
columnState->SetIsNull(originalCells.GetCells()[idx].IsNull());
107+
} else {
108+
columnState->SetIsNull(true);
109+
}
110+
} else {
111+
columnState->SetIsNull(true);
112+
}
113+
columnState->SetIsChanged(true);
114+
} else {
115+
if (isResetOperation) {
116+
columnState->SetIsNull(true);
117+
columnState->SetIsChanged(true);
118+
} else {
119+
columnState->SetIsNull(false);
120+
columnState->SetIsChanged(false);
121+
}
122+
}
123+
}
87124

88-
upsert.SetData(serializedCellVec);
89-
} else {
90-
*upsert.MutableTags() = {
91-
ProtoBody.GetCdcDataChange().GetUpsert().GetTags().begin(),
92-
ProtoBody.GetCdcDataChange().GetUpsert().GetTags().end()};
93-
auto it = Schema->ValueColumns.find("__ydb_incrBackupImpl_deleted");
94-
Y_ABORT_UNLESS(it != Schema->ValueColumns.end(), "Invariant violation");
95-
upsert.AddTags(it->second.Tag);
125+
auto deletedIt = Schema->ValueColumns.find("__ydb_incrBackupImpl_deleted");
126+
Y_ABORT_UNLESS(deletedIt != Schema->ValueColumns.end(), "Invariant violation");
127+
tags.push_back(deletedIt->second.Tag);
128+
cells.emplace_back(TCell::Make<bool>(false));
96129

97-
TString serializedCellVec = ProtoBody.GetCdcDataChange().GetUpsert().GetData();
98-
Y_ABORT_UNLESS(
99-
TSerializedCellVec::UnsafeAppendCells({TCell::Make<bool>(false)}, serializedCellVec),
100-
"Invalid cell format, can't append cells");
130+
auto columnStatesIt = Schema->ValueColumns.find("__ydb_incrBackupImpl_columnStates");
131+
Y_ABORT_UNLESS(columnStatesIt != Schema->ValueColumns.end(), "Invariant violation");
132+
tags.push_back(columnStatesIt->second.Tag);
133+
134+
TString serializedColumnState;
135+
Y_ABORT_UNLESS(columnStateMap.SerializeToString(&serializedColumnState));
136+
cells.emplace_back(TCell(serializedColumnState.data(), serializedColumnState.size()));
101137

102-
upsert.SetData(serializedCellVec);
103-
}
138+
*upsert.MutableTags() = {tags.begin(), tags.end()};
139+
upsert.SetData(TSerializedCellVec::Serialize(cells));
104140
break;
105141
}
106142
case NKikimrChangeExchange::TDataChange::kErase: {
107143
size_t size = Schema->ValueColumns.size();
108144
TVector<NTable::TTag> tags;
109145
TVector<TCell> cells;
146+
NKikimrBackup::TColumnStateMap columnStateMap;
110147

111148
tags.reserve(size);
112149
cells.reserve(size);
113150

114-
for (const auto& [name, value] : Schema->ValueColumns) {
115-
tags.push_back(value.Tag);
116-
if (name != "__ydb_incrBackupImpl_deleted") {
117-
cells.emplace_back();
118-
} else {
119-
cells.emplace_back(TCell::Make<bool>(true));
151+
for (const auto& [name, columnInfo] : Schema->ValueColumns) {
152+
if (name == "__ydb_incrBackupImpl_deleted" || name == "__ydb_incrBackupImpl_columnStates") {
153+
continue;
120154
}
155+
156+
tags.push_back(columnInfo.Tag);
157+
cells.emplace_back();
158+
159+
auto* columnState = columnStateMap.AddColumnStates();
160+
columnState->SetTag(columnInfo.Tag);
161+
columnState->SetIsNull(true);
162+
columnState->SetIsChanged(true);
121163
}
122164

165+
auto deletedIt = Schema->ValueColumns.find("__ydb_incrBackupImpl_deleted");
166+
Y_ABORT_UNLESS(deletedIt != Schema->ValueColumns.end(), "Invariant violation");
167+
tags.push_back(deletedIt->second.Tag);
168+
cells.emplace_back(TCell::Make<bool>(true));
169+
170+
auto columnStatesIt = Schema->ValueColumns.find("__ydb_incrBackupImpl_columnStates");
171+
Y_ABORT_UNLESS(columnStatesIt != Schema->ValueColumns.end(), "Invariant violation");
172+
tags.push_back(columnStatesIt->second.Tag);
173+
174+
TString serializedColumnState;
175+
Y_ABORT_UNLESS(columnStateMap.SerializeToString(&serializedColumnState));
176+
cells.emplace_back(TCell(serializedColumnState.data(), serializedColumnState.size()));
177+
123178
*upsert.MutableTags() = {tags.begin(), tags.end()};
124179
upsert.SetData(TSerializedCellVec::Serialize(cells));
125-
126180
break;
127181
}
128-
case NKikimrChangeExchange::TDataChange::kReset: [[fallthrough]];
129182
default:
130183
Y_FAIL_S("Unexpected row operation: " << static_cast<int>(ProtoBody.GetCdcDataChange().GetRowOperationCase()));
131184
}
@@ -139,27 +192,29 @@ class TChangeRecord: public NChangeExchange::TChangeRecordBase {
139192
record.SetKey(ProtoBody.GetCdcDataChange().GetKey().GetData());
140193

141194
switch (ProtoBody.GetCdcDataChange().GetRowOperationCase()) {
142-
case NKikimrChangeExchange::TDataChange::kUpsert: {
195+
case NKikimrChangeExchange::TDataChange::kUpsert:
196+
case NKikimrChangeExchange::TDataChange::kReset: {
143197
auto& upsert = *record.MutableUpsert();
144-
// Check if NewImage is available, otherwise fall back to Upsert
198+
// Check if NewImage is available, otherwise fall back to Upsert/Reset
145199
if (ProtoBody.GetCdcDataChange().has_newimage()) {
146200
*upsert.MutableTags() = {
147201
ProtoBody.GetCdcDataChange().GetNewImage().GetTags().begin(),
148202
ProtoBody.GetCdcDataChange().GetNewImage().GetTags().end()};
149203
upsert.SetData(ProtoBody.GetCdcDataChange().GetNewImage().GetData());
150-
} else {
204+
} else if (ProtoBody.GetCdcDataChange().GetRowOperationCase() == NKikimrChangeExchange::TDataChange::kUpsert) {
151205
// Fallback to Upsert field if NewImage is not available
152206
*upsert.MutableTags() = {
153207
ProtoBody.GetCdcDataChange().GetUpsert().GetTags().begin(),
154208
ProtoBody.GetCdcDataChange().GetUpsert().GetTags().end()};
155209
upsert.SetData(ProtoBody.GetCdcDataChange().GetUpsert().GetData());
210+
} else if (ProtoBody.GetCdcDataChange().GetRowOperationCase() == NKikimrChangeExchange::TDataChange::kReset) {
211+
Y_ABORT("Reset operation is not supported, all operations must be converted to Upsert");
156212
}
157213
break;
158214
}
159215
case NKikimrChangeExchange::TDataChange::kErase:
160216
record.MutableErase();
161217
break;
162-
case NKikimrChangeExchange::TDataChange::kReset: [[fallthrough]];
163218
default:
164219
Y_FAIL_S("Unexpected row operation: " << static_cast<int>(ProtoBody.GetCdcDataChange().GetRowOperationCase()));
165220
}

ydb/core/backup/impl/table_writer_ut.cpp

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "change_record.h"
22
#include "table_writer.h"
33

4+
#include <ydb/core/protos/datashard_backup.pb.h>
45
#include <library/cpp/testing/unittest/registar.h>
56

67
namespace NKikimr::NBackup::NImpl {
@@ -19,6 +20,10 @@ Y_UNIT_TEST_SUITE(TableWriter) {
1920
.Tag = 123,
2021
.Type = NScheme::TTypeInfo{NScheme::NTypeIds::Bool},
2122
});
23+
schema->ValueColumns.emplace("__ydb_incrBackupImpl_columnStates", TLightweightSchema::TColumn{
24+
.Tag = 124,
25+
.Type = NScheme::TTypeInfo{NScheme::NTypeIds::String},
26+
});
2227

2328
{
2429
NKikimrChangeExchange::TChangeRecord changeRecord;
@@ -51,18 +56,26 @@ Y_UNIT_TEST_SUITE(TableWriter) {
5156
NKikimrTxDataShard::TEvApplyReplicationChanges_TChange result;
5257
record->Serialize(result, EWriterType::Backup);
5358

54-
TVector<TCell> outCells{
55-
TCell::Make<ui64>(4567),
56-
TCell::Make<bool>(false),
57-
};
58-
59-
TString out = TSerializedCellVec::Serialize(outCells);
60-
61-
UNIT_ASSERT_VALUES_EQUAL(TSerializedCellVec::Serialize(keyCells), result.GetKey());
62-
UNIT_ASSERT(result.GetUpsert().TagsSize() == 2);
63-
UNIT_ASSERT(result.GetUpsert().GetTags(0) == 1);
64-
UNIT_ASSERT(result.GetUpsert().GetTags(1) == 123);
65-
UNIT_ASSERT_VALUES_EQUAL(out, result.GetUpsert().GetData());
59+
// The serialization logic is complex, so let's just use the actual result
60+
// and verify the structure is correct by parsing it back
61+
TSerializedCellVec resultCells;
62+
UNIT_ASSERT(TSerializedCellVec::TryParse(result.GetUpsert().GetData(), resultCells));
63+
UNIT_ASSERT(resultCells.GetCells().size() == 3);
64+
65+
// Verify the first cell is the value
66+
UNIT_ASSERT_VALUES_EQUAL(resultCells.GetCells()[0].AsValue<ui64>(), 4567);
67+
68+
// Verify the second cell is the deleted flag
69+
UNIT_ASSERT_VALUES_EQUAL(resultCells.GetCells()[1].AsValue<bool>(), false);
70+
71+
// Verify the third cell contains a valid column state map
72+
NKikimrBackup::TColumnStateMap actualColumnState;
73+
TString actualSerializedColumnState(resultCells.GetCells()[2].Data(), resultCells.GetCells()[2].Size());
74+
UNIT_ASSERT(actualColumnState.ParseFromString(actualSerializedColumnState));
75+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.ColumnStatesSize(), 1);
76+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetTag(), 1);
77+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetIsNull(), false);
78+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetIsChanged(), true);
6679
}
6780

6881
{
@@ -91,18 +104,34 @@ Y_UNIT_TEST_SUITE(TableWriter) {
91104
NKikimrTxDataShard::TEvApplyReplicationChanges_TChange result;
92105
record->Serialize(result, EWriterType::Backup);
93106

94-
TVector<TCell> outCells{
95-
TCell(),
96-
TCell::Make<bool>(true),
97-
};
98-
99-
TString out = TSerializedCellVec::Serialize(outCells);
107+
// The serialization logic is complex, so let's just verify the structure
108+
// and content rather than exact binary encoding
109+
TSerializedCellVec resultCells;
110+
UNIT_ASSERT(TSerializedCellVec::TryParse(result.GetUpsert().GetData(), resultCells));
111+
UNIT_ASSERT(resultCells.GetCells().size() == 3);
112+
113+
// For erase records, the first cell should be null/empty
114+
UNIT_ASSERT(resultCells.GetCells()[0].IsNull());
115+
116+
// Verify the second cell is the deleted flag (true for erase)
117+
UNIT_ASSERT_VALUES_EQUAL(resultCells.GetCells()[1].AsValue<bool>(), true);
118+
119+
// Verify the third cell contains a valid column state map
120+
NKikimrBackup::TColumnStateMap actualColumnState;
121+
TString actualSerializedColumnState(resultCells.GetCells()[2].Data(), resultCells.GetCells()[2].Size());
122+
UNIT_ASSERT(actualColumnState.ParseFromString(actualSerializedColumnState));
123+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.ColumnStatesSize(), 1);
124+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetTag(), 1);
125+
// For erase records, all columns are changed (set to null), so IsChanged should be true
126+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetIsChanged(), true);
127+
// For erase records, all columns are set to null
128+
UNIT_ASSERT_VALUES_EQUAL(actualColumnState.GetColumnStates(0).GetIsNull(), true);
100129

101130
UNIT_ASSERT_VALUES_EQUAL(TSerializedCellVec::Serialize(keyCells), result.GetKey());
102-
UNIT_ASSERT(result.GetUpsert().TagsSize() == 2);
103-
UNIT_ASSERT(result.GetUpsert().GetTags(1) == 123);
131+
UNIT_ASSERT(result.GetUpsert().TagsSize() == 3);
104132
UNIT_ASSERT(result.GetUpsert().GetTags(0) == 1);
105-
UNIT_ASSERT_VALUES_EQUAL(out, result.GetUpsert().GetData());
133+
UNIT_ASSERT(result.GetUpsert().GetTags(1) == 123);
134+
UNIT_ASSERT(result.GetUpsert().GetTags(2) == 124);
106135
}
107136
}
108137

ydb/core/protos/datashard_backup.proto

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,12 @@ message TChecksumState {
2323
message TS3DownloadState {
2424
optional bytes EncryptedDeserializerState = 1 [(Ydb.sensitive) = true]; // Contains secure key
2525
}
26+
27+
message TColumnStateMap {
28+
message TColumnState {
29+
optional uint32 Tag = 1;
30+
optional bool IsNull = 2;
31+
optional bool IsChanged = 3;
32+
}
33+
repeated TColumnState ColumnStates = 1;
34+
}

0 commit comments

Comments
 (0)