Skip to content

Commit c3bf44a

Browse files
committed
[hotfix] Fix union read can't restore issue
1 parent eb75d55 commit c3bf44a

File tree

3 files changed

+121
-19
lines changed

3 files changed

+121
-19
lines changed

fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/state/FlussSourceEnumeratorStateSerializer.java

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,17 @@
1717

1818
package org.apache.fluss.flink.source.state;
1919

20+
import org.apache.flink.core.io.SimpleVersionedSerializer;
21+
import org.apache.flink.core.memory.DataInputDeserializer;
22+
import org.apache.flink.core.memory.DataOutputSerializer;
23+
import org.apache.fluss.annotation.VisibleForTesting;
2024
import org.apache.fluss.flink.source.split.SourceSplitBase;
2125
import org.apache.fluss.flink.source.split.SourceSplitSerializer;
2226
import org.apache.fluss.lake.source.LakeSource;
2327
import org.apache.fluss.lake.source.LakeSplit;
2428
import org.apache.fluss.metadata.TableBucket;
2529

26-
import org.apache.flink.core.io.SimpleVersionedSerializer;
27-
import org.apache.flink.core.memory.DataInputDeserializer;
28-
import org.apache.flink.core.memory.DataOutputSerializer;
29-
3030
import javax.annotation.Nullable;
31-
3231
import java.io.IOException;
3332
import java.util.ArrayList;
3433
import java.util.HashMap;
@@ -37,17 +36,21 @@
3736
import java.util.Map;
3837
import java.util.Set;
3938

39+
import static org.apache.fluss.utils.Preconditions.checkNotNull;
40+
4041
/** A serializer for {@link SourceEnumeratorState}. */
4142
public class FlussSourceEnumeratorStateSerializer
4243
implements SimpleVersionedSerializer<SourceEnumeratorState> {
4344

4445
@Nullable private final LakeSource<LakeSplit> lakeSource;
4546

4647
private static final int VERSION_0 = 0;
48+
private static final int VERSION_1 = 1;
49+
4750
private static final ThreadLocal<DataOutputSerializer> SERIALIZER_CACHE =
4851
ThreadLocal.withInitial(() -> new DataOutputSerializer(64));
4952

50-
private static final int CURRENT_VERSION = VERSION_0;
53+
private static final int CURRENT_VERSION = VERSION_1;
5154

5255
public FlussSourceEnumeratorStateSerializer(LakeSource<LakeSplit> lakeSource) {
5356
this.lakeSource = lakeSource;
@@ -61,11 +64,28 @@ public int getVersion() {
6164
@Override
6265
public byte[] serialize(SourceEnumeratorState state) throws IOException {
6366
final DataOutputSerializer out = SERIALIZER_CACHE.get();
67+
68+
// serialize assign bucket and partitions
69+
serializeAssignBucketAndPartitions(
70+
out, state.getAssignedBuckets(), state.getAssignedPartitions());
71+
72+
// serialize remain hybrid lake splits
73+
serializeRemainingHybridLakeFlussSplits(out, state);
74+
75+
final byte[] result = out.getCopyOfBuffer();
76+
out.clear();
77+
return result;
78+
}
79+
80+
private void serializeAssignBucketAndPartitions(
81+
DataOutputSerializer out,
82+
Set<TableBucket> assignedBuckets,
83+
Map<Long, String> assignedPartitions)
84+
throws IOException {
6485
// write assigned buckets
65-
out.writeInt(state.getAssignedBuckets().size());
66-
for (TableBucket tableBucket : state.getAssignedBuckets()) {
86+
out.writeInt(assignedBuckets.size());
87+
for (TableBucket tableBucket : assignedBuckets) {
6788
out.writeLong(tableBucket.getTableId());
68-
6989
// write partition
7090
// if partition is not null
7191
if (tableBucket.getPartitionId() != null) {
@@ -78,24 +98,29 @@ public byte[] serialize(SourceEnumeratorState state) throws IOException {
7898
out.writeInt(tableBucket.getBucket());
7999
}
80100
// write assigned partitions
81-
out.writeInt(state.getAssignedPartitions().size());
82-
for (Map.Entry<Long, String> entry : state.getAssignedPartitions().entrySet()) {
101+
out.writeInt(assignedPartitions.size());
102+
for (Map.Entry<Long, String> entry : assignedPartitions.entrySet()) {
83103
out.writeLong(entry.getKey());
84104
out.writeUTF(entry.getValue());
85105
}
106+
}
86107

108+
@VisibleForTesting
109+
protected byte[] serializeV0(SourceEnumeratorState state) throws IOException {
110+
final DataOutputSerializer out = SERIALIZER_CACHE.get();
111+
serializeAssignBucketAndPartitions(
112+
out, state.getAssignedBuckets(), state.getAssignedPartitions());
87113
if (lakeSource != null) {
88114
serializeRemainingHybridLakeFlussSplits(out, state);
89115
}
90-
91116
final byte[] result = out.getCopyOfBuffer();
92117
out.clear();
93118
return result;
94119
}
95120

96121
@Override
97122
public SourceEnumeratorState deserialize(int version, byte[] serialized) throws IOException {
98-
if (version != VERSION_0) {
123+
if (version != VERSION_0 && version != CURRENT_VERSION) {
99124
throw new IOException("Unknown version or corrupt state: " + version);
100125
}
101126
final DataInputDeserializer in = new DataInputDeserializer(serialized);
@@ -124,8 +149,18 @@ public SourceEnumeratorState deserialize(int version, byte[] serialized) throws
124149
}
125150

126151
List<SourceSplitBase> remainingHybridLakeFlussSplits = null;
127-
if (lakeSource != null) {
128-
// todo: add a ut for serialize remaining hybrid lake fluss splits
152+
153+
if (version == VERSION_0) {
154+
// For VERSION_0, deserialize remaining hybrid lake Fluss splits only when lakeSource is
155+
// not null.
156+
if (lakeSource != null) {
157+
remainingHybridLakeFlussSplits = deserializeRemainingHybridLakeFlussSplits(in);
158+
}
159+
} else {
160+
// For VERSION_1 and later, always attempt to deserialize remaining hybrid lake/Fluss
161+
// splits. The serialized state encodes their presence via a boolean flag, so this
162+
// logic no longer depends on the lakeSource flag. This unconditional deserialization
163+
// is the intended behavior change compared to VERSION_0.
129164
remainingHybridLakeFlussSplits = deserializeRemainingHybridLakeFlussSplits(in);
130165
}
131166

@@ -160,7 +195,11 @@ private List<SourceSplitBase> deserializeRemainingHybridLakeFlussSplits(
160195
if (in.readBoolean()) {
161196
int numSplits = in.readInt();
162197
List<SourceSplitBase> splits = new ArrayList<>(numSplits);
163-
SourceSplitSerializer sourceSplitSerializer = new SourceSplitSerializer(lakeSource);
198+
SourceSplitSerializer sourceSplitSerializer =
199+
new SourceSplitSerializer(
200+
checkNotNull(
201+
lakeSource,
202+
"lake source must not be null when there are hybrid lake splits."));
164203
int version = in.readInt();
165204
for (int i = 0; i < numSplits; i++) {
166205
int splitSizeInBytes = in.readInt();

fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/state/SourceEnumeratorState.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import org.apache.fluss.metadata.TableBucket;
2222

2323
import javax.annotation.Nullable;
24-
2524
import java.util.List;
2625
import java.util.Map;
2726
import java.util.Objects;
@@ -73,12 +72,14 @@ public boolean equals(Object o) {
7372
}
7473
SourceEnumeratorState that = (SourceEnumeratorState) o;
7574
return Objects.equals(assignedBuckets, that.assignedBuckets)
76-
&& Objects.equals(assignedPartitions, that.assignedPartitions);
75+
&& Objects.equals(assignedPartitions, that.assignedPartitions)
76+
&& Objects.equals(
77+
remainingHybridLakeFlussSplits, that.remainingHybridLakeFlussSplits);
7778
}
7879

7980
@Override
8081
public int hashCode() {
81-
return Objects.hash(assignedBuckets, assignedPartitions);
82+
return Objects.hash(assignedBuckets, assignedPartitions, remainingHybridLakeFlussSplits);
8283
}
8384

8485
@Override

fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/state/SourceEnumeratorStateSerializerTest.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,66 @@ void testPendingSplitsCheckpointSerde() throws Exception {
9292
/* check deserialized is equal to the original */
9393
assertThat(deserializedSourceEnumeratorState).isEqualTo(sourceEnumeratorState);
9494
}
95+
96+
@Test
97+
void testV0Compatibility() throws Exception {
98+
// serialize with v0,
99+
int version = 0;
100+
// test with lake source = null
101+
FlussSourceEnumeratorStateSerializer serializer =
102+
new FlussSourceEnumeratorStateSerializer(null);
103+
104+
Set<TableBucket> assignedBuckets =
105+
new HashSet<>(Arrays.asList(new TableBucket(1, 0), new TableBucket(1, 4L, 1)));
106+
Map<Long, String> assignedPartitions = new HashMap<>();
107+
assignedPartitions.put(1L, "partition1");
108+
assignedPartitions.put(2L, "partition2");
109+
SourceEnumeratorState sourceEnumeratorState =
110+
new SourceEnumeratorState(assignedBuckets, assignedPartitions, null);
111+
byte[] serialized = serializer.serializeV0(sourceEnumeratorState);
112+
113+
// then deserialize
114+
SourceEnumeratorState deserializedSourceEnumeratorState =
115+
serializer.deserialize(version, serialized);
116+
assertThat(deserializedSourceEnumeratorState).isEqualTo(sourceEnumeratorState);
117+
118+
// test with lake source is not null
119+
serializer = new FlussSourceEnumeratorStateSerializer(new TestingLakeSource());
120+
List<SourceSplitBase> remainingHybridLakeFlussSplits = new ArrayList<>();
121+
// Add a LogSplit
122+
TableBucket logSplitBucket = new TableBucket(1, 0);
123+
LogSplit logSplit = new LogSplit(logSplitBucket, null, 100L);
124+
remainingHybridLakeFlussSplits.add(logSplit);
125+
sourceEnumeratorState =
126+
new SourceEnumeratorState(
127+
assignedBuckets, assignedPartitions, remainingHybridLakeFlussSplits);
128+
129+
serialized = serializer.serializeV0(sourceEnumeratorState);
130+
131+
// then deserialize
132+
deserializedSourceEnumeratorState = serializer.deserialize(version, serialized);
133+
assertThat(deserializedSourceEnumeratorState).isEqualTo(sourceEnumeratorState);
134+
}
135+
136+
@Test
137+
void testInconsistentLakeSourceSerde() throws Exception {
138+
// test serialize with null lake source
139+
FlussSourceEnumeratorStateSerializer serializer =
140+
new FlussSourceEnumeratorStateSerializer(null);
141+
142+
Set<TableBucket> assignedBuckets =
143+
new HashSet<>(Arrays.asList(new TableBucket(1, 0), new TableBucket(1, 4L, 1)));
144+
Map<Long, String> assignedPartitions = new HashMap<>();
145+
assignedPartitions.put(1L, "partition1");
146+
assignedPartitions.put(2L, "partition2");
147+
SourceEnumeratorState sourceEnumeratorState =
148+
new SourceEnumeratorState(assignedBuckets, assignedPartitions, null);
149+
byte[] serialized = serializer.serialize(sourceEnumeratorState);
150+
151+
// test deserialize with nonnull lake source
152+
serializer = new FlussSourceEnumeratorStateSerializer(new TestingLakeSource());
153+
SourceEnumeratorState deserializedSourceEnumeratorState =
154+
serializer.deserialize(serializer.getVersion(), serialized);
155+
assertThat(deserializedSourceEnumeratorState).isEqualTo(sourceEnumeratorState);
156+
}
95157
}

0 commit comments

Comments
 (0)