Skip to content

Commit bbc445a

Browse files
zcoowuchong
authored andcommitted
[server] Support Coordinator High Availability
1 parent f45f41b commit bbc445a

29 files changed

+1692
-73
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.fluss.exception;
19+
20+
/** Exception thrown when a request is sent to a stand by coordinator server. since: 0.9 */
21+
public class NotCoordinatorLeaderException extends ApiException {
22+
23+
private static final long serialVersionUID = 1L;
24+
25+
public NotCoordinatorLeaderException(String message) {
26+
super(message);
27+
}
28+
29+
public NotCoordinatorLeaderException(String message, Throwable cause) {
30+
super(message, cause);
31+
}
32+
}

fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class MetricNames {
3636
// metrics for coordinator server
3737
// --------------------------------------------------------------------------------------------
3838
public static final String ACTIVE_COORDINATOR_COUNT = "activeCoordinatorCount";
39+
public static final String ALIVE_COORDINATOR_COUNT = "aliveCoordinatorCount";
3940
public static final String ACTIVE_TABLET_SERVER_COUNT = "activeTabletServerCount";
4041
public static final String OFFLINE_BUCKET_COUNT = "offlineBucketCount";
4142
public static final String TABLE_COUNT = "tableCount";

fluss-rpc/src/main/java/org/apache/fluss/rpc/protocol/Errors.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.apache.fluss.exception.NetworkException;
5757
import org.apache.fluss.exception.NoRebalanceInProgressException;
5858
import org.apache.fluss.exception.NonPrimaryKeyTableException;
59+
import org.apache.fluss.exception.NotCoordinatorLeaderException;
5960
import org.apache.fluss.exception.NotEnoughReplicasAfterAppendException;
6061
import org.apache.fluss.exception.NotEnoughReplicasException;
6162
import org.apache.fluss.exception.NotLeaderOrFollowerException;
@@ -247,7 +248,11 @@ public enum Errors {
247248
63,
248249
"The client has attempted to perform an operation with an invalid producer ID.",
249250
InvalidProducerIdException::new),
250-
CONFIG_EXCEPTION(64, "A configuration error occurred.", ConfigException::new);
251+
CONFIG_EXCEPTION(64, "A configuration error occurred.", ConfigException::new),
252+
NOT_COORDINATOR_LEADER_EXCEPTION(
253+
65,
254+
"The coordinator is not a leader and cannot process request.",
255+
NotCoordinatorLeaderException::new);
251256

252257
private static final Logger LOG = LoggerFactory.getLogger(Errors.class);
253258

fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ public class CoordinatorContext {
6767
// a success deletion.
6868
private final Map<TableBucketReplica, Integer> failDeleteNumbers = new HashMap<>();
6969

70+
private final Set<String> liveCoordinatorServers = new HashSet<>();
7071
private final Map<Integer, ServerInfo> liveTabletServers = new HashMap<>();
7172
private final Set<Integer> shuttingDownTabletServers = new HashSet<>();
7273

@@ -115,6 +116,23 @@ public int getCoordinatorEpoch() {
115116
return coordinatorEpoch;
116117
}
117118

119+
public Set<String> getLiveCoordinatorServers() {
120+
return liveCoordinatorServers;
121+
}
122+
123+
public void setLiveCoordinators(Set<String> servers) {
124+
liveCoordinatorServers.clear();
125+
liveCoordinatorServers.addAll(servers);
126+
}
127+
128+
public void addLiveCoordinator(String serverId) {
129+
this.liveCoordinatorServers.add(serverId);
130+
}
131+
132+
public void removeLiveCoordinator(String serverId) {
133+
this.liveCoordinatorServers.remove(serverId);
134+
}
135+
118136
public Map<Integer, ServerInfo> getLiveTabletServers() {
119137
return liveTabletServers;
120138
}

fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorEventProcessor.java

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,15 @@
7171
import org.apache.fluss.server.coordinator.event.CoordinatorEventManager;
7272
import org.apache.fluss.server.coordinator.event.CreatePartitionEvent;
7373
import org.apache.fluss.server.coordinator.event.CreateTableEvent;
74+
import org.apache.fluss.server.coordinator.event.DeadCoordinatorEvent;
7475
import org.apache.fluss.server.coordinator.event.DeadTabletServerEvent;
7576
import org.apache.fluss.server.coordinator.event.DeleteReplicaResponseReceivedEvent;
7677
import org.apache.fluss.server.coordinator.event.DropPartitionEvent;
7778
import org.apache.fluss.server.coordinator.event.DropTableEvent;
7879
import org.apache.fluss.server.coordinator.event.EventProcessor;
7980
import org.apache.fluss.server.coordinator.event.FencedCoordinatorEvent;
8081
import org.apache.fluss.server.coordinator.event.ListRebalanceProgressEvent;
82+
import org.apache.fluss.server.coordinator.event.NewCoordinatorEvent;
8183
import org.apache.fluss.server.coordinator.event.NewTabletServerEvent;
8284
import org.apache.fluss.server.coordinator.event.NotifyKvSnapshotOffsetEvent;
8385
import org.apache.fluss.server.coordinator.event.NotifyLakeTableOffsetEvent;
@@ -86,6 +88,7 @@
8688
import org.apache.fluss.server.coordinator.event.RemoveServerTagEvent;
8789
import org.apache.fluss.server.coordinator.event.SchemaChangeEvent;
8890
import org.apache.fluss.server.coordinator.event.TableRegistrationChangeEvent;
91+
import org.apache.fluss.server.coordinator.event.watcher.CoordinatorChangeWatcher;
8992
import org.apache.fluss.server.coordinator.event.watcher.TableChangeWatcher;
9093
import org.apache.fluss.server.coordinator.event.watcher.TabletServerChangeWatcher;
9194
import org.apache.fluss.server.coordinator.lease.KvSnapshotLeaseManager;
@@ -172,6 +175,7 @@ public class CoordinatorEventProcessor implements EventProcessor {
172175
private final LakeTableTieringManager lakeTableTieringManager;
173176
private final TableChangeWatcher tableChangeWatcher;
174177
private final CoordinatorChannelManager coordinatorChannelManager;
178+
private final CoordinatorChangeWatcher coordinatorChangeWatcher;
175179
private final TabletServerChangeWatcher tabletServerChangeWatcher;
176180
private final CoordinatorMetadataCache serverMetadataCache;
177181
private final CoordinatorRequestBatch coordinatorRequestBatch;
@@ -224,6 +228,8 @@ public CoordinatorEventProcessor(
224228
tableBucketStateMachine,
225229
new RemoteStorageCleaner(conf, ioExecutor),
226230
ioExecutor);
231+
this.coordinatorChangeWatcher =
232+
new CoordinatorChangeWatcher(zooKeeperClient, coordinatorEventManager);
227233
this.tableChangeWatcher = new TableChangeWatcher(zooKeeperClient, coordinatorEventManager);
228234
this.tabletServerChangeWatcher =
229235
new TabletServerChangeWatcher(zooKeeperClient, coordinatorEventManager);
@@ -263,6 +269,7 @@ public CoordinatorContext getCoordinatorContext() {
263269
public void startup() {
264270
coordinatorContext.setCoordinatorServerInfo(getCoordinatorServerInfo());
265271
// start watchers first so that we won't miss node in zk;
272+
coordinatorChangeWatcher.start();
266273
tabletServerChangeWatcher.start();
267274
tableChangeWatcher.start();
268275
LOG.info("Initializing coordinator context.");
@@ -306,12 +313,9 @@ public void shutdown() {
306313
private ServerInfo getCoordinatorServerInfo() {
307314
try {
308315
return zooKeeperClient
309-
.getCoordinatorAddress()
316+
.getCoordinatorLeaderAddress()
310317
.map(
311318
coordinatorAddress ->
312-
// TODO we set id to 0 as that CoordinatorServer don't support
313-
// HA, if we support HA, we need to set id to the config
314-
// CoordinatorServer id to avoid node drift.
315319
new ServerInfo(
316320
0,
317321
null, // For coordinatorServer, no rack info
@@ -334,6 +338,11 @@ public int getCoordinatorEpoch() {
334338

335339
private void initCoordinatorContext() throws Exception {
336340
long start = System.currentTimeMillis();
341+
// get all coordinator servers
342+
List<String> currentCoordinatorServers = zooKeeperClient.getCoordinatorServerList();
343+
coordinatorContext.setLiveCoordinators(new HashSet<>(currentCoordinatorServers));
344+
LOG.info("Load coordinator servers success when initializing coordinator context.");
345+
337346
// get all tablet server's
338347
int[] currentServers = zooKeeperClient.getSortedTabletServerList();
339348
List<ServerInfo> tabletServerInfos = new ArrayList<>();
@@ -548,6 +557,7 @@ private void onShutdown() {
548557
tableManager.shutdown();
549558

550559
// then stop watchers
560+
coordinatorChangeWatcher.stop();
551561
tableChangeWatcher.stop();
552562
tabletServerChangeWatcher.stop();
553563
}
@@ -572,6 +582,10 @@ public void process(CoordinatorEvent event) {
572582
(NotifyLeaderAndIsrResponseReceivedEvent) event);
573583
} else if (event instanceof DeleteReplicaResponseReceivedEvent) {
574584
processDeleteReplicaResponseReceived((DeleteReplicaResponseReceivedEvent) event);
585+
} else if (event instanceof NewCoordinatorEvent) {
586+
processNewCoordinator((NewCoordinatorEvent) event);
587+
} else if (event instanceof DeadCoordinatorEvent) {
588+
processDeadCoordinator((DeadCoordinatorEvent) event);
575589
} else if (event instanceof NewTabletServerEvent) {
576590
processNewTabletServer((NewTabletServerEvent) event);
577591
} else if (event instanceof DeadTabletServerEvent) {
@@ -984,6 +998,28 @@ private void onReplicaBecomeOffline(Set<TableBucketReplica> offlineReplicas) {
984998
replicaStateMachine.handleStateChanges(offlineReplicas, OfflineReplica);
985999
}
9861000

1001+
private void processNewCoordinator(NewCoordinatorEvent newCoordinatorEvent) {
1002+
String coordinatorServerId = newCoordinatorEvent.getServerId();
1003+
if (coordinatorContext.getLiveCoordinatorServers().contains(coordinatorServerId)) {
1004+
return;
1005+
}
1006+
1007+
// process new coordinator server
1008+
LOG.info("New coordinator server callback for coordinator server {}", coordinatorServerId);
1009+
1010+
coordinatorContext.addLiveCoordinator(coordinatorServerId);
1011+
}
1012+
1013+
private void processDeadCoordinator(DeadCoordinatorEvent deadCoordinatorEvent) {
1014+
String coordinatorServerId = deadCoordinatorEvent.getServerId();
1015+
if (!coordinatorContext.getLiveCoordinatorServers().contains(coordinatorServerId)) {
1016+
return;
1017+
}
1018+
// process dead coordinator server
1019+
LOG.info("Coordinator server failure callback for {}.", coordinatorServerId);
1020+
coordinatorContext.removeLiveCoordinator(coordinatorServerId);
1021+
}
1022+
9871023
private void processNewTabletServer(NewTabletServerEvent newTabletServerEvent) {
9881024
// NOTE: we won't need to detect bounced tablet servers like Kafka as we won't
9891025
// miss the event of tablet server un-register and register again since we can

0 commit comments

Comments
 (0)