Skip to content

Commit 5c140fb

Browse files
IGNITE-26580 Implement an affinity backup filter assigning partition copies evenly between data centers (#12459)
1 parent 36cbb27 commit 5c140fb

File tree

4 files changed

+592
-0
lines changed

4 files changed

+592
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.ignite.cache.affinity.rendezvous;
19+
20+
import java.util.List;
21+
import org.apache.ignite.cluster.ClusterNode;
22+
import org.apache.ignite.internal.util.typedef.internal.S;
23+
import org.apache.ignite.lang.IgniteBiPredicate;
24+
25+
/**
26+
* Multi-data center affinity backup filter that ensures each partition's data is distributed across multiple data centers,
27+
* providing high availability and fault tolerance. This implementation guarantees at least one copy of the data in each
28+
* data center and attempts to maintain the configured backup factor without discarding copies.
29+
* <p>
30+
* The filter works by grouping nodes based on their data center identification attribute (@see {@link ClusterNode#dataCenterId()})
31+
* and ensuring that for every partition, at least one node from each data center is included in the primary-backup set.
32+
* <p>
33+
* The filter will discard backup copies only if the number of available nodes in a given data center is less
34+
* than the number of copies assigned to that data center.
35+
* For example, if a partition has 4 copies (1 primary and 3 backups) and the cluster has 2 data centers,
36+
* than 2 copies are assigned to each data center. The only scenario when just a single copy is assigned to a node in a data center is when
37+
* the number of nodes in that data center is one.
38+
* <p>
39+
* This class is constructed with a number of data centers the cluster spans and a number of backups of the cache this filter is applied to.
40+
* Implementation expects that all copies can be spread evenly across all data centers. In other words, (backups + 1) is divisible by
41+
* number of data centers without remainder. Uneven distributions of copies are not supported.
42+
* <p>
43+
* Warning: Ensure that all nodes have a consistent and valid data center identifier attribute. Missing or inconsistent values
44+
* may lead to unexpected placement of data.
45+
* </pre>
46+
* <h2 class="header">Spring Example</h2>
47+
* Create a partitioned cache template where each data center has at least one copy of the data, and the backup count is maintained.
48+
* <pre name="code" class="xml">
49+
* &lt;property name="cacheConfiguration"&gt;
50+
* &lt;list&gt;
51+
* &lt;bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration"&gt;
52+
* &lt;property name="name" value="JobcaseDefaultCacheConfig*"/&gt;
53+
* &lt;property name="cacheMode" value="PARTITIONED" /&gt;
54+
* &lt;property name="backups" value="3" /&gt;
55+
* &lt;property name="affinity"&gt;
56+
* &lt;bean class="org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction"&gt;
57+
* &lt;property name="affinityBackupFilter"&gt;
58+
* &lt;bean class="org.apache.ignite.cache.affinity.rendezvous.MdcAffinityBackupFilter"&gt;
59+
* &lt;constructor-arg value="2"/&gt; <!-- dcsNumber -->
60+
* &lt;constructor-arg value="3"/&gt; <!-- backups, the same as in the cache template -->
61+
* &lt;/bean&gt;
62+
* &lt;/property&gt;
63+
* &lt;/bean&gt;
64+
* &lt;/property&gt;
65+
* &lt;/bean&gt;
66+
* &lt;/list&gt;
67+
* &lt;/property&gt;
68+
* </pre>
69+
* <p>
70+
* With more backups, additional replicas can be distributed across different data centers to further improve redundancy.
71+
*/
72+
public class MdcAffinityBackupFilter implements IgniteBiPredicate<ClusterNode, List<ClusterNode>> {
73+
/** */
74+
private static final long serialVersionUID = 1L;
75+
76+
/** */
77+
private final int partCopiesPerDc;
78+
79+
/**
80+
* @param dcsNum Number of data centers.
81+
* @param backups Number of backups.
82+
*/
83+
public MdcAffinityBackupFilter(int dcsNum, int backups) {
84+
if (dcsNum < 2) {
85+
throw new IllegalArgumentException("MdcAffinityBackupFilter cannot be used in an environment with only one datacenter. " +
86+
"Number of datacenters must be at least 2.");
87+
}
88+
89+
int numCopies = backups + 1;
90+
91+
partCopiesPerDc = numCopies / dcsNum;
92+
int remainder = numCopies % dcsNum;
93+
94+
if (remainder != 0) {
95+
String suggestion = "recommended ";
96+
if (numCopies - remainder <= 0)
97+
suggestion += "value is " + (backups + (dcsNum - remainder));
98+
else
99+
suggestion += "values are " + (backups - remainder) + " and " + (backups + (dcsNum - remainder));
100+
101+
throw new IllegalArgumentException("Number of copies is not completely divisible by number of datacenters, " +
102+
"copies cannot be distributed evenly across DCs. " +
103+
"Please adjust the number of backups, " + suggestion);
104+
}
105+
}
106+
107+
/** {@inheritDoc} */
108+
@Override public boolean apply(ClusterNode candidate, List<ClusterNode> previouslySelected) {
109+
String candidateDcId = candidate.dataCenterId();
110+
int candDcCopiesAssigned = 0;
111+
112+
for (int i = 0; i < previouslySelected.size(); i++) {
113+
String prevDcId = previouslySelected.get(i).dataCenterId();
114+
115+
if (prevDcId == null)
116+
return false;
117+
118+
candDcCopiesAssigned += prevDcId.equals(candidateDcId) ? 1 : 0;
119+
}
120+
121+
return candDcCopiesAssigned < partCopiesPerDc;
122+
}
123+
124+
/** {@inheritDoc} */
125+
@Override public String toString() {
126+
return S.toString(MdcAffinityBackupFilter.class, this);
127+
}
128+
}

modules/core/src/main/java/org/apache/ignite/internal/processors/cache/ClusterCachesInfo.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ private void checkCache(CacheJoinNodeDiscoveryData.CacheInfo locInfo, CacheData
434434
"Affinity partitions count", locAttr.affinityPartitionsCount(),
435435
rmtAttr.affinityPartitionsCount(), true);
436436

437+
// TODO IGNITE-26967 - implement validation of affinity backup filter.
438+
437439
CU.validateKeyConfigiration(rmtAttr.groupName(), rmtAttr.cacheName(), rmt, rmtAttr.configuration().getKeyConfiguration(),
438440
locAttr.configuration().getKeyConfiguration(), log, true);
439441

0 commit comments

Comments
 (0)