@@ -18,6 +18,7 @@ package v1beta2
18
18
19
19
import (
20
20
"fmt"
21
+ "k8s.io/apimachinery/pkg/util/intstr"
21
22
"math"
22
23
"math/rand"
23
24
"regexp"
@@ -1257,6 +1258,16 @@ type AutomaticReplacementOptions struct {
1257
1258
// TaintReplacementOption controls which taint label the operator will react to.
1258
1259
// +kubebuilder:validation:MaxItems=32
1259
1260
TaintReplacementOptions []TaintReplacementOption `json:"taintReplacementOptions,omitempty"`
1261
+
1262
+ // MaxFaultDomainsWithTaintedProcessGroups defines how many fault domains in the cluster can have process groups
1263
+ // with the NodeTaintReplacing condition and still allow the operator to automatically replace those process groups.
1264
+ // If more fault domains contain process groups with the NodeTaintReplacing condition, the operator will not
1265
+ // automatically replace those process groups. This is a safeguard in addition to MaxConcurrentReplacements to make
1266
+ // sure the operator is not replacing too many process groups if a large number of nodes are tainted. A absolute number
1267
+ // of fault domains or a percentage can be provided.
1268
+ // Defaults to 10% of the fault domains or at least 1.
1269
+ // +kubebuilder:validation:XIntOrString
1270
+ MaxFaultDomainsWithTaintedProcessGroups * intstr.IntOrString `json:"maxFaultDomainsWithTaintedProcessGroups,omitempty"`
1260
1271
}
1261
1272
1262
1273
// ProcessSettings defines process-level settings.
@@ -2528,7 +2539,7 @@ func (cluster *FoundationDBCluster) GetIgnoreTerminatingPodsSeconds() int {
2528
2539
return pointer .IntDeref (cluster .Spec .AutomationOptions .IgnoreTerminatingPodsSeconds , int ((10 * time .Minute ).Seconds ()))
2529
2540
}
2530
2541
2531
- // GetProcessGroupsToRemove will returns the list of Process Group IDs that must be added to the ProcessGroupsToRemove
2542
+ // GetProcessGroupsToRemove will return the list of Process Group IDs that must be added to the ProcessGroupsToRemove
2532
2543
// it will filter out all Process Group IDs that are already marked for removal to make sure those are clean up. If a
2533
2544
// provided process group ID doesn't exit it will be ignored.
2534
2545
func (cluster * FoundationDBCluster ) GetProcessGroupsToRemove (processGroupIDs []ProcessGroupID ) []ProcessGroupID {
@@ -2926,8 +2937,25 @@ func (cluster *FoundationDBCluster) ProcessSharesDC(process FoundationDBStatusPr
2926
2937
if cluster == nil || cluster .Spec .DataCenter == "" {
2927
2938
return true
2928
2939
}
2940
+
2929
2941
if cluster .Spec .DataCenter == process .Locality [FDBLocalityDCIDKey ] {
2930
2942
return true
2931
2943
}
2944
+
2932
2945
return false
2933
2946
}
2947
+
2948
+ // GetMaxFaultDomainsWithTaintedProcessGroups returns the maximum fault domains that can hold pods on tainted nodes to still
2949
+ // allow the operator to automatically replace those pods on the tainted nodes automatically.
2950
+ func (cluster * FoundationDBCluster ) GetMaxFaultDomainsWithTaintedProcessGroups (faultDomainCnt int ) (int , error ) {
2951
+ maxAllowed , err := intstr .GetScaledValueFromIntOrPercent (intstr .ValueOrDefault (cluster .Spec .AutomationOptions .Replacements .MaxFaultDomainsWithTaintedProcessGroups , intstr.IntOrString {Type : intstr .String , StrVal : "10%" }), faultDomainCnt , false )
2952
+ if err != nil {
2953
+ return - 1 , err
2954
+ }
2955
+
2956
+ if maxAllowed < 1 {
2957
+ return 1 , nil
2958
+ }
2959
+
2960
+ return maxAllowed , nil
2961
+ }
0 commit comments