Skip to content

Commit 6f78c8c

Browse files
authored
Merge pull request #1506 from Vlatombe/provisioning-limits-exception-handling
Properly unregister slots that were reserved during provisioning if the provisioning ended up failing.
2 parents 5dcbdb5 + d33b731 commit 6f78c8c

File tree

2 files changed

+77
-2
lines changed

2 files changed

+77
-2
lines changed

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ public KubernetesClient connect() throws KubernetesAuthException, IOException {
570570
@Override
571571
public Collection<NodeProvisioner.PlannedNode> provision(
572572
@NonNull final Cloud.CloudState state, final int excessWorkload) {
573+
var limitRegistrationResults = new LimitRegistrationResults(this);
573574
try {
574575
Metrics.metricRegistry().meter(metricNameForLabel(state.getLabel())).mark(excessWorkload);
575576
Label label = state.getLabel();
@@ -588,8 +589,7 @@ public Collection<NodeProvisioner.PlannedNode> provision(
588589
// check overall concurrency limit using the default label(s) on all templates
589590
int numExecutors = 1;
590591
PodTemplate unwrappedTemplate = getUnwrappedTemplate(podTemplate);
591-
while (toBeProvisioned > 0
592-
&& KubernetesProvisioningLimits.get().register(this, podTemplate, numExecutors)) {
592+
while (toBeProvisioned > 0 && limitRegistrationResults.register(podTemplate, numExecutors)) {
593593
plannedNodes.add(PlannedNodeBuilderFactory.createInstance()
594594
.cloud(this)
595595
.template(unwrappedTemplate)
@@ -626,8 +626,11 @@ public Collection<NodeProvisioner.PlannedNode> provision(
626626
"Failed to count the # of live instances on Kubernetes",
627627
cause != null ? cause : e);
628628
}
629+
limitRegistrationResults.unregister();
629630
} catch (Exception e) {
631+
Metrics.metricRegistry().counter(MetricNames.PROVISION_FAILED).inc();
630632
LOGGER.log(Level.WARNING, "Failed to count the # of live instances on Kubernetes", e);
633+
limitRegistrationResults.unregister();
631634
}
632635
return Collections.emptyList();
633636
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.csanchez.jenkins.plugins.kubernetes;
2+
3+
import edu.umd.cs.findbugs.annotations.NonNull;
4+
import java.util.ArrayList;
5+
import java.util.List;
6+
import java.util.logging.Level;
7+
import java.util.logging.Logger;
8+
9+
/**
10+
* Registers result for slots registration and provides an easy way to unregister.
11+
*/
12+
class LimitRegistrationResults {
13+
private static final Logger LOGGER = Logger.getLogger(LimitRegistrationResults.class.getName());
14+
15+
private final KubernetesCloud cloud;
16+
17+
private final List<Result> results = new ArrayList<>();
18+
19+
public LimitRegistrationResults(@NonNull KubernetesCloud cloud) {
20+
this.cloud = cloud;
21+
}
22+
23+
/**
24+
* Register a slot for the given pod template and number of executors.
25+
* @return true is the registration was successful
26+
*/
27+
public boolean register(@NonNull PodTemplate podTemplate, int numExecutors) {
28+
var result = new Result(
29+
KubernetesProvisioningLimits.get().register(cloud, podTemplate, numExecutors),
30+
podTemplate,
31+
numExecutors);
32+
results.add(result);
33+
return result.success;
34+
}
35+
36+
/**
37+
* Unregister all slots that were registered through this object previously.
38+
*/
39+
public void unregister() {
40+
results.forEach(result -> result.unregister(cloud));
41+
}
42+
43+
private static class Result {
44+
final boolean success;
45+
final int numExecutors;
46+
47+
@NonNull
48+
PodTemplate podTemplate;
49+
50+
Result(boolean success, @NonNull PodTemplate podTemplate, int numExecutors) {
51+
this.success = success;
52+
this.podTemplate = podTemplate;
53+
this.numExecutors = numExecutors;
54+
}
55+
56+
void unregister(KubernetesCloud cloud) {
57+
if (success) {
58+
LOGGER.log(
59+
Level.FINEST,
60+
() -> "Registration was successful, unregistering slot for podTemplate " + podTemplate.getName()
61+
+ " from cloud " + cloud.name + " with " + numExecutors + " executors");
62+
KubernetesProvisioningLimits.get().unregister(cloud, podTemplate, numExecutors);
63+
} else {
64+
LOGGER.log(
65+
Level.FINEST,
66+
() -> "Registration previously failed, no need to unregister slot for podTemplate "
67+
+ podTemplate.getName() + " from cloud " + cloud.name + " with " + numExecutors
68+
+ " executors");
69+
}
70+
}
71+
}
72+
}

0 commit comments

Comments
 (0)