Skip to content
This repository was archived by the owner on Oct 16, 2024. It is now read-only.

Commit 483a207

Browse files
authored
Revert "Revert previously undeployed changes (#348)"
This reverts commit 55d0831.
1 parent 55d0831 commit 483a207

File tree

13 files changed

+42
-17
lines changed

13 files changed

+42
-17
lines changed

acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ autoscaling:
2121
instance_loss_threshold: 3
2222

2323
alert_on_max_capacity: false
24+
pool_owner: compute_infra

acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ autoscale_signal:
2929
minute_range: 10
3030

3131
alert_on_max_capacity: false
32+
pool_owner: compute_infra

clusterman/autoscaler/autoscaler.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,9 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) ->
178178
else:
179179
capacity_offset = get_capacity_offset(self.cluster, self.pool, self.scheduler, timestamp)
180180
new_target_capacity = self._compute_target_capacity(resource_request) + capacity_offset
181-
self.target_capacity_gauge.set(new_target_capacity, {"dry_run": dry_run})
182-
self.max_capacity_gauge.set(
183-
self.pool_manager.max_capacity,
184-
{"dry_run": dry_run, "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity},
185-
)
186-
self.setpoint_gauge.set(self.autoscaling_config.setpoint, {"dry_run": dry_run})
181+
self.target_capacity_gauge.set(new_target_capacity, self.add_metric_labels(dry_run))
182+
self.max_capacity_gauge.set(self.pool_manager.max_capacity, self.add_metric_labels(dry_run))
183+
self.setpoint_gauge.set(self.autoscaling_config.setpoint, self.add_metric_labels(dry_run))
187184
self._emit_requested_resource_metrics(resource_request, dry_run=dry_run)
188185

189186
try:
@@ -202,7 +199,14 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) ->
202199
def _emit_requested_resource_metrics(self, resource_request: SignalResourceRequest, dry_run: bool) -> None:
203200
for resource_type, resource_gauge in self.resource_request_gauges.items():
204201
if getattr(resource_request, resource_type) is not None:
205-
resource_gauge.set(getattr(resource_request, resource_type), {"dry_run": dry_run})
202+
resource_gauge.set(getattr(resource_request, resource_type), self.add_metric_labels(dry_run))
203+
204+
def add_metric_labels(self, dry_run):
205+
return {
206+
"dry_run": dry_run,
207+
"alert_on_max_capacity": self.pool_manager.alert_on_max_capacity,
208+
"team": self.pool_manager.pool_owner,
209+
}
206210

207211
def _get_signal_for_app(self, app: str) -> Signal:
208212
"""Load the signal object to use for autoscaling for a particular app

clusterman/autoscaler/pool_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def __init__(
8686
"autoscaling.killable_nodes_prioritizing_v2", default=False
8787
)
8888
self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
89+
self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra")
8990
monitoring_info = {"cluster": cluster, "pool": pool}
9091
self.killable_nodes_counter = get_monitoring_client().create_counter(SFX_KILLABLE_NODES_COUNT, monitoring_info)
9192

clusterman/kubernetes/kubernetes_cluster_connector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
import colorlog
2525
import kubernetes
2626
import staticconf
27-
from kubernetes.client import V1beta1Eviction
2827
from kubernetes.client import V1DeleteOptions
28+
from kubernetes.client import V1Eviction
2929
from kubernetes.client import V1ObjectMeta
3030
from kubernetes.client.models.v1_node import V1Node as KubernetesNode
3131
from kubernetes.client.models.v1_pod import V1Pod as KubernetesPod
@@ -356,7 +356,7 @@ def _evict_pod(self, pod: KubernetesPod):
356356
self._core_api.create_namespaced_pod_eviction(
357357
name=pod.metadata.name,
358358
namespace=pod.metadata.namespace,
359-
body=V1beta1Eviction(
359+
body=V1Eviction(
360360
metadata=V1ObjectMeta(
361361
name=pod.metadata.name,
362362
namespace=pod.metadata.namespace,

clusterman/kubernetes/util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from kubernetes.client.models.v1_node_selector_requirement import V1NodeSelectorRequirement
3131
from kubernetes.client.models.v1_node_selector_term import V1NodeSelectorTerm
3232
from kubernetes.client.models.v1_pod import V1Pod as KubernetesPod
33+
from kubernetes.config.config_exception import ConfigException
3334

3435
from clusterman.util import ClustermanResources
3536

@@ -72,7 +73,7 @@ def __init__(self, kubeconfig_path: str, client_class: Type) -> None:
7273
kubernetes.config.load_incluster_config()
7374
else:
7475
kubernetes.config.load_kube_config(kubeconfig_path, context=os.getenv("KUBECONTEXT"))
75-
except TypeError:
76+
except (TypeError, ConfigException):
7677
error_msg = "Could not load KUBECONFIG; is this running on Kubernetes master?"
7778
if "yelpcorp" in socket.getfqdn():
7879
error_msg += "\nHint: try using the clusterman-k8s-<clustername> wrapper script!"

clusterman/simulator/simulated_pool_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(
5959
MAX_MIN_NODE_SCALEIN_UPTIME_SECONDS,
6060
)
6161
self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
62+
self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra")
6263
self.killable_nodes_prioritizing_v2 = self.pool_config.read_bool(
6364
"autoscaling.killable_nodes_prioritizing_v2", default=False
6465
)

examples/schemas/pool.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@
6464
"additionalProperties": false
6565
},
6666
"sensu_config": {"$ref": "definitions.json#sensu_config"},
67-
"alert_on_max_capacity": {"type": "boolean"}
67+
"alert_on_max_capacity": {"type": "boolean"},
68+
"pool_owner": {"type": "string"}
6869
},
6970
"additionalProperties": false
7071
}

itests/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def setup_configurations(context):
121121
],
122122
},
123123
"alert_on_max_capacity": True,
124+
"pool_owner": "compute_infra",
124125
}
125126
kube_pool_config = {
126127
"resource_groups": [
@@ -144,6 +145,7 @@ def setup_configurations(context):
144145
"period_minutes": 7,
145146
},
146147
"alert_on_max_capacity": True,
148+
"pool_owner": "compute_infra",
147149
}
148150
with staticconf.testing.MockConfiguration(
149151
boto_config, namespace=CREDENTIALS_NAMESPACE

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ idna==2.8
1616
jmespath==0.9.4
1717
jsonpickle==1.4.2
1818
kiwisolver==1.1.0
19-
kubernetes==10.0.1
19+
kubernetes==24.2.0
2020
matplotlib==3.4.2
2121
mypy-extensions==0.4.3
2222
numpy==1.21.6

0 commit comments

Comments
 (0)