Skip to content

Commit e033344

Browse files
Auto renew certificates and add ignore list (#44)
* refactor and automatically renew credentials older than 90 days * added --dry-run flag * added ignorefile * update deploy script to allow git pull to fail
1 parent 6f3b48b commit e033344

File tree

2 files changed

+144
-46
lines changed

2 files changed

+144
-46
lines changed

bk-api/bk_api.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -894,13 +894,11 @@ def deploy_wes(node_id, this_debug, force=False):
894894
"/config/node-private-git-repo-key/node-private-git-repo-key not found, skipping"
895895
)
896896

897-
final_command = "./update-stack.sh"
897+
final_command = "/opt/waggle-edge-stack/kubernetes/update-stack.sh"
898898
if FAKE_DEPLOYMENT:
899899
final_command = 'echo "This fake deployment was successful"'
900900

901-
deploy_script = (
902-
"""\
903-
#!/bin/sh
901+
deploy_script = f"""#!/bin/sh
904902
### This script was generated by beekeeper ###
905903
set -e
906904
set -x
@@ -913,17 +911,12 @@ def deploy_wes(node_id, this_debug, force=False):
913911
git clone https://github.com/waggle-sensor/waggle-edge-stack.git
914912
fi
915913
916-
cd /opt/waggle-edge-stack
917-
git pull origin main
918-
#git checkout tags/v.1.0
919-
920-
cd /opt/waggle-edge-stack/kubernetes
914+
# allow pull to fail so we can still renew certificates on modified repo
915+
git -C /opt/waggle-edge-stack pull origin main || true
921916
917+
{final_command}
922918
"""
923-
+ final_command
924-
)
925919

926-
# return {"result": "C"}
927920
try:
928921
node_ssh_with_logging(node_id, "cat > /tmp/deploy.sh", input_str=deploy_script)
929922
node_ssh_with_logging(node_id, "sh /tmp/deploy.sh")

bk-deploy-manager/deploy_manager.py

Lines changed: 139 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22

3+
import argparse
34
import datetime
45
import logging
56
import os
@@ -8,20 +9,40 @@
89

910
import dateutil.parser
1011
import requests
12+
from dataclasses import dataclass
13+
from typing import Optional
14+
from pathlib import Path
15+
16+
17+
@dataclass
18+
class Node:
19+
id: str
20+
beehive: Optional[str]
21+
registered_at: Optional[datetime.datetime]
22+
deployed_wes_at: Optional[datetime.datetime]
23+
24+
25+
@dataclass
26+
class Candidate:
27+
node: dict
28+
renew_credentials: bool
1129

12-
logging.basicConfig(level=logging.INFO)
1330

1431
BEEKEEPER_URL = os.getenv("BEEKEEPER_URL", "http://localhost:5000")
1532

1633

1734
# example input 2021-11-19T02:07:22
1835
# returns datetime.datetime
19-
def parseTime(timestamp):
36+
def parse_datetime(timestamp: Optional[str]) -> Optional[datetime.datetime]:
37+
if timestamp == None:
38+
return None
39+
if timestamp == "":
40+
return None
2041
# dateutil.parser.isoparse('2008-09-03T20:56:35.450686')
2142
return dateutil.parser.isoparse(timestamp)
2243

2344

24-
def get_candidates():
45+
def get_nodes() -> list[Node]:
2546
if BEEKEEPER_URL == "":
2647
logging.error(f"BEEKEEPER_URL not defined")
2748
sys.exit(1)
@@ -38,61 +59,114 @@ def get_candidates():
3859
if resp.status_code != 200:
3960
raise Exception(f"status_code: {resp.status_code} body: {resp.text}")
4061

41-
nodes = resp.json()
62+
items = resp.json()["data"]
63+
64+
nodes = []
65+
66+
for item in items:
67+
node_id = item["id"]
68+
beehive = item.get("beehive")
69+
if beehive == "":
70+
beehive = None
71+
registered_at = parse_datetime(item.get("registration_event"))
72+
deployed_wes_at = parse_datetime(item.get("wes_deploy_event"))
73+
nodes.append(
74+
Node(
75+
id=node_id,
76+
beehive=beehive,
77+
registered_at=registered_at,
78+
deployed_wes_at=deployed_wes_at,
79+
)
80+
)
81+
82+
return nodes
83+
84+
85+
def get_candidates(ignorelist: list[str]) -> list[Candidate]:
86+
nodes = get_nodes()
4287

4388
candidates = []
4489

45-
if not "data" in nodes:
46-
raise Exception("Field data missing")
47-
48-
for n in nodes["data"]:
49-
node_id = n["id"]
50-
registration_event = n.get("registration_event")
51-
wes_deploy_event = n.get("wes_deploy_event")
52-
# print("id: "+node_id)
53-
# print("wes_deploy_event: "+n["wes_deploy_event"])
54-
if registration_event in ["", None]:
55-
logging.info("node %s is not registered", node_id)
90+
for node in nodes:
91+
if node.id in ignorelist:
92+
logging.info("node %s in ignorelist - skipping", node.id)
93+
continue
94+
95+
if node.registered_at is None:
96+
logging.info("node %s is not registered", node.id)
97+
continue
98+
99+
if node.beehive is None:
100+
logging.info("node %s does not belong to a beehive", node.id)
56101
continue
57102

58-
if n.get("beehive") in ["", None]:
59-
logging.info(f"node {node_id} does not belong to a beehive")
103+
if node.deployed_wes_at is None:
104+
logging.info(
105+
"scheduling node %s for wes deployment (reason: no previous deployment)",
106+
node.id,
107+
)
108+
candidates.append(Candidate(node=node, renew_credentials=False))
60109
continue
61110

62-
if wes_deploy_event in ["", None] or parseTime(registration_event) >= parseTime(
63-
wes_deploy_event
64-
):
111+
# reregistered nodes also need wes redeployed
112+
if node.registered_at >= node.deployed_wes_at:
113+
logging.info(
114+
"scheduling node %s for wes deployment (reason: node reregistered)",
115+
node.id,
116+
)
117+
candidates.append(Candidate(node=node, renew_credentials=True))
118+
continue
119+
120+
# automatically redeploy with renewed credentials periodically
121+
deployed_wes_age = datetime.datetime.now() - node.deployed_wes_at
122+
123+
if deployed_wes_age >= datetime.timedelta(days=90):
65124
logging.info(
66-
f"scheduling node {node_id} for wes deployment (reason: no previous deployment or re-registered node)"
125+
"scheduling node %s for wes deployment (reason: renew certificates - %d days old)",
126+
node.id,
127+
deployed_wes_age.days,
67128
)
68-
candidates.append(n)
129+
candidates.append(Candidate(node=node, renew_credentials=True))
69130
continue
70131

71-
logging.info(f"node {node_id} needs no deployment")
132+
logging.info("node %s needs no deployment", node.id)
133+
134+
# Q(sean) Is there ever a time where we wouldn't want to renew credentials? As long as
135+
# there's some rate limit on how fast we're generating them, it seems like just renewing
136+
# is a much simpler management strategy.
72137

73138
return candidates
74139

75140

76-
def try_wes_deployment(candidates):
141+
def try_wes_deployment(candidates: list[Candidate], dry_run: bool):
77142
success_count = 0
78143

79144
for candidate in candidates:
80145
try:
81-
deploy_wes_to_candidate(candidate)
146+
deploy_wes_to_candidate(candidate, dry_run)
82147
success_count += 1
83148
except KeyboardInterrupt:
84149
return
85150
except Exception:
86151
logging.exception("deploy_wes_to_candidate failed")
87-
time.sleep(2)
88152

89153
logging.info(f"{success_count} out of {len(candidates)} successful.")
90154
logging.info("done")
91155

92156

93-
def deploy_wes_to_candidate(candidate):
94-
node_id = candidate["id"]
95-
url = f"{BEEKEEPER_URL}/node/{node_id}"
157+
def deploy_wes_to_candidate(candidate: Candidate, dry_run: bool):
158+
node = candidate.node
159+
160+
if candidate.renew_credentials:
161+
logging.info("deploying to candidate %s with renewed credentials", node.id)
162+
url = f"{BEEKEEPER_URL}/node/{node.id}?force=true"
163+
else:
164+
logging.info("deploying to candidate %s", node.id)
165+
url = f"{BEEKEEPER_URL}/node/{node.id}"
166+
167+
if dry_run:
168+
return
169+
96170
resp = requests.post(url, json={"deploy_wes": True})
97171
resp.raise_for_status()
98172
result = resp.json()
@@ -103,20 +177,51 @@ def deploy_wes_to_candidate(candidate):
103177

104178

105179
def main():
106-
logging.info("Starting...")
180+
parser = argparse.ArgumentParser()
181+
parser.add_argument(
182+
"--debug",
183+
action="store_true",
184+
help="enable verbose logging",
185+
)
186+
parser.add_argument(
187+
"--ignorefile",
188+
help="path of ignorefile",
189+
)
190+
parser.add_argument(
191+
"--dry-run",
192+
action="store_true",
193+
help="get and log candidates but do not deploy wes",
194+
)
195+
args = parser.parse_args()
196+
197+
logging.basicConfig(
198+
level=logging.DEBUG if args.debug else logging.INFO,
199+
format="%(asctime)s %(message)s",
200+
datefmt="%Y/%m/%d %H:%M:%S",
201+
)
202+
203+
if args.ignorefile:
204+
logging.info("loading ignorefile %s", args.ignorefile)
205+
ignorefile = Path(args.ignorefile)
206+
ignorelist = [s.upper().rjust(16, "0") for s in ignorefile.read_text().split()]
207+
else:
208+
logging.info("no ignorefile provided")
209+
ignorelist = []
210+
107211
while True:
108212
candidates = []
213+
109214
try:
110-
candidates = get_candidates()
215+
logging.info("getting candidates")
216+
candidates = get_candidates(ignorelist)
111217
except Exception as e:
112218
logging.error(f"error: get_candidates returned: {str(e)}")
113219

114220
if len(candidates) == 0:
115221
logging.info("no candidates for wes deployment found")
116222
else:
117-
logging.info("candidates:")
118-
logging.info(candidates)
119-
try_wes_deployment(candidates)
223+
logging.info("deploying to candidates")
224+
try_wes_deployment(candidates, args.dry_run)
120225

121226
logging.info("waiting 5 minutes...")
122227
time.sleep(5 * 60)

0 commit comments

Comments
 (0)