Skip to content

Commit 44772d1

Browse files
authored
Merge pull request #94 from SAP/gc
Gc
2 parents d220e9a + 7f2efd2 commit 44772d1

File tree

7 files changed

+237
-0
lines changed

7 files changed

+237
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{{ if eq .Values.cluster.name "master" }}
2+
apiVersion: extensions/v1beta1
3+
kind: Deployment
4+
metadata:
5+
name: infrabox-gc
6+
namespace: {{ template "system_namespace" . }}
7+
labels:
8+
app: infrabox-gc
9+
spec:
10+
replicas: 1
11+
template:
12+
metadata:
13+
labels:
14+
app: infrabox-gc
15+
spec:
16+
{{ include "imagePullSecret" . | indent 8 }}
17+
terminationGracePeriodSeconds: 0
18+
serviceAccountName: infrabox
19+
containers:
20+
{{ include "containers_database" . | indent 12 }}
21+
-
22+
name: gc
23+
image: {{ include "image_repository" . }}/gc:{{ include "image_tag" . }}
24+
imagePullPolicy: Always
25+
env:
26+
{{ include "env_database" . | indent 16 }}
27+
{{ include "env_general" . | indent 16 }}
28+
{{ include "env_version" . | indent 16 }}
29+
{{ include "env_gcs" . | indent 16 }}
30+
{{ include "env_s3" . | indent 16 }}
31+
{{ include "env_azure" . | indent 16 }}
32+
{{ include "env_swift" . | indent 16 }}
33+
volumes:
34+
{{ include "volumes_database" . | indent 16 }}
35+
{{ end }}

ib.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
{'name': 'collector-api'},
2626
{'name': 'collector-fluentd'},
2727
{'name': 'job'},
28+
{'name': 'gc'},
2829
{'name': 'controller'},
2930
{'name': 'scheduler-kubernetes'},
3031
{'name': 'api'},

infrabox/generator/deployments.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,24 @@
221221
"cache": {
222222
"image": true
223223
}
224+
}, {
225+
"type": "docker",
226+
"build_context": "../..",
227+
"name": "gc",
228+
"docker_file": "src/gc/Dockerfile",
229+
"build_only": true,
230+
"resources": { "limits": { "cpu": 1, "memory": 1024 } },
231+
"deployments": [{
232+
"type": "docker-registry",
233+
"host": "quay.io/infrabox",
234+
"repository": "gc",
235+
"username": "infrabox+infrabox_ci",
236+
"password": { "$secret": "QUAY_PASSWORD" }
237+
}],
238+
"cache": {
239+
"image": true
240+
}
241+
224242
}, {
225243
"type": "docker",
226244
"build_context": "../..",

src/db/migrations/00014.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
DROP TRIGGER job_queue_notify_update ON job;
2+
CREATE TRIGGER job_queue_notify_update AFTER UPDATE ON job FOR EACH ROW WHEN (OLD.state IS DISTINCT FROM NEW.state) EXECUTE PROCEDURE job_queue_notify();

src/gc/Dockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM debian:9.4-slim
2+
3+
RUN apt-get update -y && apt-get install -y python python-psycopg2 python-pip python-flask python-six && \
4+
pip install cryptography eventlet boto3 google-cloud-storage future bcrypt pycrypto \
5+
azure-mgmt-resource azure-storage keystoneauth1==3.7.0 python-swiftclient && \
6+
apt-get remove -y python-pip && \
7+
apt-get autoremove -y && \
8+
rm -rf /var/lib/apt/lists/*
9+
10+
COPY src/gc gc
11+
COPY src/pyinfraboxutils /pyinfraboxutils
12+
13+
ENV PYTHONPATH=/
14+
15+
CMD python gc/gc.py

src/gc/gc.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import time
2+
3+
from pyinfraboxutils import get_logger, get_env
4+
from pyinfraboxutils import dbpool
5+
from pyinfraboxutils.storage import storage
6+
7+
logger = get_logger("gc")
8+
9+
class GC(object):
10+
def run(self):
11+
# TODO: Delete storage objects: uploads, outputs
12+
# TODO: Delete images from registry
13+
14+
while True:
15+
db = dbpool.get()
16+
try:
17+
logger.info('Starting next GC run')
18+
self._gc(db)
19+
logger.info('Finished GC run')
20+
logger.info('')
21+
except Exception as e:
22+
logger.exception(e)
23+
finally:
24+
dbpool.put(db)
25+
26+
time.sleep(3600)
27+
28+
def _gc(self, db):
29+
self._gc_job_console_output(db)
30+
self._gc_job_output(db)
31+
self._gc_test_runs(db)
32+
self._gc_orphaned_projects(db)
33+
self._gc_storage_job_cache(db)
34+
35+
def _gc_job_console_output(self, db):
36+
# Delete the console output of jobs
37+
# which are older than 30 days
38+
r = db.execute_one_dict('''
39+
SELECT count(*) as count
40+
FROM job
41+
WHERE created_at < NOW() - INTERVAL '30 days'
42+
AND console != 'deleted'
43+
''')
44+
45+
logger.info('Deleting console output of %s jobs', r['count'])
46+
47+
r = db.execute('''
48+
UPDATE job
49+
SET console = 'deleted'
50+
WHERE created_at < NOW() - INTERVAL '30 days'
51+
AND console != 'deleted'
52+
''')
53+
54+
db.commit()
55+
56+
def _gc_test_runs(self, db):
57+
# Delete the test_runs
58+
# which are older than 30 days
59+
r = db.execute_one_dict('''
60+
SELECT count(*) as count
61+
FROM test_run
62+
WHERE timestamp < NOW() - INTERVAL '14 days'
63+
''')
64+
65+
logger.info('Deleting %s test_runs', r['count'])
66+
67+
r = db.execute('''
68+
DELETE
69+
FROM test_run
70+
WHERE timestamp < NOW() - INTERVAL '14 days'
71+
''')
72+
73+
db.commit()
74+
75+
76+
def _gc_job_output(self, db):
77+
# Delete orphaned entries in the console table
78+
# which are older than one day
79+
r = db.execute_one_dict('''
80+
SELECT count(*) count
81+
FROM console
82+
WHERE date < NOW() - INTERVAL '1 day'
83+
''')
84+
85+
logger.info('Deleting %s orphaned console entries', r['count'])
86+
87+
r = db.execute('''
88+
DELETE
89+
FROM console
90+
WHERE date < NOW() - INTERVAL '1 day'
91+
''')
92+
93+
db.commit()
94+
95+
def _gc_orphaned_projects(self, db):
96+
# All the orphaned rows after a
97+
# project has been deleted
98+
tables = [
99+
'auth_token', 'build', 'collaborator', 'commit',
100+
'job', 'job_badge', 'job_markup', 'measurement',
101+
'pull_request', 'repository', 'secret', 'source_upload', 'test',
102+
'test_run'
103+
]
104+
for t in tables:
105+
self._gc_table_content_of_deleted_project(db, t)
106+
107+
def _gc_table_content_of_deleted_project(self, db, table):
108+
r = db.execute_one_dict('''
109+
SELECT count(*) as count
110+
FROM %s
111+
WHERE NOT EXISTS (
112+
SELECT project.id
113+
FROM project
114+
WHERE %s.project_id = project.id
115+
)
116+
''' % (table, table))
117+
118+
logger.info('Deleting %s orphaned rows from %s', r['count'], table)
119+
120+
db.execute('''
121+
DELETE
122+
FROM %s
123+
WHERE NOT EXISTS (
124+
SELECT project.id
125+
FROM project
126+
WHERE %s.project_id = project.id
127+
)
128+
''' % (table, table))
129+
130+
db.commit()
131+
132+
def _gc_storage_source_upload(self):
133+
pass
134+
135+
def _gc_storage_job_cache(self, db):
136+
# Delete all cache of all jobs which have not
137+
# been executed in the last 7 days
138+
r = db.execute_many_dict('''
139+
SELECT DISTINCT project_id, name
140+
FROM job
141+
WHERE
142+
created_at > NOW() - INTERVAL '14 days'
143+
EXCEPT
144+
SELECT DISTINCT project_id, name from job where created_at > NOW() - INTERVAL '7 days'
145+
''')
146+
147+
logger.info('Deleting caches of %s jobs', len(r))
148+
149+
for j in r:
150+
logger.info('Deleting cache %s/%s', j['project_id'], j['name'])
151+
key = 'project_%s_job_%s.tar.snappy' % (j['project_id'], j['name'])
152+
storage.delete_cache(key)
153+
154+
def main():
155+
get_env('INFRABOX_DATABASE_DB')
156+
get_env('INFRABOX_DATABASE_USER')
157+
get_env('INFRABOX_DATABASE_PASSWORD')
158+
get_env('INFRABOX_DATABASE_HOST')
159+
get_env('INFRABOX_DATABASE_PORT')
160+
161+
gc = GC()
162+
gc.run()
163+
164+
if __name__ == "__main__":
165+
main()

src/services/gcp/pkg/stub/handler.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ func syncGKECluster(cr *v1alpha1.GKECluster, log *logrus.Entry) (*v1alpha1.GKECl
9393
"create", cr.Status.ClusterName,
9494
"--async",
9595
"--enable-autorepair",
96+
"--scopes=gke-default,storage-rw",
9697
"--zone", cr.Spec.Zone,
9798
}
9899

0 commit comments

Comments
 (0)