Skip to content

Commit dcf6823

Browse files
authored
Merge pull request #1677 from Carlosbogo/create-dedicated-cronjobs-for-monitoring
Create dedicated cronjobs for monitoring
2 parents 92bd370 + 4dc69b6 commit dcf6823

11 files changed

+822
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: crab-popularity
5+
namespace: hdfs
6+
labels:
7+
app: crab-popularity
8+
spec:
9+
schedule: "00 22 05 * *"
10+
jobTemplate:
11+
spec:
12+
template:
13+
metadata:
14+
labels:
15+
app: crab-popularity
16+
spec:
17+
restartPolicy: Never
18+
containers:
19+
- name: crab-popularity
20+
image: registry.cern.ch/cmsmonitoring/crab-popularity:test
21+
imagePullPolicy: Always
22+
command: ["/bin/bash", "/opt/spark-apps/crab-popularity/cron4crab_popularity.sh"]
23+
args:
24+
- "--keytab"
25+
- "/etc/secrets/keytab"
26+
- "--output"
27+
- "/eos/user/c/cmsmonit/www/crabPop_cron/data"
28+
- "--p1"
29+
- "32606"
30+
- "--p2"
31+
- "32607"
32+
- "--host"
33+
- "$(MY_NODE_NAME)"
34+
- "--wdir"
35+
- "/opt/spark-apps/crab-popularity"
36+
ports:
37+
- containerPort: 32606
38+
hostPort: 32606
39+
protocol: TCP
40+
- containerPort: 32607
41+
hostPort: 32607
42+
protocol: TCP
43+
env:
44+
- name: K8S_ENV
45+
value: prod
46+
- name: HOSTNAME
47+
value: crab-popularity
48+
- name: MY_NODE_NAME
49+
valueFrom:
50+
fieldRef:
51+
fieldPath: spec.nodeName
52+
- name: PUSHGATEWAY_URL
53+
value: pushgateway.default.svc.cluster.local:9091
54+
volumeMounts:
55+
- name: eos
56+
mountPath: /eos
57+
mountPropagation: HostToContainer
58+
- name: cron-spark-jobs-secrets
59+
mountPath: /etc/secrets
60+
readOnly: true
61+
resources:
62+
limits:
63+
cpu: "2"
64+
memory: 6Gi
65+
requests:
66+
cpu: 100m
67+
memory: 100Mi
68+
volumes:
69+
- name: eos
70+
hostPath:
71+
path: /var/eos
72+
- name: cron-spark-jobs-secrets
73+
secret:
74+
secretName: cron-spark-jobs-secrets
75+
defaultMode: 420
76+
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: crab-unique-users
5+
namespace: hdfs
6+
spec:
7+
schedule: "00 22 27 * *" # Run on 27th of each month at 22:00
8+
concurrencyPolicy: Allow
9+
failedJobsHistoryLimit: 1
10+
successfulJobsHistoryLimit: 3
11+
suspend: false
12+
jobTemplate:
13+
metadata:
14+
labels:
15+
app: crab-unique-users
16+
spec:
17+
template:
18+
metadata:
19+
labels:
20+
app: crab-unique-users
21+
spec:
22+
containers:
23+
- name: crab-unique-users
24+
image: registry.cern.ch/cmsmonitoring/crab-unique-users:test
25+
imagePullPolicy: Always
26+
command: ["/bin/bash", "/opt/spark-apps/crab-unique-users/cron4crab_unique_users.sh"]
27+
args:
28+
- "--keytab"
29+
- "/etc/secrets/keytab"
30+
- "--output"
31+
- "/eos/user/c/cmsmonit/www/crab_uu_cron"
32+
- "--p1"
33+
- "32604"
34+
- "--p2"
35+
- "32605"
36+
- "--host"
37+
- "$(MY_NODE_NAME)"
38+
- "--wdir"
39+
- "/opt/spark-apps/crab-unique-users"
40+
ports:
41+
- containerPort: 32604
42+
hostPort: 32604
43+
protocol: TCP
44+
- containerPort: 32605
45+
hostPort: 32605
46+
protocol: TCP
47+
env:
48+
- name: K8S_ENV
49+
value: prod
50+
- name: HOSTNAME
51+
value: crab-unique-users
52+
- name: MY_NODE_NAME
53+
valueFrom:
54+
fieldRef:
55+
apiVersion: v1
56+
fieldPath: spec.nodeName
57+
- name: PUSHGATEWAY_URL
58+
value: pushgateway.default.svc.cluster.local:9091
59+
volumeMounts:
60+
- name: eos
61+
mountPath: /eos
62+
mountPropagation: HostToContainer
63+
- name: cron-spark-jobs-secrets
64+
mountPath: /etc/secrets
65+
readOnly: true
66+
resources:
67+
limits:
68+
cpu: "2"
69+
memory: 6Gi
70+
requests:
71+
cpu: 100m
72+
memory: 100Mi
73+
terminationMessagePath: /dev/termination-log
74+
terminationMessagePolicy: File
75+
restartPolicy: Never
76+
terminationGracePeriodSeconds: 30
77+
volumes:
78+
- name: eos
79+
hostPath:
80+
path: /var/eos
81+
type: ""
82+
- name: cron-spark-jobs-secrets
83+
secret:
84+
secretName: cron-spark-jobs-secrets
85+
defaultMode: 420
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: hpc-usage
5+
namespace: hdfs
6+
labels:
7+
app: hpc-usage
8+
spec:
9+
schedule: "10 02 * * *"
10+
concurrencyPolicy: Forbid
11+
successfulJobsHistoryLimit: 3
12+
failedJobsHistoryLimit: 1
13+
jobTemplate:
14+
spec:
15+
template:
16+
metadata:
17+
labels:
18+
app: hpc-usage
19+
spec:
20+
restartPolicy: OnFailure
21+
volumes:
22+
- name: hpc-usage-secrets
23+
secret:
24+
secretName: hpc-usage-secrets
25+
defaultMode: 420
26+
- name: eos
27+
hostPath:
28+
path: /var/eos
29+
containers:
30+
- name: hpc-usage
31+
image: registry.cern.ch/cmsmonitoring/hpc-usage:test
32+
imagePullPolicy: Always
33+
command: ["/bin/bash", "/opt/spark-apps/hpc-usage/cron4hpc_usage.sh"]
34+
args:
35+
- "--keytab"
36+
- "/etc/secrets/keytab"
37+
- "--output"
38+
- "/eos/user/c/cmsmonit/www/hpc_usage_cron"
39+
- "--url"
40+
- "https://cmsdatapop.web.cern.ch/cmsdatapop/hpc_usage"
41+
- "--iterative"
42+
- "--p1"
43+
- "32614"
44+
- "--p2"
45+
- "32615"
46+
- "--host"
47+
- "$(MY_NODE_NAME)"
48+
- "--wdir"
49+
- "/data"
50+
ports:
51+
- containerPort: 32614
52+
hostPort: 32614
53+
protocol: TCP
54+
- containerPort: 32615
55+
hostPort: 32615
56+
protocol: TCP
57+
env:
58+
- name: K8S_ENV
59+
value: prod
60+
- name: MY_NODE_NAME
61+
valueFrom:
62+
fieldRef:
63+
apiVersion: v1
64+
fieldPath: spec.nodeName
65+
- name: PUSHGATEWAY_URL
66+
value: pushgateway.default.svc.cluster.local:9091
67+
volumeMounts:
68+
- name: eos
69+
mountPath: /eos
70+
- name: hpc-usage-secrets
71+
mountPath: /etc/secrets
72+
readOnly: true
73+
resources:
74+
limits:
75+
cpu: "2"
76+
memory: 6Gi
77+
requests:
78+
cpu: 100m
79+
memory: 100Mi
80+
terminationGracePeriodSeconds: 30
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: cmsmon-rucio-ds
5+
namespace: hdfs
6+
labels:
7+
app: cmsmon-rucio-ds
8+
spec:
9+
schedule: "45 6 * * *"
10+
concurrencyPolicy: Forbid
11+
successfulJobsHistoryLimit: 3
12+
failedJobsHistoryLimit: 1
13+
suspend: false
14+
jobTemplate:
15+
spec:
16+
template:
17+
metadata:
18+
labels:
19+
app: cmsmon-rucio-ds
20+
spec:
21+
restartPolicy: OnFailure
22+
volumes:
23+
- name: rucio-daily-stats-secrets
24+
secret:
25+
secretName: rucio-daily-stats-secrets
26+
defaultMode: 420
27+
- name: eos
28+
hostPath:
29+
path: /var/eos
30+
containers:
31+
- name: cmsmon-rucio-ds
32+
image: registry.cern.ch/cmsmonitoring/rucio_daily_stats:test
33+
imagePullPolicy: Always
34+
command:
35+
- /bin/bash
36+
- -c
37+
- |
38+
#!/bin/bash
39+
. /etc/environment
40+
./cron4rucio_datasets_stats.sh \
41+
--keytab /etc/secrets/keytab \
42+
--amq /etc/secrets/amq_broker.json \
43+
--cmsmonitoring /data/CMSMonitoring.zip \
44+
--stomp /data/stomp-v700.zip \
45+
--eos /eos/user/c/cmsmonit/www/rucio_daily_ds_stats \
46+
--p1 31201 --p2 31202 --host $MY_NODE_NAME --wdir $WDIR
47+
env:
48+
- name: MY_NODE_NAME
49+
valueFrom:
50+
fieldRef:
51+
apiVersion: v1
52+
fieldPath: spec.nodeName
53+
- name: WDIR
54+
value: /tmp
55+
- name: PUSHGATEWAY_URL
56+
value: pushgateway.default.svc.cluster.local:9091
57+
volumeMounts:
58+
- name: rucio-daily-stats-secrets
59+
mountPath: /etc/secrets
60+
readOnly: true
61+
- name: eos
62+
mountPath: /eos
63+
terminationGracePeriodSeconds: 30
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: eos-dataset
5+
namespace: hdfs
6+
labels:
7+
app: eos-dataset
8+
spec:
9+
schedule: "00 22 24 * *"
10+
jobTemplate:
11+
spec:
12+
template:
13+
metadata:
14+
labels:
15+
app: eos-dataset
16+
spec:
17+
restartPolicy: Never
18+
containers:
19+
- name: eos-dataset
20+
image: registry.cern.ch/cmsmonitoring/eos-dataset:test
21+
imagePullPolicy: Always
22+
command: ["/bin/bash", "/opt/spark-apps/eos-dataset/cron4eos_dataset.sh"]
23+
args:
24+
- "--keytab"
25+
- "/etc/secrets/keytab"
26+
- "--output"
27+
- "/eos/user/c/cmsmonit/www/EOS/cron_data"
28+
- "--p1"
29+
- "32610"
30+
- "--p2"
31+
- "32611"
32+
- "--host"
33+
- "$(MY_NODE_NAME)"
34+
- "--wdir"
35+
- "/opt/spark-apps/eos-dataset"
36+
ports:
37+
- containerPort: 32610
38+
hostPort: 32610
39+
protocol: TCP
40+
- containerPort: 32611
41+
hostPort: 32611
42+
protocol: TCP
43+
env:
44+
- name: K8S_ENV
45+
value: prod
46+
- name: HOSTNAME
47+
value: eos-dataset
48+
- name: MY_NODE_NAME
49+
valueFrom:
50+
fieldRef:
51+
fieldPath: spec.nodeName
52+
- name: PUSHGATEWAY_URL
53+
value: pushgateway.default.svc.cluster.local:9091
54+
- name: PARQUET_LOCATION
55+
value: /tmp/cmsmonit/test_parquet_eos_dataset
56+
volumeMounts:
57+
- name: eos
58+
mountPath: /eos
59+
mountPropagation: HostToContainer
60+
- name: cron-spark-jobs-secrets
61+
mountPath: /etc/secrets
62+
readOnly: true
63+
resources:
64+
limits:
65+
cpu: "2"
66+
memory: 6Gi
67+
requests:
68+
cpu: 100m
69+
memory: 100Mi
70+
volumes:
71+
- name: eos
72+
hostPath:
73+
path: /var/eos
74+
- name: cron-spark-jobs-secrets
75+
secret:
76+
secretName: cron-spark-jobs-secrets
77+
defaultMode: 420
78+

0 commit comments

Comments
 (0)