Skip to content

Commit 5d8aab5

Browse files
authored
[CI] Add Terraform resources for daily CronJob that processes LLVM commits (#495)
These resources are for a CronJob that executes the container at `ghcr.io/llvm/operations-metrics:latest` on a daily basis (07:00 UTC), which will scrape daily metrics regarding LLVM's commit volume and upload them for visualization in Grafana. Changes were made to the already existing terraform files since many of the same resources are being reused anyway. This way we can keep all relevant changes in the same place instead of having two separate terraform directories that access and modify shared resources. Since the container needs access to the BigQuery Google Cloud API, IAM and K8S service accounts were used to grant that access via Workload Identity Federation for GKE. More details at https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
1 parent d132c6e commit 5d8aab5

File tree

3 files changed

+130
-0
lines changed

3 files changed

+130
-0
lines changed

premerge/gke_cluster/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ resource "google_container_node_pool" "llvm_premerge_linux_service" {
3030

3131
node_config {
3232
machine_type = "e2-highcpu-4"
33+
34+
workload_metadata_config {
35+
mode = "GKE_METADATA"
36+
}
3337
# Terraform wants to recreate the node pool everytime whe running
3438
# terraform apply unless we explicitly set this.
3539
# TODO(boomanaiden154): Look into why terraform is doing this so we do

premerge/main.tf

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,84 @@ resource "kubernetes_manifest" "metrics_deployment" {
221221

222222
depends_on = [kubernetes_namespace.metrics, kubernetes_secret.metrics_secrets]
223223
}
224+
225+
# Resources for collecting LLVM operational metrics data
226+
227+
# Service accounts and bindings to grant access to the
228+
# BigQuery API for our cronjob
229+
resource "google_service_account" "operational_metrics_gsa" {
230+
account_id = "operational-metrics-gsa"
231+
display_name = "Operational Metrics GSA"
232+
}
233+
234+
resource "google_project_iam_binding" "bigquery_jobuser_binding" {
235+
project = google_service_account.operational_metrics_gsa.project
236+
role = "roles/bigquery.jobUser"
237+
238+
members = [
239+
"serviceAccount:${google_service_account.operational_metrics_gsa.email}",
240+
]
241+
242+
depends_on = [google_service_account.operational_metrics_gsa]
243+
}
244+
245+
resource "kubernetes_namespace" "operational_metrics" {
246+
metadata {
247+
name = "operational-metrics"
248+
}
249+
provider = kubernetes.llvm-premerge-us-central
250+
}
251+
252+
resource "kubernetes_service_account" "operational_metrics_ksa" {
253+
metadata {
254+
name = "operational-metrics-ksa"
255+
namespace = "operational-metrics"
256+
annotations = {
257+
"iam.gke.io/gcp-service-account" = google_service_account.operational_metrics_gsa.email
258+
}
259+
}
260+
261+
depends_on = [kubernetes_namespace.operational_metrics]
262+
}
263+
264+
resource "google_service_account_iam_binding" "workload_identity_binding" {
265+
service_account_id = google_service_account.operational_metrics_gsa.name
266+
role = "roles/iam.workloadIdentityUser"
267+
268+
members = [
269+
"serviceAccount:${google_service_account.operational_metrics_gsa.project}.svc.id.goog[operational-metrics/operational-metrics-ksa]",
270+
]
271+
272+
depends_on = [
273+
google_service_account.operational_metrics_gsa,
274+
kubernetes_service_account.operational_metrics_ksa,
275+
]
276+
}
277+
278+
resource "kubernetes_secret" "operational_metrics_secrets" {
279+
metadata {
280+
name = "operational-metrics-secrets"
281+
namespace = "operational-metrics"
282+
}
283+
284+
data = {
285+
"github-token" = data.google_secret_manager_secret_version.metrics_github_pat.secret_data
286+
"grafana-api-key" = data.google_secret_manager_secret_version.metrics_grafana_api_key.secret_data
287+
"grafana-metrics-userid" = data.google_secret_manager_secret_version.metrics_grafana_metrics_userid.secret_data
288+
}
289+
290+
type = "Opaque"
291+
provider = kubernetes.llvm-premerge-us-central
292+
depends_on = [kubernetes_namespace.operational_metrics]
293+
}
294+
295+
resource "kubernetes_manifest" "operational_metrics_cronjob" {
296+
manifest = yamldecode(file("operational_metrics_cronjob.yaml"))
297+
provider = kubernetes.llvm-premerge-us-central
298+
299+
depends_on = [
300+
kubernetes_namespace.operational_metrics,
301+
kubernetes_secret.operational_metrics_secrets,
302+
kubernetes_service_account.operational_metrics_ksa,
303+
]
304+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# operational_metrics_cronjob.yaml
2+
apiVersion: batch/v1
3+
kind: CronJob
4+
metadata:
5+
name: operational-metrics-cronjob
6+
namespace: operational-metrics
7+
spec:
8+
# Midnight PDT
9+
schedule: "0 7 * * *"
10+
timeZone: "Etc/UTC"
11+
concurrencyPolicy: Forbid
12+
jobTemplate:
13+
spec:
14+
template:
15+
spec:
16+
serviceAccountName: operational-metrics-ksa
17+
nodeSelector:
18+
iam.gke.io/gke-metadata-server-enabled: "true"
19+
containers:
20+
- name: process-llvm-commits
21+
image: ghcr.io/llvm/operations-metrics:latest
22+
env:
23+
- name: GITHUB_TOKEN
24+
valueFrom:
25+
secretKeyRef:
26+
name: operational-metrics-secrets
27+
key: github-token
28+
- name: GRAFANA_API_KEY
29+
valueFrom:
30+
secretKeyRef:
31+
name: operational-metrics-secrets
32+
key: grafana-api-key
33+
- name: GRAFANA_METRICS_USERID
34+
valueFrom:
35+
secretKeyRef:
36+
name: operational-metrics-secrets
37+
key: grafana-metrics-userid
38+
resources:
39+
requests:
40+
cpu: "250m"
41+
memory: "256Mi"
42+
limits:
43+
cpu: "1"
44+
memory: "512Mi"
45+
restartPolicy: OnFailure

0 commit comments

Comments
 (0)