ITISFoundation
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 9 additions & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎charts/aws-ebs-csi-driver/README.md‎
Lines changed: 28 additions & 0 deletions b/‎charts/aws-ebs-csi-driver/README.md‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎charts/aws-ebs-csi-driver/values.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion b/‎charts/aws-ebs-csi-driver/values.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎charts/longhorn/README.md‎
Lines changed: 7 additions & 1 deletion b/‎charts/longhorn/README.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎charts/portainer/values.ebs-pv.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion b/‎charts/portainer/values.ebs-pv.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎charts/portainer/values.longhorn-pv.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion b/‎charts/portainer/values.longhorn-pv.yaml.gotmpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎charts/traefik/values.secure.yaml.gotmpl‎
Lines changed: 21 additions & 0 deletions b/‎charts/traefik/values.secure.yaml.gotmpl‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎charts/victoria-logs/values.yaml.gotmpl‎
Lines changed: 59 additions & 0 deletions b/‎charts/victoria-logs/values.yaml.gotmpl‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎scripts/common-services.Makefile‎
Lines changed: 3 additions & 0 deletions b/‎scripts/common-services.Makefile‎
Lines changed: 3 additions & 0 deletions
@@ -18,8 +18,16 @@
 - [ ] Service has placement constraints or is global
 - [ ] Service is restartable
 - [ ] Service restart is zero-downtime
+- [ ] Service has >1 replicas in PROD
+- [ ] Service has docker heathlcheck enabled
 - [ ] Service is monitored (via prometheus and grafana)
 - [ ] Service is not bound to one specific node (e.g. via files or volumes)
 - [ ] Relevant OPS E2E Test are added
+
+If exposed via traefik
 - [ ] Service's Public URL is included in maintenance mode
-- [ ] Service's Public URL is included in testing mode -->
+- [ ] Service's Public URL is included in testing mode
+- [ ] Service's has Traefik (Service Loadbalancer) Healthcheck enabled
+- [ ] Credentials page is updated
+- [ ] Url added to e2e test services (e2e test checking that URL can be accessed)
+-->
@@ -142,7 +142,7 @@ yq
 **/.env-devel
 **/.stack.*.yml
 **/.stack.*.yaml
-docker-compose.yml
+
 stack.yml
 stack_with_prefix.yml
 docker-compose.simcore.yml
 
@@ -0,0 +1,28 @@
+## How to delete volumes with `recalimPolicy: retain`
+1. Delete pvc:
+```
+kubectl delete pvc <pvc-name>
+```
+
+2. Verify PV is `released`
+```
+kubectl get pv <pv-name>
+```
+
+3. Manually remove EBS in AWS
+    1. Go to AWS GUI and List EBS Volumes
+    1. Filter by tag `ebs.csi.aws.com/cluster=true`
+    1. Identify the volume associated with your PV (check `kubernetes.io/created-for/pv/name` tag of the EBS Volume)
+    1. Verify that EBS Volume is `Available`
+    1. Delete EBS Volume
+
+4. Delete the PV
+```
+kubectl delete pv <pv-name>
+```
+
+5. Remove Finalizers (if necessary)
+If the PV remains in a Terminating state, remove its finalizers:
+```
+kubectl patch pv <pv-name> -p '{"metadata":{"finalizers":null}}'
+```
@@ -5,7 +5,7 @@ image:
     tag: "v1.38.1"
 
 storageClasses:
-  - name: "ebs-sc"
+  - name: "{{ .Values.ebsStorageClassName }}"
     parameters:
         type: "gp3"
     allowVolumeExpansion: true
 
@@ -2,7 +2,7 @@
 
 ### Can LH be used for critical services (e.g., Databases)?
 
-No (as of now). , we should not use it for volumes of critical services.
+No. We should not use it for volumes of critical services.
 
 As of now, we should avoid using LH for critical services. Instead, we should rely on easier-to-maintain solutions (e.g., application-level replication [Postgres Operators], S3, etc.). Once we get hands-on experience, extensive monitoring and ability to scale LH, we can consider using it for critical services.
 
@@ -25,6 +25,12 @@ Source:
 * https://longhorn.io/kb/tip-only-use-storage-on-a-set-of-nodes/
 * https://longhorn.io/docs/1.8.1/nodes-and-volumes/nodes/default-disk-and-node-config/#customizing-default-disks-for-new-nodes
 
+### How to configure disks for LH
+
+As of now, we follow the same approach we use for `/docker` folder (via ansible playbook) but we use `/longhorn` folder name
+
+Issue asking LH to clearly document requirements: https://github.com/longhorn/longhorn/issues/11125
+
 ### Can workloads be run on nodes where LH is not installed?
 
 Workloads can run on nodes without LH as long as LH is not restricted to specific nodes via the `nodeSelector` or `systemManagedComponentsNodeSelector` settings. If LH is configured to run on specific nodes, workloads can only run on those nodes.
 
@@ -1,4 +1,4 @@
 persistence:
   enabled: true
   size: "1Gi"  # minimal size for gp3 is 1Gi
-  storageClass: "ebs-sc"
+  storageClass: "{{ .Values.ebsStorageClassName }}"
@@ -1,4 +1,4 @@
 persistence:
   enabled: true
   size: "300Mi" # cannot be lower https://github.com/longhorn/longhorn/issues/8488
-  storageClass: "{{.Values.longhornStorageClassName}}"
+  storageClass: "{{ .Values.longhornStorageClassName }}"
@@ -60,6 +60,27 @@ extraObjects:
       prefixes:
       - /longhorn
 
+  # a (href) links do not work properly without trailing slash
+- apiVersion: traefik.io/v1alpha1
+  kind: Middleware
+  metadata:
+    name: logs-append-slash
+    namespace: {{ .Release.Namespace }}
+  spec:
+    redirectRegex:
+      regex: "^(https?://[^/]+/logs)$"
+      replacement: "${1}/"
+
+- apiVersion: traefik.io/v1alpha1
+  kind: Middleware
+  metadata:
+    name: logs-strip-prefix
+    namespace: {{.Release.Namespace}}
+  spec:
+    stripPrefix:
+      prefixes:
+      - /logs
+
 - apiVersion: traefik.io/v1alpha1
   kind: Middleware
   metadata:
 
@@ -0,0 +1,59 @@
+# https://github.com/VictoriaMetrics/helm-charts/blob/victoria-logs-single-0.11.2/charts/victoria-logs-single/values.yaml
+
+vector:
+  # by default it will generate sink per statefulset's pod
+  # each pod has a separate PV, so the data is replicated
+  enabled: true
+
+server:
+  # HA trough multiple replicas
+  # https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9076
+  replicaCount: 2
+
+  retentionPeriod: 30d
+
+  ingress:
+    enabled: true
+    annotations:
+        namespace: "{{ .Release.Namespace }}"
+        cert-manager.io/cluster-issuer: "cert-issuer"
+        traefik.ingress.kubernetes.io/router.entrypoints: websecure
+        traefik.ingress.kubernetes.io/router.middlewares: traefik-logs-append-slash@kubernetescrd,traefik-logs-strip-prefix@kubernetescrd,traefik-traefik-basic-auth@kubernetescrd # namespace + middleware name
+    tls:
+      - hosts:
+          - {{ requiredEnv "K8S_MONITORING_FQDN" }}
+        secretName: monitoring-tls
+    hosts:
+      - name: {{ requiredEnv "K8S_MONITORING_FQDN" }}
+        path:
+          - /logs
+        pathType: Prefix
+
+  persistentVolume:
+    enabled: true
+    storageClassName: "{{ .Values.ebsStorageClassName }}"
+    size: 10Gi
+
+  nodeSelector:
+    ops: "true"
+
+  # Schedule pods on different nodes if possible (HA)
+  # https://stackoverflow.com/a/64958458/12124525
+  topologySpreadConstraints:
+    - maxSkew: 1
+      topologyKey: "kubernetes.io/hostname"
+      whenUnsatisfiable: DoNotSchedule
+      # hardcoded due to https://github.com/VictoriaMetrics/helm-charts/issues/2219
+      labelSelector:
+        matchLabels:
+          app: server
+          app.kubernetes.io/instance: victoria-logs
+          app.kubernetes.io/name: victoria-logs-single
+
+  resources:
+    limits:
+      cpu: 500m
+      memory: 512Mi
+    requests:
+      cpu: 500m
+      memory: 512Mi
@@ -0,0 +1,3 @@
+STACK_NAME = $(notdir $(shell pwd))
+TEMP_COMPOSE=.stack.${STACK_NAME}.yaml
+REPO_BASE_DIR := $(shell git rev-parse --show-toplevel)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+STACK_NAME = $(notdir $(shell pwd))`
	`2`	`+TEMP_COMPOSE=.stack.${STACK_NAME}.yaml`
	`3`	`+REPO_BASE_DIR := $(shell git rev-parse --show-toplevel)`