Skip to content

Commit 2300ede

Browse files
authored
Merge pull request #22 from ADORSYS-GIS/feat/14-automate-cert-manager-ingress
Automate cert manager & ingress , Adoption Strategy & Project Standardization
2 parents 166c406 + 38fd4cc commit 2300ede

28 files changed

+1020
-323
lines changed

.gitignore

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,25 @@ sw.*
9292
# yarn.lock
9393
yarn.lock
9494

95-
.terraform
95+
# Terraform
96+
*.tfstate
97+
*.tfstate.backup
98+
*.tfvars
99+
.terraform/
96100
.terraform.lock.hcl
97101

98102

103+
99104
*/charts
100105

101-
*.out
102-
*.json
106+
107+
# Crash logs
108+
crash.log
109+
crash.*.log
110+
111+
# Exclude .terraform directory
112+
.terraform/
113+
114+
# Ignore CLI configuration files
115+
.terraformrc
116+
terraform.rc

Makefile

Lines changed: 0 additions & 64 deletions
This file was deleted.

README.md

Lines changed: 28 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,120 +1,33 @@
1-
# Observability Stack with GKE, LGTM, and ArgoCD
1+
# Kubernetes Observability & Operations Platform
22

3-
Complete infrastructure and application stack for observability on Google Kubernetes Engine (GKE).
3+
This repository provisions a comprehensive, production-grade observability and operations platform on **Google Kubernetes Engine (GKE)**. It integrates distinct, modular components to handle **deployment**, **monitoring**, **logging**, **tracing**, and **certificate management**.
44

5-
## Components
5+
## Core Components
66

7-
- **GKE**: Google Kubernetes Engine cluster
8-
- **LGTM Stack**:
9-
- Loki (logs)
10-
- Grafana (visualization)
11-
- Tempo (traces)
12-
- Mimir (metrics)
13-
- **ArgoCD**: GitOps continuous deployment
14-
- **Cert-Manager**: Automated certificate management
15-
- **Ingress Controller**: Nginx ingress controller
7+
* **Observability (LGTM Stack)**:
8+
* **Loki**: Distributed logging.
9+
* **Grafana**: Visualization and dashboards.
10+
* **Tempo**: Distributed tracing.
11+
* **Mimir**: Scalable metrics (Prometheus storage).
12+
* **GitOps (ArgoCD)**:
13+
* **ArgoCD**: Continuous delivery and declarative GitOps workflows.
14+
* **Infrastructure Essentials**:
15+
* **Cert-Manager**: Automated TLS certificate issuance (Let's Encrypt).
16+
* **Ingress Controller**: NGINX Ingress for external traffic management.
1617

17-
## REPO STRUCTURE
18+
## Project Structure
19+
20+
This project is built with **Terraform** and **Helm**, designed for modularity. You can deploy the entire stack or individual components as needed.
21+
22+
* **[`lgtm-stack/`](lgtm-stack/README.md)**: The core internal monitoring platform.
23+
* **[`argocd/`](argocd/README.md)**: The GitOps delivery engine.
24+
* **[`cert-manager/`](cert-manager/README.md)**: Certificate management infrastructure.
25+
* **[`ingress-controller/`](ingress-controller/README.md)**: Ingress routing infrastructure.
26+
27+
## Documentation
28+
29+
* **[Kubernetes Observability Guide](docs/kubernetes-observability.md)**: Deployment and architecture of the LGTM stack.
30+
* **[Cert-Manager Deployment](docs/cert-manager-terraform-deployment.md)**: Terraform guide for Cert-Manager.
31+
* **[Ingress Controller Deployment](docs/ingress-controller-terraform-deployment.md)**: Terraform guide for NGINX Ingress.
32+
* **[ArgoCD Documentation](argocd/README.md)**: Setup and configuration for GitOps.
1833

19-
```
20-
observability/
21-
├── README.md
22-
│ └── USE: Project overview, quick start, and entry point for new users
23-
24-
├── argocd/
25-
│ ├── README.md
26-
│ │ └── USE: ArgoCD component overview and quick reference
27-
│ └── terraform/
28-
│ ├── locals.tf
29-
│ │ └── USE: Local variables and computed values within ArgoCD module
30-
│ ├── main.tf
31-
│ │ └── USE: Deploy ArgoCD using Helm to GKE cluster
32-
│ ├── outputs.tf
33-
│ │ └── USE: Export ArgoCD endpoint URLs and credentials
34-
│ ├── variables.tf
35-
│ │ └── USE: Define input parameters for ArgoCD deployment
36-
│ └── values/
37-
│ ├── argocd-values.yaml
38-
│ │ └── USE: Base Helm chart values for ArgoCD
39-
│ ├── argocd-dev-values.yaml
40-
│ │ └── USE: Development environment overrides (reduced resources)
41-
│ └── argocd-prod-values.yaml
42-
│ └── USE: Production environment overrides (HA, replicas)
43-
44-
├── cert-manager/
45-
│ ├── README.md
46-
│ │ └── USE: Cert-Manager component overview and reference
47-
│ └── terraform/
48-
│ ├── locals.tf
49-
│ │ └── USE: Local variables and computed values
50-
│ ├── main.tf
51-
│ │ └── USE: Deploy Cert-Manager using Helm to manage TLS certificates
52-
│ ├── outputs.tf
53-
│ │ └── USE: Export Cert-Manager service account and configuration details
54-
│ ├── variables.tf
55-
│ │ └── USE: Define customizable parameters for Cert-Manager
56-
57-
├── docs/
58-
│ ├── ARCHITECTURE.md
59-
│ │ └── USE: Explain system design, component interactions, and data flow
60-
│ ├── GETTING_STARTED.md
61-
│ │ └── USE: Step-by-step quick start guide for new users
62-
│ ├── README.md
63-
│ │ └── USE: Documentation index and navigation hub
64-
│ ├── TUTORIAL_ARGOCD.md
65-
│ │ └── USE: Manual ArgoCD installation guide (alternative to Terraform)
66-
│ ├── TUTORIAL_CERT_MANAGER.md
67-
│ │ └── USE: Manual Cert-Manager installation guide
68-
│ ├── TUTORIAL_GKE_SETUP.md
69-
│ │ └── USE: Manual GKE cluster creation using gcloud CLI
70-
│ ├── TUTORIAL_INGRESS.md
71-
│ │ └── USE: Manual Ingress Controller installation guide
72-
│ ├── TUTORIAL_LGTM.md
73-
│ │ └── USE: Manual LGTM stack deployment guide
74-
│ └── images/
75-
│ ├── architecture-diagram.png
76-
│ │ └── USE: Visual system architecture diagram
77-
│ ├── argocd-workflow.png
78-
│ │ └── USE: Visual GitOps deployment workflow diagram
79-
│ └── lgtm-flow.png
80-
│ └── USE: Visual LGTM component data flow diagram
81-
82-
├── ingress-controller/
83-
│ ├── README.md
84-
│ │ └── USE: Ingress Controller component overview
85-
│ └── terraform/
86-
│ ├── locals.tf
87-
│ │ └── USE: Local variables for ingress module
88-
│ ├── main.tf
89-
│ │ └── USE: Deploy Nginx Ingress Controller for HTTP/HTTPS routing
90-
│ ├── outputs.tf
91-
│ │ └── USE: Export load balancer endpoint and service information
92-
│ ├── variables.tf
93-
│ │ └── USE: Define customizable parameters for Ingress
94-
│ └── values.yaml
95-
│ └── USE: Helm chart configuration for Nginx Ingress Controller
96-
97-
└── lgtm-stack/
98-
├── README.md
99-
│ └── USE: LGTM stack component overview and architecture
100-
└── terraform/
101-
├── locals.tf
102-
│ └── USE: Local variables for LGTM module
103-
├── main.tf
104-
│ └── USE: Deploy all LGTM components (Prometheus, Loki, Mimir, Tempo, Grafana)
105-
├── outputs.tf
106-
│ └── USE: Export endpoints and credentials for all LGTM components
107-
├── variables.tf
108-
│ └── USE: Define customizable parameters for LGTM deployment
109-
└── values/
110-
├── grafana-values.yaml
111-
│ └── USE: Helm configuration for Grafana dashboards and datasources
112-
├── loki-values.yaml
113-
│ └── USE: Helm configuration for Loki log storage and retention
114-
├── mimir-values.yaml
115-
│ └── USE: Helm configuration for Mimir long-term metrics storage
116-
├── prometheus-values.yaml
117-
│ └── USE: Helm configuration for Prometheus metrics scraping
118-
└── tempo-values.yaml
119-
└── USE: Helm configuration for Tempo distributed tracing
120-
```

cert-manager/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Cert-Manager Deployment
2+
3+
This directory contains infrastructure-as-code and configuration for deploying **Cert-Manager** to automate the management and issuance of TLS certificates for the Kubernetes cluster.
4+
5+
Cert-Manager provides:
6+
* **Automated Issuance**: Obtaining certificates from Let's Encrypt and other issuers.
7+
* **Renewal**: Automatically renewing certificates before expiry.
8+
* **Integration**: Working seamlessly with Ingress resources to secure external access for any application.
9+
10+
## Deployment Options
11+
12+
### 1. Automated Deployment
13+
This method uses the Terraform configuration located in the `terraform/` directory. It is the recommended approach for automation.
14+
15+
For detailed instructions, see the [Terraform deployment guide](../docs/cert-manager-terraform-deployment.md).
16+
17+
### 2. Manual (Helm & Kubectl)
18+
If you prefer to deploy manually using CLI tools, you can follow the [manual deployment guide](../docs/cert-manager-manual-deployment.md).
19+
20+
## Troubleshooting
21+
22+
### Deployment Flags
23+
Ensure variables are set correctly in `terraform.tfvars`:
24+
```hcl
25+
install_cert_manager = true
26+
```
27+
28+
### Common Issues
29+
30+
**Webhook Pod Not Ready**
31+
```bash
32+
# Check pod status (look for CrashLoopBackOff)
33+
kubectl get pods -n cert-manager
34+
35+
# Fix: Ensure installCRDs=true is set in Helm release
36+
```
37+
38+
**Certificate Stuck in "False" State**
39+
```bash
40+
# Check certificate events for challenge failures
41+
kubectl describe certificate <name> -n <namespace>
42+
```
43+
44+
**Issuer Not Ready**
45+
```bash
46+
# Check issuer status and ACME server URL
47+
kubectl describe clusterissuer letsencrypt-prod
48+
```

cert-manager/terraform/locals.tf

Whitespace-only changes.

cert-manager/terraform/main.tf

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
terraform {
2+
required_version = ">= 1.0"
3+
required_providers {
4+
kubernetes = {
5+
source = "hashicorp/kubernetes"
6+
version = "~> 2.0"
7+
}
8+
helm = {
9+
source = "hashicorp/helm"
10+
version = "~> 2.12"
11+
}
12+
}
13+
}
14+
15+
resource "helm_release" "cert_manager" {
16+
count = var.install_cert_manager ? 1 : 0
17+
18+
name = var.release_name
19+
repository = "https://charts.jetstack.io"
20+
chart = "cert-manager"
21+
namespace = var.namespace
22+
create_namespace = true
23+
version = var.cert_manager_version
24+
25+
set {
26+
name = "installCRDs"
27+
value = "true"
28+
}
29+
30+
wait = true
31+
timeout = 600
32+
}
33+
34+
# Issuer for Let's Encrypt
35+
resource "kubernetes_manifest" "letsencrypt_issuer" {
36+
count = var.install_cert_manager ? 1 : 0
37+
38+
manifest = {
39+
apiVersion = "cert-manager.io/v1"
40+
kind = var.cert_issuer_kind
41+
metadata = merge(
42+
{
43+
name = var.cert_issuer_name
44+
},
45+
# Only add namespace if Kind is Issuer.
46+
# If issuer_namespace is set, use it. Otherwise fallback to var.namespace.
47+
var.cert_issuer_kind == "Issuer" ? {
48+
namespace = coalesce(var.issuer_namespace, var.namespace)
49+
} : {}
50+
)
51+
spec = {
52+
acme = {
53+
server = var.issuer_server
54+
email = var.letsencrypt_email
55+
privateKeySecretRef = {
56+
name = "${var.cert_issuer_name}-key"
57+
}
58+
solvers = [
59+
{
60+
http01 = {
61+
ingress = {
62+
class = var.ingress_class_name
63+
}
64+
}
65+
}
66+
]
67+
}
68+
}
69+
}
70+
71+
depends_on = [helm_release.cert_manager]
72+
}

cert-manager/terraform/outputs.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
output "namespace" {
2+
description = "Namespace where cert-manager was installed"
3+
value = var.namespace
4+
}
5+
6+
output "issuer_name" {
7+
description = "Name of the created Issuer"
8+
value = var.install_cert_manager ? var.cert_issuer_name : ""
9+
}

0 commit comments

Comments
 (0)