Facets-cloud · omendra-tomar · Nov 17, 2025 · Nov 19, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/common/alert_group/standard/1.0/facets.yaml b/common/alert_group/standard/1.0/facets.yaml
@@ -0,0 +1,251 @@
+intent: alert_group
+flavor: standard
+version: '1.0'
+description: Creates Prometheus alert rules for monitoring and alerting with comprehensive
+  validation and enhanced UI features
+clouds:
+- aws
+- azure
+- gcp
+- kubernetes
+inputs:
+  kubernetes_details:
+    type: '@facets/kubernetes-details'
+    displayName: Kubernetes Cluster
+    optional: false
+    default:
+      resource_type: kubernetes_cluster
+      resource_name: default
+    providers:
+    - kubernetes
+    - kubernetes-alpha
+    - helm
+  prometheus:
+    type: '@facets/prometheus'
+    displayName: Prometheus Configuration
+    description: Prometheus instance details for alert rule configuration and deployment
+    optional: false
+    default:
+      resource_type: configuration
+      resource_name: prometheus
+spec:
+  title: Alert Group
+  type: object
+  description: Specification of the Alert Group resource intent
+  properties:
+    rules:
+      title: Alert Rules
+      type: object
+      description: Define alert rules for Prometheus monitoring
+      x-ui-toggle: false
+      patternProperties:
+        ^[a-zA-Z0-9_-]+$:
+          title: Alert Rule Configuration
+          type: object
+          properties:
+            expr:
+              type: string
+              title: Prometheus Expression
+              description: PromQL expression for the alert condition
+              pattern: ^.{1,2000}$
+              x-ui-error-message: Prometheus expression is required and must be between
+                1-2000 characters
+            for:
+              type: string
+              title: Alert Duration
+              description: Duration for which the condition must be true (e.g., 5m,
+                10s, 1h, 2d)
+              pattern: ^[0-9]+[smhd]$
+              default: 5m
+              x-ui-error-message: Duration must be in format like 5m, 30s, 1h, 2d
+            message:
+              type: string
+              title: Alert Message
+              description: Detailed message when alert fires
+              minLength: 10
+              maxLength: 1000
+              x-ui-error-message: Alert message must be between 10-1000 characters
+                and provide clear context
+            summary:
+              type: string
+              title: Alert Summary
+              description: Brief summary of the alert (appears in notifications)
+              minLength: 5
+              maxLength: 200
+              x-ui-error-message: Alert summary must be between 5-200 characters
+            resource_type:
+              type: string
+              title: Resource Type
+              description: Type of Kubernetes resource being monitored
+              minLength: 1
+              maxLength: 100
+              x-ui-api-source:
+                endpoint: /cc-ui/v1/dropdown/stack/{{stackName}}/resources-info
+                method: GET
+                params:
+                  includeContent: false
+                labelKey: resourceType
+                valueKey: resourceType
+                filterConditions:
+                - field: resourceType
+                  value: UNKNOWN
+                  type: negation
+              x-ui-typeable: true
+              x-ui-error-message: Please enter a valid Kubernetes resource type
+            resource_name:
+              type: string
+              resource_name: null
+              title: Resource Name
+              description: Name of the resource being monitored (must follow Kubernetes
+                naming conventions)
+              pattern: ^(\{\{.*\}\}|[a-z0-9]([-a-z0-9]*[a-z0-9])?)$
+              minLength: 1
+              maxLength: 63
+              x-ui-api-source:
+                endpoint: /cc-ui/v1/dropdown/stack/{{stackName}}/resources-info
+                method: GET
+                params:
+                  includeContent: false
+                labelKey: resourceName
+                valueKey: resourceName
+                filterConditions:
+                - field: resourceType
+                  value: spec.rules.{{this}}.resource_type
+                  type: dynamic
+              x-ui-error-message: Resource name must be valid Kubernetes name (lowercase,
+                alphanumeric, hyphens, 1-63 chars)
+            alert_type:
+              type: string
+              title: Alert Type
+              description: Type of alert supported by Facets monitoring system
+              enum:
+              - performance
+              - availability
+              - security
+              - capacity
+              - network
+              - database
+              - application
+              - infrastructure
+              - custom
+              minLength: 1
+              maxLength: 100
+              x-ui-typeable: true
+              x-ui-error-message: Select from predefined alert types or enter a custom
+                alert type
+            severity:
+              type: string
+              title: Severity Level
+              description: Alert severity level for prioritization and routing
+              enum:
+              - high
+              - normal
+              - urgent
+              - warning
+              - critical
+              default: normal
+              x-ui-typeable: true
+              x-ui-error-message: Select from predefined severity levels or enter
+                a custom severity
+            disabled:
+              type: boolean
+              title: Disabled
+              description: Whether this alert rule is disabled (won't trigger when
+                conditions are met)
+              default: false
+            runbook_url:
+              type: string
+              title: Runbook URL
+              description: URL to documentation or runbook for handling this alert
+              pattern: ^https?://.*
+              x-ui-error-message: Runbook URL must be a valid HTTP/HTTPS URL
+            escalation_policy:
+              type: string
+              title: Escalation Policy
+              description: Name of the escalation policy for this alert
+              enum:
+              - immediate
+              - standard
+              - low_priority
+              - business_hours
+              - weekend_only
+              - custom
+              default: standard
+              x-ui-error-message: Please select a valid escalation policy
+            thresholds:
+              type: object
+              title: Alert Thresholds
+              description: Configurable thresholds for the alert condition
+              x-ui-toggle: true
+              properties:
+                warning:
+                  type: number
+                  title: Warning Threshold
+                  description: Threshold value for warning level alerts
+                  minimum: 0
+                critical:
+                  type: number
+                  title: Critical Threshold
+                  description: Threshold value for critical level alerts
+                  minimum: 0
+                unit:
+                  type: string
+                  title: Unit
+                  description: Unit of measurement for thresholds
+                  enum:
+                  - percent
+                  - bytes
+                  - count
+                  - seconds
+                  - milliseconds
+                  - requests_per_second
+                  - errors_per_minute
+                  default: percent
+            labels:
+              type: object
+              title: Additional Labels
+              description: Additional labels for the alert (key-value pairs for routing
+                and grouping)
+              x-ui-toggle: true
+              x-ui-yaml-editor: true
+              x-ui-error-message: Labels should be key-value pairs with valid Kubernetes
+                label format
+            annotations:
+              type: object
+              title: Additional Annotations
+              description: Additional annotations for the alert (key-value pairs for
+                metadata)
+              x-ui-toggle: true
+              x-ui-yaml-editor: true
+              x-ui-error-message: Annotations should be key-value pairs providing
+                additional context
+          required:
+          - expr
+          - for
+          - resource_name
+          - resource_type
+          - summary
+          - message
+          - alert_type
+  required:
+  - rules
+outputs:
+  default:
+    type: '@facets/alert_group'
+    title: Alert Group Configuration
+sample:
+  version: '1.0'
+  flavor: standard
+  kind: alert_group
+  disabled: true
+  spec:
+    rules:
+      high_cpu_usage:
+        expr: cpu_usage_percent > 80
+        for: 5m
+        message: CPU usage is above 80% for more than 5 minutes
+        summary: High CPU usage detected
+        resource_type: pod
+        resource_name: my-application
+        alert_type: performance
+        severity: normal
diff --git a/common/alert_group/standard/1.0/main.tf b/common/alert_group/standard/1.0/main.tf
@@ -0,0 +1,99 @@
+locals {
+  spec = lookup(var.instance, "spec", {})
+
+  # Get alert rules from spec
+  rules = lookup(local.spec, "rules", {})
+
+  # Get Prometheus release ID from prometheus input
+  prometheus_release = lookup(var.inputs.prometheus.attributes, "helm_release_id", "prometheus")
+
+  # Transform rules into PrometheusRule format, filtering out disabled rules
+  alert_rules = [
+    for rule_name, rule_object in local.rules :
+    {
+      alert = rule_name
+      expr  = rule_object.expr
+      for   = rule_object.for
+      labels = merge(
+        lookup(rule_object, "labels", {}),
+        {
+          resource_type = rule_object.resource_type
+          resource_name = rule_object.resource_name
+          resourceType  = rule_object.resource_type
+          resourceName  = rule_object.resource_name
+          alert_type    = lookup(rule_object, "alert_type", null)
+          severity      = lookup(rule_object, "severity", null)
+        }
+      )
+      annotations = merge(
+        lookup(rule_object, "annotations", {}),
+        {
+          message = rule_object.message
+          summary = rule_object.summary
+        }
+      )
+    } if !lookup(rule_object, "disabled", false)
+  ]
+
+  # Extract rule names for outputs
+  rule_names = [for key, rule in local.rules : key if !lookup(rule, "disabled", false)]
+
+  # Metadata for PrometheusRule
+  prometheus_rule_metadata = {
+    name      = "${var.instance_name}-alert-group"
+    namespace = var.environment.namespace
+    labels = merge(
+      {
+        alert_group_name               = var.instance_name
+        role                           = "alert-rules"
+        release                        = local.prometheus_release
+        "app.kubernetes.io/name"       = var.instance_name
+        "app.kubernetes.io/instance"   = var.instance_name
+        "app.kubernetes.io/component"  = "alert-rules"
+        "app.kubernetes.io/managed-by" = "facets"
+      },
+      var.environment.cloud_tags
+    )
+    annotations = merge(
+      {
+        owner                      = "facets"
+        "facets.cloud/instance"    = var.instance_name
+        "facets.cloud/environment" = var.environment.name
+      }
+    )
+  }
+
+  # PrometheusRule manifest
+  prometheus_rule_manifest = {
+    apiVersion = "monitoring.coreos.com/v1"
+    kind       = "PrometheusRule"
+    metadata   = local.prometheus_rule_metadata
+    spec = {
+      groups = [
+        {
+          name  = "${var.instance_name}-alert-rules"
+          rules = local.alert_rules
+        }
+      ]
+    }
+  }
+}
+
+# Deploy PrometheusRule using helm_release with any-k8s-resource chart
+resource "helm_release" "alert_group" {
+  name             = "${var.instance_name}-alert-group"
+  chart            = "https://github.com/Facets-cloud/facets-utility-modules/raw/master/any-k8s-resource/dynamic-k8s-resource-0.1.0.tgz"
+  namespace        = var.environment.namespace
+  create_namespace = true
+  version          = "0.1.0"
+  timeout          = 300
+  cleanup_on_fail  = true
+  wait             = false
+  max_history      = 10
+
+  values = [
+    yamlencode({
+      resource = local.prometheus_rule_manifest
+    })
+  ]
+}
diff --git a/common/alert_group/standard/1.0/outputs.tf b/common/alert_group/standard/1.0/outputs.tf
@@ -0,0 +1,9 @@
+locals {
+  output_attributes = {
+    alert_count          = length(local.rule_names)
+    alert_names          = local.rule_names
+    namespace            = var.environment.namespace
+    prometheus_rule_name = "${var.instance_name}-alert-group"
+  }
+  output_interfaces = {}
+}
diff --git a/common/alert_group/standard/1.0/variables.tf b/common/alert_group/standard/1.0/variables.tf
@@ -0,0 +1,33 @@
+variable "instance" {
+  description = "Instance configuration for alert group"
+  type = object({
+    spec = any
+  })
+}
+
+variable "instance_name" {
+  description = "Name of the alert group instance"
+  type        = string
+}
+
+variable "environment" {
+  description = "Environment configuration"
+  type = object({
+    name       = string
+    namespace  = string
+    cloud_tags = map(string)
+  })
+}
+
+variable "inputs" {
+  description = "Input resources for the module"
+  type = object({
+    kubernetes_details = object({
+      resource_name = string
+      resource_type = string
+    })
+    prometheus = object({
+      attributes = any
+    })
+  })
+}