diff --git a/README.md b/README.md index 2cba8330..2edab370 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Optionally, the module supports advanced security group management for the worke * [terraform-ibm-base-ocp-vpc](#terraform-ibm-base-ocp-vpc) * [Submodules](./modules) * [fscloud](./modules/fscloud) + * [kube-audit](./modules/kube-audit) * [Examples](./examples) * [2 MZR clusters in same VPC example](./examples/multiple_mzr_clusters) * [Advanced example (mzr, auto-scale, kms, taints)](./examples/advanced) diff --git a/examples/advanced/README.md b/examples/advanced/README.md index 0b770bfb..fb5cd8af 100644 --- a/examples/advanced/README.md +++ b/examples/advanced/README.md @@ -10,3 +10,6 @@ The following resources are provisioned by this example: - A multi-zone (3 zone) KMS encrypted OCP VPC cluster, with worker pools in each zone. - Auto scaling enabled for the default worker pool. - Taints against the workers in zone-2 and zone-3. +- Enable Kubernetes API server audit logs. +- A Cloud logs instance +- Logs agent to send logs to the cloud logs. diff --git a/examples/advanced/main.tf b/examples/advanced/main.tf index b8d5e027..99949873 100644 --- a/examples/advanced/main.tf +++ b/examples/advanced/main.tf @@ -184,3 +184,85 @@ data "ibm_container_cluster_config" "cluster_config" { resource_group_id = module.ocp_base.resource_group_id config_dir = "${path.module}/../../kubeconfig" } + +######################################################################################################################## +# Kube Audit +######################################################################################################################## + +module "kube_audit" { + depends_on = [module.ocp_base] # Wait for the cluster to completely deploy. + source = "../../modules/kube-audit" + cluster_id = module.ocp_base.cluster_id + cluster_resource_group_id = module.resource_group.resource_group_id + audit_log_policy = "WriteRequestBodies" + region = var.region + ibmcloud_api_key = var.ibmcloud_api_key +} + + +######################################################################################################################## +# Observability (Instance + Agents) +######################################################################################################################## + +locals { + logs_agent_namespace = "ibm-observe" + logs_agent_name = "logs-agent" +} + +module "observability_instances" { + source = "terraform-ibm-modules/observability-instances/ibm" + version = "3.4.3" + resource_group_id = module.resource_group.resource_group_id + region = var.region + cloud_logs_plan = "standard" + cloud_monitoring_plan = "graduated-tier" + enable_platform_metrics = false + cloud_logs_instance_name = "${var.prefix}-cloud-logs" + cloud_monitoring_provision = false +} + +module "trusted_profile" { + source = "terraform-ibm-modules/trusted-profile/ibm" + version = "2.0.1" + trusted_profile_name = "${var.prefix}-profile" + trusted_profile_description = "Logs agent Trusted Profile" + # As a `Sender`, you can send logs to your IBM Cloud Logs service instance - but not query or tail logs. This role is meant to be used by agents and routers sending logs. + trusted_profile_policies = [{ + roles = ["Sender"] + resources = [{ + service = "logs" + }] + }] + # Set up fine-grained authorization for `logs-agent` running in ROKS cluster in `ibm-observe` namespace. + trusted_profile_links = [{ + cr_type = "ROKS_SA" + links = [{ + crn = module.ocp_base.cluster_crn + namespace = local.logs_agent_namespace + name = local.logs_agent_name + }] + } + ] +} + +module "observability_agents" { + depends_on = [module.kube_audit] + source = "terraform-ibm-modules/observability-agents/ibm" + version = "2.6.0" + cluster_id = module.ocp_base.cluster_id + cluster_resource_group_id = module.resource_group.resource_group_id + # Cloud Logs agent + logs_agent_trusted_profile = module.trusted_profile.trusted_profile.id + logs_agent_namespace = local.logs_agent_namespace + logs_agent_name = local.logs_agent_name + cloud_logs_ingress_endpoint = module.observability_instances.cloud_logs_ingress_private_endpoint + cloud_logs_ingress_port = 3443 + # example of how to add additional metadata to the logs agents + logs_agent_additional_metadata = [{ + key = "cluster_id" + value = module.ocp_base.cluster_id + }] + # example of how to add only kube-audit log source path + logs_agent_selected_log_source_paths = ["/var/log/audit/*.log"] + cloud_monitoring_enabled = false +} diff --git a/examples/advanced/provider.tf b/examples/advanced/provider.tf index abecf9f5..5ea1365e 100644 --- a/examples/advanced/provider.tf +++ b/examples/advanced/provider.tf @@ -12,3 +12,17 @@ provider "kubernetes" { token = data.ibm_container_cluster_config.cluster_config.token cluster_ca_certificate = data.ibm_container_cluster_config.cluster_config.ca_certificate } + +provider "helm" { + kubernetes { + host = data.ibm_container_cluster_config.cluster_config.host + token = data.ibm_container_cluster_config.cluster_config.token + cluster_ca_certificate = data.ibm_container_cluster_config.cluster_config.ca_certificate + } + # IBM Cloud credentials are required to authenticate to the helm repo + registry { + url = "oci://icr.io/ibm/observe/logs-agent-helm" + username = "iamapikey" + password = var.ibmcloud_api_key + } +} diff --git a/examples/advanced/version.tf b/examples/advanced/version.tf index 837c6d7d..7bda5336 100644 --- a/examples/advanced/version.tf +++ b/examples/advanced/version.tf @@ -12,5 +12,9 @@ terraform { source = "hashicorp/kubernetes" version = ">= 2.16.1" } + helm = { + source = "hashicorp/helm" + version = ">= 2.15.0" + } } } diff --git a/modules/kube-audit/README.md b/modules/kube-audit/README.md new file mode 100644 index 00000000..a3f0eb32 --- /dev/null +++ b/modules/kube-audit/README.md @@ -0,0 +1,100 @@ +# Kubernetes API server audit logs + +To monitor user-initiated, Kubernetes administrative activity made within your cluster, you can collect and forward audit events that are passed through your Kubernetes API server to IBM Cloud Logs or an external server. + +This sub-module helps you to create a Kubernetes audit system by using the provided image and deployment in your existing cluster. [Learn more](https://cloud.ibm.com/docs/openshift?topic=openshift-health-audit) + +**Important**: The sub-module uses the `icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs` image to forward logs to IBM Cloud Logs. This image is for demonstration purposes only. For a production solution, configure and maintain your own log forwarding image. + +### Usage + +```hcl +# ############################################################################ +# Init cluster config for helm +# ############################################################################ + +data "ibm_container_cluster_config" "cluster_config" { + # update this value with the Id of the cluster where these agents will be provisioned + cluster_name_id = "cluster_id" +} + +# ############################################################################ +# Config providers +# ############################################################################ + +provider "ibm" { + # update this value with your IBM Cloud API key value + ibmcloud_api_key = "XXXXXXXXXXXXXXXXX" #pragma: allowlist secret +} + +provider "helm" { + kubernetes { + host = data.ibm_container_cluster_config.cluster_config.host + token = data.ibm_container_cluster_config.cluster_config.token + cluster_ca_certificate = data.ibm_container_cluster_config.cluster_config.ca_certificate + } +} + +provider "kubernetes" { + host = data.ibm_container_cluster_config.cluster_config.host + token = data.ibm_container_cluster_config.cluster_config.token + cluster_ca_certificate = data.ibm_container_cluster_config.cluster_config.ca_certificate +} + +module "kube_audit" { + source = "terraform-ibm-modules/terraform-ibm-base-ocp-vpc/ibm//modules/kube-audit" + version = "X.X.X" # Replace "X.X.X" with a release version to lock into a specific release + cluster_id = "cluster_id" + cluster_resource_group_id = "resource group id" + region = "us-south" +} +``` + + +### Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.9.0 | +| [helm](#requirement\_helm) | >= 2.15.0, <3.0.0 | +| [ibm](#requirement\_ibm) | >= 1.70.0, <2.0.0 | +| [null](#requirement\_null) | >= 3.2.1, < 4.0.0 | +| [time](#requirement\_time) | >= 0.9.1, < 1.0.0 | + +### Modules + +No modules. + +### Resources + +| Name | Type | +|------|------| +| [helm_release.kube_audit](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [null_resource.set_audit_log_policy](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.set_audit_webhook](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [time_sleep.wait_for_kube_audit](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | +| [ibm_container_cluster_config.cluster_config](https://registry.terraform.io/providers/ibm-cloud/ibm/latest/docs/data-sources/container_cluster_config) | data source | +| [ibm_container_vpc_cluster.cluster](https://registry.terraform.io/providers/ibm-cloud/ibm/latest/docs/data-sources/container_vpc_cluster) | data source | + +### Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [audit\_deployment\_name](#input\_audit\_deployment\_name) | The name of log collection deployement and service. | `string` | `"ibmcloud-kube-audit"` | no | +| [audit\_log\_policy](#input\_audit\_log\_policy) | Specify the amount of information that is logged to the API server audit logs by choosing the audit log policy profile to use. Supported values are `default` and `WriteRequestBodies`. | `string` | `"default"` | no | +| [audit\_namespace](#input\_audit\_namespace) | The name of the namespace where log collection service and a deployment will be created. | `string` | `"ibm-kube-audit"` | no | +| [audit\_webhook\_listener\_image](#input\_audit\_webhook\_listener\_image) | The audit webhook listener image reference in the format of `[registry-url]/[namespace]/[image]`.The sub-module uses the `icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs` image to forward logs to IBM Cloud Logs. This image is for demonstration purposes only. For a production solution, configure and maintain your own log forwarding image. | `string` | `"icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs"` | no | +| [audit\_webhook\_listener\_image\_version](#input\_audit\_webhook\_listener\_image\_version) | The tag or digest for the audit webhook listener image to deploy. If changing the value, ensure it is compatible with `audit_webhook_listener_image`. | `string` | `"deaabcb8225e800385413ba420cf3f819d3b0671@sha256:acf123f4dba63534cbc104c6886abedff9d25a22a34ab7b549ede988ed6e7144"` | no | +| [cluster\_config\_endpoint\_type](#input\_cluster\_config\_endpoint\_type) | Specify which type of endpoint to use for for cluster config access: 'default', 'private', 'vpe', 'link'. 'default' value will use the default endpoint of the cluster. | `string` | `"default"` | no | +| [cluster\_id](#input\_cluster\_id) | The ID of the cluster to deploy the log collection service in. | `string` | n/a | yes | +| [cluster\_resource\_group\_id](#input\_cluster\_resource\_group\_id) | The resource group ID of the cluster. | `string` | n/a | yes | +| [ibmcloud\_api\_key](#input\_ibmcloud\_api\_key) | The IBM Cloud api key to generate an IAM token. | `string` | n/a | yes | +| [region](#input\_region) | The IBM Cloud region where the cluster is provisioned. | `string` | n/a | yes | +| [use\_private\_endpoint](#input\_use\_private\_endpoint) | Set this to true to force all api calls to use the IBM Cloud private endpoints. | `bool` | `false` | no | +| [wait\_till](#input\_wait\_till) | To avoid long wait times when you run your Terraform code, you can specify the stage when you want Terraform to mark the cluster resource creation as completed. Depending on what stage you choose, the cluster creation might not be fully completed and continues to run in the background. However, your Terraform code can continue to run without waiting for the cluster to be fully created. Supported args are `MasterNodeReady`, `OneWorkerNodeReady`, `IngressReady` and `Normal` | `string` | `"IngressReady"` | no | +| [wait\_till\_timeout](#input\_wait\_till\_timeout) | Timeout for wait\_till in minutes. | `number` | `90` | no | + +### Outputs + +No outputs. + diff --git a/modules/kube-audit/helm-charts/kube-audit/.helmignore b/modules/kube-audit/helm-charts/kube-audit/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/modules/kube-audit/helm-charts/kube-audit/Chart.yaml b/modules/kube-audit/helm-charts/kube-audit/Chart.yaml new file mode 100644 index 00000000..e4a81e85 --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: kube-audit +description: A Helm chart for kube-audit + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.0.1 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.0.0" diff --git a/modules/kube-audit/helm-charts/kube-audit/templates/deployment.yaml b/modules/kube-audit/helm-charts/kube-audit/templates/deployment.yaml new file mode 100644 index 00000000..9d80a2c6 --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/templates/deployment.yaml @@ -0,0 +1,31 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "{{ .Values.metadata.name }}" + namespace: "{{ .Values.metadata.namespace }}" + labels: + app: "{{ .Values.metadata.name }}" +spec: + replicas: 1 + selector: + matchLabels: + app: "{{ .Values.metadata.name }}" + template: + metadata: + labels: + app: "{{ .Values.metadata.name }}" + spec: + containers: + - name: "{{ .Values.metadata.name }}" + image: "{{ .Values.image.name }}:{{ .Values.image.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 3000 + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault diff --git a/modules/kube-audit/helm-charts/kube-audit/templates/namespace.yaml b/modules/kube-audit/helm-charts/kube-audit/templates/namespace.yaml new file mode 100644 index 00000000..826d55e5 --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/templates/namespace.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: "{{ .Values.metadata.namespace }}" + labels: + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/enforce-version: latest + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/audit-version: latest + pod-security.kubernetes.io/warn: restricted + pod-security.kubernetes.io/warn-version: latest + security.openshift.io/scc.podSecurityLabelSync: "false" diff --git a/modules/kube-audit/helm-charts/kube-audit/templates/network-policy.yaml b/modules/kube-audit/helm-charts/kube-audit/templates/network-policy.yaml new file mode 100644 index 00000000..1b586508 --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/templates/network-policy.yaml @@ -0,0 +1,28 @@ +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: "{{ .Values.metadata.name }}" + namespace: "{{ .Values.metadata.namespace }}" +spec: + podSelector: + matchLabels: + app: "{{ .Values.metadata.name }}" + policyTypes: + - Ingress + ingress: + - ports: + - protocol: TCP + port: 3000 + from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + app: konnectivity-agent + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + app: vpn diff --git a/modules/kube-audit/helm-charts/kube-audit/templates/service.yaml b/modules/kube-audit/helm-charts/kube-audit/templates/service.yaml new file mode 100644 index 00000000..52c0566e --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ .Values.metadata.name }}-service" + namespace: "{{ .Values.metadata.namespace }}" + labels: + app: "{{ .Values.metadata.name }}" +spec: + selector: + app: "{{ .Values.metadata.name }}" + ports: + - protocol: TCP + port: 80 + targetPort: 3000 + type: ClusterIP diff --git a/modules/kube-audit/helm-charts/kube-audit/values.yaml b/modules/kube-audit/helm-charts/kube-audit/values.yaml new file mode 100755 index 00000000..9a17a0ea --- /dev/null +++ b/modules/kube-audit/helm-charts/kube-audit/values.yaml @@ -0,0 +1,8 @@ +# NOTE: Mock values added here for helm linter to pass. Actual values are set in main.tf +metadata: + name: "ibmcloud-kube-audit" + namespace: "ibm-kube-audit" + +image: + name: "icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs" + tag: "" diff --git a/modules/kube-audit/kubeconfig/.gitignore b/modules/kube-audit/kubeconfig/.gitignore new file mode 100644 index 00000000..632a28fb --- /dev/null +++ b/modules/kube-audit/kubeconfig/.gitignore @@ -0,0 +1,6 @@ +# Ignore everything +* + +# But not these files... +!.gitignore +!README.md diff --git a/modules/kube-audit/kubeconfig/README.md b/modules/kube-audit/kubeconfig/README.md new file mode 100644 index 00000000..e85afee8 --- /dev/null +++ b/modules/kube-audit/kubeconfig/README.md @@ -0,0 +1,2 @@ +This directory must exist in source control so the `ibm_container_cluster_config` data lookup can use it to place the +config.yml used to connect to a kubernetes cluster. diff --git a/modules/kube-audit/main.tf b/modules/kube-audit/main.tf new file mode 100644 index 00000000..ba79caf9 --- /dev/null +++ b/modules/kube-audit/main.tf @@ -0,0 +1,113 @@ +data "ibm_container_cluster_config" "cluster_config" { + cluster_name_id = var.cluster_id + config_dir = "${path.module}/kubeconfig" + admin = true # workaround for https://github.com/terraform-ibm-modules/terraform-ibm-base-ocp-vpc/issues/374 + resource_group_id = var.cluster_resource_group_id + endpoint_type = var.cluster_config_endpoint_type != "default" ? var.cluster_config_endpoint_type : null # null value represents default +} + +data "ibm_container_vpc_cluster" "cluster" { + name = var.cluster_id + resource_group_id = var.cluster_resource_group_id + wait_till = var.wait_till + wait_till_timeout = var.wait_till_timeout +} + +locals { + # tflint-ignore: terraform_unused_declarations + validate_existing_vpc_id = tonumber(regex("^([0-9]+\\.[0-9]+)", data.ibm_container_vpc_cluster.cluster.kube_version)[0]) > "4.14" ? true : tobool("Kubernetes API server audit logs forwarding is only supported in ocp versions 4.15 and later.") +} + +resource "null_resource" "set_audit_log_policy" { + triggers = { + audit_log_policy = var.audit_log_policy + } + provisioner "local-exec" { + command = "${path.module}/scripts/set_audit_log_policy.sh ${var.audit_log_policy}" + interpreter = ["/bin/bash", "-c"] + environment = { + KUBECONFIG = data.ibm_container_cluster_config.cluster_config.config_file_path + } + } +} + +######################################################################################################################### +# Creates a log collection service and container +######################################################################################################################## + +locals { + kube_audit_chart_location = "${path.module}/helm-charts/kube-audit" +} + +resource "helm_release" "kube_audit" { + depends_on = [null_resource.set_audit_log_policy, data.ibm_container_vpc_cluster.cluster] + name = var.audit_deployment_name + chart = local.kube_audit_chart_location + timeout = 1200 + wait = true + recreate_pods = true + force_update = true + + set { + name = "metadata.name" + type = "string" + value = var.audit_deployment_name + } + + set { + name = "metadata.namespace" + type = "string" + value = var.audit_namespace + } + set { + name = "image.name" + type = "string" + value = var.audit_webhook_listener_image + } + + set { + name = "image.tag" + type = "string" + value = var.audit_webhook_listener_image_version + } + + provisioner "local-exec" { + command = "${path.module}/scripts/confirm-rollout-status.sh ${var.audit_deployment_name} ${var.audit_namespace}" + interpreter = ["/bin/bash", "-c"] + environment = { + KUBECONFIG = data.ibm_container_cluster_config.cluster_config.config_file_path + } + } +} + +# wait for the kube-audit resources. +resource "time_sleep" "wait_for_kube_audit" { + depends_on = [helm_release.kube_audit] + create_duration = "60s" +} + +locals { + audit_server = "https://127.0.0.1:2040/api/v1/namespaces/${var.audit_namespace}/services/${var.audit_deployment_name}-service/proxy/post" +} + +# see [issue](https://github.com/IBM-Cloud/terraform-provider-ibm/issues/6107) +# data "ibm_iam_auth_token" "webhook_api_key_tokendata" { +# depends_on = [data.ibm_container_cluster_config.cluster_config] +# } + +resource "null_resource" "set_audit_webhook" { + depends_on = [time_sleep.wait_for_kube_audit] + triggers = { + audit_log_policy = var.audit_log_policy + } + provisioner "local-exec" { + command = "${path.module}/scripts/set_webhook.sh ${var.region} ${var.use_private_endpoint} ${var.cluster_config_endpoint_type} ${var.cluster_id} ${var.cluster_resource_group_id} ${var.audit_log_policy != "default" ? "verbose" : "default"}" + interpreter = ["/bin/bash", "-c"] + environment = { + IAM_API_KEY = var.ibmcloud_api_key + AUDIT_SERVER = local.audit_server + CLIENT_CERT = data.ibm_container_cluster_config.cluster_config.admin_certificate + CLIENT_KEY = data.ibm_container_cluster_config.cluster_config.admin_key + } + } +} diff --git a/modules/kube-audit/outputs.tf b/modules/kube-audit/outputs.tf new file mode 100644 index 00000000..e9e12ece --- /dev/null +++ b/modules/kube-audit/outputs.tf @@ -0,0 +1,3 @@ +######################################################################################################################## +# Outputs +######################################################################################################################## diff --git a/modules/kube-audit/scripts/confirm-rollout-status.sh b/modules/kube-audit/scripts/confirm-rollout-status.sh new file mode 100755 index 00000000..9cd407dc --- /dev/null +++ b/modules/kube-audit/scripts/confirm-rollout-status.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +deployment=$1 +namespace=$2 + +kubectl rollout status deploy "${deployment}" -n "${namespace}" --timeout 30m diff --git a/modules/kube-audit/scripts/set_audit_log_policy.sh b/modules/kube-audit/scripts/set_audit_log_policy.sh new file mode 100755 index 00000000..b4dc2030 --- /dev/null +++ b/modules/kube-audit/scripts/set_audit_log_policy.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -euo pipefail + +AUDIT_POLICY="$1" + +STORAGE_PROFILE="oc patch apiserver cluster --type='merge' -p '{\"spec\":{\"audit\":{\"profile\":\"$AUDIT_POLICY\"}}}'" +MAX_ATTEMPTS=10 +RETRY_WAIT=5 + +function check_oc_cli() { + if ! command -v oc &>/dev/null; then + echo "Error: OpenShift CLI (oc) is not installed. Exiting." + exit 1 + fi +} + +function apply_oc_patch() { + + local attempt=0 + while [ $attempt -lt $MAX_ATTEMPTS ]; do + echo "Attempt $((attempt + 1)) of $MAX_ATTEMPTS: Applying OpenShift Console patch..." + + if eval "$STORAGE_PROFILE"; then + echo "Patch applied successfully." + return 0 + else + echo "Failed to apply patch. Retrying in ${RETRY_WAIT}s..." + sleep $RETRY_WAIT + ((attempt++)) + RETRY_WAIT=$((RETRY_WAIT * 2)) + fi + done + + echo "Maximum retry attempts reached. Could not apply patch." + exit 1 +} + +echo "=========================================" + +check_oc_cli +apply_oc_patch +sleep 30 +echo "=========================================" diff --git a/modules/kube-audit/scripts/set_webhook.sh b/modules/kube-audit/scripts/set_webhook.sh new file mode 100755 index 00000000..c65cccb4 --- /dev/null +++ b/modules/kube-audit/scripts/set_webhook.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +set -euo pipefail + +REGION="$1" +PRIVATE_ENV="$2" +CLUSTER_ENDPOINT="$3" +CLUSTER_ID="$4" +RESOURCE_GROUP_ID="$5" +POLICY="$6" + +get_cloud_endpoint() { + iam_cloud_endpoint="${IBMCLOUD_IAM_API_ENDPOINT:-"iam.cloud.ibm.com"}" + IBMCLOUD_IAM_API_ENDPOINT=${iam_cloud_endpoint#https://} + + cs_api_endpoint="${IBMCLOUD_CS_API_ENDPOINT:-"containers.cloud.ibm.com"}" + cs_api_endpoint=${cs_api_endpoint#https://} + IBMCLOUD_CS_API_ENDPOINT=${cs_api_endpoint%/global} +} + +get_cloud_endpoint + +# This is a workaround function added to retrive a new token, this can be removed once this issue(https://github.com/IBM-Cloud/terraform-provider-ibm/issues/6107) is fixed. +fetch_token() { + if [ "$IBMCLOUD_IAM_API_ENDPOINT" = "iam.cloud.ibm.com" ]; then + if [ "$PRIVATE_ENV" = true ]; then + IAM_URL="https://private.$IBMCLOUD_IAM_API_ENDPOINT/identity/token" + else + IAM_URL="https://$IBMCLOUD_IAM_API_ENDPOINT/identity/token" + fi + else + IAM_URL="https://$IBMCLOUD_IAM_API_ENDPOINT/identity/token" + fi + + token=$(curl -s -H "Content-Type: application/x-www-form-urlencoded" -d "grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey=$IAM_API_KEY" -X POST "$IAM_URL") #pragma: allowlist secret + IAM_TOKEN=$(echo "$token" | jq -r .access_token) +} + +fetch_token + +# This is a workaround function added to retrive the CA cert, this can be removed once this issue(https://github.com/IBM-Cloud/terraform-provider-ibm/issues/6068) is fixed. +get_ca_cert() { + if [ "$IBMCLOUD_CS_API_ENDPOINT" = "containers.cloud.ibm.com" ]; then + if [ "$PRIVATE_ENV" = true ]; then + if [ "$CLUSTER_ENDPOINT" == "private" ] || [ "$CLUSTER_ENDPOINT" == "default" ]; then + WEBHOOK_URL="https://private.$REGION.$IBMCLOUD_CS_API_ENDPOINT/v2/getCACert?cluster=$CLUSTER_ID" + result=$(curl -s -H "accept: application/json" -H "Authorization: $IAM_TOKEN" -H "X-Auth-Resource-Group: $RESOURCE_GROUP_ID" -X GET "$WEBHOOK_URL") + elif [ "$CLUSTER_ENDPOINT" == "vpe" ]; then + WEBHOOK_URL="https://api.$REGION.$IBMCLOUD_CS_API_ENDPOINT/v2/getCACert?cluster=$CLUSTER_ID" + result=$(curl -s -H "accept: application/json" -H "Authorization: $IAM_TOKEN" -H "X-Auth-Resource-Group: $RESOURCE_GROUP_ID" -X GET "$WEBHOOK_URL") + fi + else + WEBHOOK_URL="https://$IBMCLOUD_CS_API_ENDPOINT/global/v2/getCACert?cluster=$CLUSTER_ID" + result=$(curl -s -H "accept: application/json" -H "X-Region: $REGION" -H "Authorization: $IAM_TOKEN" -H "X-Auth-Resource-Group: $RESOURCE_GROUP_ID" -X GET "$WEBHOOK_URL") + fi + else + WEBHOOK_URL="https://$IBMCLOUD_CS_API_ENDPOINT/global/v2/getCACert?cluster=$CLUSTER_ID" + result=$(curl -s -H "accept: application/json" -H "X-Region: $REGION" -H "Authorization: $IAM_TOKEN" -H "X-Auth-Resource-Group: $RESOURCE_GROUP_ID" -X GET "$WEBHOOK_URL") + fi + + CERTIFICATE_AUTHORITY=$(echo "$result" | jq -r .caCert | base64 -d) +} + +get_ca_cert + +curl_request() { + local endpoint=$1 + local data=$2 + + if [ "$IBMCLOUD_CS_API_ENDPOINT" = "containers.cloud.ibm.com" ]; then + if [ "$PRIVATE_ENV" = true ]; then + if [ "$CLUSTER_ENDPOINT" == "private" ] || [ "$CLUSTER_ENDPOINT" == "default" ]; then + WEBHOOK_URL="https://private.$REGION.$IBMCLOUD_CS_API_ENDPOINT/$endpoint" + result=$(curl -i -H "accept: application/json" -H "Authorization: $IAM_TOKEN" -d "$data" -X PUT "$WEBHOOK_URL" 2>/dev/null) + status_code=$(echo "$result" | head -n 1 | cut -d$' ' -f2) + elif [ "$CLUSTER_ENDPOINT" == "vpe" ]; then + WEBHOOK_URL="https://api.$REGION.$IBMCLOUD_CS_API_ENDPOINT/$endpoint" + result=$(curl -i -H "accept: application/json" -H "Authorization: $IAM_TOKEN" -d "$data" -X PUT "$WEBHOOK_URL" 2>/dev/null) + status_code=$(echo "$result" | head -n 1 | cut -d$' ' -f2) + fi + else + WEBHOOK_URL="https://$IBMCLOUD_CS_API_ENDPOINT/global/$endpoint" + result=$(curl -i -H "accept: application/json" -H "X-Region: $REGION" -H "Authorization: $IAM_TOKEN" -d "$data" -X PUT "$WEBHOOK_URL" 2>/dev/null) + status_code=$(echo "$result" | head -n 1 | cut -d$' ' -f2) + fi + else + WEBHOOK_URL="https://$IBMCLOUD_CS_API_ENDPOINT/global/$endpoint" + result=$(curl -i -H "accept: application/json" -H "X-Region: $REGION" -H "Authorization: $IAM_TOKEN" -d "$data" -X PUT "$WEBHOOK_URL" 2>/dev/null) + status_code=$(echo "$result" | head -n 1 | cut -d$' ' -f2) + fi + + if [ "${status_code}" == "204" ]; then + echo "$status_code" + else + echo "ERROR:: $endpoint FAILED" + echo "$result" + fi +} + +CERTIFICATE_AUTHORITY=${CERTIFICATE_AUTHORITY//$'\n'/\\n} +CLIENT_CERT=${CLIENT_CERT//$'\n'/\\n} +CLIENT_KEY=${CLIENT_KEY//$'\n'/\\n} + +JSON_BODY="{\"auditServer\": \"$AUDIT_SERVER\",\"caCertificate\": \"$CERTIFICATE_AUTHORITY\",\"clientCertificate\": \"$CLIENT_CERT\",\"clientKey\": \"$CLIENT_KEY\",\"policy\": \"$POLICY\"}" + +webhook_attempts=1 +while true; do + response=$(curl_request "v1/clusters/$CLUSTER_ID/apiserverconfigs/auditwebhook" "$JSON_BODY") + echo "Webhook status: $response" + if [[ "$response" == "204" ]]; then + echo "webhook set successfully" + break + else + webhook_attempts=$((webhook_attempts + 1)) + if [ $webhook_attempts -ge 10 ]; then + echo "Webhook status: $response" + exit 1 + fi + echo "Sleeping for 30 secs.." + sleep 30 + fi + response="" +done +sleep 60 + +refresh_attempts=1 +while true; do + response2=$(curl_request "v1/logging/$CLUSTER_ID/refresh" "") + echo "Refresh status: $response2" + if [[ "$response2" == "204" ]]; then + echo "Cluster refreshed successfully" + break + else + refresh_attempts=$((refresh_attempts + 1)) + if [ $refresh_attempts -ge 10 ]; then + echo "Refresh status: $response2" + exit 1 + fi + echo "Sleeping for 30 secs.." + sleep 30 + fi + response2="" +done diff --git a/modules/kube-audit/variables.tf b/modules/kube-audit/variables.tf new file mode 100644 index 00000000..3173fdd5 --- /dev/null +++ b/modules/kube-audit/variables.tf @@ -0,0 +1,103 @@ +############################################################################## +# Cluster variables +############################################################################## + +variable "ibmcloud_api_key" { + type = string + description = "The IBM Cloud api key to generate an IAM token." + sensitive = true +} + +variable "cluster_id" { + type = string + description = "The ID of the cluster to deploy the log collection service in." +} + +variable "region" { + type = string + description = "The IBM Cloud region where the cluster is provisioned." +} + +variable "cluster_resource_group_id" { + type = string + description = "The resource group ID of the cluster." +} + +variable "wait_till" { + description = "To avoid long wait times when you run your Terraform code, you can specify the stage when you want Terraform to mark the cluster resource creation as completed. Depending on what stage you choose, the cluster creation might not be fully completed and continues to run in the background. However, your Terraform code can continue to run without waiting for the cluster to be fully created. Supported args are `MasterNodeReady`, `OneWorkerNodeReady`, `IngressReady` and `Normal`" + type = string + default = "IngressReady" + + validation { + error_message = "`wait_till` value must be one of `MasterNodeReady`, `OneWorkerNodeReady`, `IngressReady` or `Normal`." + condition = contains([ + "MasterNodeReady", + "OneWorkerNodeReady", + "IngressReady", + "Normal" + ], var.wait_till) + } +} + +variable "wait_till_timeout" { + description = "Timeout for wait_till in minutes." + type = number + default = 90 +} + +variable "use_private_endpoint" { + type = bool + description = "Set this to true to force all api calls to use the IBM Cloud private endpoints." + default = false +} + +variable "cluster_config_endpoint_type" { + description = "Specify which type of endpoint to use for for cluster config access: 'default', 'private', 'vpe', 'link'. 'default' value will use the default endpoint of the cluster." + type = string + default = "default" + nullable = false # use default if null is passed in + validation { + error_message = "Invalid Endpoint Type! Valid values are 'default', 'private', 'vpe', or 'link'" + condition = contains(["default", "private", "vpe", "link"], var.cluster_config_endpoint_type) + } +} + +variable "audit_log_policy" { + type = string + description = "Specify the amount of information that is logged to the API server audit logs by choosing the audit log policy profile to use. Supported values are `default` and `WriteRequestBodies`." + default = "default" + + validation { + error_message = "Invalid Audit log policy Type! Valid values are 'default' or 'WriteRequestBodies'" + condition = contains(["default", "WriteRequestBodies"], var.audit_log_policy) + } +} + +variable "audit_namespace" { + type = string + description = "The name of the namespace where log collection service and a deployment will be created." + default = "ibm-kube-audit" +} + +variable "audit_deployment_name" { + type = string + description = "The name of log collection deployement and service." + default = "ibmcloud-kube-audit" +} + +variable "audit_webhook_listener_image" { + type = string + description = "The audit webhook listener image reference in the format of `[registry-url]/[namespace]/[image]`.The sub-module uses the `icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs` image to forward logs to IBM Cloud Logs. This image is for demonstration purposes only. For a production solution, configure and maintain your own log forwarding image." + default = "icr.io/ibm/ibmcloud-kube-audit-to-ibm-cloud-logs" +} + +variable "audit_webhook_listener_image_version" { + type = string + description = "The tag or digest for the audit webhook listener image to deploy. If changing the value, ensure it is compatible with `audit_webhook_listener_image`." + nullable = false + default = "deaabcb8225e800385413ba420cf3f819d3b0671@sha256:acf123f4dba63534cbc104c6886abedff9d25a22a34ab7b549ede988ed6e7144" # See, https://github.ibm.com/GoldenEye/issues/issues/13371 + validation { + condition = can(regex("^[a-f0-9]{40}@sha256:[a-f0-9]{64}$", var.audit_webhook_listener_image_version)) + error_message = "The value of the audit webhook listener image version must match the tag and sha256 image digest format" + } +} diff --git a/modules/kube-audit/version.tf b/modules/kube-audit/version.tf new file mode 100644 index 00000000..ad9a2594 --- /dev/null +++ b/modules/kube-audit/version.tf @@ -0,0 +1,24 @@ +terraform { + required_version = ">=1.9.0" + + # Ensure that there is always 1 example locked into the lowest provider version of the range defined in the main + # module's version.tf (basic and add_rules_to_sg), and 1 example that will always use the latest provider version (advanced, fscloud and multiple mzr). + required_providers { + ibm = { + source = "ibm-cloud/ibm" + version = ">= 1.70.0, <2.0.0" + } + null = { + source = "hashicorp/null" + version = ">= 3.2.1, < 4.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.9.1, < 1.0.0" + } + helm = { + source = "hashicorp/helm" + version = ">= 2.15.0, <3.0.0" + } + } +}