diff --git a/bin/single-new-eks-automode-opensource-observability.ts b/bin/single-new-eks-automode-opensource-observability.ts new file mode 100644 index 00000000..4e156268 --- /dev/null +++ b/bin/single-new-eks-automode-opensource-observability.ts @@ -0,0 +1,6 @@ +import { configureApp } from '../lib/common/construct-utils'; +import SingleNewEksAutoModeOpenSourcePattern from '../lib/single-new-eks-opensource-observability-pattern/graviton-index'; + +const app = configureApp(); + +new SingleNewEksAutoModeOpenSourcePattern(app, 'single-new-eks-automode-opensource'); diff --git a/bin/single-new-eks-awsnative-automode-observability.ts b/bin/single-new-eks-awsnative-automode-observability.ts new file mode 100644 index 00000000..9bc10caa --- /dev/null +++ b/bin/single-new-eks-awsnative-automode-observability.ts @@ -0,0 +1,6 @@ +import SingleNewEksAutoModeAWSNativeObservabilityPattern from '../lib/single-new-eks-awsnative-automode-observability-pattern'; +import { configureApp } from '../lib/common/construct-utils'; + +const app = configureApp(); + +new SingleNewEksAutoModeAWSNativeObservabilityPattern(app, 'single-new-eks-awsnative-automode'); diff --git a/docs/patterns/images/automode-containermap.png b/docs/patterns/images/automode-containermap.png new file mode 100644 index 00000000..bb28d395 Binary files /dev/null and b/docs/patterns/images/automode-containermap.png differ diff --git a/docs/patterns/images/automode-perfmonitoring.png b/docs/patterns/images/automode-perfmonitoring.png new file mode 100644 index 00000000..1bca11b1 Binary files /dev/null and b/docs/patterns/images/automode-perfmonitoring.png differ diff --git a/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md new file mode 100644 index 00000000..e9da83af --- /dev/null +++ b/docs/patterns/single-new-eks-observability-accelerators/single-new-eks-awsnative-automode-observability.md @@ -0,0 +1,190 @@ +# Single Cluster AWS Native Observability - Auto Mode + +## Architecture + +The following figure illustrates the architecture of the pattern we will be deploying for Single EKS Auto Mode Cluster Native Observability pattern using AWS native tools such as CloudWatch Logs and Container Insights. + +![Architecture](../images/cloud-native-arch.png) + +This example makes use of CloudWatch Container Insights as a vizualization and metric-aggregation layer. +Amazon CloudWatch Container Insights helps customers collect, aggregate, and summarize metrics and logs from containerized applications and microservices. Metrics data is collected as performance log events using the embedded metric format. These performance log events use a structured JSON schema that enables high-cardinality data to be ingested and stored at scale. From this data, CloudWatch creates aggregated metrics at the cluster, node, pod, task, and service level as CloudWatch metrics. The metrics that Container Insights collects are available in CloudWatch automatic dashboards. + +By combining Container Insights and CloudWatch logs, we are able to provide a foundation for EKS (Amazon Elastic Kubernetes Service) Observability. Monitoring EKS for metrics has two categories: +the control plane and the Amazon EKS nodes (with Kubernetes objects). +The Amazon EKS control plane consists of control plane nodes that run the Kubernetes software, +such as etcd and the Kubernetes API server. To read more on the components of an Amazon EKS cluster, +please read the [service documentation](https://docs.aws.amazon.com/eks/latest/userguide/clusters.html). + +## Objective + +- Deploys one production grade Amazon EKS Auto Mode cluster. +- Enables Control Plane Logging. +- AWS Distro For OpenTelemetry Operator and Collector +- Logs with [AWS for FluentBit](https://github.com/aws/aws-for-fluent-bit) and CloudWatch Logs +- Enables CloudWatch Container Insights. +- Installs Prometheus Node Exporter for infrastructure metrics. + +## Prerequisites + +Ensure that you have installed the following tools on your machine. + +1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) +2. [kubectl](https://Kubernetes.io/docs/tasks/tools/) +3. [cdk](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install) +4. [npm](https://docs.npmjs.com/cli/v8/commands/npm-install) + +## Deploying + +1. Clone your forked repository + +```sh +git clone https://github.com/aws-observability/cdk-aws-observability-accelerator.git +``` + +2. Install the AWS CDK Toolkit globally on your machine using + +```bash +npm install -g aws-cdk +``` + +3. Install project dependencies by running `npm install` in the main folder of this cloned repository + +4. Once all pre-requisites are set you are ready to deploy the pipeline. Run the following command from the root of this repository to deploy the pipeline stack: + +```bash +make build +make pattern single-new-eks-awsnative-automode-observability deploy +``` + +## Verify the resources + +Run update-kubeconfig command. You should be able to get the command from CDK output message. + +```bash +aws eks update-kubeconfig --name single-new-eks-awsnative-automode-observability-accelerator --region --role-arn arn:aws:iam::xxxxxxxxx:role/single-new-eks-awsnative--singleneweksawsnativeauto-JN3QM2KMBNCO +``` + +Let’s verify the resources created by steps above. + +```bash +kubectl get nodes -o wide +``` + +Output: + +```console +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +i-04151a4f7765b29fb Ready 2m10s v1.31.9-eks-ca3410b 10.0.10.253 13.222.181.87 Bottlerocket (EKS Auto, Standard) 2025.7.11 (aws-k8s-1.31-standard) 6.1.141 containerd://1.7.27+bottlerocket +``` + +Next, lets verify the namespaces in the cluster: + +```bash +kubectl get ns # Output shows all namespace +``` + +Output: + +```console +NAME STATUS AGE +amazon-cloudwatch Active 5h36m +cert-manager Active 5h36m +default Active 5h46m +kube-node-lease Active 5h46m +kube-public Active 5h46m +kube-system Active 5h46m +prometheus-node-exporter Active 5h36m +``` + +## Visualization + +Navigate to CloudWatch and go to "Container Insights". + +View the Container Map: + +![Container_Map](../images/automode-containermap.png) + +View the Performance Monitoring Dashboard: + +![Perf_Dashboard](../images/automode-perfmonitoring.png) + +## Viewing Logs + +Refer to "Using CloudWatch Logs Insights to Query Logs in [Logging](../../logs.md). + +## Enabling Application Signals for your services + +Amazon CloudWatch Application Signals is a new integrated native APM experience +in AWS. CloudWatch Application Signals supports **Java**, **Python**, **.NET**, and **Node.js** applications +running on your Amazon EKS Auto Mode cluster. + +If you haven't enabled Application Signals in this account yet, follow steps 1 - 4 in our [AWS documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Application-Monitoring-Sections.html). + +Next, you have to update your Application to +`Configure application metrics and trace sampling`. For this, you must add an +annotation to a manifest YAML in your cluster. Adding this annotation +auto-instruments the application to send metrics, traces, and logs to +Application Signals. You have two options for the annotation: + +1. **Annotate Workload** auto-instruments a single workload in the cluster. + - Paste the below line into the PodTemplate section of the workload manifest. + + ``` + apiVersion: apps/v1 + kind: Deployment + spec: + template: + metadata: + # add this annotation under the pod template metadata of the services deployment YAML you want to monitor + annotations: + instrumentation.opentelemetry.io/inject-java: "true" + instrumentation.opentelemetry.io/inject-python: "true" + instrumentation.opentelemetry.io/inject-dotnet: "true"; + instrumentation.opentelemetry.io/otel-dotnet-auto-runtime: "linux-x64" # for generic Linux glibc based images, this is default value and can be omitted + instrumentation.opentelemetry.io/otel-dotnet-auto-runtime: "linux-musl-x64" # for Alpine Linux (linux-musl-x64) based images + instrumentation.opentelemetry.io/inject-nodejs: "true" + ... + ``` + + - In your terminal, enter `kubectl apply -f your_deployment_yaml` to apply the change. + +2. **Annotate Namespace** auto-instruments all workloads deployed in the selected namespace. + - Paste the below line into the metadata section of the namespace manifest. + + ``` + annotations: instrumentation.opentelemetry.io/inject-java: "true" + apiVersion: apps/v1 + kind: Namespace + metadata: + name: + # add this annotation under metadata of the namespace manifest you want to monitor + annotations: + instrumentation.opentelemetry.io/inject-java: "true" + instrumentation.opentelemetry.io/inject-python: "true" + instrumentation.opentelemetry.io/inject-dotnet: "true" + instrumentation.opentelemetry.io/inject-nodejs: "true" + ... + ``` + + - In your terminal, enter `kubectl apply -f your_namespace_yaml` to apply the change. + - In your terminal, enter a command to restart all pods in the namespace. An example command to restart deployment workloads is `kubectl rollout restart deployment -n namespace_name` + +## Visualization of CloudWatch Application Signals data + +After enabling your Application to pass metrics and traces by following +[the steps provided above](#enabling-application-signals-for-your-services), +open your Amazon CloudWatch console in the same region as your EKS cluster, +then from the left hand side choose `Application Signals -> Services` and you +will see the metrics shown on the sample dashboard below: + +![APP_Signals_Services](../images/App-signals/app-signal-services.png) + +![APP_Signals_Dependencies](../images/App-signals/app-signal-ops-deps.png) + +## Teardown + +You can teardown the whole CDK stack with the following command: + +```bash +make pattern single-new-eks-awsnative-automode-observability destroy +``` diff --git a/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts b/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts new file mode 100644 index 00000000..f7ad37dd --- /dev/null +++ b/lib/single-new-eks-awsnative-automode-observability-pattern/index.ts @@ -0,0 +1,33 @@ + +import { Construct } from 'constructs'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; + +export default class SingleNewEksAutoModeAWSNativeObservabilityPattern { + constructor(scope: Construct, id: string) { + + const stackId = `${id}-observability-accelerator`; + const account = process.env.COA_ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + + const addOns: Array = [ + new blueprints.addons.XrayAddOn() + ]; + + const cluster = new blueprints.AutomodeClusterProvider({ + nodePools: ['system', 'general-purpose'] + }); + + const props = {} + + ObservabilityBuilder.builder() + .account(account) + .region(region) + .version('auto') + .clusterProvider(cluster) + .enableNativePatternAddOns() + .enableControlPlaneLogging() + .addOns(...addOns) + .build(scope, stackId); + } +} diff --git a/lib/single-new-eks-opensource-observability-pattern/automode.ts b/lib/single-new-eks-opensource-observability-pattern/automode.ts new file mode 100644 index 00000000..0df023e4 --- /dev/null +++ b/lib/single-new-eks-opensource-observability-pattern/automode.ts @@ -0,0 +1,198 @@ +import { Construct } from 'constructs'; +import { utils } from '@aws-quickstart/eks-blueprints'; +import * as blueprints from '@aws-quickstart/eks-blueprints'; +import { GrafanaOperatorSecretAddon } from './grafanaoperatorsecretaddon'; +import * as amp from 'aws-cdk-lib/aws-aps'; +import * as eks from 'aws-cdk-lib/aws-eks'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; +import * as fs from 'fs'; + +export default class SingleNewEksAutoModeOpenSourceObservabilityPattern { + constructor(scope: Construct, id: string) { + + const stackId = `${id}-observability-accelerator`; + + const account = process.env.COA_ACCOUNT_ID! || process.env.CDK_DEFAULT_ACCOUNT!; + const region = process.env.COA_AWS_REGION! || process.env.CDK_DEFAULT_REGION!; + const ampWorkspaceName = process.env.COA_AMP_WORKSPACE_NAME! || 'observability-amp-Workspace'; + const ampWorkspace = blueprints.getNamedResource(ampWorkspaceName) as unknown as amp.CfnWorkspace; + const ampEndpoint = ampWorkspace.attrPrometheusEndpoint; + const ampWorkspaceArn = ampWorkspace.attrArn; + const amgEndpointUrl = process.env.COA_AMG_ENDPOINT_URL; + + // All Grafana Dashboard URLs from `cdk.json` if present + const fluxRepository: blueprints.FluxGitRepo = utils.valueFromContext(scope, "fluxRepository", undefined); + fluxRepository.values!.AMG_AWS_REGION = region; + fluxRepository.values!.AMP_ENDPOINT_URL = ampEndpoint; + fluxRepository.values!.AMG_ENDPOINT_URL = amgEndpointUrl; + + const ampAddOnProps: blueprints.AmpAddOnProps = { + ampPrometheusEndpoint: ampEndpoint, + ampRules: { + ampWorkspaceArn: ampWorkspaceArn, + ruleFilePaths: [ + __dirname + '/../common/resources/amp-config/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/recording-rules.yml' + ] + } + }; + + const jsonString = fs.readFileSync(__dirname + '/../../cdk.json', 'utf-8'); + const jsonStringnew = JSON.parse(jsonString); + let doc = utils.readYamlDocument(__dirname + '/../common/resources/otel-collector-config.yml'); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableJavaMonJob }}", + "{{ stop enableJavaMonJob }}", + jsonStringnew.context["java.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableNginxMonJob }}", + "{{ stop enableNginxMonJob }}", + jsonStringnew.context["nginx.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableIstioMonJob }}", + "{{ stop enableIstioMonJob }}", + jsonStringnew.context["istio.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAPIserverJob }}", + "{{ stop enableAPIserverJob }}", + jsonStringnew.context["apiserver.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionJob }}", + "{{ stop enableAdotMetricsCollectionJob }}", + jsonStringnew.context["adotcollectormetrics.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotMetricsCollectionTelemetry }}", + "{{ stop enableAdotMetricsCollectionTelemetry }}", + jsonStringnew.context["adotcollectormetrics.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsReceiver }}", + "{{ stop enableAdotContainerLogsReceiver }}", + jsonStringnew.context["adotcontainerlogs.pattern.enabled"] + ); + doc = utils.changeTextBetweenTokens( + doc, + "{{ start enableAdotContainerLogsExporter }}", + "{{ stop enableAdotContainerLogsExporter }}", + jsonStringnew.context["adotcontainerlogs.pattern.enabled"] + ); + console.log(doc); + fs.writeFileSync(__dirname + '/../common/resources/otel-collector-config-new.yml', doc); + + if (utils.valueFromContext(scope, "adotcollectormetrics.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml' + }; + } + + if (utils.valueFromContext(scope, "java.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + javaScrapeSampleLimit: 1000, + javaPrometheusMetricsEndpoint: "/metrics" + } + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/java/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/java/recording-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "adotcontainerlogs.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + logGroupName: `/aws/eks/${stackId}`, + logStreamName: `/aws/eks/${stackId}`, + logRetentionDays: 30, + awsRegion: region + } + }; + } + + if (utils.valueFromContext(scope, "apiserver.pattern.enabled", false)) { + ampAddOnProps.enableAPIServerJob = true, + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/apiserver/recording-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "nginx.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml', + manifestParameterMap: { + nginxScrapeSampleLimit: 1000, + nginxPrometheusMetricsEndpoint: "/metrics" + } + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/nginx/alerting-rules.yml' + ); + } + + if (utils.valueFromContext(scope, "istio.pattern.enabled", false)) { + ampAddOnProps.openTelemetryCollector = { + manifestPath: __dirname + '/../common/resources/otel-collector-config-new.yml' + }; + ampAddOnProps.ampRules?.ruleFilePaths.push( + __dirname + '/../common/resources/amp-config/istio/alerting-rules.yml', + __dirname + '/../common/resources/amp-config/istio/recording-rules.yml' + ); + } + + Reflect.defineMetadata("ordered", true, blueprints.addons.GrafanaOperatorAddon); + const addOns: Array = [ + new blueprints.addons.XrayAdotAddOn(), + new blueprints.addons.FluxCDAddOn({ "repositories": [fluxRepository] }), + new GrafanaOperatorSecretAddon(), + ]; + + if (utils.valueFromContext(scope, "istio.pattern.enabled", false)) { + addOns.push(new blueprints.addons.IstioBaseAddOn({ + version: "1.18.2" + })); + addOns.push(new blueprints.addons.IstioControlPlaneAddOn({ + version: "1.18.2" + })); + addOns.push(new blueprints.addons.IstioIngressGatewayAddon({ + version: "1.18.2" + })); + + addOns.push(new blueprints.addons.IstioCniAddon({ + version: "1.18.2" + })); + } + + + const automodeProps: blueprints.AutomodeClusterProviderProps = { + version: eks.KubernetesVersion.V1_31, + nodePools: ['system', 'general-purpose'] + } + + ObservabilityBuilder.builder() + .account(account) + .region(region) + .version('auto') + .withAmpProps(ampAddOnProps) + .enableOpenSourcePatternAddOns() + .enableControlPlaneLogging() + .resourceProvider(ampWorkspaceName, new blueprints.CreateAmpProvider(ampWorkspaceName, ampWorkspaceName)) + .clusterProvider(new blueprints.AutomodeClusterProvider(automodeProps)) + .addOns(...addOns) + .build(scope, stackId); + } +} diff --git a/package.json b/package.json index bfd7f962..6c0ad429 100644 --- a/package.json +++ b/package.json @@ -10,12 +10,12 @@ "lint": "npx eslint . --ext .js,.jsx,.ts,.tsx" }, "devDependencies": { - "@aws-quickstart/eks-blueprints": "1.16.3", + "@aws-quickstart/eks-blueprints": "1.17.2", "@types/jest": "^29.5.11", "@types/node": "^20.10.4", "@typescript-eslint/eslint-plugin": "^6.15.0", "@typescript-eslint/parser": "^6.15.0", - "aws-cdk": "2.173.4", + "aws-cdk": "2.1020.2", "copyfiles": "^2.4.1", "eslint": "^8.56.0", "jest": "^29.7.0", @@ -25,18 +25,18 @@ }, "dependencies": { "@kubecost/kubecost-eks-blueprints-addon": "^0.1.8", - "@aws-quickstart/eks-blueprints": "1.16.3", - "aws-cdk": "2.173.4", - "aws-cdk-lib": "2.173.4", + "@aws-quickstart/eks-blueprints": "1.17.2", + "aws-cdk": "2.1020.2", + "aws-cdk-lib": "2.204.0", "aws-sdk": "^2.1455.0", "constructs": "^10.3.0", "eks-blueprints-cdk-kubeflow-ext": "0.1.9", "source-map-support": "^0.5.21" }, "overrides": { - "@aws-quickstart/eks-blueprints": "1.16.3", - "aws-cdk": "2.173.4", - "aws-cdk-lib": "2.173.4", + "@aws-quickstart/eks-blueprints": "1.17.2", + "aws-cdk": "2.1020.2", + "aws-cdk-lib": "2.204.0", "xml2js": "0.5.0" } -} \ No newline at end of file +}