diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 628a63a59..b0acad9d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,7 @@ env: GOPROXY: "direct" jobs: + proxy-sanity-check: name: Proxy Sanity Check runs-on: ubuntu-22.04 @@ -471,6 +472,7 @@ jobs: - name: Push benchmark result if: ${{ success() && github.ref_name == 'main'}} run: git push 'https://github-actions:${{ secrets.GITHUB_TOKEN }}@github.com/nginx/agent.git' benchmark-results:benchmark-results + load-tests: name: Load Tests if: ${{ !github.event.pull_request.head.repo.fork && !startsWith(github.head_ref, 'dependabot-') }} @@ -504,7 +506,7 @@ jobs: GO_VERSION: ${{ env.GO_VERSION }} with: file: test/docker/load/Dockerfile - tags: nginx-agent-load-test:1.0.0 + tags: agent_ubuntu_24.04_load_test:1.0.0 context: '.' push: false load: true @@ -518,15 +520,13 @@ jobs: - name: Run Load Tests run: | - results=$(docker run --rm nginx-agent-load-test:1.0.0) - echo "$results" > benchmarks.json - echo "$results" + make run-load-test-image - name: Upload Load Test Results uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: load-test-results - path: benchmarks.json + path: build/test/load/benchmarks.json if-no-files-found: error - name: Store benchmark result @@ -534,7 +534,7 @@ jobs: with: name: Compare Benchmark Results tool: 'customSmallerIsBetter' - output-file-path: benchmarks.json + output-file-path: build/test/load/benchmarks.json benchmark-data-dir-path: "" # Set auto-push to false since GitHub API token is not given auto-push: false @@ -545,3 +545,105 @@ jobs: - name: Push load test result if: ${{ success() && github.ref_name == 'main' }} run: git push 'https://github-actions:${{ secrets.GITHUB_TOKEN }}@github.com/nginx/agent.git' benchmark-results:benchmark-results + + generate-cpu-profile: + name: Generate CPU Profile + runs-on: ubuntu-22.04 + needs: build-unsigned-snapshot + if: ${{ github.event_name == 'push' && + startsWith(github.ref_name, 'release-') && + !github.event.pull_request.head.repo.fork }} + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-tags: 'true' + + - name: Setup Go + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 + with: + go-version-file: 'go.mod' + cache: false + + - name: Set env + run: | + echo "GO_VERSION=$(cat go.mod | grep toolchain | sed 's/toolchain //; s/go//')" >> $GITHUB_ENV + + - name: Download Packages + uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2 + with: + name: nginx-agent-unsigned-snapshots + path: build + + - name: Build Load Test Image + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 + env: + GO_VERSION: ${{ env.GO_VERSION }} + with: + file: test/docker/load/Dockerfile + tags: agent_ubuntu_24.04_load_test:1.0.0 + context: '.' + push: false + load: true + no-cache: true + secrets: | + "nginx-crt=${{ secrets.NGINX_CRT }}" + "nginx-key=${{ secrets.NGINX_KEY }}" + build-args: | + OSARCH=amd64 + GO_VERSION=${{ env.GO_VERSION }} + + - name: Run CPU Profiling Tests + run: | + cp default.pgo old_profile.pgo > /dev/null || echo "No previous pgo profile found" + make generate-pgo-profile + ls -l default.pgo + diff default.pgo old_profile.pgo || echo "Profiles differ" + + - name: Upload CPU Profile + run: | + git config --global user.name 'github-actions' + git config --global user.email '41898282+github-actions[bot]@users.noreply.github.com' + git remote set-url origin 'https://github-actions:${{ secrets.GITHUB_TOKEN }}@github.com/nginx/agent.git' + git checkout -b ${{ github.ref_name }}-update-profile + git add default.pgo + git commit -m "Update pgo profile -- CI Autogenerated" || echo "No changes to commit" + git log -1 + git push -u origin "${{ github.ref_name }}-update-profile" + + - name: Create Pull Request + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const { repo, owner } = context.repo; + // Check if a PR already exists + const { data: pullRequests } = await github.rest.pulls.list({ + title: 'Update pgo profile', + owner, + repo, + head: '${{ github.ref_name }}-update-profile', + base: '${{ github.ref_name }}', + state: 'open' + }); + if (pullRequests.length > 0) { + console.log('A pull request already exists:', pullRequests[0].html_url); + return; + } + + console.log('Creating a new pull request to update the pgo profile...'); + console.log('If the PR creation fails, please ensure that the workflow has write permissions and try again.'); + console.log('Base: ${{ github.ref_name }}, Head: ${{ github.ref_name }}-update-profile'); + + const result = await github.rest.pulls.create({ + title: 'Update pgo profile', + owner, + repo, + head: '${{ github.ref_name }}-update-profile', + base: '${{ github.ref_name }}', + body: [ + 'This PR is auto-generated by the release workflow.' + ].join('\n') + }); diff --git a/Makefile b/Makefile index 276c94e2d..e601c0ef3 100644 --- a/Makefile +++ b/Makefile @@ -288,26 +288,68 @@ local-rpm-package: ## Create local rpm package @CGO_ENABLED=0 GOARCH=$(OSARCH) GOOS=linux $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME) -pgo=default.pgo -ldflags=$(LDFLAGS) $(PROJECT_DIR)/$(PROJECT_FILE) ARCH=$(OSARCH) VERSION=$(shell echo $(VERSION) | tr -d 'v') $(GORUN) $(NFPM) pkg --config ./scripts/packages/.local-nfpm.yaml --packager rpm --target $(RPM_PACKAGE); -generate-pgo-profile: build-mock-management-plane-grpc - mv default.pgo profile.pprof +generate-pgo-profile: build-mock-management-plane-grpc run-load-test-with-cpu-profiling ## Generate PGO profile + @echo "Generating PGO profile" TEST_ENV="Container" CONTAINER_OS_TYPE=$(CONTAINER_OS_TYPE) BUILD_TARGET="install-agent-local" \ PACKAGES_REPO=$(OSS_PACKAGES_REPO) PACKAGE_NAME=$(PACKAGE_NAME) BASE_IMAGE=$(BASE_IMAGE) \ OS_VERSION=$(OS_VERSION) OS_RELEASE=$(OS_RELEASE) DOCKERFILE_PATH=$(DOCKERFILE_PATH) \ IMAGE_PATH=$(IMAGE_PATH) TAG=${IMAGE_TAG} CONTAINER_NGINX_IMAGE_REGISTRY=${CONTAINER_NGINX_IMAGE_REGISTRY} \ - $(GOTEST) -v ./test/integration -cpuprofile integration_cpu.pprof - @CGO_ENABLED=0 $(GOTEST) -count 10 -timeout 5m -bench=. -benchmem -run=^# ./internal/watcher/instance -cpuprofile perf_watcher_cpu.pprof - @$(GOTOOL) pprof -proto perf_watcher_cpu.pprof integration_cpu.pprof > default.pgo - rm perf_watcher_cpu.pprof integration_cpu.pprof integration.test profile.pprof + scripts/performance/profiling.sh + + @$(GOTOOL) pprof -proto -output=default.pgo \ + build/test/profiles/merged.pprof \ + build/test/load-cpu-profiling/load/metrics_load_cpu.pprof \ + || { echo "Failed to merge profiles"; exit 1; } # run under sudo locally load-test-image: ## Build performance load testing image - @echo "🚚 Running load tests" - $(CONTAINER_BUILDENV) $(CONTAINER_CLITOOL) build -t $(IMAGE_TAG)_load_test . \ - --no-cache -f ./test/docker/load/Dockerfile \ + @echo "🚚 Building load test image" + $(CONTAINER_BUILDENV) $(CONTAINER_CLITOOL) build \ + --no-cache \ + -t $(IMAGE_TAG)_load_test:1.0.0 . \ + -f ./test/docker/load/Dockerfile \ --secret id=nginx-crt,src=$(CERTS_DIR)/nginx-repo.crt \ --secret id=nginx-key,src=$(CERTS_DIR)/nginx-repo.key \ --build-arg OSARCH=$(OSARCH) \ --build-arg GO_VERSION=$(GO_VERSION) run-load-test-image: ## Run performance load testing image - $(CONTAINER_BUILDENV) $(CONTAINER_CLITOOL) run --rm -v $(PWD)/$(BUILD_DIR)/:/agent/$(BUILD_DIR)/ $(IMAGE_TAG)_load_test + @mkdir -p $(TEST_BUILD_DIR)/load + @echo "🚚 Running load tests" + $(CONTAINER_CLITOOL) rm -f agent-load-test || true + +# Run the load test container + @$(CONTAINER_BUILDENV) $(CONTAINER_CLITOOL) run \ + --name agent-load-test \ + $(IMAGE_TAG)_load_test:1.0.0 + +# Copy the files generated by the load tests + @$(CONTAINER_CLITOOL) cp \ + agent-load-test:/agent/performance/load \ + $(TEST_BUILD_DIR) + @echo "Results saved to $(TEST_BUILD_DIR)/load" + @find $(TEST_BUILD_DIR)/load -type f -exec ls -lh {} \; + +# Stop and remove the container + @$(CONTAINER_CLITOOL) stop agent-load-test && $(CONTAINER_CLITOOL) rm -f agent-load-test + +run-load-test-with-cpu-profiling: ## Run performance load testing with cpu profiling + @echo "🚚 Running load tests with cpu profiling" + @mkdir -p $(TEST_BUILD_DIR)/load-cpu-profiling + @$(CONTAINER_CLITOOL) rm -f agent-load-test-with-cpu-profiling || true + +# Run the load test container + @$(CONTAINER_BUILDENV) $(CONTAINER_CLITOOL) run \ + --name agent-load-test-with-cpu-profiling \ + --env PROFILE=true \ + $(IMAGE_TAG)_load_test:1.0.0 + +# Copy the files generated by the load tests + @$(CONTAINER_CLITOOL) cp \ + agent-load-test-with-cpu-profiling:/agent/performance/load \ + $(TEST_BUILD_DIR)/load-cpu-profiling + @echo "Results saved to $(TEST_BUILD_DIR)/load-cpu-profiling" + +# Stop and remove the container + @$(CONTAINER_CLITOOL) stop agent-load-test-with-cpu-profiling && \ + $(CONTAINER_CLITOOL) rm -f agent-load-test-with-cpu-profiling diff --git a/Makefile.containers b/Makefile.containers index 15c7019f0..d902b5b70 100644 --- a/Makefile.containers +++ b/Makefile.containers @@ -52,6 +52,6 @@ CONTAINER_REGISTRY = registry.access.redhat.com BASE_IMAGE = $(CONTAINER_REGISTRY)/ubi$(OS_VERSION)/ubi:latest endif -CONTAINER_NGINX_IMAGE_REGISTRY ?= registry.nginx.com +CONTAINER_NGINX_IMAGE_REGISTRY ?= docker-registry.nginx.com CONTAINER_VARS = CONTAINER_CLITOOL CONTAINER_COMPOSE CONTAINER_BUILDENV CONTAINER_OS_TYPE CONTAINER_REGISTRY CONTAINER_NGINX_IMAGE_REGISTRY diff --git a/scripts/performance/profiling.sh b/scripts/performance/profiling.sh new file mode 100755 index 000000000..d7db73170 --- /dev/null +++ b/scripts/performance/profiling.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# This script runs Go tests with CPU profiling enabled for all test packages found under the directories: +# - internal/watcher +# It saves the CPU profiles in the $PROFILES_DIR directory with the format _.pprof +# e.g. _watcher_cpu.pprof + +# The variables below can be set to customize the environment for the integration tests: +# Example using variables defined in our Makefile: +# TEST_ENV=ci CONTAINER_OS_TYPE=linux BUILD_TARGET=agent PACKAGES_REPO=nginxinc \ +# PACKAGE_NAME=nginx-agent BASE_IMAGE=ubuntu OS_VERSION=20.04 OS_RELEASE=focal \ +# DOCKERFILE_PATH=Dockerfile IMAGE_PATH=nginxinc/nginx-agent TAG=latest CONTAINER_NGINX_IMAGE_REGISTRY=docker.io \ +# ./scripts/performance/profiling.sh + +set -e +set -o pipefail + +PROFILES_DIR="build/test/profiles" +mkdir -p ${PROFILES_DIR} + +# Run watcher tests with CPU profiling for each package +echo "Starting watcher tests with cpu profiling..." +packages=$(find internal/watcher -type f -name '*_test.go' -exec dirname {} \; | sort -u) +echo "Found packages:" +echo "$packages" +for pkg in $packages; do + echo "Running tests in package: ${pkg}" + go test \ + -count 10 -timeout 3m \ + -cpuprofile "${PROFILES_DIR}/$(basename $pkg)_watcher_cpu.pprof" \ + "./${pkg}" || { echo "Tests failed in package: ${pkg}, but continuing..."; continue; } + echo "Profile saved to: ${PROFILES_DIR}/$(basename $pkg)_watcher_cpu.pprof" +done + +## Merge all CPU profiles +files=$(ls ${PROFILES_DIR}/*.pprof) +echo "Merging CPU profiles: $files" +go tool pprof -proto -output=${PROFILES_DIR}/merged.pprof $files +echo "Merged CPU profile saved to: ${PROFILES_DIR}/merged.pprof" diff --git a/test/docker/load/Dockerfile b/test/docker/load/Dockerfile index a1eeb4ee8..537f4f204 100644 --- a/test/docker/load/Dockerfile +++ b/test/docker/load/Dockerfile @@ -4,9 +4,6 @@ LABEL maintainer="NGINX Docker Maintainers " # https://askubuntu.com/questions/909277/avoiding-user-interaction-with-tzdata-when-installing-certbot-in-a-docker-contai ARG DEBIAN_FRONTEND=noninteractive -WORKDIR /agent -COPY . /agent - RUN --mount=type=secret,id=nginx-crt,dst=nginx-repo.crt \ --mount=type=secret,id=nginx-key,dst=nginx-repo.key \ set -x \ @@ -59,9 +56,10 @@ ARG GO_VERSION ARG OSARCH # Install Go -RUN wget https://go.dev/dl/go${GO_VERSION}.linux-${OSARCH}.tar.gz \ - && tar -xvf go${GO_VERSION}.linux-${OSARCH}.tar.gz \ - && mv go /usr/local +RUN wget -q https://go.dev/dl/go${GO_VERSION}.linux-${OSARCH}.tar.gz \ + && tar -xf go${GO_VERSION}.linux-${OSARCH}.tar.gz \ + && mv go /usr/local > /dev/null \ + && rm go${GO_VERSION}.linux-${OSARCH}.tar.gz ENV DEBIAN_FRONTEND=noninteractive ENV GOROOT=/usr/local/go @@ -69,21 +67,22 @@ ENV CGO_ENABLED=0 ENV PATH="/usr/local/go/bin:${PATH}" ENV PATH=$PATH:/usr/local/go/bin +WORKDIR /agent +COPY . /agent/ + RUN mv /agent/test/config/agent/nginx-agent-otel-load.conf /agent/test/load/nginx-agent.conf -RUN mkdir /var/run/nginx-agent/ /var/log/nginx-agent/ /etc/nginx-agent/ +RUN mkdir /var/run/nginx-agent/ /var/log/nginx-agent/ /etc/nginx-agent/ -WORKDIR /agent/ CMD make install-tools FROM configure-go as run-tests WORKDIR /agent/ -RUN make build -RUN chmod -R +x ./build - -WORKDIR /agent/test/load - -RUN go test -v -timeout 1m ./... +## Pre-download Go modules +RUN go mod tidy +RUN make build && ls -la /agent/build && /agent/build/nginx-agent -v -CMD ["sh", "-c", "cat benchmarks.json"] +COPY test/docker/load/entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh +CMD ["/agent/test/docker/load/entrypoint.sh"] diff --git a/test/docker/load/entrypoint.sh b/test/docker/load/entrypoint.sh new file mode 100755 index 000000000..998a0e3d5 --- /dev/null +++ b/test/docker/load/entrypoint.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +PROFILE=${PROFILE:-"false"} +BENCHMARKS_DIR="/agent/performance/load" + +load_test() { + echo "Running load tests..." + pushd test/load + go test -v -timeout 1m ./... + cp benchmarks.json ${BENCHMARKS_DIR} + cp -r results ${BENCHMARKS_DIR} + popd +} + +load_test_with_profile() { + echo "Running load tests with CPU Profiling enabled..." + pushd test/load + go test -v -timeout 1m ./... \ + -cpuprofile metrics_load_cpu.pprof + cp benchmarks.json ${BENCHMARKS_DIR} + cp -r results ${BENCHMARKS_DIR} + cp *.pprof ${BENCHMARKS_DIR} + popd +} + +## Main script execution starts here +mkdir -p ${BENCHMARKS_DIR} +echo "Running in $(pwd)" +if [[ "$PROFILE" == "true" ]]; then + echo "CPU Profiling is enabled." + load_test_with_profile || { echo "Load tests with cpu profiling failed"; exit 1; } +else + load_test || { echo "Load tests failed"; exit 1; } +fi +echo "Done." diff --git a/test/load/otel_collector_plugin_load_test.go b/test/load/otel_collector_plugin_load_test.go index fa4db0b2b..d503cbe7a 100644 --- a/test/load/otel_collector_plugin_load_test.go +++ b/test/load/otel_collector_plugin_load_test.go @@ -33,54 +33,70 @@ func TestMetric10kDPS(t *testing.T) { name := fmt.Sprintf("OTLP-%s-%s", runtime.GOOS, binary) sender := testbed.NewOTLPMetricDataSender(testbed.DefaultHost, 4317) receiver := testbed.NewOTLPDataReceiver(5643) - receiver = receiver.WithCompression("none") - t.Run(name, func(t *testing.T) { - require.NoError(t, err) + tests := []struct { + name string + compression string + }{ + { + name: name + "-none", + compression: "none", + }, + } - options := testbed.LoadOptions{ - DataItemsPerSecond: 10_000, - ItemsPerBatch: 100, - Parallel: 1, - } + for _, test := range tests { + t.Run(name, func(t *testing.T) { + require.NoError(t, err) + + options := testbed.LoadOptions{ + DataItemsPerSecond: 10_000, + ItemsPerBatch: 100, + Parallel: 1, + } - agentProc := NewNginxAgentProcessCollector(WithEnvVar("GOMAXPROCS", "10")) + t.Logf("Running test case: %s (compression=%s)x", test.name, test.compression) - dataProvider := testbed.NewPerfTestDataProvider(options) - tc := testbed.NewTestCase( - t, - dataProvider, - sender, - receiver, - agentProc, - &testbed.PerfTestValidator{}, - performanceResultsSummary, - // this resource spec is overwritten in the agent process collector - testbed.WithResourceLimits(testbed.ResourceSpec{}), - ) + agentProc := NewNginxAgentProcessCollector(WithEnvVar("GOMAXPROCS", "10")) - t.Cleanup(tc.Stop) + receiver = receiver.WithCompression(test.compression) - tc.StartBackend() - tc.StartAgent() + dataProvider := testbed.NewPerfTestDataProvider(options) + tc := testbed.NewTestCase( + t, + dataProvider, + sender, + receiver, + agentProc, + &testbed.PerfTestValidator{}, + performanceResultsSummary, + // this resource spec is overwritten in the agent process collector + testbed.WithResourceLimits(testbed.ResourceSpec{}), + ) - tc.StartLoad(options) + t.Cleanup(tc.Stop) - tc.WaitFor(func() bool { return tc.LoadGenerator.IsReady() }, "load generator ready") + tc.StartBackend() + tc.StartAgent() - tc.WaitFor(func() bool { return tc.LoadGenerator.DataItemsSent() > 0 }, "load generator started") + tc.StartLoad(options) - tc.Sleep(tc.Duration) + tc.WaitFor(func() bool { return tc.LoadGenerator.IsReady() }, "load generator ready") - tc.StopLoad() + tc.WaitFor(func() bool { return tc.LoadGenerator.DataItemsSent() > 0 }, "load generator started") - tc.WaitFor(func() bool { return tc.LoadGenerator.DataItemsSent() == tc.MockBackend.DataItemsReceived() }, - "all data items received") + tc.Sleep(tc.Duration) - tc.ValidateData() - }) + tc.StopLoad() - defer testbed.SaveResults(performanceResultsSummary) + tc.WaitFor(func() bool { return tc.LoadGenerator.DataItemsSent() == tc.MockBackend.DataItemsReceived() }, + "all data items received") + + tc.ValidateData() + }) + + // Save intermediate results after each test case + testbed.SaveResults(performanceResultsSummary) + } } func parseBinary(s string) string {