OPCT-326: collector: gather kube-burner data for basic perf profiles (#67)

mtulio · web-flow · commit 1cefc3c7a04a · 2025-04-03T14:16:39.000Z
Collect kube-burner profiles to with a simple execution to measure basic performance from clusters in controlled environment (aka know providers/CI) to be used as a baseline while evaluating partner clusters. Total time increased in the workflow: 9 minutes Total data collected (additional): ~512K Those profiles have been used to collect performance report in internal documentation: [OPCT-276](https://issues.redhat.com/browse/OPCT-276)
diff --git a/artifacts-collector/Containerfile b/artifacts-collector/Containerfile
@@ -18,7 +18,8 @@ LABEL io.k8s.display-name="OPCT Clients" \
 WORKDIR /plugin
 
 RUN echo "fastestmirror=true" >> /etc/dnf/dnf.conf \
-    && microdnf install -y curl grep tar xz gzip diff \
+    && microdnf update -y \
+    && microdnf install -y wget curl grep tar xz gzip diff \
     && microdnf clean all
 
 COPY --from=tools /usr/bin/oc /usr/bin/oc
diff --git a/artifacts-collector/README.md b/artifacts-collector/README.md
@@ -1,8 +1,72 @@
 # artifacts-collector plugin
 
+Collector plugin/step is the final step of the default conformance workflow
+executed by OPCT.
 
-Build and push (from root directory of project):
+The step collects artifacts such as:
+
+- e2e metadata executed by openshift-tests for previous steps - sent to the collector pod as a artifact server, saved with prefix  `artifacts_e2e-*`
+- must-gather: used as a baseline to evaluate cluster information, and etcd performance during the conformance tests. Must-gather is the first data collected—running before performance tests—so our report won’t be impacted by non-e2e workloads.
+- [camgi](https://github.com/elmiko/camgi.rs) report: generates camgi report if it is available in the running architecture
+- run etcd FIO tool: to evaluate disk one-off performance of control plane nodes, and sample of compute/worker nodes
+- kube-burner: run standard profiles to execute performance tests, collecting metrics and data to local index
+
+## Prerequisites
+
+- Download latest version of opct
+
+### Build your custom image
+Build and push (from the root directory):
 
 ```sh
 make build-plugin-collector PLATFORMS=linux/amd64 COMMAND=push
 ```
+
+## Usage
+
+### Run individual collectors - kube-burner
+
+It's possible to run individual collector by customizing the plugin manifest.
+
+The kube-burner manifest file `manifests/kube-burner-only.yaml` enforce flags
+to prevent collecting standard data, running only kube-burner in the target cluster.
+
+To run the standalone plugin, you can use the wrapped API of Sonobuoy including OPCT:
+
+> Update the `image` in the podSpec manifest file `manifests/kube-burner-only.yaml`
+
+- Run
+```bash
+./opct sonobuoy run -p ./artifacts-collector/manifests/kube-burner-only.yaml \
+    --dns-namespace=openshift-dns \
+    --dns-pod-labels=dns.operator.openshift.io/daemonset-dns=default
+```
+
+- Follow the execution or read the logs:
+
+```sh
+./opct sonobuoy status
+
+# or read the logs
+
+oc logs -l plugin-name=99-openshift-artifacts-collector -n sonobuoy
+```
+
+- When completed, retrieve the results:
+
+```sh
+./opct sonobuoy retrieve
+```
+
+- Then explore the performance data:
+
+
+- When completed, retrieve the results:
+
+```sh
+$ tar xfz -C results/ 202502062032_sonobuoy_4afa09f6-24e1-4909-b9d2-7c158d604b02.tar.gz
+
+$ ls -sh results/plugins/99-openshift-artifacts-collector/results/global/
+total 424K
+ 52K artifacts_kube-burner_cluster-density-v2.log   52K artifacts_kube-burner_node-density-cni.log   52K artifacts_kube-burner_node-density.log  268K artifacts_kube-burner.tar.gz
+```
diff --git a/artifacts-collector/collector.sh b/artifacts-collector/collector.sh
@@ -165,6 +165,70 @@ collect_metrics() {
     os_log_info "${msg_prefix} finished!"
 }
 
+# kube_burner run workloads for performance and scale testing. The tests are
+# executed in the Validation environment, and the results are saved as raw data
+# into the artifact path.
+# https://kube-burner.github.io/kube-burner-ocp/latest/
+
+# kube_burner_install install kube-burner-ocp (kube-burner wrapper) if it is not installed yet.
+function kube_burner_install() {
+    if [[  -f /usr/local/bin/kube-burner-ocp ]]; then
+        return
+    fi
+    send_test_progress "status=running=kube-burner=install";
+    ARCH=$(uname -m)
+    echo "Installing kube-burner-ocp version ${KUBE_BURNER_VERSION} for ${ARCH}"
+    wget -q -O kube-burner-ocp.tar.gz "https://github.com/kube-burner/kube-burner-ocp/releases/download/v${KUBE_BURNER_VERSION}/kube-burner-ocp-V${KUBE_BURNER_VERSION}-linux-${ARCH}.tar.gz"
+    tar xfz kube-burner-ocp.tar.gz && mv -v kube-burner-ocp /usr/local/bin/kube-burner-ocp
+}
+
+# kube_burner_run is the individual profile executor, saving the result in a consolidated kube-burner directory.
+function kube_burner_run() {
+    local index_dir
+    echo "> Running kube-burner ${KB_CMD}"
+    kube_burner_install
+    send_test_progress "status=running=kube-burner=${KB_CMD}";
+
+    kube-burner-ocp ${KB_CMD} --local-indexing ${KUBE_BURNER_EXTRA_ARGS-} |& tee -a "${RESULTS_DIR}"/artifacts_kube-burner_"${KB_CMD}".log
+
+    # kube-burner-ocp save the index to directory collected-metrics-uuid
+    index_dir=$(ls collected-metrics* -d)
+    job_id="$(echo "${index_dir}" | awk -F'collected-metrics-' '{print$1}')"
+    mv -v "${index_dir}" "${KUBE_BURNER_RESULTS_DIR}/${KB_CMD}"
+    echo "${job_id}" > "${KUBE_BURNER_RESULTS_DIR}/id_${KB_CMD}.txt"
+}
+
+# kube_burner_save saves the artifacts collected for each profile from a consolidated
+# directory into the sonobuoy result/artifact directory.
+function kube_burner_save() {
+    echo "> Saving kube-burner results"
+    pushd ${KUBE_BURNER_RESULTS_DIR} || return
+    tar cvfz "${RESULTS_DIR}"/artifacts_kube-burner.tar.gz ./*
+    popd || return
+}
+
+# collect_kube_burner is the entrypoint to execute kube-burner profiles.
+function collect_kube_burner() {
+    mkdir -p "${KUBE_BURNER_RESULTS_DIR}"
+    for KB_CMD in ${KUBE_BURNER_COMMANDS-}; do
+        send_test_progress "status=running=kube-burner=${KB_CMD}";
+        echo "> Running kube-burner command: ${KB_CMD}"
+        unset KUBE_BURNER_EXTRA_ARGS
+        case ${KB_CMD} in
+            "cluster-density-v2")
+                KUBE_BURNER_EXTRA_ARGS="--iterations=1 --churn-duration=2m0s --churn-cycles=2"
+                kube_burner_run;
+                ;;
+            *)
+                kube_burner_run;
+                ;;
+        esac
+        send_test_progress "status=done=kube-burner=${KB_CMD}";
+    done
+    kube_burner_save
+    send_test_progress "status=done=kube-burner";
+}
+
 # Run Plugin for Collecor. The Collector plugin is the last one executed on the
 # cluster. It will collect custom files used on the Validation environment, at the
 # end it will generate a tarbal file to submit the raw results to Sonobuoy.
@@ -173,22 +237,35 @@ run_plugin_collector() {
 
     pushd "${RESULTS_DIR}" || true
 
-    # Collecting must-gather
-    send_test_progress "status=running=collecting must-gather";
-    collect_must_gather || true
+    # Collect must-gather
+    if [[ "${SKIP_MUST_GATHER:-false}" == "false" ]]; then
+        send_test_progress "status=running=collecting must-gather";
+        collect_must_gather || true
+    fi
 
-    # Experimental: Collect performance data
+    # Collect performance data
     # running after must-gather to prevent impacting in etcd logs when testing etcdfio.
-    send_test_progress "status=running=collecting performance data";
-    collect_performance || true
+    if [[ "${SKIP_PERFORMANCE:-false}" == "false" ]]; then
+        send_test_progress "status=running=collecting performance data";
+        collect_performance || true
+    fi
 
-    # Experimental: Collect metrics
-    send_test_progress "status=running=collecting metrics";
-    collect_metrics || true
+    # Collect metrics
+    if [[ "${SKIP_METRICS:-false}" == "false" ]]; then
+        send_test_progress "status=running=collecting metrics";
+        collect_metrics || true
+    fi
 
-    # Creating Result file used to publish to sonobuoy. (last step)
-    send_test_progress "status=running=saving artifacts";
+    # Experimental: Collect kube-burner (performance profiles)
+    # Experimental are skipped by default. Need to be explicitly enabled in the plugin manifest:
+    # https://github.com/redhat-openshift-ecosystem/opct/blob/main/data/templates/plugins/openshift-artifacts-collector.yaml
+    if [[ "${SKIP_KUBE_BURNER:-true}" == "false" ]]; then
+        send_test_progress "status=running=kube-burner";
+        collect_kube_burner || true
+    fi
 
+    # Create result file used to publish to sonobuoy aggregator. (must be the last step)
+    send_test_progress "status=running=saving artifacts";
     os_log_info "[executor][PluginID#${PLUGIN_ID}] Packing all results..."
     ls -sh ./artifacts_*
     tar cfz raw-results.tar.gz ./artifacts_*
diff --git a/artifacts-collector/global_env.sh b/artifacts-collector/global_env.sh
@@ -27,3 +27,10 @@ declare -gx MIRROR_IMAGE_REPOSITORY
 
 # Utilities
 declare -grx UTIL_OC_BIN="/usr/bin/oc"
+
+# Kube Burner
+KUBE_BURNER_DEFAULT_COMMANDS="node-density node-density-cni cluster-density-v2"
+declare -gx KUBE_BURNER_COMMANDS="${KUBE_BURNER_COMMANDS:-${KUBE_BURNER_DEFAULT_COMMANDS}}"
+declare -gx KUBE_BURNER_RESULTS_DIR="/tmp/kube-burner"
+KUBE_BURNER_DEFAULT_VERSION="1.6.2"
+declare -gx KUBE_BURNER_VERSION="${KUBE_BURNER_VERSION:-${KUBE_BURNER_DEFAULT_VERSION}}"
diff --git a/artifacts-collector/manifests/kube-burner-only.yaml b/artifacts-collector/manifests/kube-burner-only.yaml
@@ -0,0 +1,56 @@
+---
+podSpec:
+  restartPolicy: Never
+  serviceAccountName: sonobuoy-serviceaccount
+  priorityClassName: system-node-critical
+  volumes:
+    - name: shared
+      emptyDir: {}
+sonobuoy-config:
+  driver: Job
+  plugin-name: 99-openshift-artifacts-collector
+  result-format: raw
+  description: |
+    OPCT artifacts plugin collects the artifacts from the OpenShift Container
+    Platform post-validation.
+  source-url:
+    "https://github.com/redhat-openshift-ecosystem/provider-certification-tool/\
+    blob/main/manifests/openshift-artifacts-collector.yaml"
+  skipCleanup: true
+spec:
+  name: plugin
+  image: "quay.io/opct/plugin-artifacts-collector:v0.0.0-devel-f17e7fa"
+  imagePullPolicy: Always
+  command: ["/bin/bash", "/plugin/entrypoint-collector.sh"]
+  volumeMounts:
+    - mountPath: /tmp/shared
+      name: shared
+  env:
+    - name: PLUGIN_ID
+      value: "99"
+    - name: PLUGIN_NAME
+      value: "openshift-artifacts-collector"
+    - name: PLUGIN_BLOCKED_BY
+      value: ""
+    - name: IMAGE_OVERRIDE_MUST_GATHER
+      value: "quay.io/opct/must-gather-monitoring:devel"
+    - name: ENV_NODE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: spec.nodeName
+    - name: ENV_POD_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.name
+    - name: ENV_POD_NAMESPACE
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.namespace
+    - name: RUN_MODE
+      value: "default"
+    - name: SKIP_MUST_GATHER
+      value: "true"
+    - name: SKIP_PERFORMANCE
+      value: "true"
+    - name: SKIP_METRICS
+      value: "true"