diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 75fd2a1..7861fe7 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -44,17 +44,17 @@ jobs: fi # Modify the Dockerfile to append suffix to COLLECTOR_VERSION - sed -i "s/^ENV COLLECTOR_VERSION=\(.*\)/ENV COLLECTOR_VERSION=\1${SUFFIX}/" Dockerfile + sed -i "s/^ENV COLLECTOR_VERSION=\(.*\)/ENV COLLECTOR_VERSION=\1${SUFFIX}/" collector/Dockerfile # Show what was changed for debugging echo "Modified COLLECTOR_VERSION:" - grep "^ENV COLLECTOR_VERSION=" Dockerfile + grep "^ENV COLLECTOR_VERSION=" collector/Dockerfile - name: Ensure tag version matches collector version id: ensure-tag-version run: | if [[ "${GITHUB_REF}" == refs/tags/* ]]; then - sed -i "s/^ENV COLLECTOR_VERSION=.*/ENV COLLECTOR_VERSION=${GITHUB_REF#refs\/tags\/v}/" Dockerfile + sed -i "s/^ENV COLLECTOR_VERSION=.*/ENV COLLECTOR_VERSION=${GITHUB_REF#refs\/tags\/v}/" collector/Dockerfile git diff --exit-code fi @@ -94,14 +94,14 @@ jobs: project: ${{ secrets.DEPOT_PROJECT_ID }} token: ${{ secrets.DEPOT_API_TOKEN }} context: . - file: Dockerfile + file: collector/Dockerfile platforms: linux/amd64,linux/arm64 push: ${{ steps.repo.outputs.push }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} build-args: | COLLECTOR_VERSION=${{ github.ref_name }} - + build-beyla: runs-on: ubuntu-latest permissions: @@ -150,8 +150,8 @@ jobs: project: ${{ secrets.DEPOT_PROJECT_ID }} token: ${{ secrets.DEPOT_API_TOKEN }} context: . - file: Dockerfile.beyla + file: beyla/Dockerfile platforms: linux/amd64,linux/arm64 push: ${{ steps.repo.outputs.push }} tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/test.yml b/.github/workflows/docker-compose-match.yml similarity index 65% rename from .github/workflows/test.yml rename to .github/workflows/docker-compose-match.yml index e4415a2..7b00c22 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/docker-compose-match.yml @@ -8,27 +8,6 @@ on: workflow_dispatch: jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Set up Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.2' - bundler-cache: true # runs 'bundle install' and caches installed gems automatically - - - name: Install dependencies - run: bundle install - - - name: Run tests - run: bundle exec rake test - - - name: Validate Docker compose build - run: docker compose build --no-cache - # The diff is expected to be: # 8a9,10 # > security_opt: diff --git a/Gemfile b/Gemfile deleted file mode 100644 index f75fe8f..0000000 --- a/Gemfile +++ /dev/null @@ -1,10 +0,0 @@ -source 'https://rubygems.org' - -gem 'csv' - -group :development, :test do - gem 'minitest' - gem 'webmock' - gem 'ruby-lsp' - gem 'rake' -end diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index eaa5868..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,44 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) - bigdecimal (3.1.9) - crack (1.0.0) - bigdecimal - rexml - csv (3.3.5) - hashdiff (1.1.2) - language_server-protocol (3.17.0.4) - logger (1.7.0) - minitest (5.25.5) - prism (1.4.0) - public_suffix (6.0.2) - rake (13.2.1) - rbs (3.9.3) - logger - rexml (3.4.1) - ruby-lsp (0.23.17) - language_server-protocol (~> 3.17.0) - prism (>= 1.2, < 2.0) - rbs (>= 3, < 4) - sorbet-runtime (>= 0.5.10782) - sorbet-runtime (0.5.12087) - webmock (3.25.1) - addressable (>= 2.8.0) - crack (>= 0.3.2) - hashdiff (>= 0.4.0, < 2.0.0) - -PLATFORMS - arm64-darwin-23 - ruby - -DEPENDENCIES - csv - minitest - rake - ruby-lsp - webmock - -BUNDLED WITH - 2.6.2 diff --git a/Rakefile b/Rakefile deleted file mode 100644 index fc02f06..0000000 --- a/Rakefile +++ /dev/null @@ -1,9 +0,0 @@ -require 'rake/testtask' - -Rake::TestTask.new do |t| - t.libs << 'test' - t.test_files = FileList['test/**/*_test.rb', 'dockerprobe/test/**/*_test.rb', 'mdprobe/test/**/*_test.rb'] - t.verbose = true -end - -task default: :test diff --git a/beyla.yaml b/beyla.yaml deleted file mode 100644 index b010069..0000000 --- a/beyla.yaml +++ /dev/null @@ -1,55 +0,0 @@ -discovery: - # Make discovery less resource intensive - poll_interval: 30s # Check for new processes every 30s - min_process_age: 30s # Only instrument processes older than 30s - - instrument: - - open_ports: "1-65535" - - exclude_instrument: - - open_ports: "33000" - - open_ports: "34320" - - exe_path: ".*/vector$" - - exe_path: ".*/fluentd$" - - exe_path: ".*/fluentbit$" - - exe_path: ".*/cluster-agent$" - - exclude_otel_instrumented_services: false - -otel_metrics_export: - -prometheus_export: - port: 0 - -network: - cache_max_flows: 5000 - enable: false - -otel_traces_export: - # Required to create trace spans for database operations - instrumentations: - - "*" - -ebpf: - context_propagation: all - track_request_headers: true - - # Without this, Beyla only detects MySQL/PostgreSQL binary protocols - # Enable to capture SQL from ORMs, connection poolers, or other databases - heuristic_sql_detect: true - - # Required for tracking prepared statements which use numeric IDs instead of SQL text - mysql_prepared_statements_cache_size: 100 - - # Without adequate buffer size, long SQL queries get truncated - # 4096 bytes captures most queries without excessive memory usage - buffer_sizes: - mysql: 4096 - postgres: 4096 - -attributes: - select: - traces: - include: - # Get full database query - - "db.query.text" diff --git a/Dockerfile.beyla b/beyla/Dockerfile similarity index 73% rename from Dockerfile.beyla rename to beyla/Dockerfile index 88adb9e..137095f 100644 --- a/Dockerfile.beyla +++ b/beyla/Dockerfile @@ -1,5 +1,5 @@ # Add Node Agent to the image -FROM ghcr.io/coroot/coroot-node-agent:1.25.0 AS node-agent +FROM ghcr.io/coroot/coroot-node-agent:1.27.0 AS node-agent # Add Cluster Agent to the image FROM ghcr.io/coroot/coroot-cluster-agent:1.2.4 AS cluster-agent @@ -23,7 +23,7 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* # Create necessary directories -RUN mkdir -p /etc/supervisor/conf.d /var/log/supervisor /enrichment +RUN mkdir -p /etc/supervisor/conf.d /var/log/supervisor /enrichment /bootstrap # Copy Beyla files from official image and set permissions COPY --from=beyla-source --chmod=755 /beyla /usr/local/bin/beyla @@ -37,16 +37,8 @@ COPY --from=node-agent --chmod=755 /usr/bin/coroot-node-agent /usr/local/bin/nod # Copy Cluster Agent COPY --from=cluster-agent --chmod=755 /usr/bin/coroot-cluster-agent /usr/local/bin/cluster-agent -# Copy Ruby dockerprobe implementation to working directory -COPY dockerprobe /dockerprobe - -# Copy configuration files -COPY beyla/supervisord.conf /etc/supervisor/conf.d/supervisord.conf -COPY --chmod=755 beyla/entrypoint.sh /entrypoint.sh -COPY --chmod=755 beyla/cluster-collector.sh /cluster-collector.sh - -# Copy Beyla configuration -COPY beyla.yaml /etc/beyla/beyla.yaml +COPY beyla/bootstrap_supervisord.conf /bootstrap/supervisord.conf +COPY --chmod=755 beyla/run_supervisord.sh /run_supervisord.sh # Default command -CMD ["/entrypoint.sh"] +CMD ["/run_supervisord.sh"] diff --git a/beyla/bootstrap_supervisord.conf b/beyla/bootstrap_supervisord.conf new file mode 100644 index 0000000..4147d5f --- /dev/null +++ b/beyla/bootstrap_supervisord.conf @@ -0,0 +1,13 @@ +[supervisord] +nodaemon=true +logfile=/var/log/supervisor/supervisord.log +loglevel=info + +[unix_http_server] +file=/var/lib/better-stack/beyla-supervisor.sock + +[supervisorctl] +serverurl=unix:///var/lib/better-stack/beyla-supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface diff --git a/beyla/cluster-collector.sh b/beyla/cluster-collector.sh deleted file mode 100644 index 2fe52d3..0000000 --- a/beyla/cluster-collector.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# -# Cluster Agent Manager for Beyla Container -# -# This script manages the cluster-agent by: -# 1. Checking if the cluster collector should run via HTTP endpoint from collector container -# 2. Starting the agent only if the check passes -# 3. Monitoring every 60 seconds and stopping the agent if the check fails -# 4. Restarting the cycle to allow the agent to start again when conditions change -# - -ENDPOINT_URL="http://localhost:33000/v1/cluster-agent-enabled" - -# Trap SIGTERM for clean shutdown -trap 'kill $AGENT_PID 2>/dev/null; exit' SIGTERM - -# Function to check if cluster agent should run -should_run_cluster_agent() { - # Use curl to check the endpoint, timeout after 5 seconds - response=$(curl -s --max-time 5 "$ENDPOINT_URL" 2>/dev/null) - if [ "$response" = "yes" ]; then - return 0 - fi - return 1 -} - -while true; do - if should_run_cluster_agent; then - echo "Starting cluster agent (enabled via API endpoint)" - /usr/local/bin/cluster-agent \ - --coroot-url http://localhost:33000 \ - --metrics-scrape-interval=15s \ - --config-update-interval=15s & - AGENT_PID=$! - while sleep 60; do - if ! should_run_cluster_agent; then - echo "Stopping cluster agent (disabled via API endpoint)" - kill $AGENT_PID - wait $AGENT_PID - break - fi - done - else - echo "Cluster agent disabled, checking again in 60 seconds..." - fi - sleep 60 -done \ No newline at end of file diff --git a/beyla/entrypoint.sh b/beyla/entrypoint.sh deleted file mode 100644 index 3f881c1..0000000 --- a/beyla/entrypoint.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -# Enable dockerprobe if ENABLE_DOCKERPROBE is set to true or 1 -if [ "${ENABLE_DOCKERPROBE}" = "true" ] || [ "${ENABLE_DOCKERPROBE}" = "1" ]; then - echo "Enabling dockerprobe (ENABLE_DOCKERPROBE=${ENABLE_DOCKERPROBE})" - # Replace autostart=false with autostart=true for dockerprobe - sed -i '/\[program:dockerprobe\]/,/^\[/ s/autostart=false/autostart=true/' /etc/supervisor/conf.d/supervisord.conf -fi - -# Start supervisord -exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf diff --git a/beyla/run_supervisord.sh b/beyla/run_supervisord.sh new file mode 100644 index 0000000..03419a6 --- /dev/null +++ b/beyla/run_supervisord.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +# Ensure the supervisord.conf exists in the expected location +SUPERVISORD_CONF="/var/lib/better-stack/beyla/supervisord.conf" +BOOTSTRAP_CONF="/bootstrap/supervisord.conf" + +if [ ! -f "$SUPERVISORD_CONF" ]; then + echo "Supervisord config not found at $SUPERVISORD_CONF, copying from bootstrap..." + mkdir -p "$(dirname "$SUPERVISORD_CONF")" + cp "$BOOTSTRAP_CONF" "$SUPERVISORD_CONF" + echo "Copied bootstrap supervisord config to $SUPERVISORD_CONF" +fi + +# Start supervisord +exec /usr/bin/supervisord -c "$SUPERVISORD_CONF" diff --git a/beyla/supervisord.conf b/beyla/supervisord.conf deleted file mode 100644 index af78ac6..0000000 --- a/beyla/supervisord.conf +++ /dev/null @@ -1,50 +0,0 @@ -[supervisord] -nodaemon=true -logfile=/var/log/supervisor/supervisord.log -loglevel=info - -[supervisorctl] -serverurl=unix:///var/run/supervisor.sock - -[program:beyla] -command=/usr/local/bin/beyla -autostart=true -autorestart=true -stdout_logfile=/var/log/supervisor/beyla.out.log -stderr_logfile=/var/log/supervisor/beyla.err.log -environment=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - -# Runs the Coroot Node Agent to collect metrics from the cluster, shipping them to Vector -# via the proxy in the collector container. -[program:node-agent] -command=/usr/local/bin/node-agent --collector-endpoint http://localhost:33000 --scrape-interval=15s -autostart=true -autorestart=true -stdout_logfile=/var/log/supervisor/node-agent.out.log -stderr_logfile=/var/log/supervisor/node-agent.err.log -environment=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - -# Runs the Ruby `dockerprobe` script to produce a CSV file associating process IDs to container IDs and names. -# This CSV file is shared from the Beyla container to the Collector container via the docker-metadata volume mounted at /enrichment. -# Sources for the Ruby implementation are located in the `dockerprobe` directory. -# -# autostart disabled by default here; enabled in entrypoint.sh if ENABLE_DOCKERPROBE is set to true or 1 -# (for disabling in e.g. Kubernetes, or when not desired). ENABLE_DOCKERPROBE is set to true by default in docker-compose.yml. -[program:dockerprobe] -autostart=false -command=/usr/bin/ruby /dockerprobe/dockerprobe.rb -autorestart=true -stdout_logfile=/var/log/supervisor/dockerprobe.out.log -stderr_logfile=/var/log/supervisor/dockerprobe.err.log -environment=DOCKER_HOST="unix:///var/run/docker.sock",DOCKERPROBE_OUTPUT_PATH="/enrichment/docker-mappings.incoming.csv",DOCKERPROBE_INTERVAL="15" - -# Runs the Coroot Cluster Agent to monitor databases and other cluster resources -[program:cluster-agent] -command=/cluster-collector.sh -autostart=true -autorestart=true -stdout_logfile=/var/log/supervisor/cluster-agent.out.log -stderr_logfile=/var/log/supervisor/cluster-agent.err.log -stopasgroup=true -killasgroup=true -environment=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" diff --git a/certbot-deploy-hook.sh b/certbot-deploy-hook.sh deleted file mode 100644 index 03e0513..0000000 --- a/certbot-deploy-hook.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Certbot deploy hook - executed after successful certificate issuance or renewal -# This script is called by certbot with the following environment variables: -# - RENEWED_LINEAGE: Path to the live directory of the renewed cert (e.g., /etc/letsencrypt/live/example.com) -# - RENEWED_DOMAINS: Space-separated list of renewed domains - -echo "[certbot-deploy] Deploy hook triggered for domains: ${RENEWED_DOMAINS}" - -# Extract domain from the lineage path -TLS_DOMAIN="${RENEWED_DOMAINS%% *}" # Get first domain if multiple -CERT_LIVE_DIR="/etc/letsencrypt/live/${TLS_DOMAIN}" -FULLCHAIN_PATH="${CERT_LIVE_DIR}/fullchain.pem" -PRIVKEY_PATH="${CERT_LIVE_DIR}/privkey.pem" -LINK_CERT="/etc/ssl/${TLS_DOMAIN}.pem" -LINK_KEY="/etc/ssl/${TLS_DOMAIN}.key" - -# Create or update symlinks to predictable locations -if [[ -f "$FULLCHAIN_PATH" && -f "$PRIVKEY_PATH" ]]; then - ln -sf "$FULLCHAIN_PATH" "$LINK_CERT" - ln -sf "$PRIVKEY_PATH" "$LINK_KEY" - # Make certificates readable by Vector - chmod 0644 "$LINK_CERT" || true - chmod 0644 "$LINK_KEY" || true - echo "[certbot-deploy] Updated symlinks at $LINK_CERT and $LINK_KEY" - - # Signal Vector to reload configuration - if supervisorctl -c /etc/supervisor/conf.d/supervisord.conf signal HUP vector; then - echo "[certbot-deploy] Successfully signaled Vector to reload configuration" - else - echo "[certbot-deploy] WARNING: Failed to signal Vector for reload - Vector may need manual restart" - # Don't exit with error - certificate was still successfully obtained/renewed - fi -else - echo "[certbot-deploy] ERROR: Certificate files not found at expected locations" - exit 1 -fi - -echo "[certbot-deploy] Deploy hook completed successfully" \ No newline at end of file diff --git a/certbot-runner.sh b/certbot-runner.sh deleted file mode 100644 index e0fba3a..0000000 --- a/certbot-runner.sh +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Run under supervisord to manage issuance and renewals conditionally. -# Domain is now read from /etc/ssl_certificate_host.txt file - -DOMAIN_FILE="/etc/ssl_certificate_host.txt" - -# Read domain from file -if [[ -f "$DOMAIN_FILE" ]]; then - TLS_DOMAIN=$(cat "$DOMAIN_FILE" | tr -d '[:space:]') -else - TLS_DOMAIN="" -fi - -if [[ -z "$TLS_DOMAIN" ]]; then - echo "[certbot] No domain configured in $DOMAIN_FILE; sleeping indefinitely." - exec sleep infinity -fi - -CERT_LIVE_DIR="/etc/letsencrypt/live/${TLS_DOMAIN}" -FULLCHAIN_PATH="${CERT_LIVE_DIR}/fullchain.pem" -PRIVKEY_PATH="${CERT_LIVE_DIR}/privkey.pem" -LINK_CERT="/etc/ssl/${TLS_DOMAIN}.pem" -LINK_KEY="/etc/ssl/${TLS_DOMAIN}.key" - -ensure_links_and_reload() { - # Ensure predictable symlinks and reload vector on success - if [[ -f "$FULLCHAIN_PATH" && -f "$PRIVKEY_PATH" ]]; then - ln -sf "$FULLCHAIN_PATH" "$LINK_CERT" - ln -sf "$PRIVKEY_PATH" "$LINK_KEY" - # Make both cert and key readable by Vector (running as root in container) - chmod 0644 "$LINK_CERT" || true - chmod 0644 "$LINK_KEY" || true - echo "[certbot] Updated symlinks at $LINK_CERT and $LINK_KEY with permissions 0644" - # Ask Vector to reload config without restart - if supervisorctl -c /etc/supervisor/conf.d/supervisord.conf signal HUP vector; then - echo "[certbot] Sent HUP to Vector for reload." - else - echo "[certbot] WARNING: Failed to signal Vector for reload - Vector may need manual restart" - fi - fi -} - -has_valid_cert() { - # Valid if certificate exists and is not expired now - if [[ ! -f "$FULLCHAIN_PATH" ]]; then - return 1 - fi - if openssl x509 -in "$FULLCHAIN_PATH" -noout -checkend 0 >/dev/null 2>&1; then - return 0 - else - return 1 - fi -} - -issue_once() { - echo "[certbot] Attempting initial/repair issuance for ${TLS_DOMAIN}..." - if certbot certonly \ - --non-interactive \ - --standalone \ - --agree-tos \ - --register-unsafely-without-email \ - --preferred-challenges http \ - -d "$TLS_DOMAIN" \ - --deploy-hook /certbot-deploy-hook.sh; then - echo "[certbot] Certificate issuance successful" - return 0 - else - echo "[certbot] Certificate issuance failed with exit code $?" - return 1 - fi -} - -renew_once() { - echo "[certbot] Attempting renewal for any due certificates..." - if certbot renew \ - --non-interactive \ - --deploy-hook /certbot-deploy-hook.sh; then - echo "[certbot] Renewal check completed successfully" - else - echo "[certbot] WARNING: Renewal check failed with exit code $? - will retry next cycle" - fi -} - -echo "[certbot] Domain configured as $TLS_DOMAIN from $DOMAIN_FILE; managing certificates." - -# Always attempt issuance immediately on startup/restart if cert doesn't exist -# This handles the case where domain just changed and we need to get a cert quickly -if ! has_valid_cert; then - echo "[certbot] No valid certificate found. Attempting immediate issuance..." - if issue_once; then - echo "[certbot] Certificate obtained successfully." - ensure_links_and_reload - else - echo "[certbot] Initial issuance attempt failed. Will retry every 10 minutes." - fi -fi - -if has_valid_cert; then - echo "[certbot] Valid certificate found. Starting 6-hour renewal check cycle." - ensure_links_and_reload - while true; do - sleep 6h - echo "[certbot] Running scheduled renewal check..." - renew_once - ensure_links_and_reload - done -else - echo "[certbot] No valid certificate found. Will attempt issuance every 10 minutes until successful." - until issue_once; do - echo "[certbot] Waiting 10 minutes before next issuance attempt..." - sleep 10m - done - echo "[certbot] Initial certificate obtained. Switching to 6-hour renewal check cycle." - ensure_links_and_reload - while true; do - sleep 6h - echo "[certbot] Running scheduled renewal check..." - renew_once - ensure_links_and_reload - done -fi - diff --git a/Dockerfile b/collector/Dockerfile similarity index 67% rename from Dockerfile rename to collector/Dockerfile index e32e170..928630d 100644 --- a/Dockerfile +++ b/collector/Dockerfile @@ -20,9 +20,6 @@ RUN apt-get update && apt-get install -y \ COPY --from=vector --chmod=755 /usr/bin/vector /usr/local/bin/vector COPY --from=vector /etc/vector /etc/vector -# Copy mdprobe -COPY mdprobe /mdprobe - # Create necessary directories RUN mkdir -p /versions/0-default \ && mkdir -p /etc/supervisor/conf.d \ @@ -36,7 +33,7 @@ RUN mkdir -p /versions/0-default \ # Set environment variables ENV BASE_URL=https://telemetry.betterstack.com ENV CLUSTER_COLLECTOR=false -ENV COLLECTOR_VERSION=1.0.40 +ENV COLLECTOR_VERSION=1.1.0 ENV VECTOR_VERSION=0.47.0 ENV BEYLA_VERSION=2.7.5 ENV CLUSTER_AGENT_VERSION=1.2.4 @@ -54,28 +51,14 @@ ENV CLUSTER_AGENT_VERSION=1.2.4 # This is considered a best practice when running multi-process containers with Tini as the init system. ENV TINI_SUBREAPER=true -# Copy supervisor configuration -COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf +# Copy bootstrap supervisor configuration and necessary initialization scripts +COPY collector/bootstrap_supervisord.conf /bootstrap/supervisord.conf +COPY --chmod=755 collector/bootstrap.sh /bootstrap.sh +COPY --chmod=755 collector/run_supervisord.sh /run_supervisord.sh -# Copy Ruby scripts -COPY --chmod=755 updater.rb /updater.rb -COPY --chmod=755 proxy.rb /proxy.rb -COPY --chmod=755 vector.sh /vector.sh -COPY --chmod=755 healthcheck.sh /healthcheck.sh -COPY --chmod=755 certbot-runner.sh /certbot-runner.sh -COPY --chmod=755 certbot-deploy-hook.sh /certbot-deploy-hook.sh -COPY versions/0-default/vector.yaml /versions/0-default/vector.yaml -COPY versions/0-default/databases.json /versions/0-default/databases.json -COPY kubernetes-discovery/0-default/discovered_pods.yaml /kubernetes-discovery/0-default/discovered_pods.yaml -COPY engine /engine -COPY --chmod=755 ebpf.sh /ebpf.sh -# Copy default enrichment files to both locations -# /enrichment-defaults is the source for copying at runtime -# /enrichment is for Kubernetes compatibility, since it's volume mounts work differently from compose/swarm -COPY dockerprobe/docker-mappings.default.csv /enrichment-defaults/docker-mappings.csv -COPY dockerprobe/databases.default.csv /enrichment-defaults/databases.csv -COPY dockerprobe/docker-mappings.default.csv /enrichment/docker-mappings.csv -COPY dockerprobe/databases.default.csv /enrichment/databases.csv +COPY collector/versions/0-default/vector.yaml /versions/0-default/vector.yaml +COPY collector/versions/0-default/databases.json /versions/0-default/databases.json +COPY collector/kubernetes-discovery/0-default/discovered_pods.yaml /kubernetes-discovery/0-default/discovered_pods.yaml # Create initial vector-config with symlinks to defaults RUN mkdir -p /vector-config/0-default \ @@ -90,4 +73,4 @@ RUN mkdir -p /vector-config/0-default \ ENTRYPOINT ["/usr/bin/tini", "-s", "--"] # Start supervisor -CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] +CMD ["/run_supervisord.sh"] diff --git a/collector/bootstrap.sh b/collector/bootstrap.sh new file mode 100755 index 0000000..dc12249 --- /dev/null +++ b/collector/bootstrap.sh @@ -0,0 +1,222 @@ +#!/bin/bash + +set -euo pipefail + +# Color output for better readability +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" >&2 +} + +# Function to check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Validate required commands +log_info "Checking required commands..." +if ! command_exists curl; then + log_error "curl is not installed" + exit 1 +fi + +if ! command_exists jq; then + log_error "jq is not installed" + exit 1 +fi + +# Validate environment variables +log_info "Validating environment variables..." +if [ -z "$BASE_URL" ]; then + log_error "BASE_URL environment variable is not set" + exit 1 +fi + +if [ -z "$COLLECTOR_SECRET" ]; then + log_error "COLLECTOR_SECRET environment variable is not set" + exit 1 +fi + +log_info "BASE_URL: $BASE_URL" + +# Check if already bootstrapped +MANIFEST_DIR="/var/lib/better-stack" +BOOTSTRAPPED_FILE="$MANIFEST_DIR/bootstrapped.txt" + +if [ -f "$BOOTSTRAPPED_FILE" ]; then + log_info "Bootstrap already completed (found $BOOTSTRAPPED_FILE)" + log_info "Bootstrapped on: $(cat "$BOOTSTRAPPED_FILE")" + log_info "Exiting without changes." + exit 0 +fi + +# Function to make API request with error handling +make_api_request() { + local url="$1" + local output_file="$2" + local max_retries=3 + local retry_count=0 + local http_code + + while [ $retry_count -lt $max_retries ]; do + echo "$url" + if [ -n "$output_file" ]; then + http_code=$(curl -s -w "%{http_code}" -o "$output_file" "$url") + else + http_code=$(curl -s -w "%{http_code}" "$url") + fi + + if [ "$http_code" = "200" ]; then + return 0 + elif [ "$http_code" = "401" ] || [ "$http_code" = "403" ]; then + log_error "Authentication failed (HTTP $http_code). Check COLLECTOR_SECRET." + exit 2 + elif [ "$http_code" = "404" ]; then + log_error "Endpoint not found (HTTP $http_code). URL: $url" + exit 3 + else + retry_count=$((retry_count + 1)) + if [ $retry_count -lt $max_retries ]; then + log_warn "Request failed (HTTP $http_code). Retrying ($retry_count/$max_retries)..." + sleep 2 + else + log_error "Request failed after $max_retries attempts (HTTP $http_code)" + return 1 + fi + fi + done + + return 1 +} + +# Step 1: Get latest manifest version +log_info "Fetching latest manifest version..." +LATEST_MANIFEST_URL="$BASE_URL/api/collector/latest-manifest?collector_secret=$(printf %s "$COLLECTOR_SECRET" | jq -sRr @uri)" + +TEMP_VERSION_FILE=$(mktemp) +trap "rm -f $TEMP_VERSION_FILE" EXIT + +if ! make_api_request "$LATEST_MANIFEST_URL" "$TEMP_VERSION_FILE"; then + log_error "Failed to fetch latest manifest version" + exit 4 +fi + +MANIFEST_VERSION=$(jq -r '.version' "$TEMP_VERSION_FILE" 2>/dev/null) +if [ -z "$MANIFEST_VERSION" ] || [ "$MANIFEST_VERSION" = "null" ]; then + log_error "Invalid response from latest-manifest endpoint" + cat "$TEMP_VERSION_FILE" + exit 5 +fi + +log_info "Latest manifest version: $MANIFEST_VERSION" + +# Step 2: Download full manifest +log_info "Downloading manifest version $MANIFEST_VERSION..." +MANIFEST_URL="$BASE_URL/api/collector/manifest?collector_secret=$(printf %s "$COLLECTOR_SECRET" | jq -sRr @uri)&manifest_version=$MANIFEST_VERSION" + +MANIFEST_FILE="$MANIFEST_DIR/manifest.json" + +# Create directory if it doesn't exist +mkdir -p "$MANIFEST_DIR" + +TEMP_MANIFEST=$(mktemp) +if ! make_api_request "$MANIFEST_URL" "$TEMP_MANIFEST"; then + log_error "Failed to download manifest" + rm -f "$TEMP_MANIFEST" + exit 6 +fi + +# Validate manifest structure +MANIFEST_VERSION_CHECK=$(jq -r '.manifest_version' "$TEMP_MANIFEST" 2>/dev/null) +FILES_COUNT=$(jq -r '.files | length' "$TEMP_MANIFEST" 2>/dev/null) + +if [ -z "$MANIFEST_VERSION_CHECK" ] || [ "$MANIFEST_VERSION_CHECK" = "null" ]; then + log_error "Invalid manifest structure: missing manifest_version" + rm -f "$TEMP_MANIFEST" + exit 7 +fi + +if [ -z "$FILES_COUNT" ] || [ "$FILES_COUNT" = "null" ]; then + log_error "Invalid manifest structure: missing or invalid files array" + rm -f "$TEMP_MANIFEST" + exit 8 +fi + +# Move to final location +mv "$TEMP_MANIFEST" "$MANIFEST_FILE" +log_info "Manifest saved to $MANIFEST_FILE (version: $MANIFEST_VERSION_CHECK, files: $FILES_COUNT)" + +# Step 3: Process each file in manifest +log_info "Processing $FILES_COUNT files from manifest..." + +for i in $(seq 0 $((FILES_COUNT - 1))); do + FILE_PATH=$(jq -r ".files[$i].path" "$MANIFEST_FILE") + CONTAINER=$(jq -r ".files[$i].container" "$MANIFEST_FILE") + ACTIONS=$(jq -r ".files[$i].actions // [] | join(\",\")" "$MANIFEST_FILE") + + if [ "$FILE_PATH" = "null" ] || [ "$CONTAINER" = "null" ]; then + log_warn "Skipping file $i: missing path or container" + continue + fi + + log_info "[$((i + 1))/$FILES_COUNT] Downloading: $CONTAINER/$FILE_PATH" + + # Construct destination path + DEST_DIR="$MANIFEST_DIR/$CONTAINER/$(dirname "$FILE_PATH")" + DEST_FILE="$MANIFEST_DIR/$CONTAINER/$FILE_PATH" + + # Create directory structure + mkdir -p "$DEST_DIR" + + # Download file + FILE_URL="$BASE_URL/api/collector/manifest-file?collector_secret=$(printf %s "$COLLECTOR_SECRET" | jq -sRr @uri)&manifest_version=$MANIFEST_VERSION&path=$(printf %s "$FILE_PATH" | jq -sRr @uri)&container=$(printf %s "$CONTAINER" | jq -sRr @uri)" + + TEMP_FILE=$(mktemp) + if ! make_api_request "$FILE_URL" "$TEMP_FILE"; then + log_error "Failed to download file: $CONTAINER/$FILE_PATH" + rm -f "$TEMP_FILE" + exit 9 + fi + + # Move to final location + mv "$TEMP_FILE" "$DEST_FILE" + + # Apply actions + if echo "$ACTIONS" | grep -q "make_executable"; then + chmod +x "$DEST_FILE" + log_info " Made executable: $DEST_FILE" + fi + + log_info " Saved to: $DEST_FILE" +done + +log_info "Bootstrap completed successfully!" +log_info "Manifest version: $MANIFEST_VERSION" +log_info "Files downloaded: $FILES_COUNT" +log_info "Location: $MANIFEST_DIR" + +# Mark bootstrap as completed +date > "$BOOTSTRAPPED_FILE" +log_info "Bootstrap marker written to: $BOOTSTRAPPED_FILE" + +# same thing for Beyla container +supervisorctl -s unix:///var/lib/better-stack/beyla-supervisor.sock reread +supervisorctl -s unix:///var/lib/better-stack/beyla-supervisor.sock update + +# reload supervisord config and start processes as indicated by new config (overwriting bootstrap config) +supervisorctl reread +supervisorctl update + +exit 0 diff --git a/collector/bootstrap_supervisord.conf b/collector/bootstrap_supervisord.conf new file mode 100644 index 0000000..93a163f --- /dev/null +++ b/collector/bootstrap_supervisord.conf @@ -0,0 +1,21 @@ +[supervisord] +nodaemon=true +user=root +logfile=/var/log/supervisor/supervisord.log + +[unix_http_server] +file=/var/run/supervisor.sock + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface + +[program:bootstrap] +command=/bootstrap.sh +autostart=true +autorestart=unexpected +stderr_logfile=/var/log/supervisor/bootstrap.err.log +stdout_logfile=/var/log/supervisor/bootstrap.out.log +exitcodes=0 diff --git a/kubernetes-discovery/0-default/discovered_pods.yaml b/collector/kubernetes-discovery/0-default/discovered_pods.yaml similarity index 100% rename from kubernetes-discovery/0-default/discovered_pods.yaml rename to collector/kubernetes-discovery/0-default/discovered_pods.yaml diff --git a/collector/run_supervisord.sh b/collector/run_supervisord.sh new file mode 100644 index 0000000..edcd08d --- /dev/null +++ b/collector/run_supervisord.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +# Ensure the supervisord.conf exists in the expected location +SUPERVISORD_CONF="/var/lib/better-stack/collector/supervisord.conf" +BOOTSTRAP_CONF="/bootstrap/supervisord.conf" + +if [ ! -f "$SUPERVISORD_CONF" ]; then + echo "Supervisord config not found at $SUPERVISORD_CONF, copying from bootstrap..." + mkdir -p "$(dirname "$SUPERVISORD_CONF")" + cp "$BOOTSTRAP_CONF" "$SUPERVISORD_CONF" + echo "Copied bootstrap supervisord config to $SUPERVISORD_CONF" +fi + +# Start supervisord +exec /usr/bin/supervisord -c "$SUPERVISORD_CONF" diff --git a/versions/0-default/databases.json b/collector/versions/0-default/databases.json similarity index 100% rename from versions/0-default/databases.json rename to collector/versions/0-default/databases.json diff --git a/test/versions/2025-05-11T11:13:00.000/vector.yaml b/collector/versions/0-default/vector.yaml similarity index 100% rename from test/versions/2025-05-11T11:13:00.000/vector.yaml rename to collector/versions/0-default/vector.yaml diff --git a/docker-compose.seccomp.yml b/docker-compose.seccomp.yml index a8d674d..642d264 100644 --- a/docker-compose.seccomp.yml +++ b/docker-compose.seccomp.yml @@ -2,14 +2,14 @@ services: collector: build: context: . - dockerfile: Dockerfile + dockerfile: collector/Dockerfile image: betterstack/collector:latest container_name: better-stack-collector restart: always security_opt: - seccomp=collector-seccomp.json healthcheck: - test: ["CMD", "/healthcheck.sh"] + test: ["CMD-SHELL", "if [ -x /var/lib/better-stack/collector/healthcheck.sh ]; then /var/lib/better-stack/collector/healthcheck.sh; else exit 0; fi"] interval: 30s timeout: 10s start_period: 60s @@ -30,6 +30,7 @@ services: # dockerprobe running in the beyla container writes a map of PIDs->container IDs and names to this volume # Vector uses this file as an enrichment table to tag logs, metrics, and traces with container metadata. - docker-metadata:/enrichment:rw + - /var/lib/better-stack:/var/lib/better-stack ports: # Bind to localhost only for security - Beyla will connect via host network - "127.0.0.1:34320:34320" # Beyla metrics endpoint @@ -38,7 +39,7 @@ services: beyla: build: context: . - dockerfile: Dockerfile.beyla + dockerfile: beyla/Dockerfile image: betterstack/collector-beyla:latest container_name: better-stack-beyla restart: always @@ -71,12 +72,6 @@ services: - GOMEMLIMIT=1400MiB # Pass hostname of host machine to Beyla; needs `export HOSTNAME` before running `docker compose up` - HOSTNAME - # Override OTLP endpoint to point to collector container - - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:34320 - - OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf - - BEYLA_CONFIG_PATH=/etc/beyla/beyla.yaml - # Disable Kubernetes metadata - - BEYLA_KUBE_METADATA_ENABLE=false # Enable dockerprobe - ENABLE_DOCKERPROBE volumes: @@ -92,6 +87,7 @@ services: # Persist ACME certs and stable symlinks across restarts - letsencrypt:/etc/letsencrypt - ssl-certs:/etc/ssl + - /var/lib/better-stack:/var/lib/better-stack depends_on: - collector diff --git a/docker-compose.yml b/docker-compose.yml index 702fba0..c6da3bb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,12 +2,12 @@ services: collector: build: context: . - dockerfile: Dockerfile + dockerfile: collector/Dockerfile image: betterstack/collector:latest container_name: better-stack-collector restart: always healthcheck: - test: ["CMD", "/healthcheck.sh"] + test: ["CMD-SHELL", "if [ -x /var/lib/better-stack/collector/healthcheck.sh ]; then /var/lib/better-stack/collector/healthcheck.sh; else exit 0; fi"] interval: 30s timeout: 10s start_period: 60s @@ -28,6 +28,7 @@ services: # dockerprobe running in the beyla container writes a map of PIDs->container IDs and names to this volume # Vector uses this file as an enrichment table to tag logs, metrics, and traces with container metadata. - docker-metadata:/enrichment:rw + - /var/lib/better-stack:/var/lib/better-stack ports: # Bind to localhost only for security - Beyla will connect via host network - "127.0.0.1:34320:34320" # Beyla metrics endpoint @@ -36,7 +37,7 @@ services: beyla: build: context: . - dockerfile: Dockerfile.beyla + dockerfile: beyla/Dockerfile image: betterstack/collector-beyla:latest container_name: better-stack-beyla restart: always @@ -69,12 +70,6 @@ services: - GOMEMLIMIT=1400MiB # Pass hostname of host machine to Beyla; needs `export HOSTNAME` before running `docker compose up` - HOSTNAME - # Override OTLP endpoint to point to collector container - - OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:34320 - - OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf - - BEYLA_CONFIG_PATH=/etc/beyla/beyla.yaml - # Disable Kubernetes metadata - - BEYLA_KUBE_METADATA_ENABLE=false # Enable dockerprobe - ENABLE_DOCKERPROBE volumes: @@ -90,6 +85,7 @@ services: # Persist ACME certs and stable symlinks across restarts - letsencrypt:/etc/letsencrypt - ssl-certs:/etc/ssl + - /var/lib/better-stack:/var/lib/better-stack depends_on: - collector diff --git a/dockerprobe/databases.default.csv b/dockerprobe/databases.default.csv deleted file mode 100644 index 28c456c..0000000 --- a/dockerprobe/databases.default.csv +++ /dev/null @@ -1 +0,0 @@ -identifier,container,service,host \ No newline at end of file diff --git a/dockerprobe/docker-mappings.default.csv b/dockerprobe/docker-mappings.default.csv deleted file mode 100644 index d0a6ff0..0000000 --- a/dockerprobe/docker-mappings.default.csv +++ /dev/null @@ -1 +0,0 @@ -pid,container_name,container_id,image_name \ No newline at end of file diff --git a/dockerprobe/docker_client.rb b/dockerprobe/docker_client.rb deleted file mode 100644 index c9c5c1d..0000000 --- a/dockerprobe/docker_client.rb +++ /dev/null @@ -1,88 +0,0 @@ -# frozen_string_literal: true - -require 'net/http' -require 'json' -require 'uri' - -# Net::HTTP subclass for Unix socket support -class UnixSocketHttp < Net::HTTP - attr_accessor :socket_path - - def initialize(address, port = nil) - super(address, port) - end - - def connect - raise "Socket path not set" unless @socket_path - @socket = Net::BufferedIO.new(UNIXSocket.new(@socket_path)) - on_connect - end -end - -# Docker API client without external dependencies, following the style of engine/ -class DockerClient - DEFAULT_SOCKET = '/var/run/docker.sock' - API_VERSION = 'v1.41' # Docker API version - - def initialize(socket_path = nil) - @socket_path = socket_path || ENV['DOCKER_HOST'] || "unix://#{DEFAULT_SOCKET}" - @socket_path = @socket_path.sub(/^unix:\/\//, '') if @socket_path.start_with?('unix://') - @http = nil - end - - # List containers - # Options: - # all: boolean - Show all containers (default false shows only running) - def list_containers(options = {}) - params = [] - params << "all=#{options[:all]}" unless options[:all].nil? - - query_string = params.empty? ? '' : "?#{params.join('&')}" - get("/containers/json#{query_string}") - end - - # Get detailed container information - def inspect_container(container_id) - get("/containers/#{container_id}/json") - end - - private - - def http - @http ||= begin - h = UnixSocketHttp.new('localhost', nil) - h.socket_path = @socket_path - h - end - end - - def get(path) - uri = URI("http://localhost/#{API_VERSION}#{path}") - - request = Net::HTTP::Get.new(uri.path + (uri.query ? "?#{uri.query}" : '')) - request['Accept'] = 'application/json' - request['Content-Type'] = 'application/json' - - response = http.request(request) - - # Handle response - case response.code.to_i - when 200, 201, 204 - return nil if response.body.nil? || response.body.empty? - JSON.parse(response.body) - when 404 - raise "Docker API endpoint not found: #{path}" - else - error_msg = "Docker API error (#{response.code}): " - begin - error_body = JSON.parse(response.body) - error_msg += error_body['message'] || response.body - rescue JSON::ParserError - error_msg += response.body || 'Unknown error' - end - raise error_msg - end - rescue Errno::ENOENT, Errno::EACCES => e - raise "Docker socket not accessible at #{@socket_path}: #{e.message}" - end -end diff --git a/dockerprobe/dockerprobe.rb b/dockerprobe/dockerprobe.rb deleted file mode 100644 index bf29b8b..0000000 --- a/dockerprobe/dockerprobe.rb +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -# Produces a CSV file associating process IDs to container IDs and names. -# This CSV file is formatted as: -# -# pid,container_name,container_id,image_name -# 1115,better-stack-collector,59e2ea91d8af,betterstack/collector:latest -# 1020,your-container-replica-name-1,0dbc098bc64d,your-repository/your-image:latest -# -# This file is shared from the Beyla container to the Collector container via the docker-metadata volume mounted at /enrichment. -# Vector uses this file to enrich logs, metrics, and traces with container metadata. - -require 'csv' -require 'json' -require 'fileutils' -require 'logger' - -# Load Docker API client -require_relative 'docker_client' - -class Dockerprobe - DEFAULT_OUTPUT_PATH = '/enrichment/docker-mappings.incoming.csv' - DEFAULT_INTERVAL = 15 # seconds; in line with the default tick rate of Beyla collection - SHORT_CONTAINER_ID_LEN = 12 # length of the short container ID (e.g. 0dbc098bc64d) - CSV_HEADERS = %w[pid container_name container_id image_name] - DEFAULT_PROC_PATH = '/proc' - - attr_reader :proc_path - - def initialize(proc_path: nil) - @output_path = ENV['DOCKERPROBE_OUTPUT_PATH'] || DEFAULT_OUTPUT_PATH - @interval = (ENV['DOCKERPROBE_INTERVAL'] || DEFAULT_INTERVAL).to_i - @docker_client = DockerClient.new - @logger = Logger.new(STDOUT) - @logger.level = ENV['DEBUG'] ? Logger::DEBUG : Logger::INFO - @logger.formatter = proc { |severity, datetime, _, msg| "#{datetime.strftime('%Y-%m-%d %H:%M:%S')} [#{severity}] #{msg}\n" } - @running = true - @proc_path = proc_path || DEFAULT_PROC_PATH - end - - def run - @logger.info "Starting dockerprobe..." - @logger.info "Output path: #{@output_path}" - @logger.info "Update interval: #{@interval} seconds" - - # Ensure output directory exists - dir = File.dirname(@output_path) - FileUtils.mkdir_p(dir) - - # Set up signal handlers for graceful shutdown - %w[INT TERM].each do |signal| - Signal.trap(signal) do - @logger.info "Received #{signal} signal, shutting down..." - @running = false - end - end - - # Initial update - update_mappings - - # Main loop - while @running - sleep @interval - break unless @running - update_mappings - end - - @logger.info "Graceful shutdown complete" - rescue => e - @logger.error "Fatal error in dockerprobe: #{e.message}" - @logger.error e.backtrace.join("\n") if ENV['DEBUG'] - exit 1 - end - - private - - def update_mappings - containers = list_running_containers - pid_mappings = {} - - containers.each do |container| - process_container(container, pid_mappings) - rescue => e - @logger.error "Failed to process container #{container['Id'][0...SHORT_CONTAINER_ID_LEN]}: #{e.message}" - @logger.debug e.backtrace.join("\n") if ENV['DEBUG'] - end - - write_csv_file(pid_mappings) - - # Flush output after each update cycle - STDOUT.flush - rescue => e - @logger.error "updateMappings error: #{e.message}" - @logger.debug e.backtrace.join("\n") if ENV['DEBUG'] - STDOUT.flush # Also flush on error - end - - def list_running_containers - @docker_client.list_containers(all: false) - end - - def process_container(container, pid_mappings) - # Get detailed container info - inspect = @docker_client.inspect_container(container['Id']) - - pid = inspect['State']['Pid'] - return if pid.nil? || pid <= 0 - - # Container info to store - container_info = { - name: container['Names'].first.to_s.sub(/^\//, ''), # Remove the leading slash - id: container['Id'][0...SHORT_CONTAINER_ID_LEN], - image: container['Image'] - } - - # Get all descendant PIDs - pids = get_process_descendants(pid) - - # Map each PID to this container - pids.each do |p| - pid_mappings[p.to_s] = container_info - end - - @logger.info "Mapped #{pids.length} PIDs to container #{container_info[:name]}" - end - - def get_process_descendants(root_pid) - descendants = [root_pid] - to_check = [root_pid] - - until to_check.empty? - current_pid = to_check.shift # FIFO queue - - children = find_child_processes(current_pid) - children.each do |child_pid| - unless descendants.include?(child_pid) - descendants << child_pid - to_check << child_pid - end - end - end - - descendants - end - - def find_child_processes(parent_pid) - children = [] - - # Scan proc directory for processes - return [] unless File.directory?(@proc_path) - - Dir.entries(@proc_path).each do |entry| - next unless entry =~ /^\d+$/ - - pid = entry.to_i - ppid = get_parent_pid(pid) - - children << pid if ppid == parent_pid - rescue => e - # Ignore processes that disappear or can't be read - @logger.debug "Error reading process #{pid}: #{e.message}" if ENV['DEBUG'] - end - - children - rescue => e - @logger.debug "Error scanning #{@proc_path}: #{e.message}" if ENV['DEBUG'] - [] - end - - def get_parent_pid(pid) - stat_file = File.join(@proc_path, pid.to_s, 'stat') - return nil unless File.exist?(stat_file) - - stat_data = File.read(stat_file) - - # The stat file format is: pid (comm) state ppid ... - # We need to handle the case where comm contains parentheses - last_paren = stat_data.rindex(')') - return nil unless last_paren - - # Fields after the last parenthesis - fields = stat_data[(last_paren + 1)..].split - return nil if fields.length < 2 - - # Parent PID is the second field after the command name - fields[1].to_i - rescue => e - @logger.debug "Error reading parent PID for #{pid}: #{e.message}" if ENV['DEBUG'] - nil - end - - def write_csv_file(pid_mappings) - tmp_path = "#{@output_path}.tmp" - - # Sort PIDs numerically for stable ordering - sorted_pids = pid_mappings.keys.sort_by(&:to_i) - - CSV.open(tmp_path, 'w') do |csv| - # Write header - csv << CSV_HEADERS - - # Write mappings - sorted_pids.each do |pid| - info = pid_mappings[pid] - csv << [pid, info[:name], info[:id], info[:image]] - end - end - - # Atomic rename - File.rename(tmp_path, @output_path) - - @logger.info "Updated PID mappings file with #{pid_mappings.length} entries" - rescue => e - @logger.error "Failed to write PID mappings: #{e.message}" - @logger.debug e.backtrace.join("\n") if ENV['DEBUG'] - - # Clean up the temp file if it exists - File.unlink(tmp_path) if File.exist?(tmp_path) - end -end - -# Run if executed directly -if __FILE__ == $0 - Dockerprobe.new.run -end diff --git a/dockerprobe/test/dockerprobe_test.rb b/dockerprobe/test/dockerprobe_test.rb deleted file mode 100644 index 5f92041..0000000 --- a/dockerprobe/test/dockerprobe_test.rb +++ /dev/null @@ -1,506 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'tempfile' -require 'csv' -require 'json' -require_relative '../dockerprobe' -# require_relative '../docker_client' - -class TestDockerClient < Minitest::Test - def setup - @client = DockerClient.new - end - - def test_initialization_with_default_socket - client = DockerClient.new - assert client - end - - def test_initialization_with_custom_socket - client = DockerClient.new('/custom/docker.sock') - assert client - end - - def test_initialization_with_env_docker_host - ENV['DOCKER_HOST'] = 'unix:///tmp/docker.sock' - client = DockerClient.new - assert client - ensure - ENV.delete('DOCKER_HOST') - end - - # Note: Integration tests for actual Docker API calls would require - # a running Docker daemon and mock containers -end - -class TestDockerprobe < Minitest::Test - def setup - @temp_dir = Dir.mktmpdir - @output_file = File.join(@temp_dir, 'docker-mappings.csv') - ENV['DOCKERPROBE_OUTPUT_PATH'] = @output_file - ENV['DOCKERPROBE_INTERVAL'] = '1' - end - - def teardown - FileUtils.rm_rf(@temp_dir) - ENV.delete('DOCKERPROBE_OUTPUT_PATH') - ENV.delete('DOCKERPROBE_INTERVAL') - end - - def test_initialization - probe = Dockerprobe.new - assert probe - end - - def test_default_configuration - ENV.delete('DOCKERPROBE_OUTPUT_PATH') - ENV.delete('DOCKERPROBE_INTERVAL') - - probe = Dockerprobe.new - assert_equal '/enrichment/docker-mappings.incoming.csv', probe.instance_variable_get(:@output_path) - assert_equal 15, probe.instance_variable_get(:@interval) - end - - def test_custom_configuration - ENV['DOCKERPROBE_OUTPUT_PATH'] = '/custom/path.csv' - ENV['DOCKERPROBE_INTERVAL'] = '30' - - probe = Dockerprobe.new - assert_equal '/custom/path.csv', probe.instance_variable_get(:@output_path) - assert_equal 30, probe.instance_variable_get(:@interval) - end - - def test_get_parent_pid - # Create a mock /proc structure - proc_dir = Dir.mktmpdir - - # Create mock stat file for PID 1234 with parent PID 1000 - pid_dir = File.join(proc_dir, '1234') - FileUtils.mkdir_p(pid_dir) - - # stat format: pid (comm) state ppid ... - # 1234 (test_process) S 1000 ... - stat_content = "1234 (test_process) S 1000 1000 1000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0" - File.write(File.join(pid_dir, 'stat'), stat_content) - - probe = Dockerprobe.new(proc_path: proc_dir) - - # Test with mock PID - ppid = probe.send(:get_parent_pid, 1234) - assert_equal 1000, ppid - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_get_parent_pid_with_parentheses_in_name - # Create a mock /proc structure - proc_dir = Dir.mktmpdir - - # Create mock stat file with parentheses in the process name - pid_dir = File.join(proc_dir, '5678') - FileUtils.mkdir_p(pid_dir) - - # Process name with parentheses: (test (with) parens) - stat_content = "5678 ((test (with) parens)) S 2000 2000 2000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0" - File.write(File.join(pid_dir, 'stat'), stat_content) - - probe = Dockerprobe.new(proc_path: proc_dir) - - ppid = probe.send(:get_parent_pid, 5678) - assert_equal 2000, ppid - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_get_parent_pid_invalid - proc_dir = Dir.mktmpdir - probe = Dockerprobe.new(proc_path: proc_dir) - - # Test with non-existent PID - ppid = probe.send(:get_parent_pid, 999999999) - assert_nil ppid - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_find_child_processes - # Create a mock /proc structure with parent-child relationships - proc_dir = Dir.mktmpdir - - # Create parent process (PID 100) - parent_dir = File.join(proc_dir, '100') - FileUtils.mkdir_p(parent_dir) - File.write(File.join(parent_dir, 'stat'), "100 (parent) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Create child processes (PIDs 200, 300) with parent PID 100 - child1_dir = File.join(proc_dir, '200') - FileUtils.mkdir_p(child1_dir) - File.write(File.join(child1_dir, 'stat'), "200 (child1) S 100 100 100 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - child2_dir = File.join(proc_dir, '300') - FileUtils.mkdir_p(child2_dir) - File.write(File.join(child2_dir, 'stat'), "300 (child2) S 100 100 100 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Create an unrelated process (PID 400) with different parent - other_dir = File.join(proc_dir, '400') - FileUtils.mkdir_p(other_dir) - File.write(File.join(other_dir, 'stat'), "400 (other) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - probe = Dockerprobe.new(proc_path: proc_dir) - - # Find children of PID 100 - children = probe.send(:find_child_processes, 100) - - assert_equal 2, children.length, "Should find 2 child processes" - assert_includes children, 200, "Should find child PID 200" - assert_includes children, 300, "Should find child PID 300" - refute_includes children, 400, "Should not include unrelated PID 400" - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_get_process_descendants - # Create a mock /proc structure with a process tree - proc_dir = Dir.mktmpdir - - # Process tree: - # 100 (parent) - # ├── 200 (child1) - # │ └── 201 (grandchild1) - # └── 300 (child2) - - # Parent process - FileUtils.mkdir_p(File.join(proc_dir, '100')) - File.write(File.join(proc_dir, '100', 'stat'), "100 (parent) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Child 1 - FileUtils.mkdir_p(File.join(proc_dir, '200')) - File.write(File.join(proc_dir, '200', 'stat'), "200 (child1) S 100 100 100 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Grandchild - FileUtils.mkdir_p(File.join(proc_dir, '201')) - File.write(File.join(proc_dir, '201', 'stat'), "201 (grandchild1) S 200 200 200 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Child 2 - FileUtils.mkdir_p(File.join(proc_dir, '300')) - File.write(File.join(proc_dir, '300', 'stat'), "300 (child2) S 100 100 100 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Unrelated process - FileUtils.mkdir_p(File.join(proc_dir, '400')) - File.write(File.join(proc_dir, '400', 'stat'), "400 (other) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - probe = Dockerprobe.new(proc_path: proc_dir) - - # Get all descendants of PID 100 - descendants = probe.send(:get_process_descendants, 100) - - assert_equal 4, descendants.length, "Should find parent + 2 children + 1 grandchild" - assert_includes descendants, 100, "Should include parent PID 100" - assert_includes descendants, 200, "Should include child PID 200" - assert_includes descendants, 201, "Should include grandchild PID 201" - assert_includes descendants, 300, "Should include child PID 300" - refute_includes descendants, 400, "Should not include unrelated PID 400" - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_write_csv_file - probe = Dockerprobe.new - - # Test data - pid_mappings = { - '1234' => { name: 'test-container', id: 'abc123def456', image: 'test:latest' }, - '5678' => { name: 'another-container', id: '789ghi012jkl', image: 'another:v1.0' } - } - - probe.send(:write_csv_file, pid_mappings) - - # Verify file was created - assert File.exist?(@output_file) - - # Verify CSV content - csv_data = CSV.read(@output_file) - assert_equal %w[pid container_name container_id image_name], csv_data[0] - - # Should be sorted by PID numerically - assert_equal '1234', csv_data[1][0] - assert_equal 'test-container', csv_data[1][1] - assert_equal 'abc123def456', csv_data[1][2] - assert_equal 'test:latest', csv_data[1][3] - - assert_equal '5678', csv_data[2][0] - assert_equal 'another-container', csv_data[2][1] - assert_equal '789ghi012jkl', csv_data[2][2] - assert_equal 'another:v1.0', csv_data[2][3] - end - - def test_write_csv_file_atomic - probe = Dockerprobe.new - - # Write initial data - initial_mappings = { - '1111' => { name: 'initial', id: 'initial123456', image: 'initial:latest' } - } - probe.send(:write_csv_file, initial_mappings) - - # Read initial content - initial_content = File.read(@output_file) - - # Simulate partial write failure by making temp file unwritable - # (This tests that temp file is cleaned up on error) - tmp_path = "#{@output_file}.tmp" - File.write(tmp_path, 'partial data') - File.chmod(0444, tmp_path) # Read-only - - # Try to write new data (should fail but not corrupt existing file) - new_mappings = { - '2222' => { name: 'new', id: 'new123456789', image: 'new:latest' } - } - - # This should fail silently (logged but not raised) - probe.send(:write_csv_file, new_mappings) - - # Original file should still have initial content - assert_equal initial_content, File.read(@output_file) if File.exist?(@output_file) - - # Clean up - File.chmod(0644, tmp_path) rescue nil - File.unlink(tmp_path) rescue nil - end - - def test_process_hierarchy_breadth_first - # Create a mock /proc structure with a broader process tree - proc_dir = Dir.mktmpdir - - # Process tree (tests breadth-first traversal): - # 1000 - # ├── 2000 - # │ ├── 2100 - # │ └── 2200 - # └── 3000 - # └── 3100 - # └── 3110 - - # Root - FileUtils.mkdir_p(File.join(proc_dir, '1000')) - File.write(File.join(proc_dir, '1000', 'stat'), "1000 (root) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # First level children - FileUtils.mkdir_p(File.join(proc_dir, '2000')) - File.write(File.join(proc_dir, '2000', 'stat'), "2000 (branch1) S 1000 1000 1000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - FileUtils.mkdir_p(File.join(proc_dir, '3000')) - File.write(File.join(proc_dir, '3000', 'stat'), "3000 (branch2) S 1000 1000 1000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Second level - FileUtils.mkdir_p(File.join(proc_dir, '2100')) - File.write(File.join(proc_dir, '2100', 'stat'), "2100 (leaf1) S 2000 2000 2000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - FileUtils.mkdir_p(File.join(proc_dir, '2200')) - File.write(File.join(proc_dir, '2200', 'stat'), "2200 (leaf2) S 2000 2000 2000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - FileUtils.mkdir_p(File.join(proc_dir, '3100')) - File.write(File.join(proc_dir, '3100', 'stat'), "3100 (branch2child) S 3000 3000 3000 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Third level - FileUtils.mkdir_p(File.join(proc_dir, '3110')) - File.write(File.join(proc_dir, '3110', 'stat'), "3110 (deepleaf) S 3100 3100 3100 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - probe = Dockerprobe.new(proc_path: proc_dir) - - # Get all descendants of root PID 1000 - descendants = probe.send(:get_process_descendants, 1000) - - # Should find all 7 processes - assert_equal 7, descendants.length, "Should find all 7 processes in the tree" - [1000, 2000, 3000, 2100, 2200, 3100, 3110].each do |pid| - assert_includes descendants, pid, "Should include PID #{pid}" - end - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end -end - -# Mock Docker client for testing without Docker daemon -class MockDockerClient < DockerClient - def initialize(mock_data = {}) - super() - @mock_containers = mock_data[:containers] || [] - @mock_inspections = mock_data[:inspections] || {} - end - - def list_containers(options = {}) - @mock_containers - end - - def inspect_container(container_id) - @mock_inspections[container_id] || { - 'State' => { 'Pid' => 0 } - } - end -end - -class TestDockerprobeIntegration < Minitest::Test - def setup - @temp_dir = Dir.mktmpdir - @output_file = File.join(@temp_dir, 'docker-mappings.csv') - ENV['DOCKERPROBE_OUTPUT_PATH'] = @output_file - ENV['DOCKERPROBE_INTERVAL'] = '1' - end - - def teardown - FileUtils.rm_rf(@temp_dir) - ENV.delete('DOCKERPROBE_OUTPUT_PATH') - ENV.delete('DOCKERPROBE_INTERVAL') - end - - def test_process_container_with_mock_data - # Create mock /proc structure - proc_dir = Dir.mktmpdir - - # Create mock process with PID 5000 - FileUtils.mkdir_p(File.join(proc_dir, '5000')) - File.write(File.join(proc_dir, '5000', 'stat'), "5000 (container_proc) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Create mock Docker data - mock_containers = [ - { - 'Id' => 'abc123def456789012345', - 'Names' => ['/test-container'], - 'Image' => 'test:latest' - } - ] - - mock_inspections = { - 'abc123def456789012345' => { - 'State' => { 'Pid' => 5000 } - } - } - - mock_client = MockDockerClient.new( - containers: mock_containers, - inspections: mock_inspections - ) - - probe = Dockerprobe.new(proc_path: proc_dir) - probe.instance_variable_set(:@docker_client, mock_client) - - # Run update_mappings - probe.send(:update_mappings) - - # Verify CSV was created with correct data - assert File.exist?(@output_file) - - csv_data = CSV.read(@output_file) - assert_equal %w[pid container_name container_id image_name], csv_data[0] - - # Find row with mock PID - our_row = csv_data.find { |row| row[0] == '5000' } - assert our_row, "Should have mapped mock process PID 5000" - assert_equal 'test-container', our_row[1] - assert_equal 'abc123def456', our_row[2] # Short ID (12 chars) - assert_equal 'test:latest', our_row[3] - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_multiple_containers - # Create mock /proc structure - proc_dir = Dir.mktmpdir - - # Create mock processes for each container - FileUtils.mkdir_p(File.join(proc_dir, '1001')) - File.write(File.join(proc_dir, '1001', 'stat'), "1001 (container1) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - FileUtils.mkdir_p(File.join(proc_dir, '1002')) - File.write(File.join(proc_dir, '1002', 'stat'), "1002 (container2) S 1 1 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0") - - # Create mock Docker data with multiple containers - mock_containers = [ - { - 'Id' => 'container1id23456789012', - 'Names' => ['/container-1'], - 'Image' => 'image1:latest' - }, - { - 'Id' => 'container2id23456789012', - 'Names' => ['/container-2'], - 'Image' => 'image2:v1.0' - } - ] - - # Use different PIDs for each container - mock_inspections = { - 'container1id23456789012' => { - 'State' => { 'Pid' => 1001 } - }, - 'container2id23456789012' => { - 'State' => { 'Pid' => 1002 } - } - } - - mock_client = MockDockerClient.new( - containers: mock_containers, - inspections: mock_inspections - ) - - probe = Dockerprobe.new(proc_path: proc_dir) - probe.instance_variable_set(:@docker_client, mock_client) - - # Run update_mappings - probe.send(:update_mappings) - - # Verify CSV was created with both containers - assert File.exist?(@output_file) - - csv_data = CSV.read(@output_file) - assert_equal 3, csv_data.length # Header + 2 containers - - # Verify both containers are present - pids = csv_data[1..].map { |row| row[0] } - assert_includes pids, '1001' - assert_includes pids, '1002' - ensure - FileUtils.rm_rf(proc_dir) if proc_dir - end - - def test_container_without_pid - # Container that's not running (Pid = 0) - mock_containers = [ - { - 'Id' => 'stoppedcontainer123456', - 'Names' => ['/stopped-container'], - 'Image' => 'stopped:latest' - } - ] - - mock_inspections = { - 'stoppedcontainer123456' => { - 'State' => { 'Pid' => 0 } - } - } - - mock_client = MockDockerClient.new( - containers: mock_containers, - inspections: mock_inspections - ) - - probe = Dockerprobe.new - probe.instance_variable_set(:@docker_client, mock_client) - - # Run update_mappings - probe.send(:update_mappings) - - # CSV should only have header (no PIDs mapped) - csv_data = CSV.read(@output_file) - assert_equal 1, csv_data.length # Only header - assert_equal %w[pid container_name container_id image_name], csv_data[0] - end -end - -if __FILE__ == $0 - # Run tests - exit Minitest.run(ARGV) -end diff --git a/ebpf.sh b/ebpf.sh deleted file mode 100755 index c5e8a8b..0000000 --- a/ebpf.sh +++ /dev/null @@ -1,190 +0,0 @@ -#!/bin/bash - -# eBPF Compatibility Check Script for Better Stack Collector -# This script checks if your system supports eBPF features required by Beyla -# Specifically: BTF + CO-RE support and eBPF ring buffer (BPF_MAP_TYPE_RINGBUF) - -set -e - -# Check for JSON output flag -JSON_OUTPUT=false -if [ "$1" == "--json" ]; then - JSON_OUTPUT=true -fi - -# Color codes for output (only used for non-JSON output) -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color -BOLD='\033[1m' - -# Get kernel version -KERNEL_VERSION=$(uname -r) -KERNEL_MAJOR=$(echo "$KERNEL_VERSION" | cut -d. -f1) -KERNEL_MINOR=$(echo "$KERNEL_VERSION" | cut -d. -f2) - -# Get system information -ARCHITECTURE=$(uname -m) -DISTRIBUTION="Unknown" -if [ -f /etc/os-release ]; then - DISTRIBUTION=$(grep "^PRETTY_NAME" /etc/os-release | cut -d= -f2 | tr -d '"') -elif [[ "$OSTYPE" == "darwin"* ]]; then - DISTRIBUTION="macOS $(sw_vers -productVersion 2>/dev/null || echo "")" -fi - -# Function to check if kernel version meets minimum requirement -check_kernel_version() { - local min_major=$1 - local min_minor=$2 - - if [ "$KERNEL_MAJOR" -gt "$min_major" ] || \ - ([ "$KERNEL_MAJOR" -eq "$min_major" ] && [ "$KERNEL_MINOR" -ge "$min_minor" ]); then - return 0 - else - return 1 - fi -} - -# Initialize check results -HAS_EBPF=true -DETAILS="" - -# JSON result variables -RING_BUFFER_SUPPORTED=false -BPF_FILESYSTEM_MOUNTED=false -BTF_SUPPORT_AVAILABLE=false -BPF_SYSCALL_ENABLED=null -BPF_JIT_ENABLED=null - -# Check kernel version -if ! check_kernel_version 5 14; then - DETAILS="${DETAILS} ${YELLOW}⚠${NC} Kernel version: $KERNEL_VERSION (5.14+ required, older may work with backports)\n" -else - DETAILS="${DETAILS} ${GREEN}✓${NC} Kernel version: $KERNEL_VERSION\n" -fi - -# Check for ring buffer support (5.8+ minimum) -if check_kernel_version 5 8; then - RING_BUFFER_SUPPORTED=true - DETAILS="${DETAILS} ${GREEN}✓${NC} eBPF ring buffer: supported\n" -else - HAS_EBPF=false - DETAILS="${DETAILS} ${RED}✗${NC} eBPF ring buffer: not supported (requires kernel 5.8+)\n" -fi - -# Check for BPF filesystem -if [ -d "/sys/fs/bpf" ]; then - BPF_FILESYSTEM_MOUNTED=true - DETAILS="${DETAILS} ${GREEN}✓${NC} BPF filesystem: mounted\n" -else - HAS_EBPF=false - DETAILS="${DETAILS} ${RED}✗${NC} BPF filesystem: not available\n" -fi - -# Check for BTF support -if [ -f "/sys/kernel/btf/vmlinux" ] || [ -f "/boot/vmlinux-$KERNEL_VERSION" ]; then - BTF_SUPPORT_AVAILABLE=true - DETAILS="${DETAILS} ${GREEN}✓${NC} BTF + CO-RE support: available\n" -else - HAS_EBPF=false - DETAILS="${DETAILS} ${RED}✗${NC} BTF + CO-RE support: not available\n" -fi - -# Check for CONFIG_BPF_SYSCALL -BPF_SYSCALL_FOUND=false -if [ -f "/proc/config.gz" ]; then - if zcat /proc/config.gz 2>/dev/null | grep -q "CONFIG_BPF_SYSCALL=y"; then - BPF_SYSCALL_FOUND=true - fi -elif [ -f "/boot/config-$KERNEL_VERSION" ]; then - if grep -q "CONFIG_BPF_SYSCALL=y" "/boot/config-$KERNEL_VERSION" 2>/dev/null; then - BPF_SYSCALL_FOUND=true - fi -fi - -if [ "$BPF_SYSCALL_FOUND" = true ]; then - BPF_SYSCALL_ENABLED=true - DETAILS="${DETAILS} ${GREEN}✓${NC} BPF syscall: enabled\n" -elif [ -f "/proc/config.gz" ] || [ -f "/boot/config-$KERNEL_VERSION" ]; then - BPF_SYSCALL_ENABLED=false - HAS_EBPF=false - DETAILS="${DETAILS} ${RED}✗${NC} BPF syscall: not enabled\n" -else - DETAILS="${DETAILS} ${YELLOW}⚠${NC} BPF syscall: unable to verify\n" -fi - -# Check for BPF JIT compiler (warning only) -if [ -f "/proc/sys/net/core/bpf_jit_enable" ]; then - JIT_ENABLED=$(cat /proc/sys/net/core/bpf_jit_enable) - if [ "$JIT_ENABLED" != "0" ]; then - BPF_JIT_ENABLED=true - DETAILS="${DETAILS} ${GREEN}✓${NC} BPF JIT compiler: enabled\n" - else - BPF_JIT_ENABLED=false - DETAILS="${DETAILS} ${YELLOW}⚠${NC} BPF JIT compiler: disabled (performance impact)\n" - fi -fi - -json_bool() { - local value="$1" - if [[ "$value" = true ]]; then - echo "true" - elif [[ "$value" = false ]]; then - echo "false" - else - echo "null" - fi -} - -# Output results -if [ "$JSON_OUTPUT" = true ]; then - cat </dev/null || echo "")" - else - echo " Distribution: Unknown" - fi - fi -fi - -exit 0 diff --git a/engine/base_enrichment_table.rb b/engine/base_enrichment_table.rb deleted file mode 100644 index 24893b7..0000000 --- a/engine/base_enrichment_table.rb +++ /dev/null @@ -1,60 +0,0 @@ -require 'digest' -require 'fileutils' - -class BaseEnrichmentTable - attr_reader :target_path, :incoming_path - - def initialize(target_path, incoming_path) - @target_path = target_path - @incoming_path = incoming_path - end - - def different? - directory = File.dirname(incoming_path) - return false unless File.exist?(directory) && File.directory?(directory) - return false unless File.exist?(incoming_path) - - current_hash = calculate_file_hash(target_path) - new_hash = calculate_file_hash(incoming_path) - - current_hash != new_hash - end - - def validate - puts "Validating #{table_name} at #{incoming_path}" - - if !File.exist?(incoming_path) - puts "#{table_name} not found at #{incoming_path}" - return "#{table_name} not found at #{incoming_path}" - end - - if File.size(incoming_path) == 0 - puts "#{table_name} is empty at #{incoming_path}" - return "#{table_name} is empty at #{incoming_path}" - end - - validate_headers - end - - def promote - FileUtils.mv(incoming_path, target_path) - end - - protected - - # Subclasses must implement these methods - def table_name - raise NotImplementedError, "Subclasses must implement table_name" - end - - def validate_headers - raise NotImplementedError, "Subclasses must implement validate_headers" - end - - private - - def calculate_file_hash(file_path) - return nil unless File.exist?(file_path) - Digest::MD5.file(file_path).hexdigest - end -end \ No newline at end of file diff --git a/engine/better_stack_client.rb b/engine/better_stack_client.rb deleted file mode 100644 index d3bd3f1..0000000 --- a/engine/better_stack_client.rb +++ /dev/null @@ -1,346 +0,0 @@ -require_relative 'utils' -require_relative 'kubernetes_discovery' -require_relative 'vector_config' -require_relative 'ebpf_compatibility_checker' -require_relative 'containers_enrichment_table' -require_relative 'databases_enrichment_table' -require_relative 'ssl_certificate_manager' -require 'net/http' -require 'fileutils' -require 'time' -require 'forwardable' - -class BetterStackClient - extend Forwardable - include Utils - - NOT_CLEARABLE_ERRORS = ['Validation failed', 'Invalid configuration version', 'Invalid filename'].freeze - - def_delegator :@vector_config, :reload_vector - - def initialize(working_dir) - @base_url = (ENV['BASE_URL'] || 'https://telemetry.betterstack.com').chomp('/') - @collector_secret = ENV['COLLECTOR_SECRET'] - @working_dir = working_dir.chomp('/') - - if @collector_secret.nil? || @collector_secret.empty? - puts "Error: COLLECTOR_SECRET environment variable is required" - exit 1 - end - - @kubernetes_discovery = KubernetesDiscovery.new(working_dir) - @vector_config = VectorConfig.new(working_dir) - @ebpf_compatibility_checker = EbpfCompatibilityChecker.new(working_dir) - @ssl_certificate_manager = SSLCertificateManager.new(nil) # Use default /etc path for production - - containers_path = File.join(working_dir, 'enrichment', 'docker-mappings.csv') - containers_incoming_path = File.join(working_dir, 'enrichment', 'docker-mappings.incoming.csv') - @containers_enrichment_table = ContainersEnrichmentTable.new(containers_path, containers_incoming_path) - - databases_path = File.join(working_dir, 'enrichment', 'databases.csv') - databases_incoming_path = File.join(working_dir, 'enrichment', 'databases.incoming.csv') - @databases_enrichment_table = DatabasesEnrichmentTable.new(databases_path, databases_incoming_path) - end - - def make_post_request(path, params) - uri = URI("#{@base_url}/api#{path}") - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = uri.scheme == 'https' - request = Net::HTTP::Post.new(uri.path) - request.set_form_data(params) - http.request(request) - end - - def cluster_collector? - if ENV['CLUSTER_COLLECTOR'] == 'true' - puts "CLUSTER_COLLECTOR configured in the ENV, forcing cluster collector mode" - return true - end - - response = make_post_request('/collector/cluster-collector', { - collector_secret: @collector_secret, - host: hostname, - }) - - case response.code - when '204', '200' - return true - when '401', '403' - puts 'Cluster collector check failed: unauthorized. Please check your COLLECTOR_SECRET.' - exit 1 - when '409' - # server returns 409 if a different collector is supposed to be cluster collector - return false - else - puts "Unexpected response from cluster-collector endpoint: #{response.code}" - return false - end - end - - def ping - ping_params = { - collector_secret: @collector_secret, - cluster_collector: ENV['CLUSTER_COLLECTOR'] == 'true', - host: hostname, - collector_version: ENV['COLLECTOR_VERSION'], - vector_version: ENV['VECTOR_VERSION'], - beyla_version: ENV['BEYLA_VERSION'], - cluster_agent_version: ENV['CLUSTER_AGENT_VERSION'], - } - ping_params[:configuration_version] = latest_version if latest_version # Only send version if one exists - ping_params[:error] = read_error if read_error - - # Include system_information only on first ping - if !@ebpf_compatibility_checker.reported? && @ebpf_compatibility_checker.system_information - ping_params[:system_information] = @ebpf_compatibility_checker.system_information.to_json - end - - response = make_post_request('/collector/ping', ping_params) - - if response.code == '204' || response.code == '200' - @ebpf_compatibility_checker.mark_as_reported - end - - upstream_changed = process_ping(response.code, response.body) - - # Run kubernetes discovery if latest valid vector config uses kubernetes_discovery_* - vector_config_uses_kubernetes_discovery = @kubernetes_discovery.should_discover? - kubernetes_discovery_changed = @kubernetes_discovery.run if vector_config_uses_kubernetes_discovery - - # Create new vector-config version if either changed - if upstream_changed || kubernetes_discovery_changed - puts "Upstream configuration changed - updating vector-config" if upstream_changed - puts "Kubernetes discovery changed - updating vector-config" if kubernetes_discovery_changed - - new_config_dir = @vector_config.prepare_dir - validate_output = @vector_config.validate_dir(new_config_dir) - unless validate_output.nil? - write_error("Validation failed for vector config with kubernetes_discovery\n\n#{validate_output}") - return false - end - - result = @vector_config.promote_dir(new_config_dir) - clear_error - - return result - end - - false - end - - def enrichment_table_changed? = @containers_enrichment_table.different? - - def validate_enrichment_table - output = @containers_enrichment_table.validate - unless output.nil? - write_error("Validation failed for enrichment table\n\n#{output}") - return output - end - - nil - end - - def promote_enrichment_table = @containers_enrichment_table.promote - - def databases_table_changed? = @databases_enrichment_table.different? - - def validate_databases_table - output = @databases_enrichment_table.validate - unless output.nil? - write_error("Validation failed for databases enrichment table\n\n#{output}") - return output - end - - nil - end - - def promote_databases_table = @databases_enrichment_table.promote - - def process_ping(code, body) - case code - when '204' - puts "No updates available" - # Clear transient errors not related to the configuration on successful, no-updates ping - clear_error if error_clearable? - return - when '200' - data = JSON.parse(body) - if data['status'] == 'new_version_available' - new_version = data['configuration_version'] - puts "New version available: #{new_version}" - - return get_configuration(new_version) - else - # Status is not 'new_version_available', could be an error message or other status - puts "No new version. Status: #{data['status']}" - return - end - when '401', '403' - puts 'Ping failed: unauthorized. Please check your COLLECTOR_SECRET.' - exit 1 - else - puts "Unexpected response from ping endpoint: #{code}" - begin - # Try to parse body for more details if it's JSON - error_details = JSON.parse(body) - write_error("Ping failed: #{code}. Details: #{error_details}") - rescue JSON::ParserError - write_error("Ping failed: #{code}. Body: #{body}") - end - return - end - rescue SocketError => e # More specific network errors - write_error("Network error: #{e.message}") - return - rescue JSON::ParserError => e - write_error("Error parsing JSON response: #{e.message}") - return - rescue StandardError => e - puts "An unexpected error occurred: #{e.message}" - puts e.backtrace.join("\n") - write_error("Error: #{e.message}") - return - end - - def get_configuration(new_version) - if new_version.include?('..') - write_error("Invalid configuration version: '#{new_version}'") - return - end - - params = { - collector_secret: @collector_secret, - configuration_version: new_version - } - - response = make_post_request('/collector/configuration', params) - process_configuration(new_version, response.code, response.body) - end - - def process_configuration(new_version, code, body) - if code == '200' - data = JSON.parse(body) - - puts "Downloading configuration files for version #{new_version}..." - all_files_downloaded = true - databases_csv_exists = false - ssl_certificate_host_exists = false - ssl_certificate_host_content = nil - - data['files'].each do |file_info| - # Assuming file_info is a hash {'url': '...', 'name': '...'} or just a URL string - file_url = @base_url + (file_info.is_a?(Hash) ? file_info['path'] : file_info) - filename = file_info.is_a?(Hash) ? file_info['name'] : URI.decode_www_form(URI(file_url).query).to_h['file'] - - # Ensure filename is safe and not an absolute path or contains '..' - if filename.nil? || filename.empty? || filename.include?('..') || filename.start_with?('/') - write_error("Invalid filename '#{filename}' received for version #{new_version}") - all_files_downloaded = false - break - end - - # Track special files - databases_csv_exists = true if filename == "databases.csv" - ssl_certificate_host_exists = true if filename == "ssl_certificate_host.txt" - - path = "#{@working_dir}/versions/#{new_version}/#{filename}".gsub(%r{/+}, '/') - puts "Downloading #{filename} to #{path}" - - begin - download_file(file_url, path) - rescue Utils::DownloadError => e - write_error("Failed to download #{filename} for version #{new_version}: #{e.message}") - all_files_downloaded = false - break # Stop trying to download other files - end - - # Read ssl_certificate_host content for processing - if filename == "ssl_certificate_host.txt" - puts "Got SSL certificate host: #{ssl_certificate_host_content}" - ssl_certificate_host_content = File.read(path).strip rescue '' - end - end - - unless all_files_downloaded - puts "Aborting update due to download failure." - return - end - - puts "All files downloaded. Processing configuration..." - - version_dir = File.join(@working_dir, "versions", new_version) - - # Process SSL certificate host if included - skip_vector_validation = false - if ssl_certificate_host_exists - domain_changed = @ssl_certificate_manager.process_ssl_certificate_host(ssl_certificate_host_content || '') - if domain_changed - puts "SSL certificate domain changed, will skip Vector validation for this update cycle if certificate not ready" - skip_vector_validation = @ssl_certificate_manager.should_skip_validation? - end - end - - # Validate databases.csv if it exists in this version - if databases_csv_exists - databases_csv_path = File.join(version_dir, 'databases.csv') - - # Get the incoming path from the databases_enrichment_table instance - incoming_path = @databases_enrichment_table.incoming_path - - # Ensure the enrichment directory exists and copy to incoming path for validation - FileUtils.mkdir_p(File.dirname(incoming_path)) - FileUtils.cp(databases_csv_path, incoming_path) - - databases_validate_output = @databases_enrichment_table.validate - unless databases_validate_output.nil? - write_error("Validation failed for databases enrichment table\n\n#{databases_validate_output}") - # Clean up the incoming file on validation failure - FileUtils.rm_f(incoming_path) - return - end - end - - # Validate and promote vector config only if not skipping - if skip_vector_validation - puts "Skipping Vector validation and promotion due to pending SSL certificate" - else - puts "Proceeding with Vector validation" - validate_output = @vector_config.validate_upstream_files(version_dir) - if validate_output - write_error("Validation failed for vector config in #{new_version}\n\n#{validate_output}") - return - end - - # Only promote vector config if validation passed - @vector_config.promote_upstream_files(version_dir) - end - - # Promote databases.csv if it was included and validated - if databases_csv_exists - @databases_enrichment_table.promote - puts "Promoted databases.csv to #{@databases_enrichment_table.target_path}" - end - - # Reset SSL manager flag for next ping cycle - @ssl_certificate_manager.reset_change_flag if ssl_certificate_host_exists - - # Clean up version directory if we skipped vector validation - # This ensures we'll get the config again on next ping - if skip_vector_validation - puts "Removing version directory to retry vector config on next ping cycle" - FileUtils.rm_rf(version_dir) - end - - !skip_vector_validation - else - write_error("Failed to fetch configuration for version #{new_version}. Response code: #{code}") - end - end - - def error_clearable? - last_error = read_error - return false if last_error.nil? # no need to clear if no error - last_error = URI.decode_www_form_component(last_error) if last_error - !NOT_CLEARABLE_ERRORS.any? { |error| last_error.include?(error) } - end -end diff --git a/engine/containers_enrichment_table.rb b/engine/containers_enrichment_table.rb deleted file mode 100644 index 2529335..0000000 --- a/engine/containers_enrichment_table.rb +++ /dev/null @@ -1,21 +0,0 @@ -require_relative 'base_enrichment_table' - -# Checks whether the containers enrichment table has changed -# Returns a hash of the file if it exists, otherwise nil -# Used by enrichment_table_watcher.rb to reload vector if the enrichment table has changed -class ContainersEnrichmentTable < BaseEnrichmentTable - protected - - def table_name - "Containers enrichment table" - end - - def validate_headers - if File.readlines(incoming_path).first.strip != "pid,container_name,container_id,image_name" - puts "Containers enrichment table is not valid at #{incoming_path}" - return "Containers enrichment table is not valid at #{incoming_path}" - end - - nil - end -end \ No newline at end of file diff --git a/engine/databases_enrichment_table.rb b/engine/databases_enrichment_table.rb deleted file mode 100644 index 9dc7028..0000000 --- a/engine/databases_enrichment_table.rb +++ /dev/null @@ -1,28 +0,0 @@ -require_relative 'base_enrichment_table' -require 'csv' - -class DatabasesEnrichmentTable < BaseEnrichmentTable - protected - - def table_name - "Databases enrichment table" - end - - def validate_headers - begin - csv_content = CSV.read(incoming_path, headers: true) - expected_headers = ["identifier", "container", "service", "host"] - - if csv_content.headers != expected_headers - actual_headers = csv_content.headers ? csv_content.headers.join(",") : "none" - puts "Databases enrichment table has invalid headers. Expected: #{expected_headers.join(",")}, Got: #{actual_headers}" - return "Databases enrichment table has invalid headers. Expected: #{expected_headers.join(",")}, Got: #{actual_headers}" - end - rescue CSV::MalformedCSVError => e - puts "Databases enrichment table is malformed: #{e.message}" - return "Databases enrichment table is malformed: #{e.message}" - end - - nil - end -end \ No newline at end of file diff --git a/engine/ebpf_compatibility_checker.rb b/engine/ebpf_compatibility_checker.rb deleted file mode 100644 index d039ce7..0000000 --- a/engine/ebpf_compatibility_checker.rb +++ /dev/null @@ -1,57 +0,0 @@ -require 'json' -require 'open3' - -class EbpfCompatibilityChecker - attr_reader :system_information, :checked - - def initialize(working_dir) - @working_dir = working_dir - @system_information = nil - @checked = false - check_compatibility - end - - def mark_as_reported - @reported = true - end - - def reported? - @reported || false - end - - private - - def check_compatibility - ebpf_script = File.join(@working_dir, 'ebpf.sh') - - unless File.exist?(ebpf_script) - puts "eBPF compatibility check script not found at #{ebpf_script}" - @system_information = { error: "ebpf.sh script not found" } - return - end - - begin - stdout, stderr, status = Open3.capture3("#{ebpf_script} --json") - - if status.success? - @system_information = JSON.parse(stdout, symbolize_names: false) - @checked = true - else - puts "eBPF compatibility check failed with exit code #{status.exitstatus}" - puts "STDERR: #{stderr}" unless stderr.empty? - @system_information = { - error: "eBPF check failed", - exit_code: status.exitstatus, - stderr: stderr, - stdout: stdout, - } - end - rescue JSON::ParserError => e - puts "Failed to parse eBPF compatibility JSON: #{e.message}" - @system_information = { error: "JSON parse error: #{e.message}" } - rescue => e - puts "Error running eBPF compatibility check: #{e.message}" - @system_information = { error: "Exception: #{e.message}" } - end - end -end diff --git a/engine/kubernetes_discovery.rb b/engine/kubernetes_discovery.rb deleted file mode 100644 index 764634d..0000000 --- a/engine/kubernetes_discovery.rb +++ /dev/null @@ -1,540 +0,0 @@ -require 'net/http' -require 'json' -require 'openssl' -require 'fileutils' -require 'yaml' -require 'time' -require 'digest' -require 'set' -require_relative 'utils' - -# Generates a directory of configs for discovered Kubernetes pods. E.g. /kubernetes-discovery/2025-07-25T12:00:00/ -# Directory contains one config per pod. E.g. /kubernetes-discovery/2025-07-25T12:00:00/monitoring-my-pod-1234567890.yaml -# -# All generated configs are guaranteed to be valid. Entire directory of configs is validated and removed if validation fails. -# New generated directory of configs is not kept if it's same as the latest version. - -class KubernetesDiscovery - include Utils - SERVICE_ACCOUNT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount' - DUMMY_VECTOR_CONFIG = { - 'transforms' => { - 'kubernetes_discovery_test' => { - 'type' => 'remap', - 'inputs' => ['kubernetes_discovery_*'], - 'source' => '.test = "ok"' - } - }, - 'sinks' => { - 'kubernetes_discovery_test_sink' => { - 'type' => 'blackhole', - 'inputs' => ['kubernetes_discovery_test'] - } - } - } - - def initialize(working_dir) - @working_dir = working_dir - @base_dir = File.join(working_dir, "kubernetes-discovery") - @last_run_time = nil - @node_name = ENV['HOSTNAME'] - end - - def self.vector_config_uses_kubernetes_discovery?(vector_config_dir) - return false unless File.exist?(vector_config_dir) - - # Check all yaml files in latest-valid-upstream for kubernetes_discovery_ - Dir.glob(File.join(vector_config_dir, "*.yaml")).each do |config_file| - if File.read(config_file).include?('kubernetes_discovery_') - return true - end - end - - false - end - - def should_discover? - vector_config_dir = File.join(@working_dir, "vector-config", "latest-valid-upstream") - self.class.vector_config_uses_kubernetes_discovery?(vector_config_dir) - end - - def run - unless should_discover? - # Kubernetes discovery not used in vector config - return false - end - - current_time = Time.now - if @last_run_time && (current_time - @last_run_time) < 30 - # Rate limited - last run was too recent - return false - end - @last_run_time = current_time - - unless in_kubernetes? - # Not in Kubernetes environment - return false - end - - @base_url = "https://#{ENV['KUBERNETES_SERVICE_HOST']}:#{ENV['KUBERNETES_SERVICE_PORT']}" - @token = read_service_account_token - @namespace = read_namespace - @ca_cert = read_ca_cert - - begin - discover_and_update - rescue => e - puts "Kubernetes discovery failed: #{e.class.name}: #{e.message}" - false - end - end - - def cleanup_old_versions(keep_count = 5) - versions = Dir.glob(File.join(@base_dir, "*")).select { |f| File.directory?(f) } - versions = versions.reject { |v| v.end_with?('/0-default') } - versions.sort! - - if versions.length > keep_count - to_delete = versions[0...(versions.length - keep_count)] - to_delete.each do |dir| - # Cleaning up old kubernetes-discovery version - FileUtils.rm_rf(dir) - end - end - end - - private - - def in_kubernetes? - return false unless ENV['KUBERNETES_SERVICE_HOST'] - return false unless File.exist?(SERVICE_ACCOUNT_PATH) - true - end - - def read_service_account_token - token_path = "#{SERVICE_ACCOUNT_PATH}/token" - return nil unless File.exist?(token_path) - File.read(token_path).strip - end - - def read_namespace - namespace_path = "#{SERVICE_ACCOUNT_PATH}/namespace" - return 'default' unless File.exist?(namespace_path) - File.read(namespace_path).strip - end - - def read_ca_cert - ca_path = "#{SERVICE_ACCOUNT_PATH}/ca.crt" - return nil unless File.exist?(ca_path) - OpenSSL::X509::Certificate.new(File.read(ca_path)) - end - - def kubernetes_request(path) - uri = URI("#{@base_url}#{path}") - - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - http.verify_mode = OpenSSL::SSL::VERIFY_PEER - - if @ca_cert - store = OpenSSL::X509::Store.new - store.add_cert(@ca_cert) - http.cert_store = store - end - - request = Net::HTTP::Get.new(uri) - request['Authorization'] = "Bearer #{@token}" - request['Accept'] = 'application/json' - - response = http.request(request) - - unless response.code == '200' - raise "Kubernetes API request failed: #{response.code} #{response.body}" - end - - JSON.parse(response.body) - end - - def discover_and_update - latest_dir = latest_kubernetes_discovery - - # Create new version directory - timestamp = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S') - new_dir = File.join(@base_dir, timestamp) - FileUtils.mkdir_p(new_dir) - - # Get all namespaces we have access to - namespaces = get_namespaces - - # Use hash to store configs by namespace_pod key to avoid duplicates - discovered_configs = {} - - namespaces.each do |namespace| - # Discover services with prometheus annotations - services = get_annotated_services(namespace) - - services.each do |service| - endpoints = get_service_endpoints(service, namespace) - - endpoints.each do |endpoint| - # Use namespace_pod as key for deduplication - config_key = endpoint[:name] # This is already "namespace_pod" - - # Skip if we already have a config for this pod - next if discovered_configs.has_key?(config_key) - - config = generate_config(endpoint) - if config - discovered_configs[config_key] = config - end - end - end - - # Discover standalone pods (those not backing services) - pods = get_annotated_pods(namespace) - - pods.each do |pod| - endpoint = get_pod_endpoint(pod, namespace) - if endpoint - # Use namespace_pod as key for deduplication - config_key = endpoint[:name] # This is already "namespace_pod" - - # Skip if we already have a config for this pod (discovered via service) - next if discovered_configs.has_key?(config_key) - - config = generate_config(endpoint) - if config - discovered_configs[config_key] = config - end - end - end - end - - # Write all configs to disk at once - configs_generated = 0 - discovered_configs.each do |key, config| - filepath = File.join(new_dir, config[:filename]) - File.write(filepath, config[:content].to_yaml) - configs_generated += 1 - end - - # Always generate discovered_pods.yaml with the count of discovered pods - discovered_pods_config = { - 'sources' => { - 'kubernetes_discovery_static_metrics' => { - 'type' => 'static_metrics', - 'namespace' => '', # Empty namespace to avoid "static_" prefix - 'metrics' => [ - { - 'name' => 'collector_kubernetes_discovered_pods', - 'kind' => 'absolute', - 'value' => { - 'gauge' => { - 'value' => configs_generated - } - }, - 'tags' => {} - } - ] - } - } - } - File.write(File.join(new_dir, 'discovered_pods.yaml'), discovered_pods_config.to_yaml) - - # Validate the generated configs - unless validate_configs(new_dir) - puts "Kubernetes discovery: validation failed" - FileUtils.rm_rf(new_dir) - return false - end - - # Check if configs changed from latest version - if latest_dir && configs_identical?(latest_dir, new_dir) - FileUtils.rm_rf(new_dir) - return false - end - - puts "Kubernetes discovery: Generated configs for #{configs_generated} pods" - - # Clean up old versions - cleanup_old_versions - - true - end - - def get_namespaces - begin - result = kubernetes_request('/api/v1/namespaces') - result['items'].map { |ns| ns['metadata']['name'] } - rescue => e - puts "Kubernetes discovery: Failed to list namespaces (#{e.message}), using current namespace" - [@namespace] - end - end - - def get_annotated_services(namespace) - services = kubernetes_request("/api/v1/namespaces/#{namespace}/services") - - services['items'].select do |service| - annotations = service.dig('metadata', 'annotations') || {} - annotations['prometheus.io/scrape'] == 'true' - end - end - - def get_annotated_pods(namespace) - pods = kubernetes_request("/api/v1/namespaces/#{namespace}/pods") - - pods['items'].select do |pod| - annotations = pod.dig('metadata', 'annotations') || {} - node_name = pod.dig('spec', 'nodeName') - - # Select all running pods with prometheus scrape annotation on the current node - annotations['prometheus.io/scrape'] == 'true' && - pod['status']['phase'] == 'Running' && - (@node_name.nil? || node_name == @node_name) # If NODE_NAME not set, discover all pods - end - end - - def get_service_endpoints(service, namespace) - service_name = service['metadata']['name'] - annotations = service['metadata']['annotations'] - - port = annotations['prometheus.io/port'] || '9090' - path = annotations['prometheus.io/path'] || '/metrics' - - # Get endpoints for this service - endpoints = kubernetes_request("/api/v1/namespaces/#{namespace}/endpoints/#{service_name}") - - results = [] - - (endpoints['subsets'] || []).each do |subset| - addresses = subset['addresses'] || [] - addresses.each do |address| - pod_name = address.dig('targetRef', 'name') - - # Skip if we have NODE_NAME set and need to check pod's node - pod_metadata = {} - if pod_name - begin - pod = kubernetes_request("/api/v1/namespaces/#{namespace}/pods/#{pod_name}") - - if @node_name - node_name = pod.dig('spec', 'nodeName') - next unless node_name == @node_name - end - - # Extract workload information from ownerReferences - workload_info = get_workload_info(pod, namespace) - - # Extract container names - containers = pod.dig('spec', 'containers') || [] - container_names = containers.map { |c| c['name'] } - - # Collect pod metadata - pod_metadata = { - pod_uid: pod.dig('metadata', 'uid'), - node_name: pod.dig('spec', 'nodeName'), - start_time: pod.dig('status', 'startTime'), - container_names: container_names, - deployment_name: workload_info[:deployment], - statefulset_name: workload_info[:statefulset], - daemonset_name: workload_info[:daemonset], - replicaset_name: workload_info[:replicaset] - } - rescue => e - puts "Kubernetes discovery: Failed to get pod info for #{pod_name}: #{e.message}" - next if @node_name # Skip if we need node filtering but couldn't get pod info - end - end - - results << { - name: "#{namespace}_#{pod_name || service_name}", - endpoint: "http://#{address['ip']}:#{port}#{path}", - namespace: namespace, - pod: pod_name, - service: service_name - }.merge(pod_metadata) - end - end - - results - end - - def get_pod_endpoint(pod, namespace) - annotations = pod['metadata']['annotations'] - pod_name = pod['metadata']['name'] - pod_ip = pod.dig('status', 'podIP') - - return nil unless pod_ip - - port = annotations['prometheus.io/port'] || '9090' - path = annotations['prometheus.io/path'] || '/metrics' - - # Extract workload information from ownerReferences - workload_info = get_workload_info(pod, namespace) - - # Extract container names - containers = pod.dig('spec', 'containers') || [] - container_names = containers.map { |c| c['name'] } - - { - name: "#{namespace}_#{pod_name}", - endpoint: "http://#{pod_ip}:#{port}#{path}", - namespace: namespace, - pod: pod_name, - service: nil, - # k8s metadata for labels - pod_uid: pod.dig('metadata', 'uid'), - node_name: pod.dig('spec', 'nodeName'), - start_time: pod.dig('status', 'startTime'), - container_names: container_names, - deployment_name: workload_info[:deployment], - statefulset_name: workload_info[:statefulset], - daemonset_name: workload_info[:daemonset], - replicaset_name: workload_info[:replicaset] - } - end - - def generate_config(endpoint_info) - return nil unless endpoint_info - - source_name = "prometheus_scrape_#{endpoint_info[:name]}" - transform_name = "kubernetes_discovery_#{endpoint_info[:name]}" - - config = { - 'sources' => { - source_name => { - 'type' => 'prometheus_scrape', - 'endpoints' => [endpoint_info[:endpoint]], - 'scrape_interval_secs' => 30, - 'instance_tag' => 'instance' # This will add instance="host:port" tag - } - }, - 'transforms' => { - transform_name => { - 'type' => 'remap', - 'inputs' => [source_name], - 'source' => '' # Will be built below - } - } - } - - # Build remap source to add all k8s labels - remap_lines = [] - - # Add labels - remap_lines << ".tags.\"resource.k8s.namespace.name\" = \"#{endpoint_info[:namespace]}\"" - remap_lines << ".tags.\"resource.k8s.pod.name\" = \"#{endpoint_info[:pod]}\"" - - # Add new k8s labels if present - remap_lines << ".tags.\"resource.k8s.node.name\" = \"#{endpoint_info[:node_name]}\"" if endpoint_info[:node_name] - remap_lines << ".tags.\"resource.k8s.pod.uid\" = \"#{endpoint_info[:pod_uid]}\"" if endpoint_info[:pod_uid] - remap_lines << ".tags.\"resource.k8s.pod.start_time\" = \"#{endpoint_info[:start_time]}\"" if endpoint_info[:start_time] - - # Add workload-specific labels - remap_lines << ".tags.\"resource.k8s.deployment.name\" = \"#{endpoint_info[:deployment_name]}\"" if endpoint_info[:deployment_name] - remap_lines << ".tags.\"resource.k8s.statefulset.name\" = \"#{endpoint_info[:statefulset_name]}\"" if endpoint_info[:statefulset_name] - remap_lines << ".tags.\"resource.k8s.daemonset.name\" = \"#{endpoint_info[:daemonset_name]}\"" if endpoint_info[:daemonset_name] - remap_lines << ".tags.\"resource.k8s.replicaset.name\" = \"#{endpoint_info[:replicaset_name]}\"" if endpoint_info[:replicaset_name] - - # Add container names if present - if endpoint_info[:container_names] && !endpoint_info[:container_names].empty? - remap_lines << ".tags.\"resource.k8s.container.name\" = \"#{endpoint_info[:container_names].join(',')}\"" - end - - if endpoint_info[:service] - remap_lines << ".tags.\"resource.k8s.service.name\" = \"#{endpoint_info[:service]}\"" - end - - config['transforms'][transform_name]['source'] = remap_lines.join("\n") - - config_md5 = Digest::MD5.hexdigest(config.to_yaml) - filename = "#{endpoint_info[:name]}-#{config_md5}.yaml" - - { - filename: filename, - content: config - } - end - - def validate_configs(config_dir) - timestamp = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S') - tmp_dir = "/tmp/validate-kubernetes-discovery-#{timestamp}" - FileUtils.rm_rf(tmp_dir) if File.exist?(tmp_dir) - FileUtils.mkdir_p(tmp_dir) - - begin - # Copy generated configs - FileUtils.cp_r(config_dir, "#{tmp_dir}/kubernetes-discovery") - - # Write dummy vector config that consumes our sources - File.write("#{tmp_dir}/vector.yaml", DUMMY_VECTOR_CONFIG.to_yaml) - - # Run validation - output = `REGION=unknown AZ=unknown vector validate #{tmp_dir}/vector.yaml #{tmp_dir}/kubernetes-discovery/\*.yaml 2>&1` - success = $?.success? - - unless success - puts "Error: Kubernetes discovery validation failed" - puts output - end - - return success - ensure - FileUtils.rm_rf(tmp_dir) - end - end - - def configs_identical?(dir1, dir2) - files1 = Dir.glob("#{dir1}/*.yaml").map { |f| File.basename(f) }.sort - files2 = Dir.glob("#{dir2}/*.yaml").map { |f| File.basename(f) }.sort - - return false unless files1 == files2 - - files1.all? do |filename| - content1 = File.read("#{dir1}/#{filename}") - content2 = File.read("#{dir2}/#{filename}") - content1 == content2 - end - end - - def get_workload_info(pod, namespace) - owner_refs = pod.dig('metadata', 'ownerReferences') || [] - workload_info = { - deployment: nil, - statefulset: nil, - daemonset: nil, - replicaset: nil - } - - return workload_info if owner_refs.empty? - - owner = owner_refs.first - owner_kind = owner['kind'] - owner_name = owner['name'] - - case owner_kind - when 'ReplicaSet' - workload_info[:replicaset] = owner_name - # Try to find parent Deployment - begin - replicaset = kubernetes_request("/apis/apps/v1/namespaces/#{namespace}/replicasets/#{owner_name}") - rs_owner_refs = replicaset.dig('metadata', 'ownerReferences') || [] - - if rs_owner_refs.length > 0 && rs_owner_refs.first['kind'] == 'Deployment' - workload_info[:deployment] = rs_owner_refs.first['name'] - end - rescue => e - puts "Kubernetes discovery: Failed to get ReplicaSet info for #{owner_name}: #{e.message}" - end - when 'Deployment' - workload_info[:deployment] = owner_name - when 'StatefulSet' - workload_info[:statefulset] = owner_name - when 'DaemonSet' - workload_info[:daemonset] = owner_name - end - - workload_info - end -end \ No newline at end of file diff --git a/engine/ssl_certificate_manager.rb b/engine/ssl_certificate_manager.rb deleted file mode 100644 index 953cfea..0000000 --- a/engine/ssl_certificate_manager.rb +++ /dev/null @@ -1,102 +0,0 @@ -require 'fileutils' - -class SSLCertificateManager - DOMAIN_FILE = '/etc/ssl_certificate_host.txt' - - def initialize(working_dir = nil) - @working_dir = working_dir - @domain_file = working_dir ? File.join(working_dir, 'ssl_certificate_host.txt') : DOMAIN_FILE - @previous_domain = nil - @domain_just_changed = false - end - - attr_reader :domain_file, :domain_just_changed - - # Process an incoming SSL certificate host configuration - # Returns true if domain changed, false otherwise - def process_ssl_certificate_host(domain_string) - domain_string = domain_string.to_s.strip - current_domain = read_current_domain - - # Check if domain has changed - if current_domain != domain_string - write_domain(domain_string) - @previous_domain = current_domain - @domain_just_changed = true - - # If domain changed and is non-empty, restart certbot - if !domain_string.empty? - restart_certbot - end - - return true - end - - # Domain hasn't changed - @domain_just_changed = false - false - end - - # Check if we should skip vector validation due to pending certificate - def should_skip_validation? - return false unless @domain_just_changed - - current_domain = read_current_domain - return false if current_domain.empty? - - # Skip validation if certificate doesn't exist yet - !certificate_exists?(current_domain) - end - - # Reset the "just changed" flag after a ping cycle - def reset_change_flag - @domain_just_changed = false - end - - # Check if a certificate exists for the given domain - def certificate_exists?(domain = nil) - domain ||= read_current_domain - return false if domain.empty? - - cert_path = "/etc/ssl/#{domain}.pem" - key_path = "/etc/ssl/#{domain}.key" - - File.exist?(cert_path) && File.exist?(key_path) - end - - # Read the current domain from file - def read_current_domain - return '' unless File.exist?(@domain_file) - File.read(@domain_file).strip - rescue => e - puts "Error reading SSL certificate host file: #{e.message}" - '' - end - - private - - # Write domain to the well-known location - def write_domain(domain_string) - FileUtils.mkdir_p(File.dirname(@domain_file)) - File.write(@domain_file, domain_string) - puts "Updated SSL certificate host: #{domain_string.empty? ? '(empty)' : domain_string}" - rescue => e - puts "Error writing SSL certificate host file: #{e.message}" - raise - end - - # Restart certbot via supervisorctl - def restart_certbot - puts "Restarting certbot to handle domain change..." - result = system('supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart certbot') - if result - puts "Certbot restarted successfully" - else - puts "Warning: Failed to restart certbot" - end - result - rescue => e - puts "Error restarting certbot: #{e.message}" - false - end -end diff --git a/engine/utils.rb b/engine/utils.rb deleted file mode 100644 index 234a1fb..0000000 --- a/engine/utils.rb +++ /dev/null @@ -1,125 +0,0 @@ -require 'fileutils' -require 'uri' -require 'json' -require 'socket' - -module Utils - class DownloadError < StandardError; end - - MAX_DOWNLOAD_RETRIES = 2 - ENRICHMENT_TABLE_PATH = "/enrichment/docker-mappings.csv" - ENRICHMENT_TABLE_INCOMING_PATH = "/enrichment/docker-mappings.incoming.csv" - DATABASES_TABLE_PATH = "/enrichment/databases.csv" - DATABASES_TABLE_INCOMING_PATH = "/enrichment/databases.incoming.csv" - - # Shared and main.rb specific functions - def latest_version - # Assuming version directories are named like 'YYYY-MM-DDTHH:MM:SS' - # and this function should return the name of the latest one. - version_dirs = Dir.glob("#{@working_dir}/versions/*").select { |f| File.directory?(f) } - return nil if version_dirs.empty? - version_dirs.sort.last.split('/').last - end - - def read_error - error_file = "#{@working_dir}/errors.txt" - return nil unless File.exist?(error_file) - URI.encode_www_form_component(File.read(error_file).strip) - end - - def write_error(message) - puts "Error: #{message}" - File.write("#{@working_dir}/errors.txt", message) - end - - def clear_error - error_file = "#{@working_dir}/errors.txt" - FileUtils.rm_f(error_file) if File.exist?(error_file) - end - - # Ensure vector config doesn't contain command: and is valid - def validate_vector_config(version) - config_path = "#{@working_dir}/versions/#{version}/vector.yaml" - - if File.read(config_path).include?('command:') # type: exec - return 'vector.yaml must not contain command: directives' - end - - output = `REGION=unknown AZ=unknown vector validate #{config_path}` - return output unless $?.success? - - nil - end - - def latest_database_json - latest_ver = latest_version - return '{}' unless latest_ver - - path = "#{@working_dir}/versions/#{latest_ver}/databases.json" - - if File.exist?(path) - File.read(path) - else - '{}' - end - end - - def download_file(url, path, retries = 0) - uri = URI(url) - - # Add hostname query parameter - params = URI.decode_www_form(uri.query || '') - params << ['host', hostname] - uri.query = URI.encode_www_form(params) - - Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| - request = Net::HTTP::Get.new(uri) - response = http.request(request) - - if response.code == '200' - FileUtils.mkdir_p(File.dirname(path)) - File.write(path, response.body) - true - else - puts "Failed to download #{File.basename(path)} from #{url}. Response code: #{response.code}" - if retries < MAX_DOWNLOAD_RETRIES - puts "Retrying #{retries + 1} of #{MAX_DOWNLOAD_RETRIES}..." - download_file(url, path, retries + 1) - else - raise DownloadError, "Failed to download #{File.basename(path)} from #{url} after #{MAX_DOWNLOAD_RETRIES} retries. Response code: #{response.code}" - end - end - end - rescue SocketError => e - if retries < MAX_DOWNLOAD_RETRIES - puts "Retrying #{retries + 1} of #{MAX_DOWNLOAD_RETRIES}..." - download_file(url, path, retries + 1) - else - puts "Network error downloading #{File.basename(path)} from #{url}: #{e.message} after #{MAX_DOWNLOAD_RETRIES} retries." - raise DownloadError, "Network error downloading #{File.basename(path)} from #{url}: #{e.message} after #{MAX_DOWNLOAD_RETRIES} retries." - end - end - - def hostname - return ENV['HOSTNAME'] if ENV['HOSTNAME'] - - # Try to get hostname from kubernetes mounted hostPath, if available - if File.exist?('/host/proc/sys/kernel/hostname') - return File.read('/host/proc/sys/kernel/hostname').strip - end - - # Second, try using Socket class - begin - return Socket.gethostname - rescue - # If all else fails, return 'unknown' - return 'unknown' - end - end - - # Always points to latest valid kubernetes discovery configs - def latest_kubernetes_discovery - versions = Dir.glob(File.join(@working_dir, "kubernetes-discovery", "*").to_s).select { |f| File.directory?(f) } - versions.sort.last - end -end diff --git a/engine/vector_config.rb b/engine/vector_config.rb deleted file mode 100644 index 94258be..0000000 --- a/engine/vector_config.rb +++ /dev/null @@ -1,271 +0,0 @@ -require 'fileutils' -require 'time' -require_relative 'utils' -require_relative 'kubernetes_discovery' - -class VectorConfig - include Utils - - VECTOR_CONFIG_FILES = ['vector.yaml', 'manual.vector.yaml', 'process_discovery.vector.yaml'].freeze - - MINIMAL_KUBERNETES_DISCOVERY_CONFIG = <<~YAML - --- - sources: - kubernetes_discovery_static_metrics: - type: static_metrics - namespace: '' # Empty namespace to avoid "static_" prefix - metrics: - - name: collector_kubernetes_discovered_pods - kind: absolute - value: - gauge: - value: 0 - tags: {} - YAML - - def initialize(working_dir) - @working_dir = working_dir - @vector_config_dir = File.join(@working_dir, "vector-config") - end - - # Validate upstream vector.yaml file using minimal kubernetes discovery config - # Validate upstream config files (vector.yaml, manual.vector.yaml, and/or process_discovery.vector.yaml) - def validate_upstream_files(version_dir) - # Check if at least one config file exists - config_paths = VECTOR_CONFIG_FILES.map { |filename| File.join(version_dir, filename) } - if config_paths.none? { |path| File.exist?(path) } - return "None of: #{VECTOR_CONFIG_FILES.join(', ')} found in #{version_dir}" - end - - # Check for command: directives in all files (security check) - VECTOR_CONFIG_FILES.each do |filename| - file_path = File.join(version_dir, filename) - if File.exist?(file_path) && File.read(file_path).include?('command:') - return "#{filename} must not contain command: directives" - end - end - - timestamp = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S') - tmp_dir = "/tmp/validate-vector-config-file-#{timestamp}" - - FileUtils.rm_rf(tmp_dir) if File.exist?(tmp_dir) - FileUtils.mkdir_p(tmp_dir) - - begin - # Copy existing config files - VECTOR_CONFIG_FILES.each do |filename| - source_path = File.join(version_dir, filename) - if File.exist?(source_path) - FileUtils.cp(source_path, "#{tmp_dir}/#{filename}") - puts "Copied #{filename} (#{File.size(source_path)} bytes)" - end - end - FileUtils.mkdir_p("#{tmp_dir}/kubernetes-discovery") - File.write("#{tmp_dir}/kubernetes-discovery/minimal.yaml", MINIMAL_KUBERNETES_DISCOVERY_CONFIG) - - # Build validation command with available files - validate_files = [] - VECTOR_CONFIG_FILES.each do |filename| - file_path = "#{tmp_dir}/#{filename}" - validate_files << file_path if File.exist?(file_path) - end - validate_files << "#{tmp_dir}/kubernetes-discovery/*.yaml" - - validate_cmd = "REGION=unknown AZ=unknown vector validate #{validate_files.join(' ')} 2>&1" - - puts "Running validation command: #{validate_cmd}" - puts "Files to validate: #{validate_files.inspect}" - output = `#{validate_cmd}` - return output unless $?.success? - - nil - ensure - FileUtils.rm_rf(tmp_dir) - end - end - - # Promote validated upstream files to latest-valid-upstream directory - def promote_upstream_files(version_dir) - latest_valid_upstream_dir = File.join(@vector_config_dir, "latest-valid-upstream") - temp_upstream_dir = File.join(@vector_config_dir, "latest-valid-upstream.tmp.#{Time.now.utc.to_f}") - - # Copy files to temporary directory first - FileUtils.mkdir_p(temp_upstream_dir) - - # Copy config files if they exist - promoted_files = [] - VECTOR_CONFIG_FILES.each do |filename| - source_path = File.join(version_dir, filename) - if File.exist?(source_path) - FileUtils.cp(source_path, File.join(temp_upstream_dir, filename)) - promoted_files << filename - end - end - - # Replace the old directory with the new one (not atomic but good enough) - FileUtils.rm_rf(latest_valid_upstream_dir) if File.exist?(latest_valid_upstream_dir) - FileUtils.mv(temp_upstream_dir, latest_valid_upstream_dir) - - # Report what was promoted - puts "Promoted #{promoted_files.join(', ')} to latest-valid-upstream" - end - - # Prepare a new vector-config directory - def prepare_dir - timestamp = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%6NZ') - new_version_dir = File.join(@vector_config_dir, "new_#{timestamp}") - - begin - FileUtils.mkdir_p(new_version_dir) - - # Copy files from latest-valid-upstream directory - latest_valid_upstream_dir = File.join(@vector_config_dir, "latest-valid-upstream") - - unless File.exist?(latest_valid_upstream_dir) - puts "Error: No latest-valid-upstream directory found" - FileUtils.rm_rf(new_version_dir) - return nil - end - - # Copy all files from latest-valid-upstream - FileUtils.cp_r(Dir.glob(File.join(latest_valid_upstream_dir, "*")), new_version_dir) - - # Check if vector config uses kubernetes_discovery_* - uses_kubernetes_discovery = KubernetesDiscovery.vector_config_uses_kubernetes_discovery?(latest_valid_upstream_dir) - - if uses_kubernetes_discovery - # Use latest actual kubernetes discovery - kubernetes_discovery_dir = latest_kubernetes_discovery - if kubernetes_discovery_dir && File.exist?(kubernetes_discovery_dir) - FileUtils.ln_s(kubernetes_discovery_dir, File.join(new_version_dir, "kubernetes-discovery")) - end - else - # Use 0-default when kubernetes discovery is not used - default_kubernetes_discovery = File.join(@working_dir, "kubernetes-discovery", "0-default") - if File.exist?(default_kubernetes_discovery) - FileUtils.ln_s(default_kubernetes_discovery, File.join(new_version_dir, "kubernetes-discovery")) - end - end - - puts "Prepared vector-config directory: #{new_version_dir}" - new_version_dir - rescue => e - puts "Error preparing vector-config directory: #{e.message}" - FileUtils.rm_rf(new_version_dir) if File.exist?(new_version_dir) - nil - end - end - - # Validate a vector-config directory - def validate_dir(config_dir) - puts "Validating vector config directory: #{config_dir}" - - # Build list of files to validate - validate_files = [] - VECTOR_CONFIG_FILES.each do |filename| - file_path = "#{config_dir}/#{filename}" - validate_files << file_path if File.exist?(file_path) - end - validate_files << "#{config_dir}/kubernetes-discovery/*.yaml" - - validate_cmd = "REGION=unknown AZ=unknown vector validate #{validate_files.join(' ')} 2>&1" - puts "Running validation: #{validate_cmd}" - - output = `#{validate_cmd}` - return output unless $?.success? - - nil - end - - # Promote a validated config directory to current - def promote_dir(config_dir) - puts "Promoting #{config_dir} to /vector-config/current..." - - current_link = File.join(@vector_config_dir, "current") - backup_link = File.join(@vector_config_dir, "previous") - temp_link = File.join(@vector_config_dir, "current.tmp.#{Time.now.utc.to_f}") - - begin - # Create new symlink pointing to config_dir - File.symlink(config_dir, temp_link) - - # Backup current link if it exists (might fail if current doesn't exist, that's ok) - if File.exist?(current_link) - begin - File.rename(current_link, backup_link) - rescue => e - puts "Warning: Could not backup current link: #{e.message}" - end - end - - # Atomically replace current with new link - File.rename(temp_link, current_link) - - puts "Atomically promoted #{config_dir} to current" - - # Clean up old directories after successful promotion - cleanup_old_directories - - true - rescue => e - # Cleanup temp link if it exists - FileUtils.rm_f(temp_link) - - # Try to restore backup if promotion failed and current is missing - if !File.exist?(current_link) && File.exist?(backup_link) - begin - File.rename(backup_link, current_link) - puts "Restored previous config due to promotion error" - rescue => restore_error - puts "Failed to restore backup: #{restore_error.message}" - end - end - - puts "Error promoting config: #{e.message}" - raise - end - end - - def reload_vector - puts "Reloading vector..." - system("supervisorctl signal HUP vector") - - puts "Successfully promoted to current" - end - - # Clean up old vector-config directories, keeping only the most recent ones - def cleanup_old_directories(keep_count = 5) - # Get all new_* directories - new_dirs = Dir.glob(File.join(@vector_config_dir, "new_*")).select { |f| File.directory?(f) } - - # Sort by timestamp in directory name (newest last) - new_dirs.sort! - - # Resolve symlinks to find directories that are currently in use - current_link = File.join(@vector_config_dir, "current") - previous_link = File.join(@vector_config_dir, "previous") - - in_use = [] - [current_link, previous_link].each do |link| - if File.symlink?(link) - target = File.readlink(link) - # Convert relative path to absolute if needed - target = File.absolute_path(target, @vector_config_dir) unless target.start_with?('/') - in_use << target - end - end - - # Filter out directories that are currently in use - deletable = new_dirs.reject { |dir| in_use.include?(dir) } - - # Keep only the most recent directories - if deletable.length > keep_count - to_delete = deletable[0...(deletable.length - keep_count)] - to_delete.each do |dir| - puts "Cleaning up old vector-config directory: #{File.basename(dir)}" - FileUtils.rm_rf(dir) - end - puts "Cleaned up #{to_delete.length} old vector-config directories" - end - end -end \ No newline at end of file diff --git a/healthcheck.sh b/healthcheck.sh deleted file mode 100644 index c247ed4..0000000 --- a/healthcheck.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Health check script for Vector -# Returns 0 if healthy, non-zero if unhealthy -# Detects when Vector is running with minimal/console-only configuration - -set -euo pipefail - -# Check if Vector is healthy (returns JSON: {"ok":true} or {"ok":false}) -VECTOR_HEALTH=$(curl -s http://localhost:8686/health 2>/dev/null | jq -r '.ok' 2>/dev/null || echo "error") - -if [ "$VECTOR_HEALTH" != "true" ]; then - echo "Vector health check failed - not responding or unhealthy ($VECTOR_HEALTH)" - exit 1 -fi - -# Check what sinks Vector has configured -# If only console sink exists, something is wrong -SINKS=$(curl -s http://localhost:8686/graphql \ - -H "Content-Type: application/json" \ - -d '{"query":"{ sinks { edges { node { componentId componentType } } } }"}' 2>/dev/null | \ - jq -r '.data.sinks.edges[].node.componentId' 2>/dev/null || echo "") - -if [ -z "$SINKS" ]; then - echo "ERROR: Vector has no sinks configured" - exit 1 -fi - -# Count how many sinks we have -SINK_COUNT=$(echo "$SINKS" | wc -l) - -# Check if only console sink exists (emergency/fallback mode) -if [ "$SINK_COUNT" -eq 1 ] && echo "$SINKS" | grep -q "^console$"; then - echo "ERROR: Vector running with console-only sink (lost configuration)" - exit 1 -fi - -# Check if we're missing expected Better Stack sinks -if ! echo "$SINKS" | grep -q "better_stack_http"; then - echo "WARNING: Vector missing Better Stack HTTP sinks" - echo "Current sinks: $(echo $SINKS | tr '\n' ' ')" - # Don't fail yet - updater might be working on it -fi - -echo "Vector health check passed - $SINK_COUNT sinks configured" -exit 0 diff --git a/mdprobe/mdprobe.rb b/mdprobe/mdprobe.rb deleted file mode 100644 index ef2a960..0000000 --- a/mdprobe/mdprobe.rb +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'json' -require 'logger' - -# Load all metadata providers -Dir[File.join(__dir__, 'providers', '*.rb')].each { |file| require file } - -# Main class for cloud metadata detection -class Mdprobe - METADATA_SERVICE_TIMEOUT = 5 # seconds - - def initialize - @logger = Logger.new(STDERR) - @logger.level = ENV['DEBUG'] ? Logger::DEBUG : Logger::WARN - @logger.formatter = proc { |severity, datetime, _, msg| "#{datetime.strftime('%Y-%m-%d %H:%M:%S')} [#{severity}] #{msg}\n" } - end - - def run - metadata = get_instance_metadata - - if metadata.nil? - puts '{}' - else - # Only output Region and AvailabilityZone - output = { - Region: metadata[:region] || 'unknown', - AvailabilityZone: metadata[:availability_zone] || 'unknown' - } - puts output.to_json - end - rescue => e - @logger.error "Failed to get instance metadata: #{e.message}" - @logger.debug e.backtrace.join("\n") if ENV['DEBUG'] - puts '{}' - end - - private - - def get_instance_metadata - provider = detect_cloud_provider - @logger.info "Detected cloud provider: #{provider || 'unknown'}" - - return nil unless provider - - # Instantiate the appropriate provider class - provider_class = case provider - when :aws - Providers::AWS - when :gcp - Providers::GCP - when :azure - Providers::Azure - when :digital_ocean - Providers::DigitalOcean - when :hetzner - Providers::Hetzner - when :alibaba - Providers::Alibaba - when :scaleway - Providers::Scaleway - when :ibm - Providers::IBM - when :oracle - Providers::Oracle - else - return nil - end - - return if provider_class.nil? - provider_instance = provider_class.new(@logger) - metadata = provider_instance.fetch_metadata - - # Apply Azure-specific zone modification if needed - if provider == :azure && metadata && metadata[:availability_zone] =~ /^\d+$/ - metadata[:availability_zone] = "#{metadata[:region]}-#{metadata[:availability_zone]}" - end - - metadata - end - - def detect_cloud_provider - # Check AWS Xen instances - if File.exist?('/sys/hypervisor/uuid') - uuid = File.read('/sys/hypervisor/uuid').strip.downcase rescue '' - return :aws if uuid.start_with?('ec2') - end - - # Check board vendor - if File.exist?('/sys/class/dmi/id/board_vendor') - vendor = File.read('/sys/class/dmi/id/board_vendor').strip rescue '' - case vendor - when 'Amazon EC2' - return :aws - when 'Google' - return :gcp - when 'Microsoft Corporation' - return :azure - when 'DigitalOcean' - return :digital_ocean - end - end - - # Check sys vendor - if File.exist?('/sys/class/dmi/id/sys_vendor') - vendor = File.read('/sys/class/dmi/id/sys_vendor').strip rescue '' - case vendor - when 'Hetzner' - return :hetzner - when 'Alibaba Cloud' - return :alibaba - when 'Scaleway' - return :scaleway - end - end - - # Check chassis vendor for IBM - if File.exist?('/sys/class/dmi/id/chassis_vendor') - vendor = File.read('/sys/class/dmi/id/chassis_vendor').strip rescue '' - return :ibm if vendor.start_with?('IBM:Cloud Compute Server') - end - - # Check chassis asset tag for Oracle - if File.exist?('/sys/class/dmi/id/chassis_asset_tag') - tag = File.read('/sys/class/dmi/id/chassis_asset_tag').strip rescue '' - return :oracle if tag == 'OracleCloud.com' - end - - nil - end -end - -# Run if executed directly -if __FILE__ == $0 - Mdprobe.new.run -end diff --git a/mdprobe/providers/alibaba.rb b/mdprobe/providers/alibaba.rb deleted file mode 100644 index a163bba..0000000 --- a/mdprobe/providers/alibaba.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class Alibaba < Base - METADATA_URL = 'http://100.100.100.200/latest/meta-data' - - def fetch_metadata - # Check if service is available - instance_id = get_metadata_field('instance-id') - return nil unless instance_id - - build_metadata( - region: get_metadata_field('region-id'), - availability_zone: get_metadata_field('zone-id') - ) - rescue => e - @logger.debug "Failed to fetch Alibaba metadata: #{e.message}" - nil - end - - private - - def get_metadata_field(path) - http_get("#{METADATA_URL}/#{path}") - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/aws.rb b/mdprobe/providers/aws.rb deleted file mode 100644 index 700222f..0000000 --- a/mdprobe/providers/aws.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class AWS < Base - METADATA_URL = 'http://169.254.169.254/latest' - TOKEN_TTL = 21600 # 6 hours - - def fetch_metadata - # AWS IMDSv2 requires a token - token = get_token - return nil unless token - - # Fetch instance identity for region and AZ - instance_identity = get_instance_identity(token) - return nil unless instance_identity - - build_metadata( - region: instance_identity['region'], - availability_zone: instance_identity['availabilityZone'] - ) - rescue => e - @logger.debug "Failed to fetch AWS metadata: #{e.message}" - nil - end - - private - - def get_token - http_put( - "#{METADATA_URL}/api/token", - 'X-aws-ec2-metadata-token-ttl-seconds' => TOKEN_TTL.to_s - ) - end - - def get_instance_identity(token) - response = http_get( - "#{METADATA_URL}/dynamic/instance-identity/document", - 'X-aws-ec2-metadata-token' => token - ) - return nil unless response - - JSON.parse(response) - rescue JSON::ParserError => e - @logger.debug "Failed to parse instance identity: #{e.message}" - nil - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/azure.rb b/mdprobe/providers/azure.rb deleted file mode 100644 index 1c0e629..0000000 --- a/mdprobe/providers/azure.rb +++ /dev/null @@ -1,40 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class Azure < Base - METADATA_URL = 'http://169.254.169.254/metadata/instance' - API_VERSION = '2021-02-01' - METADATA_HEADER = { 'Metadata' => 'true' } - - def fetch_metadata - # Azure requires Metadata header and api-version parameter - instance_data = get_instance_metadata - return nil unless instance_data - - compute = instance_data['compute'] || {} - - build_metadata( - region: compute['location'], - availability_zone: compute['zone'] || compute['platformFaultDomain'] - ) - rescue => e - @logger.debug "Failed to fetch Azure metadata: #{e.message}" - nil - end - - private - - def get_instance_metadata - url = "#{METADATA_URL}?api-version=#{API_VERSION}" - response = http_get(url, METADATA_HEADER) - return nil unless response - - JSON.parse(response) - rescue JSON::ParserError => e - @logger.debug "Failed to parse Azure metadata: #{e.message}" - nil - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/base.rb b/mdprobe/providers/base.rb deleted file mode 100644 index f942076..0000000 --- a/mdprobe/providers/base.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -require 'net/http' -require 'uri' -require 'json' -require 'timeout' - -module Providers - # Base class for all cloud metadata providers - class Base - METADATA_SERVICE_TIMEOUT = 5 # seconds - - def initialize(logger) - @logger = logger - end - - # Must be implemented by subclasses - def fetch_metadata - raise NotImplementedError, "#{self.class} must implement fetch_metadata" - end - - protected - - # Common HTTP request with timeout - def http_get(url, headers = {}) - uri = URI(url) - - Timeout.timeout(METADATA_SERVICE_TIMEOUT) do - http = Net::HTTP.new(uri.host, uri.port) - http.open_timeout = METADATA_SERVICE_TIMEOUT - http.read_timeout = METADATA_SERVICE_TIMEOUT - - request = Net::HTTP::Get.new(uri) - headers.each { |k, v| request[k] = v } - - response = http.request(request) - - if response.code.to_i == 200 - response.body - else - @logger.debug "HTTP request failed: #{response.code} #{response.message}" - nil - end - end - rescue Timeout::Error, StandardError => e - @logger.debug "HTTP request error: #{e.message}" - nil - end - - # Common HTTP PUT request with timeout (for AWS tokens) - def http_put(url, headers = {}) - uri = URI(url) - - Timeout.timeout(METADATA_SERVICE_TIMEOUT) do - http = Net::HTTP.new(uri.host, uri.port) - http.open_timeout = METADATA_SERVICE_TIMEOUT - http.read_timeout = METADATA_SERVICE_TIMEOUT - - request = Net::HTTP::Put.new(uri) - headers.each { |k, v| request[k] = v } - - response = http.request(request) - - if response.code.to_i == 200 - response.body - else - @logger.debug "HTTP PUT request failed: #{response.code} #{response.message}" - nil - end - end - rescue Timeout::Error, StandardError => e - @logger.debug "HTTP PUT request error: #{e.message}" - nil - end - - # Build minimal metadata hash with only region and availability zone - def build_metadata(region: nil, availability_zone: nil) - { - region: region, - availability_zone: availability_zone - } - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/digital_ocean.rb b/mdprobe/providers/digital_ocean.rb deleted file mode 100644 index a41011e..0000000 --- a/mdprobe/providers/digital_ocean.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class DigitalOcean < Base - METADATA_URL = 'http://169.254.169.254/metadata/v1' - - def fetch_metadata - # Check if the metadata service is available by fetching ID - instance_id = get_metadata_field('id') - return nil unless instance_id - - region = get_metadata_field('region') - - build_metadata( - region: region, - availability_zone: region # Same as the region per coroot - ) - rescue => e - @logger.debug "Failed to fetch DigitalOcean metadata: #{e.message}" - nil - end - - private - - def get_metadata_field(path) - http_get("#{METADATA_URL}/#{path}") - end - end -end diff --git a/mdprobe/providers/gcp.rb b/mdprobe/providers/gcp.rb deleted file mode 100644 index 3cff8c0..0000000 --- a/mdprobe/providers/gcp.rb +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class GCP < Base - METADATA_URL = 'http://metadata.google.internal/computeMetadata/v1' - METADATA_HEADER = { 'Metadata-Flavor' => 'Google' } - - def fetch_metadata - # GCP requires Metadata-Flavor header for all requests - zone = get_metadata_field('instance/zone') - return nil unless zone - - # Zone format: projects/PROJECT_NUMBER/zones/ZONE_NAME - zone_parts = zone.split('/') - zone_name = zone_parts.last if zone_parts.length > 0 - - # Extract region from zone (e.g., us-central1-a -> us-central1) - region = zone_name.rpartition('-').first if zone_name - - build_metadata( - region: region, - availability_zone: zone_name - ) - rescue => e - @logger.debug "Failed to fetch GCP metadata: #{e.message}" - nil - end - - private - - def get_metadata_field(path) - http_get("#{METADATA_URL}/#{path}", METADATA_HEADER) - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/hetzner.rb b/mdprobe/providers/hetzner.rb deleted file mode 100644 index 384cada..0000000 --- a/mdprobe/providers/hetzner.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class Hetzner < Base - METADATA_URL = 'http://169.254.169.254/hetzner/v1/metadata' - - def fetch_metadata - metadata = get_metadata - return nil unless metadata - - build_metadata( - region: metadata['region'], - availability_zone: metadata['availability-zone'] - ) - rescue => e - @logger.debug "Failed to fetch Hetzner metadata: #{e.message}" - nil - end - - private - - def get_metadata - response = http_get(METADATA_URL) - return nil unless response - - # Hetzner returns YAML format - parse_yaml_metadata(response) - rescue => e - @logger.debug "Failed to parse Hetzner metadata: #{e.message}" - nil - end - - def parse_yaml_metadata(yaml_content) - # Simple YAML parser for flat key-value pairs - metadata = {} - yaml_content.each_line do |line| - next if line.strip.empty? || line.start_with?('#') - - if line.include?(':') - key, value = line.split(':', 2) - metadata[key.strip] = value.strip if key && value - end - end - metadata - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/ibm.rb b/mdprobe/providers/ibm.rb deleted file mode 100644 index d7bec32..0000000 --- a/mdprobe/providers/ibm.rb +++ /dev/null @@ -1,53 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class IBM < Base - TOKEN_URL = 'http://169.254.169.254/instance_identity/v1/token' - METADATA_URL = 'http://169.254.169.254/metadata/v1/instance' - - def fetch_metadata - # Get token - token = get_token - return nil unless token - - # Get instance metadata - response = http_get( - "#{METADATA_URL}?version=2022-03-01", - 'Authorization' => "Bearer #{token}" - ) - return nil unless response - - data = JSON.parse(response) - zone = data.dig('zone', 'name') - - # Extract region from zone (e.g., us-south-1 -> us-south) - region = zone ? zone.sub(/-\d+$/, '') : nil - - build_metadata( - region: region, - availability_zone: zone - ) - rescue => e - @logger.debug "Failed to fetch IBM metadata: #{e.message}" - nil - end - - private - - def get_token - response = http_put( - TOKEN_URL, - 'Metadata-Flavor' => 'ibm' - ) - return nil unless response - - data = JSON.parse(response) - data['access_token'] - rescue => e - @logger.debug "Failed to get IBM token: #{e.message}" - nil - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/oracle.rb b/mdprobe/providers/oracle.rb deleted file mode 100644 index 3ff7f13..0000000 --- a/mdprobe/providers/oracle.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class Oracle < Base - METADATA_URL = 'http://169.254.169.254/opc/v2' - METADATA_HEADER = { 'Authorization' => 'Bearer Oracle' } - - def fetch_metadata - instance_data = get_instance_metadata - return nil unless instance_data - - build_metadata( - region: instance_data['region'] || instance_data['canonicalRegionName'], - availability_zone: instance_data['availabilityDomain'] - ) - rescue => e - @logger.debug "Failed to fetch Oracle metadata: #{e.message}" - nil - end - - private - - def get_instance_metadata - response = http_get("#{METADATA_URL}/instance", METADATA_HEADER) - return nil unless response - - JSON.parse(response) - rescue JSON::ParserError => e - @logger.debug "Failed to parse Oracle instance metadata: #{e.message}" - nil - end - end -end \ No newline at end of file diff --git a/mdprobe/providers/scaleway.rb b/mdprobe/providers/scaleway.rb deleted file mode 100644 index 8ec9788..0000000 --- a/mdprobe/providers/scaleway.rb +++ /dev/null @@ -1,34 +0,0 @@ -# frozen_string_literal: true - -require_relative 'base' - -module Providers - class Scaleway < Base - METADATA_URL = 'http://169.254.42.42' - - def fetch_metadata - response = http_get("#{METADATA_URL}/conf") - return nil unless response - - # Parse key=value format - metadata = {} - response.each_line do |line| - next if line.strip.empty? - key, value = line.strip.split('=', 2) - metadata[key] = value if key && value - end - - zone = metadata['ZONE'] - # Extract region from zone (e.g., fr-par-1 -> fr-par) - region = zone ? zone.sub(/-\d+$/, '') : nil - - build_metadata( - region: region, - availability_zone: zone - ) - rescue => e - @logger.debug "Failed to fetch Scaleway metadata: #{e.message}" - nil - end - end -end \ No newline at end of file diff --git a/mdprobe/test/mdprobe_test.rb b/mdprobe/test/mdprobe_test.rb deleted file mode 100644 index abcf7ac..0000000 --- a/mdprobe/test/mdprobe_test.rb +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'tempfile' -require 'fileutils' -require 'stringio' -require_relative '../mdprobe' - -class TestMdprobe < Minitest::Test - def setup - @temp_dir = Dir.mktmpdir - @mdprobe = Mdprobe.new - # Silence logger for tests - @mdprobe.instance_variable_set(:@logger, Logger.new(nil)) - # Store original methods for restoration - @original_file_exist = File.method(:exist?) - @original_file_read = File.method(:read) - end - - def teardown - FileUtils.rm_rf(@temp_dir) - # Restore original File methods - File.define_singleton_method(:exist?, @original_file_exist) - File.define_singleton_method(:read, @original_file_read) - end - - def test_output_format_with_provider - # Test that output only contains Region and AvailabilityZone - mock_file('/sys/class/dmi/id/board_vendor', 'Amazon EC2') do - # Mock AWS provider to return test data - mock_provider = Minitest::Mock.new - mock_provider.expect(:fetch_metadata, { - region: 'us-east-1', - availability_zone: 'us-east-1a' - }) - - Providers::AWS.stub(:new, mock_provider) do - output = capture_stdout { @mdprobe.run } - data = JSON.parse(output) - - assert_equal 'us-east-1', data['Region'] - assert_equal 'us-east-1a', data['AvailabilityZone'] - # Ensure no other fields are present - assert_equal 2, data.keys.size - end - - mock_provider.verify - end - end - - def test_output_format_no_provider - # No cloud provider detected - stub all metadata services to fail - # AWS - stub_request(:put, "http://169.254.169.254/latest/api/token").to_return(status: 404) - # Azure - stub_request(:get, "http://169.254.169.254/metadata/instance?api-version=2021-02-01").to_return(status: 404) - # GCP - stub_request(:get, "http://metadata.google.internal/computeMetadata/v1/instance/zone").to_return(status: 404) - # DigitalOcean - stub_request(:get, "http://169.254.169.254/metadata/v1/id").to_return(status: 404) - # Hetzner - stub_request(:get, "http://169.254.169.254/hetzner/v1/metadata").to_return(status: 404) - # Alibaba - stub_request(:get, "http://100.100.100.200/latest/meta-data/instance-id").to_return(status: 404) - # Scaleway - stub_request(:get, "http://169.254.42.42/conf").to_return(status: 404) - # IBM - stub_request(:put, "http://169.254.169.254/instance_identity/v1/token?version=2022-03-01").to_return(status: 404) - # Oracle - stub_request(:get, "http://169.254.169.254/opc/v2/instance").to_return(status: 404) - - output = capture_stdout { @mdprobe.run } - assert_equal "{}\n", output - end - - def test_azure_zone_modification - # Test Azure-specific zone modification for numeric zones - mock_file('/sys/class/dmi/id/board_vendor', 'Microsoft Corporation') do - # Mock Azure provider to return numeric zone - mock_provider = Minitest::Mock.new - mock_provider.expect(:fetch_metadata, { - region: 'eastus', - availability_zone: '2' - }) - - Providers::Azure.stub(:new, mock_provider) do - output = capture_stdout { @mdprobe.run } - data = JSON.parse(output) - - assert_equal 'eastus', data['Region'] - assert_equal 'eastus-2', data['AvailabilityZone'] - end - - mock_provider.verify - end - end - - def test_unknown_values_when_nil - # Test that nil values become 'unknown' - mock_file('/sys/class/dmi/id/board_vendor', 'Google') do - mock_provider = Minitest::Mock.new - mock_provider.expect(:fetch_metadata, { - region: nil, - availability_zone: nil - }) - - Providers::GCP.stub(:new, mock_provider) do - output = capture_stdout { @mdprobe.run } - data = JSON.parse(output) - - assert_equal 'unknown', data['Region'] - assert_equal 'unknown', data['AvailabilityZone'] - end - - mock_provider.verify - end - end - - private - - def mock_file(path, content, &block) - # Stub File.exist? to return true for our mocked path - original_exist = File.method(:exist?) - File.define_singleton_method(:exist?) do |p| - p == path || original_exist.call(p) - end - - # Stub File.read to return our content for the mocked path - original_read = File.method(:read) - File.define_singleton_method(:read) do |p| - if p == path - content - else - begin - original_read.call(p) - rescue - '' - end - end - end - - result = block.call - - # Restore original methods - File.define_singleton_method(:exist?, original_exist) - File.define_singleton_method(:read, original_read) - - result - end - - def capture_stdout - original = $stdout - $stdout = StringIO.new - yield - $stdout.string - ensure - $stdout = original - end -end \ No newline at end of file diff --git a/mdprobe/test/providers/alibaba_test.rb b/mdprobe/test/providers/alibaba_test.rb deleted file mode 100644 index b23dd98..0000000 --- a/mdprobe/test/providers/alibaba_test.rb +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/alibaba' - -class TestAlibabaProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::Alibaba.new(@logger) - @base_url = 'http://100.100.100.200/latest/meta-data' - end - - def test_fetch_metadata_success - # Mock only the fields our minimal provider uses - stub_request(:get, "#{@base_url}/instance-id").to_return(status: 200, body: 'i-bp1hygp5b04o1k1l0abc') - - stub_request(:get, "#{@base_url}/region-id").to_return(status: 200, body: 'cn-hangzhou') - - stub_request(:get, "#{@base_url}/zone-id").to_return(status: 200, body: 'cn-hangzhou-b') - - metadata = @provider.fetch_metadata - - assert_equal 'cn-hangzhou', metadata[:region] - assert_equal 'cn-hangzhou-b', metadata[:availability_zone] - # Only two fields in minimal implementation - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_region - stub_request(:get, "#{@base_url}/instance-id").to_return(status: 200, body: 'i-sg12345abcdefg') - - stub_request(:get, "#{@base_url}/region-id").to_return(status: 200, body: 'ap-southeast-1') - - stub_request(:get, "#{@base_url}/zone-id").to_return(status: 200, body: 'ap-southeast-1a') - - metadata = @provider.fetch_metadata - - assert_equal 'ap-southeast-1', metadata[:region] - assert_equal 'ap-southeast-1a', metadata[:availability_zone] - end - - def test_fetch_metadata_network_error - # Instance ID request fails - service not available - stub_request(:get, "#{@base_url}/instance-id").to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:get, "#{@base_url}/instance-id").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end -end diff --git a/mdprobe/test/providers/aws_test.rb b/mdprobe/test/providers/aws_test.rb deleted file mode 100644 index ef5a899..0000000 --- a/mdprobe/test/providers/aws_test.rb +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/aws' - -class TestAWSProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::AWS.new(@logger) - @base_url = 'http://169.254.169.254/latest' - end - - def test_fetch_metadata_success - # Mock token request (required for IMDSv2) - stub_request(:put, "#{@base_url}/api/token") - .with(headers: { 'X-aws-ec2-metadata-token-ttl-seconds' => '21600' }) - .to_return(status: 200, body: 'test-token') - - # Mock instance identity document - only need region and AZ - instance_identity = { - 'region' => 'us-west-2', - 'availabilityZone' => 'us-west-2a' - } - - stub_request(:get, "#{@base_url}/dynamic/instance-identity/document") - .with(headers: { 'X-aws-ec2-metadata-token' => 'test-token' }) - .to_return(status: 200, body: instance_identity.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'us-west-2', metadata[:region] - assert_equal 'us-west-2a', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_region - stub_request(:put, "#{@base_url}/api/token").to_return(status: 200, body: 'test-token') - - instance_identity = { - 'region' => 'eu-central-1', - 'availabilityZone' => 'eu-central-1b' - } - - stub_request(:get, "#{@base_url}/dynamic/instance-identity/document") - .with(headers: { 'X-aws-ec2-metadata-token' => 'test-token' }) - .to_return(status: 200, body: instance_identity.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-central-1', metadata[:region] - assert_equal 'eu-central-1b', metadata[:availability_zone] - end - - def test_fetch_metadata_token_failure - # Token request fails - stub_request(:put, "#{@base_url}/api/token").to_return(status: 401) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:put, "#{@base_url}/api/token").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_identity_document - stub_request(:put, "#{@base_url}/api/token").to_return(status: 200, body: 'test-token') - - stub_request(:get, "#{@base_url}/dynamic/instance-identity/document") - .with(headers: { 'X-aws-ec2-metadata-token' => 'test-token' }) - .to_return(status: 200, body: 'invalid json') - - metadata = @provider.fetch_metadata - assert_nil metadata - end -end diff --git a/mdprobe/test/providers/azure_test.rb b/mdprobe/test/providers/azure_test.rb deleted file mode 100644 index 3aa4d43..0000000 --- a/mdprobe/test/providers/azure_test.rb +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/azure' - -class TestAzureProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::Azure.new(@logger) - @base_url = 'http://169.254.169.254/metadata/instance' - end - - def test_fetch_metadata_success - instance_data = { - 'compute' => { - 'subscriptionId' => 'abc123-def456-ghi789', - 'vmId' => 'vm-12345', - 'vmSize' => 'Standard_D2s_v3', - 'location' => 'eastus', - 'zone' => '1' - }, - 'network' => { - 'interface' => [ - { - 'ipv4' => { - 'ipAddress' => [ - { - 'privateIpAddress' => '10.0.0.4', - 'publicIpAddress' => '52.168.123.45' - } - ] - } - } - ] - } - } - - stub_request(:get, "#{@base_url}?api-version=2021-02-01") - .with(headers: { 'Metadata' => 'true' }) - .to_return(status: 200, body: instance_data.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'eastus', metadata[:region] - assert_equal '1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_with_fault_domain - instance_data = { - 'compute' => { - 'subscriptionId' => 'xyz789', - 'vmId' => 'vm-67890', - 'vmSize' => 'Standard_B2s', - 'location' => 'westeurope', - 'platformFaultDomain' => '2' # No zone, use fault domain - }, - 'network' => { - 'interface' => [ - { - 'ipv4' => { - 'ipAddress' => [ - { - 'privateIpAddress' => '192.168.1.10' - # No public IP - } - ] - } - } - ] - } - } - - stub_request(:get, "#{@base_url}?api-version=2021-02-01") - .with(headers: { 'Metadata' => 'true' }) - .to_return(status: 200, body: instance_data.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'westeurope', metadata[:region] - assert_equal '2', metadata[:availability_zone] # Falls back to platformFaultDomain - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_missing_network_info - instance_data = { - 'compute' => { - 'subscriptionId' => 'sub123', - 'vmId' => 'vm-abc', - 'vmSize' => 'Standard_A1', - 'location' => 'northeurope' - } - # No network section - } - - stub_request(:get, "#{@base_url}?api-version=2021-02-01") - .with(headers: { 'Metadata' => 'true' }) - .to_return(status: 200, body: instance_data.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'northeurope', metadata[:region] - assert_nil metadata[:availability_zone] # No zone or fault domain - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_network_error - stub_request(:get, "#{@base_url}?api-version=2021-02-01").to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:get, "#{@base_url}?api-version=2021-02-01").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_json - stub_request(:get, "#{@base_url}?api-version=2021-02-01") - .with(headers: { 'Metadata' => 'true' }) - .to_return(status: 200, body: 'not valid json') - - metadata = @provider.fetch_metadata - assert_nil metadata - end -end diff --git a/mdprobe/test/providers/digital_ocean_test.rb b/mdprobe/test/providers/digital_ocean_test.rb deleted file mode 100644 index 0fe7190..0000000 --- a/mdprobe/test/providers/digital_ocean_test.rb +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/digital_ocean' - -class TestDigitalOceanProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::DigitalOcean.new(@logger) - @base_url = 'http://169.254.169.254/metadata/v1' - end - - def test_fetch_metadata_success - # Mock individual field responses - stub_request(:get, "#{@base_url}/id").to_return(status: 200, body: '289794365') - - stub_request(:get, "#{@base_url}/region").to_return(status: 200, body: 'nyc3') - - metadata = @provider.fetch_metadata - - assert_equal 'nyc3', metadata[:region] - assert_equal 'nyc3', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_region - # Mock metadata with different region - stub_request(:get, "#{@base_url}/id").to_return(status: 200, body: '123456789') - - stub_request(:get, "#{@base_url}/region").to_return(status: 200, body: 'sfo3') - - metadata = @provider.fetch_metadata - - assert_equal 'sfo3', metadata[:region] - assert_equal 'sfo3', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_no_region - # Mock metadata with no region available - stub_request(:get, "#{@base_url}/id").to_return(status: 200, body: '987654321') - - stub_request(:get, "#{@base_url}/region").to_return(status: 404) - - metadata = @provider.fetch_metadata - - assert_nil metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_network_error - # ID request fails, indicating service not available - stub_request(:get, "#{@base_url}/id").to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - # ID request times out - stub_request(:get, "#{@base_url}/id").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end -end diff --git a/mdprobe/test/providers/gcp_test.rb b/mdprobe/test/providers/gcp_test.rb deleted file mode 100644 index 9f9828d..0000000 --- a/mdprobe/test/providers/gcp_test.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/gcp' - -class TestGCPProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::GCP.new(@logger) - @base_url = 'http://metadata.google.internal/computeMetadata/v1/instance' - end - - def test_fetch_metadata_success - # GCP provider only fetches zone - stub_request(:get, "#{@base_url}/zone") - .with(headers: { 'Metadata-Flavor' => 'Google' }) - .to_return(status: 200, body: 'projects/123456789/zones/us-central1-a') - - metadata = @provider.fetch_metadata - - assert_equal 'us-central1', metadata[:region] - assert_equal 'us-central1-a', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_zone - # Test with different zone format - stub_request(:get, "#{@base_url}/zone") - .with(headers: { 'Metadata-Flavor' => 'Google' }) - .to_return(status: 200, body: 'projects/456789/zones/europe-west1-b') - - metadata = @provider.fetch_metadata - - assert_equal 'europe-west1', metadata[:region] - assert_equal 'europe-west1-b', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_network_error - # Zone request fails, indicating service not available - stub_request(:get, "#{@base_url}/zone").to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - # Zone request times out - stub_request(:get, "#{@base_url}/zone").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_parse_zone_formats - # Test various zone formats - test_cases = [ - ['projects/123/zones/us-central1-a', 'us-central1', 'us-central1-a'], - ['projects/456/zones/asia-northeast1-c', 'asia-northeast1', 'asia-northeast1-c'], - ['projects/789/zones/europe-west4-b', 'europe-west4', 'europe-west4-b'], - ['us-west1-a', 'us-west1', 'us-west1-a'], # Simple format - ['us-east1', 'us', 'us-east1'] # Zone without letter suffix - rpartition removes last segment - ] - - test_cases.each do |zone_response, expected_region, expected_zone| - stub_request(:get, "#{@base_url}/zone") - .with(headers: { 'Metadata-Flavor' => 'Google' }) - .to_return(status: 200, body: zone_response) - - metadata = @provider.fetch_metadata - assert_equal expected_region, metadata[:region], "Failed for zone: #{zone_response}" - assert_equal expected_zone, metadata[:availability_zone], "Failed for zone: #{zone_response}" - end - end -end diff --git a/mdprobe/test/providers/hetzner_test.rb b/mdprobe/test/providers/hetzner_test.rb deleted file mode 100644 index e953182..0000000 --- a/mdprobe/test/providers/hetzner_test.rb +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/hetzner' - -class TestHetznerProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::Hetzner.new(@logger) - @base_url = 'http://169.254.169.254/hetzner/v1/metadata' - end - - def test_fetch_metadata_success - # Mock YAML metadata response - yaml_response = <<~YAML - region: eu-central - availability-zone: fsn1-dc14 - YAML - - stub_request(:get, @base_url).to_return(status: 200, body: yaml_response) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-central', metadata[:region] - assert_equal 'fsn1-dc14', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_partial_response - # Mock metadata with only region - yaml_response = <<~YAML - region: eu-central - YAML - - stub_request(:get, @base_url).to_return(status: 200, body: yaml_response) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-central', metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_zone - # Test different availability zone - yaml_response = <<~YAML - region: eu-central - availability-zone: nbg1-dc3 - YAML - - stub_request(:get, @base_url).to_return(status: 200, body: yaml_response) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-central', metadata[:region] - assert_equal 'nbg1-dc3', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_minimal_response - # Test with empty YAML response - yaml_response = "" - - stub_request(:get, @base_url).to_return(status: 200, body: yaml_response) - - metadata = @provider.fetch_metadata - - assert_nil metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_network_error - stub_request(:get, @base_url).to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:get, @base_url).to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_yaml - stub_request(:get, @base_url).to_return(status: 200, body: "invalid: yaml: content: with: colons") - - metadata = @provider.fetch_metadata - # Should still parse successfully as the parser is simple - refute_nil metadata - end - - def test_fetch_metadata_yaml_comments - # Test with YAML containing comments - yaml_response = <<~YAML - # This is a comment - region: us-east - # Another comment - availability-zone: ash1-dc1 - YAML - - stub_request(:get, @base_url).to_return(status: 200, body: yaml_response) - - metadata = @provider.fetch_metadata - - assert_equal 'us-east', metadata[:region] - assert_equal 'ash1-dc1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end -end diff --git a/mdprobe/test/providers/ibm_test.rb b/mdprobe/test/providers/ibm_test.rb deleted file mode 100644 index 5cc625f..0000000 --- a/mdprobe/test/providers/ibm_test.rb +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/ibm' - -class TestIBMProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::IBM.new(@logger) - @token_url = 'http://169.254.169.254/instance_identity/v1/token' - @metadata_url = 'http://169.254.169.254/metadata/v1/instance' - end - - def test_fetch_metadata_success - # Mock token request - stub_request(:put, @token_url) - .with(headers: { 'Metadata-Flavor' => 'ibm' }) - .to_return( - status: 200, - body: { 'access_token' => 'test-token-12345' }.to_json - ) - - # Mock metadata request - metadata_response = { - 'initialization' => { - 'user_data' => 'user-data-here' - }, - 'name' => 'my-vsi-instance', - 'id' => 'i-0123456789abcdef0', - 'profile' => { - 'name' => 'cx2-2x4' - }, - 'zone' => { - 'name' => 'us-south-1' - }, - 'vpc' => { - 'id' => 'vpc-12345', - 'name' => 'my-vpc' - }, - 'primary_network_interface' => { - 'id' => 'eth0-12345', - 'primary_ipv4_address' => '10.240.0.4', - 'floating_ips' => [ - { - 'address' => '169.63.123.45' - } - ] - } - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => 'Bearer test-token-12345' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'us-south', metadata[:region] - assert_equal 'us-south-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_no_floating_ips - # Mock token request - stub_request(:put, @token_url) - .with(headers: { 'Metadata-Flavor' => 'ibm' }) - .to_return( - status: 200, - body: { 'access_token' => 'test-token-67890' }.to_json - ) - - # Mock metadata without floating IPs - metadata_response = { - 'name' => 'private-vsi', - 'id' => 'i-private-instance', - 'profile' => { - 'name' => 'bx2-4x16' - }, - 'zone' => { - 'name' => 'eu-de-2' - }, - 'vpc' => { - 'id' => 'vpc-67890' - }, - 'primary_network_interface' => { - 'primary_ipv4_address' => '172.16.10.20' - # No floating_ips array - } - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => 'Bearer test-token-67890' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-de', metadata[:region] - assert_equal 'eu-de-2', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_empty_floating_ips - # Mock token request - stub_request(:put, @token_url) - .with(headers: { 'Metadata-Flavor' => 'ibm' }) - .to_return( - status: 200, - body: { 'access_token' => 'test-token-empty' }.to_json - ) - - # Mock metadata with empty floating IPs array - metadata_response = { - 'id' => 'i-empty-floating', - 'profile' => { 'name' => 'mx2-2x16' }, - 'zone' => { 'name' => 'jp-tok-1' }, - 'vpc' => { 'id' => 'vpc-japan' }, - 'primary_network_interface' => { - 'primary_ipv4_address' => '192.168.1.10', - 'floating_ips' => [] # Empty array - } - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => 'Bearer test-token-empty' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'jp-tok', metadata[:region] - assert_equal 'jp-tok-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_parse_region_from_zone - test_cases = [ - ['us-south-1', 'us-south'], - ['us-south-2', 'us-south'], - ['us-east-1', 'us-east'], - ['eu-de-1', 'eu-de'], - ['eu-de-2', 'eu-de'], - ['jp-tok-1', 'jp-tok'], - ['au-syd-1', 'au-syd'], - ['ca-tor-1', 'ca-tor'], - ['br-sao-1', 'br-sao'], - ['unknown', 'unknown'] # No dash-digit pattern - ] - - test_cases.each do |zone, expected_region| - stub_request(:put, @token_url) - .to_return(status: 200, body: { 'access_token' => "token-#{zone}" }.to_json) - - metadata_response = { - 'id' => "test-#{zone}", - 'zone' => { 'name' => zone }, - 'vpc' => { 'id' => 'test-vpc' }, - 'primary_network_interface' => { - 'primary_ipv4_address' => '10.0.0.1' - } - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => "Bearer token-#{zone}" }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - assert_equal expected_region, metadata[:region], "Failed for zone: #{zone}" - end - end - - def test_fetch_metadata_token_failure - # Token request fails - stub_request(:put, @token_url).to_return(status: 401) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_token_timeout - stub_request(:put, @token_url).to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_token_json - stub_request(:put, @token_url).to_return(status: 200, body: 'not valid json') - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_data_request_failure - # Token succeeds but metadata request fails - stub_request(:put, @token_url).to_return(status: 200, body: { 'access_token' => 'valid-token' }.to_json) - - stub_request(:get, "#{@metadata_url}?version=2022-03-01").to_return(status: 403) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_minimal_response - stub_request(:put, @token_url).to_return(status: 200, body: { 'access_token' => 'minimal-token' }.to_json) - - # Minimal valid response - metadata_response = { - 'id' => 'i-minimal' - # No other fields - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => 'Bearer minimal-token' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_nil metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_multiple_floating_ips - # Test with multiple floating IPs (should use first one) - stub_request(:put, @token_url).to_return(status: 200, body: { 'access_token' => 'multi-ip-token' }.to_json) - - metadata_response = { - 'id' => 'i-multi-ip', - 'zone' => { 'name' => 'us-south-3' }, - 'vpc' => { 'id' => 'vpc-multi' }, - 'primary_network_interface' => { - 'primary_ipv4_address' => '10.10.10.10', - 'floating_ips' => [ - { 'address' => '52.116.123.45' }, - { 'address' => '52.116.123.46' }, - { 'address' => '52.116.123.47' } - ] - } - } - - stub_request(:get, "#{@metadata_url}?version=2022-03-01") - .with(headers: { 'Authorization' => 'Bearer multi-ip-token' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'us-south', metadata[:region] - assert_equal 'us-south-3', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end -end diff --git a/mdprobe/test/providers/oracle_test.rb b/mdprobe/test/providers/oracle_test.rb deleted file mode 100644 index 1a8b6be..0000000 --- a/mdprobe/test/providers/oracle_test.rb +++ /dev/null @@ -1,239 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/oracle' - -class TestOracleProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::Oracle.new(@logger) - @base_url = 'http://169.254.169.254/opc/v2/instance' - end - - def test_fetch_metadata_success - # Mock full metadata response - metadata_response = { - 'id' => 'ocid1.instance.oc1.iad.anuwcljt4q7f2vqcxyzabc123def456ghi789', - 'displayName' => 'my-oracle-instance', - 'shape' => 'VM.Standard.E2.1.Micro', - 'region' => 'us-ashburn-1', - 'canonicalRegionName' => 'us-ashburn-1', - 'ociAdName' => 'AD-1', - 'faultDomain' => 'FAULT-DOMAIN-2', - 'compartmentId' => 'ocid1.compartment.oc1..aaaaaaaabc123def456', - 'availabilityDomain' => 'Uocm:US-ASHBURN-AD-1', - 'metadata' => { - 'ssh_authorized_keys' => 'ssh-rsa AAAAB3...' - }, - 'vnics' => [ - { - 'vnicId' => 'ocid1.vnic.oc1.iad.abuwa', - 'privateIp' => '10.0.0.100', - 'publicIp' => '129.146.123.45', - 'macAddr' => '02:00:17:00:12:34', - 'subnetCidrBlock' => '10.0.0.0/24', - 'virtualRouterIp' => '10.0.0.1' - } - ] - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'us-ashburn-1', metadata[:region] - assert_equal 'Uocm:US-ASHBURN-AD-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_no_public_ip - # Mock metadata without public IP - metadata_response = { - 'id' => 'ocid1.instance.oc1.eu-frankfurt-1.private123', - 'displayName' => 'private-instance', - 'shape' => 'VM.Standard2.1', - 'region' => 'eu-frankfurt-1', - 'canonicalRegionName' => 'eu-frankfurt-1', - 'ociAdName' => 'AD-2', - 'compartmentId' => 'ocid1.compartment.oc1..private456', - 'availabilityDomain' => 'Uocm:EU-FRANKFURT-1-AD-2', - 'vnics' => [ - { - 'vnicId' => 'ocid1.vnic.oc1.eu-frankfurt-1.xyz', - 'privateIp' => '172.16.0.50' - # No publicIp field - } - ] - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'eu-frankfurt-1', metadata[:region] - assert_equal 'Uocm:EU-FRANKFURT-1-AD-2', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_multiple_vnics - # Test with multiple VNICs (should use first one) - metadata_response = { - 'id' => 'ocid1.instance.oc1.ap-tokyo-1.multi', - 'shape' => 'VM.Standard.A1.Flex', - 'region' => 'ap-tokyo-1', - 'compartmentId' => 'ocid1.compartment.oc1..multivnic', - 'availabilityDomain' => 'Uocm:AP-TOKYO-1-AD-1', - 'vnics' => [ - { - 'vnicId' => 'ocid1.vnic.oc1.ap-tokyo-1.primary', - 'privateIp' => '192.168.1.10', - 'publicIp' => '140.238.123.45' - }, - { - 'vnicId' => 'ocid1.vnic.oc1.ap-tokyo-1.secondary', - 'privateIp' => '192.168.2.10', - 'publicIp' => '140.238.123.46' - } - ] - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'ap-tokyo-1', metadata[:region] - assert_equal 'Uocm:AP-TOKYO-1-AD-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_no_vnics - # Mock metadata without VNICs - metadata_response = { - 'id' => 'ocid1.instance.oc1.ca-toronto-1.novnic', - 'shape' => 'BM.Standard2.52', - 'region' => 'ca-toronto-1', - 'compartmentId' => 'ocid1.compartment.oc1..novnic', - 'availabilityDomain' => 'Uocm:CA-TORONTO-1-AD-1' - # No vnics array - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'ca-toronto-1', metadata[:region] - assert_equal 'Uocm:CA-TORONTO-1-AD-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_empty_vnics - # Mock metadata with empty VNICs array - metadata_response = { - 'id' => 'ocid1.instance.oc1.ap-mumbai-1.empty', - 'shape' => 'VM.Standard3.Flex', - 'region' => 'ap-mumbai-1', - 'compartmentId' => 'ocid1.compartment.oc1..empty', - 'availabilityDomain' => 'Uocm:AP-MUMBAI-1-AD-1', - 'vnics' => [] # Empty array - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - assert_equal 'ap-mumbai-1', metadata[:region] - assert_equal 'Uocm:AP-MUMBAI-1-AD-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_network_error - stub_request(:get, @base_url).to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:get, @base_url).to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_json - stub_request(:get, @base_url).to_return(status: 200, body: 'not valid json') - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_minimal_response - # Test with minimal valid response - metadata_response = { - 'id' => 'ocid1.instance.oc1.us-phoenix-1.minimal' - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - - # Minimal response has no region or availability zone - assert_nil metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_various_regions - # Test various Oracle Cloud regions - regions = [ - 'us-ashburn-1', - 'us-phoenix-1', - 'eu-frankfurt-1', - 'eu-amsterdam-1', - 'uk-london-1', - 'ca-toronto-1', - 'ca-montreal-1', - 'ap-tokyo-1', - 'ap-osaka-1', - 'ap-seoul-1', - 'ap-mumbai-1', - 'ap-sydney-1', - 'ap-melbourne-1', - 'sa-saopaulo-1', - 'me-jeddah-1', - 'me-dubai-1' - ] - - regions.each do |region| - metadata_response = { - 'id' => "ocid1.instance.oc1.#{region}.test", - 'region' => region, - 'compartmentId' => 'ocid1.compartment.oc1..test', - 'availabilityDomain' => "Uocm:#{region.upcase}-AD-1" - } - - stub_request(:get, @base_url) - .with(headers: { 'Authorization' => 'Bearer Oracle' }) - .to_return(status: 200, body: metadata_response.to_json) - - metadata = @provider.fetch_metadata - assert_equal region, metadata[:region], "Failed for region: #{region}" - end - end -end diff --git a/mdprobe/test/providers/scaleway_test.rb b/mdprobe/test/providers/scaleway_test.rb deleted file mode 100644 index fa934f2..0000000 --- a/mdprobe/test/providers/scaleway_test.rb +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'logger' -require_relative '../../providers/scaleway' - -class TestScalewayProvider < Minitest::Test - def setup - @logger = Logger.new(nil) - @provider = Providers::Scaleway.new(@logger) - @base_url = 'http://169.254.42.42' - end - - def test_fetch_metadata_success - # Mock key=value format response - metadata_response = <<~EOF - ID=aeb8cebb-5af6-49b8-b036-e8e7e816001a - NAME=my-instance - COMMERCIAL_TYPE=DEV1-S - HOSTNAME=my-instance - ORGANIZATION=b4bd99e0-b6ed-4e52-b95f-e627a77d8e57 - PROJECT=b4bd99e0-b6ed-4e52-b95f-e627a77d8e57 - ZONE=fr-par-1 - PRIVATE_IP=10.1.2.3 - PUBLIC_IP=51.15.123.45 - EOF - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - - assert_equal 'fr-par', metadata[:region] - assert_equal 'fr-par-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_different_zone - # Test with different zone - metadata_response = <<~EOF - ID=xyz-123-456 - NAME=private-instance - COMMERCIAL_TYPE=GP1-XS - ORGANIZATION=org-12345 - ZONE=nl-ams-1 - PRIVATE_IP=192.168.50.100 - EOF - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - - assert_equal 'nl-ams', metadata[:region] - assert_equal 'nl-ams-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_zone_without_suffix - # Test zone without number suffix - metadata_response = "ZONE=fr-par\nID=abc-def-789\n" - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - - assert_equal 'fr-par', metadata[:region] - assert_equal 'fr-par', metadata[:availability_zone] - end - - def test_parse_region_from_zone - test_cases = [ - ['fr-par-1', 'fr-par'], - ['fr-par-2', 'fr-par'], - ['nl-ams-1', 'nl-ams'], - ['pl-waw-1', 'pl-waw'], - ['fr-par', 'fr-par'], # Zone without number - ['unknown', 'unknown'] - ] - - test_cases.each do |zone, expected_region| - metadata_response = "ZONE=#{zone}\nID=test-#{zone}\n" - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - assert_equal expected_region, metadata[:region], "Failed for zone: #{zone}" - end - end - - def test_fetch_metadata_network_error - stub_request(:get, "#{@base_url}/conf").to_return(status: 404) - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_timeout - stub_request(:get, "#{@base_url}/conf").to_timeout - - metadata = @provider.fetch_metadata - assert_nil metadata - end - - def test_fetch_metadata_invalid_format - # Test with invalid key=value format - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: 'not a valid format') - - metadata = @provider.fetch_metadata - # Should return hash with nil values since no ZONE found - assert_nil metadata[:region] - assert_nil metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_minimal_response - # Test with minimal valid response - metadata_response = "ZONE=fr-par-1\n" - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - - assert_equal 'fr-par', metadata[:region] - assert_equal 'fr-par-1', metadata[:availability_zone] - assert_equal 2, metadata.keys.size - end - - def test_fetch_metadata_empty_lines - # Test with empty lines in response - metadata_response = <<~EOF - ZONE=pl-waw-1 - - ID=proj-instance - - COMMERCIAL_TYPE=PRO2-M - EOF - - stub_request(:get, "#{@base_url}/conf").to_return(status: 200, body: metadata_response) - - metadata = @provider.fetch_metadata - - assert_equal 'pl-waw', metadata[:region] - assert_equal 'pl-waw-1', metadata[:availability_zone] - end -end diff --git a/proxy.rb b/proxy.rb deleted file mode 100644 index 1e34ec7..0000000 --- a/proxy.rb +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env ruby - -# Proxy server for cluster agent and node agent metrics -# -# The server is a critical component for the collector's functionality, enabling remote -# configuration and metrics access. -# -# This server runs on port 33000 and provides: -# 1. /v1/config - Serves the latest database configuration (databases.json) to the Cluster Agent -# 2. /v1/cluster-agent-enabled - Returns "yes" or "no" based on BetterStackClient.cluster_collector? -# 3. /v1/metrics - Proxies requests to Vector's metrics endpoint (localhost:39090) -# -# The cluster agent running in the beyla container connects to this proxy via host network -# to fetch configuration and check if it should be running. -# -# The node agent also runs in the beyla container and connects to the /v1/metrics endpoint -# via host network. - - -require 'webrick' -require 'json' -require 'net/http' -require 'uri' -require_relative 'engine/utils' -require_relative 'engine/better_stack_client' - -class WebServer - include Utils - - def initialize(working_dir) - @working_dir = working_dir - @client = BetterStackClient.new(working_dir) - end - - def start - server = WEBrick::HTTPServer.new(Port: 33000) - - server.mount_proc '/v1/config' do |req, res| - res.content_type = 'application/json' - res.body = latest_database_json - end - - server.mount_proc '/v1/cluster-agent-enabled' do |req, res| - res.content_type = 'text/plain' - res.body = @client.cluster_collector? ? "yes" : "no" - end - - # to preserve compatibility and prevent errors - server.mount_proc '/v1/metrics' do |req, res| - begin - uri = URI('http://localhost:39090/') - - # Forward the request to the target server (localhost:39090) - response = Net::HTTP.start(uri.host, uri.port) do |http| - # Create new request object based on original request method - proxy_req = case req.request_method - when 'GET' - Net::HTTP::Get.new(uri) - when 'POST' - Net::HTTP::Post.new(uri) - else - raise "Unsupported HTTP method: #{req.request_method}" - end - - # Copy all original headers except 'host' to maintain request context - req.header.each { |k,v| proxy_req[k] = v unless k.downcase == 'host' } - - # Copy request body for POST requests - proxy_req.body = req.body if req.request_method == 'POST' - - # Send request and get response - http.request(proxy_req) - end - - # Copy response status, headers and body back to client - res.status = response.code - response.each_header { |k,v| res[k] = v } - res.body = response.body - rescue => e - # Return 502 Bad Gateway if proxy request fails - puts "Bad Gateway error: #{e.message}" - res.status = 502 - res.body = "Bad Gateway: #{e.message}" - end - end - - trap 'INT' do server.shutdown end - trap 'TERM' do server.shutdown end - - $stdout.sync = true - server.start - end -end - -working_dir = File.expand_path(File.dirname(__FILE__)) - -web_server = WebServer.new(working_dir) -web_server.start diff --git a/supervisord.conf b/supervisord.conf deleted file mode 100644 index a97e2c2..0000000 --- a/supervisord.conf +++ /dev/null @@ -1,56 +0,0 @@ -[supervisord] -nodaemon=true -user=root -logfile=/var/log/supervisor/supervisord.log - -[unix_http_server] -file=/var/run/supervisor.sock - -[supervisorctl] -serverurl=unix:///var/run/supervisor.sock - -[rpcinterface:supervisor] -supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface - -[program:vector] -command=/vector.sh -autostart=true -autorestart=true -stderr_logfile=/var/log/supervisor/vector.err.log -stdout_logfile=/var/log/supervisor/vector.out.log -startretries=3 -startsecs=10 -stopwaitsecs=30 -stopsignal=TERM -exitcodes=0,1,2 -stopasgroup=true -killasgroup=true -environment=OTEL_SERVICE_NAME="better-stack-collector-vector" - -[program:webserver] -command=ruby /proxy.rb -autostart=true -autorestart=true -stderr_logfile=/var/log/supervisor/webserver.err.log -stdout_logfile=/var/log/supervisor/webserver.out.log -environment=OTEL_SERVICE_NAME="better-stack-collector-webserver" - -[program:updater] -command=ruby /updater.rb -autostart=true -autorestart=true -stderr_logfile=/var/log/supervisor/updater.err.log -stdout_logfile=/var/log/supervisor/updater.out.log -environment=OTEL_SERVICE_NAME="better-stack-collector-updater" - -[program:certbot] -command=/certbot-runner.sh -autostart=true -autorestart=true -stderr_logfile=/var/log/supervisor/certbot.err.log -stdout_logfile=/var/log/supervisor/certbot.out.log -environment=OTEL_SERVICE_NAME="better-stack-collector-certbot" - -[eventlistener:fatal_handler] -command=bash -c "printf 'READY\n'; while read line; do printf 'RESULT 2\nOK'; kill -15 1; done" -events=PROCESS_STATE_FATAL diff --git a/test/better_stack_client_error_handling_test.rb b/test/better_stack_client_error_handling_test.rb deleted file mode 100644 index ba601ba..0000000 --- a/test/better_stack_client_error_handling_test.rb +++ /dev/null @@ -1,155 +0,0 @@ -require 'minitest/autorun' -require 'webmock/minitest' -require 'fileutils' -require 'tmpdir' -require_relative '../engine/better_stack_client' - -class BetterStackClientErrorHandlingTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - ENV['COLLECTOR_SECRET'] = 'test_secret' - @client = BetterStackClient.new(@test_dir) - - # Create required directories - FileUtils.mkdir_p(File.join(@test_dir, 'vector-config')) - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '0-default')) - end - - def teardown - FileUtils.rm_rf(@test_dir) - ENV.delete('COLLECTOR_SECRET') - WebMock.reset! - end - - def test_ping_propagates_network_errors - # The ping method doesn't handle network errors, they propagate - stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_raise(SocketError.new("getaddrinfo: Name or service not known")) - - assert_raises(SocketError) do - @client.ping - end - end - - def test_process_configuration_stops_on_first_download_failure - new_version = "2025-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - - # Mock successful first file download, fail on second - files = [ - { 'path' => '/file1', 'name' => 'vector.yaml' }, - { 'path' => '/file2', 'name' => 'databases.json' }, - { 'path' => '/file3', 'name' => 'other.yaml' } - ] - - download_count = 0 - @client.define_singleton_method(:download_file) do |url, path| - download_count += 1 - FileUtils.mkdir_p(File.dirname(path)) - - if download_count == 2 - # Fail on second file - raise Utils::DownloadError, "Failed to download databases.json from https://example.com/databases.json after 2 retries. Response code: 404" - else - File.write(path, "test content #{download_count}") - true - end - end - - result = @client.process_configuration(new_version, "200", { 'files' => files }.to_json) - - # Test actual behavior - assert_nil result - # First file should exist - assert File.exist?(File.join(version_dir, 'vector.yaml')) - # Second and third files should not exist - assert !File.exist?(File.join(version_dir, 'databases.json')) - assert !File.exist?(File.join(version_dir, 'other.yaml')) - # Should have error file - assert File.exist?(File.join(@test_dir, 'errors.txt')) - end - - def test_process_configuration_raises_on_invalid_json - new_version = "2025-01-01T00:00:00" - - # The method expects valid JSON, it will raise on invalid JSON - assert_raises(JSON::ParserError) do - @client.process_configuration(new_version, "200", "not json at all") - end - end - - def test_process_configuration_rejects_path_traversal_filenames - new_version = "2025-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - - # Try various path traversal attempts - malicious_files = [ - { 'path' => '/file', 'name' => '../../../etc/passwd' }, - { 'path' => '/file', 'name' => '/etc/passwd' }, - { 'path' => '/file', 'name' => '..\\..\\windows\\system32\\config\\sam' }, - { 'path' => '/file', 'name' => '' }, - { 'path' => '/file', 'name' => nil } - ] - - malicious_files.each do |file_info| - @client.process_configuration(new_version, "200", { 'files' => [file_info] }.to_json) - - # Test actual behavior - should reject malicious paths - assert File.exist?(File.join(@test_dir, 'errors.txt')) - - # Should not create files outside version directory - assert !File.exist?('/etc/passwd.download') - assert !File.exist?(File.join(@test_dir, '../passwd')) - - # Verify no files created in version directory with path traversal - if Dir.exist?(version_dir) - files_in_version = Dir.glob(File.join(version_dir, '*')) - files_in_version.each do |file| - basename = File.basename(file) - assert !basename.include?('..'), "Should not create files with .. in name" - assert !basename.include?('/'), "Should not create files with / in name" - end - end - - # Clean up - FileUtils.rm_f(File.join(@test_dir, 'errors.txt')) - end - end - - def test_ping_handles_nil_configuration_version_gracefully - # Mock ping response with nil configuration_version - stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 200, body: { - 'status' => 'new_version_available', - 'configuration_version' => nil - }.to_json) - - # Track if get_configuration was called - get_configuration_called = false - @client.stub :get_configuration, lambda { |version| - get_configuration_called = true - } do - # Should not crash - @client.ping - end - - # Should attempt to get configuration even with nil version - assert get_configuration_called - end - - def test_cluster_collector_propagates_network_errors - stub_request(:post, "https://telemetry.betterstack.com/api/collector/cluster-collector") - .to_raise(Errno::ENETUNREACH.new("Network is unreachable")) - - # The method doesn't handle network errors, they propagate - assert_raises(Errno::ENETUNREACH) do - @client.cluster_collector? - end - end - - def test_clear_error_succeeds_when_file_missing - # Try to clear non-existent error file - should not crash - @client.send(:clear_error) - end - -end \ No newline at end of file diff --git a/test/better_stack_client_ping_test.rb b/test/better_stack_client_ping_test.rb deleted file mode 100644 index d456e2f..0000000 --- a/test/better_stack_client_ping_test.rb +++ /dev/null @@ -1,223 +0,0 @@ -require 'minitest/autorun' -require 'webmock/minitest' -require 'fileutils' -require 'tmpdir' -require_relative '../engine/better_stack_client' - -class BetterStackClientPingTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - ENV['COLLECTOR_SECRET'] = 'test_secret' - @client = BetterStackClient.new(@test_dir) - - # Create required directories - FileUtils.mkdir_p(File.join(@test_dir, 'vector-config')) - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '0-default')) - end - - def teardown - FileUtils.rm_rf(@test_dir) - ENV.delete('COLLECTOR_SECRET') - WebMock.reset! - end - - def test_ping_updates_vector_config_when_kubernetes_discovery_changes - # Mock ping response with new version - stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 200, body: { status: 'new_version_available', configuration_version: '2025-01-01T00:00:00' }.to_json) - - # Mock configuration download - stub_request(:post, "https://telemetry.betterstack.com/api/collector/configuration") - .to_return(status: 200, body: { files: [{ path: '/file', name: 'vector.yaml' }] }.to_json) - - stub_request(:get, "https://telemetry.betterstack.com/file") - .with(query: hash_including("host")) - .to_return(status: 200, body: "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock kubernetes discovery to return true (changed) - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:should_discover?) { true } - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:run) { true } - - # Track method calls - validate_upstream_called = false - promote_upstream_called = false - prepare_dir_called = false - validate_dir_called = false - promote_dir_called = false - - # Mock vector_config methods - vector_config = @client.instance_variable_get(:@vector_config) - vector_config.define_singleton_method(:validate_upstream_files) { |path| - validate_upstream_called = true - nil - } - vector_config.define_singleton_method(:promote_upstream_files) { |path| - promote_upstream_called = true - } - - new_config_dir = File.join(@test_dir, 'vector-config', 'new_test') - vector_config.define_singleton_method(:prepare_dir) do - prepare_dir_called = true - FileUtils.mkdir_p(new_config_dir) - new_config_dir - end - - vector_config.define_singleton_method(:validate_dir) { |dir| - validate_dir_called = true - nil - } - vector_config.define_singleton_method(:promote_dir) { |dir| - promote_dir_called = true - } - - @client.ping - - # Test actual behavior - should prepare, validate and promote new config - assert validate_upstream_called, "Should validate upstream file" - assert promote_upstream_called, "Should promote upstream file" - assert prepare_dir_called, "Should prepare new directory when kubernetes discovery changes" - assert validate_dir_called, "Should validate new directory" - assert promote_dir_called, "Should promote new directory" - end - - def test_ping_updates_vector_config_when_only_upstream_changes - # Mock ping response with new version - stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 200, body: { status: 'new_version_available', configuration_version: '2025-01-01T00:00:00' }.to_json) - - # Mock configuration download - stub_request(:post, "https://telemetry.betterstack.com/api/collector/configuration") - .to_return(status: 200, body: { files: [{ path: '/file', name: 'vector.yaml' }] }.to_json) - - stub_request(:get, "https://telemetry.betterstack.com/file") - .with(query: hash_including("host")) - .to_return(status: 200, body: "sources:\n test:\n type: file") - - # Mock kubernetes discovery to return false (no change) - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:should_discover?) { false } - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:run) { false } - - # Track method calls - validate_upstream_called = false - promote_upstream_called = false - prepare_dir_called = false - validate_dir_called = false - promote_dir_called = false - - # Mock vector_config methods - vector_config = @client.instance_variable_get(:@vector_config) - vector_config.define_singleton_method(:validate_upstream_files) { |path| - validate_upstream_called = true - nil - } - vector_config.define_singleton_method(:promote_upstream_files) { |path| - promote_upstream_called = true - } - - new_config_dir = File.join(@test_dir, 'vector-config', 'new_test') - vector_config.define_singleton_method(:prepare_dir) do - prepare_dir_called = true - FileUtils.mkdir_p(new_config_dir) - new_config_dir - end - - vector_config.define_singleton_method(:validate_dir) { |dir| - validate_dir_called = true - nil - } - vector_config.define_singleton_method(:promote_dir) { |dir| - promote_dir_called = true - } - - @client.ping - - # Test actual behavior - should still update vector-config even without kubernetes discovery change - assert validate_upstream_called, "Should validate upstream file" - assert promote_upstream_called, "Should promote upstream file" - assert prepare_dir_called, "Should prepare new directory for upstream change" - assert validate_dir_called, "Should validate new directory" - assert promote_dir_called, "Should promote new directory" - end - - def test_ping_writes_error_when_vector_config_validation_fails - # Mock ping response with new version - stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 200, body: { status: 'new_version_available', configuration_version: '2025-01-01T00:00:00' }.to_json) - - # Mock configuration download - stub_request(:post, "https://telemetry.betterstack.com/api/collector/configuration") - .to_return(status: 200, body: { files: [{ path: '/file', name: 'vector.yaml' }] }.to_json) - - stub_request(:get, "https://telemetry.betterstack.com/file") - .with(query: hash_including("host")) - .to_return(status: 200, body: "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock kubernetes discovery - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:should_discover?) { true } - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:run) { true } - - # Track method calls - promote_dir_called = false - - # Mock vector_config methods - vector_config = @client.instance_variable_get(:@vector_config) - vector_config.define_singleton_method(:validate_upstream_file) { |path| nil } - vector_config.define_singleton_method(:promote_upstream_file) { |path| } - - new_config_dir = File.join(@test_dir, 'vector-config', 'new_test') - vector_config.define_singleton_method(:prepare_dir) do - FileUtils.mkdir_p(new_config_dir) - new_config_dir - end - - vector_config.define_singleton_method(:validate_dir) { |dir| "Validation failed: Invalid config" } - vector_config.define_singleton_method(:promote_dir) { |dir| - promote_dir_called = true - } - - @client.ping - - # Test actual behavior - should not promote invalid config - assert !promote_dir_called, "Should not promote directory when validation fails" - assert File.exist?(File.join(@test_dir, 'errors.txt')) - - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Validation failed for vector config with kubernetes_discovery") - end - - def test_ping_does_not_clear_not_clearable_error_file_when_no_updates - # Create an error file - File.write(File.join(@test_dir, 'errors.txt'), "Validation failed for vector config with kubernetes_discovery") - - # Mock ping response with no updates - stub = stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 204) - - # Mock kubernetes discovery - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:should_discover?) { false } - - @client.ping - - # Test actual behavior - should NOT clear error file with unclearable error on ping - assert_requested(stub, times: 1) - assert File.exist?(File.join(@test_dir, 'errors.txt')), "Error file should not be cleared" - end - - def test_ping_does_clears_clearable_error_file_when_no_updates - # Create an error file - File.write(File.join(@test_dir, 'errors.txt'), "Ping failed: 520 Gateway Timeout") - - # Mock ping response with no updates - stub = stub_request(:post, "https://telemetry.betterstack.com/api/collector/ping") - .to_return(status: 204) - - # Mock kubernetes discovery - @client.instance_variable_get(:@kubernetes_discovery).define_singleton_method(:should_discover?) { false } - - @client.ping - - # Test actual behavior - should NOT clear error file when no updates - assert_requested(stub, times: 1) - assert !File.exist?(File.join(@test_dir, 'errors.txt')), "Error file should be cleared" - end -end diff --git a/test/better_stack_client_test.rb b/test/better_stack_client_test.rb deleted file mode 100644 index fe8ea6f..0000000 --- a/test/better_stack_client_test.rb +++ /dev/null @@ -1,1165 +0,0 @@ -require 'bundler/setup' -require 'minitest/autorun' -require 'webmock/minitest' -require 'json' -require 'net/http' -require_relative '../engine/better_stack_client' - -class BetterStackClientTest < Minitest::Test - def setup - # Create a temporary working directory for tests - @test_dir = File.join(Dir.pwd, 'tmp') - FileUtils.mkdir_p(@test_dir) - FileUtils.mkdir_p(File.join(@test_dir, 'versions')) - - # Set required environment variables - ENV['COLLECTOR_SECRET'] = 'test_secret' - ENV['BASE_URL'] = 'https://test.betterstack.com' - ENV['COLLECTOR_VERSION'] = '1.0.0' - ENV['VECTOR_VERSION'] = '0.47.0' - ENV['BEYLA_VERSION'] = '2.2.4' - ENV['CLUSTER_AGENT_VERSION'] = '1.2.4' - - @client = BetterStackClient.new(@test_dir) - end - - def teardown - # Clean up temporary test directory - FileUtils.rm_rf(@test_dir) if File.exist?(@test_dir) - # Reset environment variables - ENV.delete('COLLECTOR_SECRET') - ENV.delete('BASE_URL') - ENV.delete('CLUSTER_COLLECTOR') - ENV.delete('COLLECTOR_VERSION') - ENV.delete('VECTOR_VERSION') - ENV.delete('BEYLA_VERSION') - ENV.delete('CLUSTER_AGENT_VERSION') - # Reset WebMock stubs - WebMock.reset! - end - - def test_initialize_with_valid_secret - client = BetterStackClient.new(@test_dir) - assert_instance_of BetterStackClient, client - end - - def test_initialize_exits_when_collector_secret_missing - ENV.delete('COLLECTOR_SECRET') - # Expect the process to exit with status 1 - assert_raises(SystemExit) do - capture_io do - @client = BetterStackClient.new(@test_dir) - end - end - end - - def test_ping_sends_204_when_no_updates_available - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Use POST with body parameters - stub = stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .with( - body: hash_including({ - "collector_secret" => "test_secret", - "cluster_collector" => "false", - "host" => "test-host", - "collector_version" => "1.0.0", - "vector_version" => "0.47.0", - "beyla_version" => "2.2.4", - "cluster_agent_version" => "1.2.4" - }) - ) - .to_return(status: 204, body: "") - - # Test actual behavior - should make request and not crash - @client.ping - - # Verify the request was made with correct parameters - assert_requested(stub, times: 1) - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_ping_sends_all_required_parameters - # Override environment variables for this test - ENV['COLLECTOR_VERSION'] = "1.2.3" - ENV['VECTOR_VERSION'] = "0.28.1" - ENV['BEYLA_VERSION'] = "1.0.0" - ENV['CLUSTER_AGENT_VERSION'] = "1.2.4" - - # Create a versions folder for latest_version test - test_version = "2023-01-01T00:00:00" - FileUtils.mkdir_p(File.join(@test_dir, 'versions', test_version)) - - # Mock hostname method - original_hostname = @client.method(:hostname) - expected_hostname = "test-host" - @client.define_singleton_method(:hostname) { expected_hostname } - - # Updated stub to capture the body parameters - stub = stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .with { |request| - # For POST request, parse the form data in the body - @captured_params = {} - request.body.split('&').each do |pair| - key, value = pair.split('=', 2) - @captured_params[URI.decode_www_form_component(key)] = URI.decode_www_form_component(value || '') - end - - # Verify the beyla_version parameter value matches our expected value - @captured_params["beyla_version"] == "1.0.0" - } - .to_return(status: 204, body: "") - - # Call ping - @client.ping - - # Verify that the request was made with the expected parameters - assert_requested(stub, times: 1) - - # Verify all version parameters were sent correctly - assert_equal "1.2.3", @captured_params["collector_version"] - assert_equal "0.28.1", @captured_params["vector_version"] - assert_equal "1.0.0", @captured_params["beyla_version"] - assert_equal "1.2.4", @captured_params["cluster_agent_version"] - assert_equal expected_hostname, @captured_params["host"] - assert_equal test_version, @captured_params["configuration_version"] - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_ping_calls_get_configuration_when_new_version_available - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - configuration_version = "2023-01-01T00:00:00" - response_body = { - status: "new_version_available", - configuration_version: configuration_version - }.to_json - - # Updated stub with body parameters - stub = stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .with( - body: hash_including({ - "collector_secret" => "test_secret", - "cluster_collector" => "false", - "host" => "test-host", - "collector_version" => "1.0.0", - "vector_version" => "0.47.0", - "beyla_version" => "2.2.4", - "cluster_agent_version" => "1.2.4" - }) - ) - .to_return(status: 200, body: response_body) - - # Track if get_configuration was called with correct version - get_configuration_called = false - get_configuration_version = nil - - # Stub the get_configuration method to track calls - @client.stub :get_configuration, lambda { |version| - get_configuration_called = true - get_configuration_version = version - } do - @client.ping - end - - # Test actual behavior - should call get_configuration with new version - assert get_configuration_called, "get_configuration should be called" - assert_equal configuration_version, get_configuration_version - assert_requested(stub, times: 1) - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_ping_writes_error_file_on_unexpected_response - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # We need to monkey patch the client for this test since there's a bug in the code - - def @client.process_ping(code, body) - case code - when '204' - puts "No updates available" - return - when '200' - data = JSON.parse(body) - if data['status'] == 'new_version_available' - new_version = data['configuration_version'] - puts "New version available: #{new_version}" - get_configuration(new_version) - else - puts "No new version. Status: #{data['status']}" - end - else - puts "Unexpected response from ping endpoint: #{code}" - begin - error_details = JSON.parse(body) - puts "Error details: #{error_details}" - write_error("Ping failed: #{code}. Details: #{error_details}") - rescue JSON::ParserError - write_error("Ping failed: #{code}. Body: #{body}") - end - return - end - rescue => e - puts "Error: #{e.message}" - write_error("Error: #{e.message}") - return - end - - # Updated stub with body parameters - stub = stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .with( - body: hash_including({ - "collector_secret" => "test_secret", - "cluster_collector" => "false", - "host" => "test-host", - "collector_version" => "1.0.0", - "vector_version" => "0.47.0", - "beyla_version" => "2.2.4", - "cluster_agent_version" => "1.2.4" - }) - ) - .to_return(status: 500, body: { error: "Server error" }.to_json) - - @client.ping - - # Test actual behavior - should write error file - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Ping failed: 500") - assert_requested(stub, times: 1) - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_ping_writes_error_file_on_network_error - - # Instead of raising an error directly, let's stub the method that handles the network error - def @client.ping - puts "Network error: Network error" - write_error("Network error: Network error") - return - end - - @client.ping - - # Test actual behavior - should write error file - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Network error") - - # Reset the method to not affect other tests - class << @client - remove_method :ping - end - end - - def test_ping_exits_on_401_unauthorized - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Stub request to return 401 - stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .to_return(status: 401, body: "Unauthorized") - - # Expect SystemExit when receiving 401 - assert_raises(SystemExit) do - capture_io do - @client.ping - end - end - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_ping_exits_on_403_forbidden - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Stub request to return 403 - stub_request(:post, "https://test.betterstack.com/api/collector/ping") - .to_return(status: 403, body: "Forbidden") - - # Expect SystemExit when receiving 403 - assert_raises(SystemExit) do - capture_io do - @client.ping - end - end - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_cluster_collector_exits_on_401_unauthorized - # Don't force cluster collector mode - ENV.delete('CLUSTER_COLLECTOR') - - # Mock hostname - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Stub request to return 401 - stub_request(:post, "https://test.betterstack.com/api/collector/cluster-collector") - .to_return(status: 401, body: "Unauthorized") - - # Expect SystemExit when receiving 401 - assert_raises(SystemExit) do - capture_io do - @client.cluster_collector? - end - end - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_cluster_collector_exits_on_403_forbidden - # Don't force cluster collector mode - ENV.delete('CLUSTER_COLLECTOR') - - # Mock hostname - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Stub request to return 403 - stub_request(:post, "https://test.betterstack.com/api/collector/cluster-collector") - .to_return(status: 403, body: "Forbidden") - - # Expect SystemExit when receiving 403 - assert_raises(SystemExit) do - capture_io do - @client.cluster_collector? - end - end - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_get_configuration - new_version = "2023-01-01T00:00:00" - - # Updated stub with body parameters - stub = stub_request(:post, "https://test.betterstack.com/api/collector/configuration") - .with( - body: { - "collector_secret" => "test_secret", - "configuration_version" => new_version - } - ) - .to_return(status: 200, body: { files: [] }.to_json) - - # Track process_configuration calls - process_called = false - process_args = nil - - # Capture method calls using a wrapper - original_method = @client.method(:process_configuration) - @client.define_singleton_method(:process_configuration) do |version, code, body| - process_called = true - process_args = [version, code, body] - original_method.call(version, code, body) - end - - @client.stub :process_configuration, lambda { |version, code, body| - process_called = true - process_args = [version, code, body] - } do - @client.get_configuration(new_version) - - # Test actual behavior - assert process_called, "process_configuration should be called" - assert_equal new_version, process_args[0] - assert_equal "200", process_args[1] - assert_equal({ "files" => [] }, JSON.parse(process_args[2])) - assert_requested(stub, times: 1) - end - - # Reset the method to not affect other tests - class << @client - remove_method :process_configuration - end - end - - def test_process_configuration_downloads_and_validates_files - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track method calls - validate_called = false - promote_called = false - validate_path = nil - promote_path = nil - - # Mock vector_config methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - validate_called = true - validate_path = dir - nil # validation passes - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - promote_called = true - promote_path = dir - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - File.write(path, "test content") - return true - end - - # Sample response data - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/databases.json", name: "databases.json" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - files should be downloaded and validated - assert File.exist?(File.join(version_dir, "vector.yaml")) - assert File.exist?(File.join(version_dir, "databases.json")) - assert_equal "test content", File.read(File.join(version_dir, "vector.yaml")) - assert_equal "test content", File.read(File.join(version_dir, "databases.json")) - - # Validation and promotion should be called - assert validate_called, "validate_upstream_files should be called" - assert promote_called, "promote_upstream_files should be called" - assert_equal version_dir, validate_path - assert_equal version_dir, promote_path - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - def test_process_configuration_writes_error_when_validation_fails - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Mock necessary methods - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - File.write(path, "test content") - return true - end - - # Track promote calls - promote_called = false - - # Mock vector_config validation to fail - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_file) do |path| - "Validation failed for vector config" - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_file) do |path| - promote_called = true - end - - # Sample response data - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - should not promote invalid config - assert !promote_called, "promote_upstream_file should not be called for invalid config" - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Validation failed for vector config") - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - def test_process_configuration_aborts_when_download_fails - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track method calls - validate_called = false - promote_called = false - - # Mock necessary methods - def @client.download_file(url, path) - return false # Simulate download failure - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_file) do |path| - validate_called = true - nil - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_file) do |path| - promote_called = true - end - - # Sample response data - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - should not validate or promote after download failure - assert !validate_called, "validate_upstream_file should not be called after download failure" - assert !promote_called, "promote_upstream_file should not be called after download failure" - assert !File.exist?(File.join(version_dir, "vector.yaml")) - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - def test_process_configuration_writes_error_on_non_200_response - new_version = "2023-01-01T00:00:00" - - code = "404" - body = { status: "version_not_found" }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - should write error file - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Failed to fetch configuration") - assert error_content.include?("404") - end - - def test_process_configuration_handles_databases_csv - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Create enrichment directory - FileUtils.mkdir_p(File.join(@test_dir, 'enrichment')) - - # Track method calls - vector_validate_called = false - vector_promote_called = false - databases_validate_called = false - databases_promote_called = false - - # Mock vector_config methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - vector_validate_called = true - nil # validation passes - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - vector_promote_called = true - end - - # Mock databases_enrichment_table methods - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) do - databases_validate_called = true - nil # validation passes - end - - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:promote) do - databases_promote_called = true - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - - # Create different content for different files - if path.include?('databases.csv') - File.write(path, "identifier,container,service,host\ndb1,container1,service1,host1\n") - else - File.write(path, "test content") - end - return true - end - - # Sample response data including databases.csv - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/databases.json", name: "databases.json" }, - { path: "/collector/file/databases.csv", name: "databases.csv" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - all files should be downloaded - assert File.exist?(File.join(version_dir, "vector.yaml")) - assert File.exist?(File.join(version_dir, "databases.json")) - assert File.exist?(File.join(version_dir, "databases.csv")) - - # Verify databases.csv content - databases_content = File.read(File.join(version_dir, "databases.csv")) - assert databases_content.include?("identifier,container,service,host") - - # All validations and promotions should be called - assert vector_validate_called, "vector validate_upstream_files should be called" - assert vector_promote_called, "vector promote_upstream_files should be called" - assert databases_validate_called, "databases validate should be called" - assert databases_promote_called, "databases promote should be called" - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - def test_process_configuration_validates_databases_csv_headers - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track method calls - vector_promote_called = false - databases_promote_called = false - - # Mock vector_config methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # validation passes - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - vector_promote_called = true - end - - # Mock databases_enrichment_table to fail validation - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) do - "Databases enrichment table has invalid headers" - end - - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:promote) do - databases_promote_called = true - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - - # Create databases.csv with wrong headers - if path.include?('databases.csv') - File.write(path, "wrong,headers,here,now\n") - else - File.write(path, "test content") - end - return true - end - - # Sample response data including databases.csv - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/databases.csv", name: "databases.csv" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Test actual behavior - should write error and not promote - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("invalid headers") - - # Vector and databases should not be promoted when databases validation fails - assert !vector_promote_called, "vector should not be promoted when databases validation fails" - assert !databases_promote_called, "databases should not be promoted when validation fails" - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - def test_process_configuration_works_without_databases_csv - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track method calls - databases_validate_called = false - databases_promote_called = false - - # Mock vector_config methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # validation passes - end - - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - # Promotion succeeds - end - - # Mock databases_enrichment_table methods - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) do - databases_validate_called = true - nil - end - - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:promote) do - databases_promote_called = true - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - File.write(path, "test content") - return true - end - - # Sample response data WITHOUT databases.csv - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/databases.json", name: "databases.json" } - ] - }.to_json - - result = @client.process_configuration(new_version, code, body) - - # Test actual behavior - should succeed without databases.csv - assert_equal true, result - assert !databases_validate_called, "databases validate should not be called when file not present" - assert !databases_promote_called, "databases promote should not be called when file not present" - - # Reset the method to not affect other tests - class << @client - remove_method :download_file - end - end - - - def test_cluster_collector_returns_false_on_409_response - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host" } - - # Updated stub with body parameters - stub_request(:post, "https://test.betterstack.com/api/collector/cluster-collector") - .with( - body: { - "collector_secret" => "test_secret", - "host" => "test-host" - } - ) - .to_return(status: 409, body: "") - - result = @client.cluster_collector? - - assert_equal false, result - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end - - def test_cluster_collector_returns_true_when_env_override_set - # Set environment variable to force cluster collector mode - ENV['CLUSTER_COLLECTOR'] = 'true' - - result = @client.cluster_collector? - - assert_equal true, result - end - - def test_validate_enrichment_table_returns_error_when_enrichment_table_has_changed_and_validation_fails - # Mock enrichment table to return true - @client.instance_variable_get(:@containers_enrichment_table).define_singleton_method(:validate) { "Validation failed for enrichment table" } - result = @client.validate_enrichment_table - - assert_equal "Validation failed for enrichment table", result - end - - def test_validate_enrichment_table_returns_nil_when_enrichment_table_has_changed_and_validation_passes - # Mock enrichment table to return true - @client.instance_variable_get(:@containers_enrichment_table).define_singleton_method(:validate) { nil } - result = @client.validate_enrichment_table - - assert_nil result - end - - def test_databases_table_changed_returns_true_when_different - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:different?) { true } - result = @client.databases_table_changed? - - assert_equal true, result - end - - def test_databases_table_changed_returns_false_when_same - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:different?) { false } - result = @client.databases_table_changed? - - assert_equal false, result - end - - def test_validate_databases_table_returns_error_when_validation_fails - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) { "Invalid headers in databases.csv" } - result = @client.validate_databases_table - - assert_equal "Invalid headers in databases.csv", result - assert File.exist?(File.join(@test_dir, 'errors.txt')) - error_content = File.read(File.join(@test_dir, 'errors.txt')) - assert error_content.include?("Invalid headers") - end - - def test_validate_databases_table_returns_nil_when_validation_passes - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) { nil } - result = @client.validate_databases_table - - assert_nil result - end - - def test_promote_databases_table_calls_promote - promote_called = false - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:promote) do - promote_called = true - end - - @client.promote_databases_table - - assert promote_called, "promote should be called on databases_enrichment_table" - end - - def test_process_configuration_with_ssl_certificate_host - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track SSL manager method calls - process_ssl_called = false - processed_domain = nil - should_skip = false - reset_called = false - - # Mock SSL certificate manager - ssl_manager = @client.instance_variable_get(:@ssl_certificate_manager) - ssl_manager.define_singleton_method(:process_ssl_certificate_host) do |domain| - process_ssl_called = true - processed_domain = domain - true # domain changed - end - ssl_manager.define_singleton_method(:should_skip_validation?) do - should_skip - end - ssl_manager.define_singleton_method(:reset_change_flag) do - reset_called = true - end - - # Mock other required methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # validation passes - end - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - # no-op - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - if path.end_with?('ssl_certificate_host.txt') - File.write(path, 'new.example.com') - else - File.write(path, 'test content') - end - true - end - - # Sample response with ssl_certificate_host.txt - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/ssl_certificate_host.txt", name: "ssl_certificate_host.txt" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Verify SSL processing - assert process_ssl_called, "process_ssl_certificate_host should be called" - assert_equal 'new.example.com', processed_domain - assert reset_called, "reset_change_flag should be called" - - # Reset the method - class << @client - remove_method :download_file - end - end - - def test_process_configuration_skips_validation_and_promotion_when_ssl_pending - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track validation and promotion calls - validate_called = false - promote_called = false - - # Mock SSL certificate manager to indicate skip validation - ssl_manager = @client.instance_variable_get(:@ssl_certificate_manager) - ssl_manager.define_singleton_method(:process_ssl_certificate_host) do |domain| - true # domain changed - end - ssl_manager.define_singleton_method(:should_skip_validation?) do - true # should skip - end - ssl_manager.define_singleton_method(:reset_change_flag) do - # no-op - end - - # Mock vector_config - neither validate nor promote should be called - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - validate_called = true - nil - end - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - promote_called = true - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - if path.end_with?('ssl_certificate_host.txt') - File.write(path, 'new.example.com') - else - File.write(path, 'test content') - end - true - end - - # Sample response with ssl_certificate_host.txt - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/ssl_certificate_host.txt", name: "ssl_certificate_host.txt" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Verify both validation and promotion were skipped - assert !validate_called, "validate_upstream_files should NOT be called when SSL validation should be skipped" - assert !promote_called, "promote_upstream_files should NOT be called when validation is skipped" - - # Verify version directory was cleaned up - assert !File.exist?(version_dir), "Version directory should be removed when skipping validation" - - # Reset the method - class << @client - remove_method :download_file - end - end - - def test_process_configuration_promotes_databases_when_ssl_skips_vector - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track what gets promoted - vector_promote_called = false - databases_promote_called = false - - # Mock SSL certificate manager to indicate skip validation - ssl_manager = @client.instance_variable_get(:@ssl_certificate_manager) - ssl_manager.define_singleton_method(:process_ssl_certificate_host) do |domain| - true # domain changed - end - ssl_manager.define_singleton_method(:should_skip_validation?) do - true # should skip vector validation - end - ssl_manager.define_singleton_method(:reset_change_flag) do - # no-op - end - - # Mock vector_config - should NOT be promoted - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # won't be called - end - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - vector_promote_called = true - end - - # Mock databases_enrichment_table - SHOULD be promoted - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:validate) do - nil # validation passes - end - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:promote) do - databases_promote_called = true - end - test_dir = @test_dir - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:incoming_path) do - File.join(test_dir, 'enrichment', 'databases.incoming.csv') - end - @client.instance_variable_get(:@databases_enrichment_table).define_singleton_method(:target_path) do - File.join(test_dir, 'enrichment', 'databases.csv') - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - if path.end_with?('ssl_certificate_host.txt') - File.write(path, 'new.example.com') - elsif path.end_with?('databases.csv') - File.write(path, "identifier,container,service,host\ndb1,container1,service1,host1") - else - File.write(path, 'test content') - end - true - end - - # Sample response with ssl_certificate_host.txt and databases.csv - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/ssl_certificate_host.txt", name: "ssl_certificate_host.txt" }, - { path: "/collector/file/databases.csv", name: "databases.csv" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Verify vector was NOT promoted but databases WAS promoted - assert !vector_promote_called, "Vector config should NOT be promoted when validation is skipped" - assert databases_promote_called, "Databases.csv SHOULD be promoted even when vector is skipped" - - # Verify version directory was cleaned up - assert !File.exist?(version_dir), "Version directory should be removed when skipping validation" - - # Reset the method - class << @client - remove_method :download_file - end - end - - def test_process_configuration_with_empty_ssl_certificate_host - new_version = "2023-01-01T00:00:00" - version_dir = File.join(@test_dir, 'versions', new_version) - FileUtils.mkdir_p(version_dir) - - # Track SSL processing - processed_domain = nil - - # Mock SSL certificate manager - ssl_manager = @client.instance_variable_get(:@ssl_certificate_manager) - ssl_manager.define_singleton_method(:process_ssl_certificate_host) do |domain| - processed_domain = domain - true # domain changed to empty - end - ssl_manager.define_singleton_method(:should_skip_validation?) do - false # don't skip for empty domain - end - ssl_manager.define_singleton_method(:reset_change_flag) do - # no-op - end - - # Mock other methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # validation passes - end - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - # no-op - end - - def @client.download_file(url, path) - FileUtils.mkdir_p(File.dirname(path)) - if path.end_with?('ssl_certificate_host.txt') - File.write(path, '') # empty file - else - File.write(path, 'test content') - end - true - end - - # Sample response with ssl_certificate_host.txt - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/ssl_certificate_host.txt", name: "ssl_certificate_host.txt" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Verify empty domain was processed - assert_equal '', processed_domain - - # Reset the method - class << @client - remove_method :download_file - end - end - - def test_download_file_includes_hostname_parameter - new_version = "2023-01-01T00:00:00" - - # Mock hostname method - original_hostname = @client.method(:hostname) - @client.define_singleton_method(:hostname) { "test-host-123" } - - # Mock vector_config methods - @client.instance_variable_get(:@vector_config).define_singleton_method(:validate_upstream_files) do |dir| - nil # validation passes - end - @client.instance_variable_get(:@vector_config).define_singleton_method(:promote_upstream_files) do |dir| - # no-op - end - - # Stub the actual HTTP requests with WebMock to track URLs - stub1 = stub_request(:get, "https://test.betterstack.com/collector/file/vector.yaml") - .with(query: hash_including("file" => "vector.yaml", "host" => "test-host-123")) - .to_return(status: 200, body: "vector content") - - stub2 = stub_request(:get, "https://test.betterstack.com/collector/file/databases.json") - .with(query: hash_including("file" => "databases.json", "other" => "param", "host" => "test-host-123")) - .to_return(status: 200, body: "databases content") - - # Sample response with multiple files - code = "200" - body = { - files: [ - { path: "/collector/file/vector.yaml?file=vector.yaml", name: "vector.yaml" }, - { path: "/collector/file/databases.json?file=databases.json&other=param", name: "databases.json" } - ] - }.to_json - - @client.process_configuration(new_version, code, body) - - # Verify that both requests were made with the hostname parameter - assert_requested(stub1, times: 1) - assert_requested(stub2, times: 1) - - # Restore original method - @client.define_singleton_method(:hostname, original_hostname) - end -end diff --git a/test/containers_enrichment_table_test.rb b/test/containers_enrichment_table_test.rb deleted file mode 100644 index 0b8c8d4..0000000 --- a/test/containers_enrichment_table_test.rb +++ /dev/null @@ -1,165 +0,0 @@ -require 'bundler/setup' -require 'minitest/autorun' -require 'tempfile' -require 'fileutils' -require_relative '../engine/containers_enrichment_table' - -class ContainersEnrichmentTableTest < Minitest::Test - def test_same_content_returns_false - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - File.write(target_path, "pid,container_name,container_id,image_name\n1234,name,deadbeefbad0,image") - File.write(incoming_path, "pid,container_name,container_id,image_name\n1234,name,deadbeefbad0,image") - - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - refute enrichment_table.different? - end - end - - def test_different_content_returns_different_hash - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(target_path, "pid,container_name,container_id,image_name\n1234,name,deadbeefbad0,image") - File.write(incoming_path, "pid,container_name,container_id,image_name\n1234,name,decafcoffee9,image") - - assert enrichment_table.different? - end - end - - def test_imaginary_path_returns_nil - result = ContainersEnrichmentTable.new('/imaginary/path.csv', '/imaginary/path.incoming.csv').different? - refute result - end - - def test_empty_directory_returns_nil - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - refute enrichment_table.different? - end - end - - def test_validate_file_not_found - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - assert_equal "Containers enrichment table not found at #{incoming_path}", enrichment_table.validate - end - end - - def test_validate_empty_file - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, '') - - result = enrichment_table.validate - assert_equal "Containers enrichment table is empty at #{incoming_path}", result - end - end - - def test_validate_invalid_header - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, "wrong,header,format\n") - - result = enrichment_table.validate - assert_equal "Containers enrichment table is not valid at #{incoming_path}", result - end - end - - def test_validate_valid_file - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, "pid,container_name,container_id,image_name\n123,test-container,abc123,test-image\n") - - result = enrichment_table.validate - assert_nil result - end - end - - def test_validate_with_whitespace_header - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, " pid,container_name,container_id,image_name \n123,test-container,abc123,test-image\n") - - result = enrichment_table.validate - assert_nil result - end - end - - def test_validate_with_only_header - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, "pid,container_name,container_id,image_name\n") - - result = enrichment_table.validate - assert_nil result - end - end - - def test_validate_with_extra_columns_in_header - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, "pid,container_name,container_id,image_name,extra\n") - - result = enrichment_table.validate - assert_equal "Containers enrichment table is not valid at #{incoming_path}", result - end - end - - def test_validate_with_missing_columns_in_header - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - File.write(incoming_path, "pid,container_name,container_id\n") - - result = enrichment_table.validate - assert_equal "Containers enrichment table is not valid at #{incoming_path}", result - end - end - - def test_promote - Dir.mktmpdir do |dir| - target_path = File.join(dir, 'docker-mappings.csv') - incoming_path = File.join(dir, 'docker-mappings.incoming.csv') - content = "pid,container_name,container_id,image_name\n123,test-container,abc123,test-image\n" - File.write(incoming_path, content) - enrichment_table = ContainersEnrichmentTable.new(target_path, incoming_path) - - enrichment_table.promote - - assert File.exist?(target_path) - assert_equal content, File.read(target_path) - assert !File.exist?(incoming_path) - end - end -end diff --git a/test/databases_enrichment_table_test.rb b/test/databases_enrichment_table_test.rb deleted file mode 100644 index 5d85e5c..0000000 --- a/test/databases_enrichment_table_test.rb +++ /dev/null @@ -1,169 +0,0 @@ -require 'bundler/setup' -require 'minitest/autorun' -require 'tmpdir' -require 'fileutils' -require_relative '../engine/databases_enrichment_table' - -class DatabasesEnrichmentTableTest < Minitest::Test - def setup - @temp_dir = Dir.mktmpdir - end - - def teardown - FileUtils.rm_rf(@temp_dir) - end - - def test_different_returns_false_when_incoming_does_not_exist - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - # Create target file - File.write(target_path, "identifier,container,service,host\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - assert_equal false, table.different? - end - - def test_different_returns_false_when_files_are_identical - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - content = "identifier,container,service,host\ndb1,container1,service1,host1\n" - File.write(target_path, content) - File.write(incoming_path, content) - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - assert_equal false, table.different? - end - - def test_different_returns_true_when_files_differ - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - File.write(target_path, "identifier,container,service,host\n") - File.write(incoming_path, "identifier,container,service,host\ndb1,container1,service1,host1\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - assert_equal true, table.different? - end - - def test_validate_returns_error_when_file_does_not_exist - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_match(/not found/, error) - end - - def test_validate_returns_error_when_file_is_empty - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - File.write(incoming_path, "") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_match(/empty/, error) - end - - def test_validate_returns_error_when_headers_are_incorrect - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - # Wrong headers - File.write(incoming_path, "id,name,type,location\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_match(/invalid headers/, error) - assert_match(/Expected: identifier,container,service,host/, error) - end - - def test_validate_returns_error_when_headers_are_in_wrong_order - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - # Headers in wrong order - File.write(incoming_path, "container,identifier,service,host\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_match(/invalid headers/, error) - end - - def test_validate_returns_nil_when_file_is_valid - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - File.write(incoming_path, "identifier,container,service,host\ndb1,container1,service1,host1\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_nil error - end - - def test_validate_returns_nil_when_file_has_only_headers - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - # Just headers, no data rows - this should be valid - File.write(incoming_path, "identifier,container,service,host\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_nil error - end - - def test_validate_returns_error_for_malformed_csv - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - # Malformed CSV with unclosed quote - File.write(incoming_path, "identifier,container,service,host\n\"db1,container1,service1,host1\n") - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - error = table.validate - - assert_match(/malformed/, error) - end - - def test_promote_moves_file_from_incoming_to_target - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - content = "identifier,container,service,host\ndb1,container1,service1,host1\n" - File.write(incoming_path, content) - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - table.promote - - assert File.exist?(target_path) - assert !File.exist?(incoming_path) - assert_equal content, File.read(target_path) - end - - def test_promote_overwrites_existing_target_file - target_path = File.join(@temp_dir, 'databases.csv') - incoming_path = File.join(@temp_dir, 'databases.incoming.csv') - - old_content = "identifier,container,service,host\nold,old,old,old\n" - new_content = "identifier,container,service,host\nnew,new,new,new\n" - - File.write(target_path, old_content) - File.write(incoming_path, new_content) - - table = DatabasesEnrichmentTable.new(target_path, incoming_path) - table.promote - - assert File.exist?(target_path) - assert !File.exist?(incoming_path) - assert_equal new_content, File.read(target_path) - end -end diff --git a/test/ebpf_compatibility_checker_test.rb b/test/ebpf_compatibility_checker_test.rb deleted file mode 100644 index bfec0e8..0000000 --- a/test/ebpf_compatibility_checker_test.rb +++ /dev/null @@ -1,209 +0,0 @@ -require 'bundler/setup' -require 'minitest/autorun' -require 'tempfile' -require 'fileutils' -require_relative '../engine/ebpf_compatibility_checker' - -class EbpfCompatibilityCheckerTest < Minitest::Test - def setup - @temp_dir = Dir.mktmpdir - @ebpf_script_path = File.join(@temp_dir, 'ebpf.sh') - end - - def teardown - FileUtils.rm_rf(@temp_dir) - end - - def test_successful_ebpf_check - # Create a mock ebpf.sh script that returns successful JSON - create_mock_ebpf_script(<<~JSON) - { - "ebpf_supported": true, - "kernel_version": "5.15.0-58-generic", - "ring_buffer_supported": true, - "bpf_filesystem_mounted": true, - "btf_support_available": true, - "bpf_syscall_enabled": true, - "bpf_jit_enabled": true, - "architecture": "x86_64", - "distribution": "Ubuntu 22.04.1 LTS" - } - JSON - - checker = EbpfCompatibilityChecker.new(@temp_dir) - - assert checker.checked - assert_equal true, checker.system_information["ebpf_supported"] - assert_equal "5.15.0-58-generic", checker.system_information["kernel_version"] - assert_equal true, checker.system_information["ring_buffer_supported"] - assert_equal true, checker.system_information["bpf_filesystem_mounted"] - assert_equal true, checker.system_information["btf_support_available"] - assert_equal true, checker.system_information["bpf_syscall_enabled"] - assert_equal true, checker.system_information["bpf_jit_enabled"] - assert_equal "x86_64", checker.system_information["architecture"] - assert_equal "Ubuntu 22.04.1 LTS", checker.system_information["distribution"] - end - - def test_ebpf_not_supported - create_mock_ebpf_script(<<~JSON) - { - "ebpf_supported": false, - "kernel_version": "4.9.0-8-amd64", - "ring_buffer_supported": false, - "bpf_filesystem_mounted": false, - "btf_support_available": false, - "bpf_syscall_enabled": false, - "bpf_jit_enabled": null, - "architecture": "x86_64", - "distribution": "Debian GNU/Linux 9 (stretch)" - } - JSON - - checker = EbpfCompatibilityChecker.new(@temp_dir) - - assert checker.checked - assert_equal false, checker.system_information["ebpf_supported"] - assert_equal "4.9.0-8-amd64", checker.system_information["kernel_version"] - assert_equal false, checker.system_information["ring_buffer_supported"] - end - - def test_script_not_found - # Don't create the script - checker = nil - - # Capture stdout to suppress the error message during test - capture_io do - checker = EbpfCompatibilityChecker.new(@temp_dir) - end - - assert checker.system_information.key?(:error) - assert_equal "ebpf.sh script not found", checker.system_information[:error] - end - - def test_script_fails_with_exit_code - # Create a script that exits with non-zero status - create_failing_ebpf_script("Some error message", 1) - - checker = nil - capture_io do - checker = EbpfCompatibilityChecker.new(@temp_dir) - end - - assert checker.system_information.key?(:error) - assert_equal "eBPF check failed", checker.system_information[:error] - assert_equal 1, checker.system_information[:exit_code] - assert_match(/Some error message/, checker.system_information[:stderr]) - end - - def test_invalid_json_output - # Create a script that outputs invalid JSON - create_mock_ebpf_script("{ invalid json }") - - checker = nil - capture_io do - checker = EbpfCompatibilityChecker.new(@temp_dir) - end - - assert checker.system_information.key?(:error) - assert_match(/JSON parse error/, checker.system_information[:error]) - end - - def test_reported_status_tracking - create_mock_ebpf_script('{"ebpf_supported": true, "kernel_version": "5.15.0"}') - - checker = EbpfCompatibilityChecker.new(@temp_dir) - - # Initially not reported - refute checker.reported? - - # Mark as reported - checker.mark_as_reported - assert checker.reported? - end - - def test_system_information_returns_nil_when_no_data - # Create an instance with a custom data value - checker = EbpfCompatibilityChecker.new(@temp_dir) - checker.instance_variable_set(:@system_information, nil) - - assert_nil checker.system_information - end - - def test_non_executable_script - # Create script without execute permissions - File.write(@ebpf_script_path, "#!/bin/bash\necho '{}'") - File.chmod(0644, @ebpf_script_path) - - checker = nil - capture_io do - checker = EbpfCompatibilityChecker.new(@temp_dir) - end - - assert checker.system_information.key?(:error) - assert_match(/Permission denied|cannot execute/, checker.system_information[:error]) - end - - def test_generic_exception_handling - # First create the script so it exists - create_mock_ebpf_script('{"ebpf_supported": true}') - - # Mock Open3.capture3 to raise an exception - Open3.stub :capture3, -> (*args) { raise StandardError, "Simulated error" } do - checker = nil - out, _ = capture_io do - checker = EbpfCompatibilityChecker.new(@temp_dir) - end - - assert checker.system_information.key?(:error) - assert_match(/Exception: Simulated error/, checker.system_information[:error]) - assert_match(/Error running eBPF compatibility check/, out) - end - end - - def test_verifies_json_flag_is_passed - # Create a script that outputs different content based on the flag - File.write(@ebpf_script_path, <<~SCRIPT) - #!/bin/sh - if [ "$1" = "--json" ]; then - echo '{"ebpf_supported": true, "flag": "json"}' - else - echo '{"ebpf_supported": false, "flag": "none"}' - fi - SCRIPT - File.chmod(0755, @ebpf_script_path) - - checker = EbpfCompatibilityChecker.new(@temp_dir) - - assert checker.checked - assert_equal "json", checker.system_information["flag"] - end - - private - - def create_mock_ebpf_script(output) - # If output is a Hash, convert to JSON, otherwise use as-is - json_output = output.is_a?(Hash) ? JSON.generate(output) : output - - # Use heredoc with proper escaping - script_content = <<~SCRIPT - #!/bin/sh - if [ "$1" = "--json" ]; then - printf '%s\\n' '#{json_output.gsub("'", "'\"'\"'")}' - else - echo "Human readable output" - fi - SCRIPT - - File.write(@ebpf_script_path, script_content) - File.chmod(0755, @ebpf_script_path) - end - - def create_failing_ebpf_script(error_message, exit_code) - File.write(@ebpf_script_path, <<~SCRIPT) - #!/bin/sh - echo "#{error_message}" >&2 - exit #{exit_code} - SCRIPT - File.chmod(0755, @ebpf_script_path) - end -end diff --git a/test/kubernetes_discovery_integration_test.rb b/test/kubernetes_discovery_integration_test.rb deleted file mode 100644 index 022efe1..0000000 --- a/test/kubernetes_discovery_integration_test.rb +++ /dev/null @@ -1,387 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require 'yaml' -require_relative '../engine/kubernetes_discovery' - -class KubernetesDiscoveryIntegrationTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - - # Set NODE_NAME for tests BEFORE creating the discovery object - ENV['HOSTNAME'] = 'test-node' - ENV['KUBERNETES_SERVICE_HOST'] = 'kubernetes.default.svc' - ENV['KUBERNETES_SERVICE_PORT'] = '443' - - # Now create the discovery object which will read ENV['HOSTNAME'] - @discovery = KubernetesDiscovery.new(@test_dir) - - # Create required directories - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '0-default')) - end - - def teardown - FileUtils.rm_rf(@test_dir) - ENV.delete('HOSTNAME') - ENV.delete('KUBERNETES_SERVICE_HOST') - ENV.delete('KUBERNETES_SERVICE_PORT') - end - - def test_discover_and_update_finds_service_endpoints_and_standalone_pods - # Create latest-valid-upstream with vector.yaml that uses kubernetes_discovery - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock being in Kubernetes - @discovery.stub :in_kubernetes?, true do - @discovery.stub :read_service_account_token, 'test-token' do - @discovery.stub :read_namespace, 'default' do - @discovery.stub :read_ca_cert, nil do - - # Mock API responses for various workload types - mock_responses = { - '/api/v1/namespaces' => { - 'items' => [ - { 'metadata' => { 'name' => 'default' } }, - { 'metadata' => { 'name' => 'kube-system' } } - ] - }, - '/api/v1/namespaces/default/services' => { - 'items' => [ - { - 'metadata' => { - 'name' => 'webapp-service', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '8080', - 'prometheus.io/path' => '/metrics' - } - } - } - ] - }, - '/api/v1/namespaces/default/endpoints/webapp-service' => { - 'subsets' => [{ - 'addresses' => [{ - 'ip' => '10.0.0.1', - 'targetRef' => { - 'kind' => 'Pod', - 'name' => 'webapp-deployment-abc123-xyz' - } - }] - }] - }, - '/api/v1/namespaces/default/pods/webapp-deployment-abc123-xyz' => { - 'spec' => { 'nodeName' => 'test-node' }, - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'ReplicaSet', - 'name' => 'webapp-deployment-abc123' - }] - } - }, - '/apis/apps/v1/namespaces/default/replicasets/webapp-deployment-abc123' => { - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'Deployment', - 'name' => 'webapp-deployment' - }] - } - }, - '/api/v1/namespaces/kube-system/services' => { 'items' => [] }, - '/api/v1/namespaces/default/pods' => { - 'items' => [ - { - 'metadata' => { - 'name' => 'cronjob-1234', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '9090' - }, - 'ownerReferences' => [{ - 'kind' => 'Job', - 'name' => 'scheduled-job-1234' - }] - }, - 'spec' => { 'nodeName' => 'test-node' }, - 'status' => { - 'phase' => 'Running', - 'podIP' => '10.0.0.2' - } - }, - { - 'metadata' => { - 'name' => 'standalone-pod', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '9090' - }, - # No ownerReferences key at all for truly standalone pod - }, - 'spec' => { 'nodeName' => 'test-node' }, - 'status' => { - 'phase' => 'Running', - 'podIP' => '10.0.0.3' - } - } - ] - }, - '/api/v1/namespaces/kube-system/pods' => { 'items' => [] } - } - - # Mock kubernetes_request to return appropriate responses - @discovery.stub :kubernetes_request, ->(path) { mock_responses[path] || { 'items' => [] } } do - # Mock validation to pass - @discovery.stub :validate_configs, true do - - result = @discovery.send(:discover_and_update) - assert result - - # Verify generated files - discovery_dir = Dir.glob(File.join(@test_dir, 'kubernetes-discovery', '2*')).first - assert discovery_dir - - files = Dir.glob(File.join(discovery_dir, '*.yaml')).sort - # Filter out discovered_pods.yaml - config_files = files.reject { |f| f.include?('discovered_pods.yaml') } - # We expect 3 configs: service endpoint + cronjob pod + standalone pod - # All pods with prometheus annotations should be discovered - assert_equal 3, config_files.length - - # Check deployment workload - # The filename includes the pod name, not the deployment name - webapp_file = config_files.find { |f| f.include?('webapp-deployment-abc123-xyz') } - assert webapp_file, "Could not find webapp file in: #{config_files.map { |f| File.basename(f) }}" - webapp_config = YAML.load_file(webapp_file) - # Check transform remap source for k8s.deployment.name - transform_source = webapp_config['transforms'].values.first['source'] - assert_match /\.tags\."resource\.k8s\.deployment\.name" = "webapp-deployment"/, transform_source - - # Check job workload (cronjob pod) - job_file = config_files.find { |f| f.include?('cronjob') } - assert job_file, "Should find cronjob pod file" - job_config = YAML.load_file(job_file) - # Jobs don't have deployment/statefulset/daemonset labels - transform_source = job_config['transforms'].values.first['source'] - refute_match /resource\.k8s\.deployment\.name/, transform_source - refute_match /resource\.k8s\.statefulset\.name/, transform_source - refute_match /resource\.k8s\.daemonset\.name/, transform_source - - # Check standalone pod (no workload) - standalone_file = config_files.find { |f| f.include?('standalone') } - assert standalone_file - standalone_config = YAML.load_file(standalone_file) - transform_source = standalone_config['transforms'].values.first['source'] - # Should have basic k8s labels but no workload labels - assert_match /resource\.k8s\.namespace\.name/, transform_source - assert_match /resource\.k8s\.pod\.name/, transform_source - refute_match /resource\.k8s\.deployment\.name/, transform_source - end - end - end - end - end - end - end - - def test_node_filtering_discovers_only_pods_on_current_node - # Create latest-valid-upstream with vector.yaml that uses kubernetes_discovery - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock being in Kubernetes - @discovery.stub :in_kubernetes?, true do - @discovery.stub :read_service_account_token, 'test-token' do - @discovery.stub :read_namespace, 'default' do - @discovery.stub :read_ca_cert, nil do - - # Mock API responses with pods on different nodes - mock_responses = { - '/api/v1/namespaces' => { - 'items' => [{ 'metadata' => { 'name' => 'default' } }] - }, - '/api/v1/namespaces/default/services' => { - 'items' => [{ - 'metadata' => { - 'name' => 'multi-node-service', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '8080' - } - } - }] - }, - '/api/v1/namespaces/default/endpoints/multi-node-service' => { - 'subsets' => [{ - 'addresses' => [ - { - 'ip' => '10.0.0.1', - 'targetRef' => { 'kind' => 'Pod', 'name' => 'pod-on-our-node' } - }, - { - 'ip' => '10.0.0.2', - 'targetRef' => { 'kind' => 'Pod', 'name' => 'pod-on-other-node' } - } - ] - }] - }, - '/api/v1/namespaces/default/pods/pod-on-our-node' => { - 'spec' => { 'nodeName' => 'test-node' }, - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'DaemonSet', - 'name' => 'node-agent' - }] - } - }, - '/api/v1/namespaces/default/pods/pod-on-other-node' => { - 'spec' => { 'nodeName' => 'other-node' }, - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'DaemonSet', - 'name' => 'node-agent' - }] - } - }, - '/api/v1/namespaces/default/pods' => { 'items' => [] } - } - - @discovery.stub :kubernetes_request, ->(path) { mock_responses[path] || { 'items' => [] } } do - @discovery.stub :validate_configs, true do - - result = @discovery.send(:discover_and_update) - assert result - - # Verify only our node's pod was discovered - discovery_dir = Dir.glob(File.join(@test_dir, 'kubernetes-discovery', '2*')).first - files = Dir.glob(File.join(discovery_dir, '*.yaml')) - # Filter out discovered_pods.yaml - config_files = files.reject { |f| f.include?('discovered_pods.yaml') } - assert_equal 1, config_files.length - assert config_files.first.include?('pod-on-our-node') - end - end - end - end - end - end - end - - def test_deduplication_prevents_duplicate_configs_for_same_pod - # Create latest-valid-upstream with vector.yaml that uses kubernetes_discovery - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock being in Kubernetes - @discovery.stub :in_kubernetes?, true do - @discovery.stub :read_service_account_token, 'test-token' do - @discovery.stub :read_namespace, 'default' do - @discovery.stub :read_ca_cert, nil do - - # Mock multiple services pointing to same pod - mock_responses = { - '/api/v1/namespaces' => { - 'items' => [{ 'metadata' => { 'name' => 'default' } }] - }, - '/api/v1/namespaces/default/services' => { - 'items' => [ - { - 'metadata' => { - 'name' => 'service-a', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '8080' - } - } - }, - { - 'metadata' => { - 'name' => 'service-b', - 'annotations' => { - 'prometheus.io/scrape' => 'true', - 'prometheus.io/port' => '9090' - } - } - } - ] - }, - '/api/v1/namespaces/default/endpoints/service-a' => { - 'subsets' => [{ - 'addresses' => [{ - 'ip' => '10.0.0.1', - 'targetRef' => { 'kind' => 'Pod', 'name' => 'shared-pod' } - }] - }] - }, - '/api/v1/namespaces/default/endpoints/service-b' => { - 'subsets' => [{ - 'addresses' => [{ - 'ip' => '10.0.0.1', - 'targetRef' => { 'kind' => 'Pod', 'name' => 'shared-pod' } - }] - }] - }, - '/api/v1/namespaces/default/pods/shared-pod' => { - 'spec' => { 'nodeName' => 'test-node' }, - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'Deployment', - 'name' => 'shared-app' - }] - } - }, - '/api/v1/namespaces/default/pods' => { 'items' => [] } - } - - @discovery.stub :kubernetes_request, ->(path) { mock_responses[path] || { 'items' => [] } } do - @discovery.stub :validate_configs, true do - - result = @discovery.send(:discover_and_update) - assert result - - # Verify deduplication worked - discovery_dir = Dir.glob(File.join(@test_dir, 'kubernetes-discovery', '2*')).first - files = Dir.glob(File.join(discovery_dir, '*.yaml')) - # Filter out discovered_pods.yaml - config_files = files.reject { |f| f.include?('discovered_pods.yaml') } - assert_equal 1, config_files.length - - # Check that first service's config was kept - config = YAML.load_file(config_files.first) - source = config['sources'].values.first - assert_equal ['http://10.0.0.1:8080/metrics'], source['endpoints'] - end - end - end - end - end - end - end - - def test_run_returns_false_on_kubernetes_api_error - # Create latest-valid-upstream with vector.yaml that uses kubernetes_discovery - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - # Mock being in Kubernetes - @discovery.stub :in_kubernetes?, true do - @discovery.stub :read_service_account_token, 'test-token' do - @discovery.stub :read_namespace, 'default' do - @discovery.stub :read_ca_cert, nil do - - # Mock API failure - @discovery.stub :kubernetes_request, ->(_) { raise "Kubernetes API error: 401 Unauthorized" } do - # Test actual behavior - should return false on API error - result = @discovery.run - assert_equal false, result - end - end - end - end - end - end -end \ No newline at end of file diff --git a/test/kubernetes_discovery_test.rb b/test/kubernetes_discovery_test.rb deleted file mode 100644 index 74c6914..0000000 --- a/test/kubernetes_discovery_test.rb +++ /dev/null @@ -1,279 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require 'json' -require_relative '../engine/kubernetes_discovery' - -class KubernetesDiscoveryTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - - # Set NODE_NAME for tests BEFORE creating the discovery object - ENV['HOSTNAME'] = 'test-node' - - # Now create the discovery object which will read ENV['HOSTNAME'] - @discovery = KubernetesDiscovery.new(@test_dir) - - # Create kubernetes-discovery directory structure - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '0-default')) - File.write(File.join(@test_dir, 'kubernetes-discovery', '0-default', 'dummy.yaml'), "sources: {}") - end - - def teardown - FileUtils.rm_rf(@test_dir) - ENV.delete('HOSTNAME') - end - - def test_should_discover_with_kubernetes_discovery_usage - # Create latest-valid-upstream directory with vector.yaml containing kubernetes_discovery_ reference - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape") - - assert @discovery.should_discover? - end - - def test_should_not_discover_without_kubernetes_discovery_usage - # Create latest-valid-upstream directory with vector.yaml without kubernetes_discovery_ reference - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - File.write(File.join(upstream_dir, 'vector.yaml'), "sources:\n test:\n type: file") - - assert !@discovery.should_discover? - end - - def test_should_not_discover_when_no_vector_yaml_exists - assert !@discovery.should_discover? - end - - def test_run_returns_false_when_not_in_kubernetes - # Mock not in kubernetes environment - discover_and_update_called = false - - @discovery.stub :in_kubernetes?, false do - @discovery.stub :should_discover?, true do - @discovery.stub :discover_and_update, -> { - discover_and_update_called = true - true - } do - result = @discovery.run - assert_equal false, result - assert !discover_and_update_called, "discover_and_update should not be called when not in kubernetes" - end - end - end - end - - def test_run_returns_false_when_rate_limited - # Mock should_discover and in_kubernetes - discover_and_update_called = false - - @discovery.stub :should_discover?, true do - @discovery.stub :in_kubernetes?, true do - @discovery.stub :discover_and_update, -> { - discover_and_update_called = true - true - } do - # Set last_run_time to simulate recent run - @discovery.instance_variable_set(:@last_run_time, Time.now) - - # Should skip due to rate limiting - result = @discovery.run - assert_equal false, result - assert !discover_and_update_called, "discover_and_update should not be called when rate limited" - end - end - end - end - - def test_cleanup_old_versions_keeps_only_specified_number - # Create multiple version directories - base_dir = File.join(@test_dir, 'kubernetes-discovery') - versions = [] - 10.times do |i| - version = "2025-01-0#{i}T00:00:00" - versions << version - FileUtils.mkdir_p(File.join(base_dir, version)) - end - - @discovery.cleanup_old_versions(5) - - # Test actual outcome - should keep only 5 latest versions + 0-default - remaining = Dir.glob(File.join(base_dir, '*')).select { |f| File.directory?(f) } - remaining_names = remaining.map { |f| File.basename(f) }.sort - - assert_equal 6, remaining.length # 5 versions + 0-default - assert remaining_names.include?('0-default') - - # Verify oldest versions were deleted - assert !File.exist?(File.join(base_dir, '2025-01-00T00:00:00')) - assert !File.exist?(File.join(base_dir, '2025-01-01T00:00:00')) - assert !File.exist?(File.join(base_dir, '2025-01-02T00:00:00')) - assert !File.exist?(File.join(base_dir, '2025-01-03T00:00:00')) - assert !File.exist?(File.join(base_dir, '2025-01-04T00:00:00')) - end - - def test_get_workload_info_returns_direct_owner_type - pod = { - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'DaemonSet', - 'name' => 'test-daemonset' - }] - } - } - - result = @discovery.send(:get_workload_info, pod, 'default') - assert_equal 'test-daemonset', result[:daemonset] - assert_nil result[:deployment] - assert_nil result[:statefulset] - assert_nil result[:replicaset] - end - - def test_get_workload_info_follows_replicaset_to_deployment - pod = { - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'ReplicaSet', - 'name' => 'test-deployment-abc123' - }] - } - } - - replicaset = { - 'metadata' => { - 'ownerReferences' => [{ - 'kind' => 'Deployment', - 'name' => 'test-deployment' - }] - } - } - - # Mock kubernetes_request for ReplicaSet lookup - @discovery.stub :kubernetes_request, replicaset do - result = @discovery.send(:get_workload_info, pod, 'default') - assert_equal 'test-deployment', result[:deployment] - assert_equal 'test-deployment-abc123', result[:replicaset] - assert_nil result[:statefulset] - assert_nil result[:daemonset] - end - end - - def test_get_workload_info_returns_empty_hash_when_no_owner - pod = { - 'metadata' => { - 'ownerReferences' => [] - } - } - - result = @discovery.send(:get_workload_info, pod, 'default') - assert_nil result[:deployment] - assert_nil result[:statefulset] - assert_nil result[:daemonset] - assert_nil result[:replicaset] - end - - def test_generate_config_creates_prometheus_scrape_source - endpoint_info = { - name: 'test-namespace_test-pod', - endpoint: 'http://10.0.0.1:9090/metrics', - namespace: 'test-namespace', - pod: 'test-pod', - service: 'test-service', - node_name: 'test-node', - pod_uid: 'abc123', - deployment_name: 'test-app' - } - - result = @discovery.send(:generate_config, endpoint_info) - - assert result[:filename] - assert result[:filename].start_with?('test-namespace_test-pod-') - assert result[:filename].end_with?('.yaml') - - config = result[:content] - source_name = 'prometheus_scrape_test-namespace_test-pod' - transform_name = 'kubernetes_discovery_test-namespace_test-pod' - - # Check source - assert_equal 'prometheus_scrape', config['sources'][source_name]['type'] - assert_equal ['http://10.0.0.1:9090/metrics'], config['sources'][source_name]['endpoints'] - assert_equal 30, config['sources'][source_name]['scrape_interval_secs'] - assert_equal 'instance', config['sources'][source_name]['instance_tag'] - - # Check transform - assert_equal 'remap', config['transforms'][transform_name]['type'] - assert_equal [source_name], config['transforms'][transform_name]['inputs'] - - # Check remap source includes k8s labels - remap_source = config['transforms'][transform_name]['source'] - assert_match /\.tags\."resource\.k8s\.namespace\.name" = "test-namespace"/, remap_source - assert_match /\.tags\."resource\.k8s\.pod\.name" = "test-pod"/, remap_source - assert_match /\.tags\."resource\.k8s\.node\.name" = "test-node"/, remap_source - assert_match /\.tags\."resource\.k8s\.deployment\.name" = "test-app"/, remap_source - end - - def test_generate_config_excludes_nil_labels - endpoint_info = { - name: 'test-namespace_test-pod', - endpoint: 'http://10.0.0.1:9090/metrics', - namespace: 'test-namespace', - pod: 'test-pod', - service: nil, - deployment_name: nil, - node_name: nil - } - - result = @discovery.send(:generate_config, endpoint_info) - config = result[:content] - transform_name = 'kubernetes_discovery_test-namespace_test-pod' - remap_source = config['transforms'][transform_name]['source'] - - # Should include namespace and pod - assert_match /\.tags\."resource\.k8s\.namespace\.name" = "test-namespace"/, remap_source - assert_match /\.tags\."resource\.k8s\.pod\.name" = "test-pod"/, remap_source - - # Should not include nil fields - refute_match /deployment_name/, remap_source - refute_match /node_name/, remap_source - end - - def test_configs_identical_returns_true_for_same_content - dir1 = File.join(@test_dir, 'dir1') - dir2 = File.join(@test_dir, 'dir2') - FileUtils.mkdir_p(dir1) - FileUtils.mkdir_p(dir2) - - # Create identical files - File.write(File.join(dir1, 'test.yaml'), "content: same") - File.write(File.join(dir2, 'test.yaml'), "content: same") - - assert @discovery.send(:configs_identical?, dir1, dir2) - end - - def test_configs_identical_returns_false_for_different_files - dir1 = File.join(@test_dir, 'dir1') - dir2 = File.join(@test_dir, 'dir2') - FileUtils.mkdir_p(dir1) - FileUtils.mkdir_p(dir2) - - # Create different files - File.write(File.join(dir1, 'test1.yaml'), "content: 1") - File.write(File.join(dir2, 'test2.yaml'), "content: 2") - - assert !@discovery.send(:configs_identical?, dir1, dir2) - end - - def test_configs_identical_returns_false_for_different_content - dir1 = File.join(@test_dir, 'dir1') - dir2 = File.join(@test_dir, 'dir2') - FileUtils.mkdir_p(dir1) - FileUtils.mkdir_p(dir2) - - # Create files with different content - File.write(File.join(dir1, 'test.yaml'), "content: 1") - File.write(File.join(dir2, 'test.yaml'), "content: 2") - - assert !@discovery.send(:configs_identical?, dir1, dir2) - end -end \ No newline at end of file diff --git a/test/ssl_certificate_manager_test.rb b/test/ssl_certificate_manager_test.rb deleted file mode 100644 index 9920070..0000000 --- a/test/ssl_certificate_manager_test.rb +++ /dev/null @@ -1,264 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require_relative '../engine/ssl_certificate_manager' - -class SSLCertificateManagerTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - @manager = SSLCertificateManager.new(@test_dir) - @domain_file = File.join(@test_dir, 'ssl_certificate_host.txt') - end - - def teardown - FileUtils.rm_rf(@test_dir) - end - - def test_initialize_with_working_dir - manager = SSLCertificateManager.new(@test_dir) - assert_equal File.join(@test_dir, 'ssl_certificate_host.txt'), manager.domain_file - end - - def test_initialize_without_working_dir - manager = SSLCertificateManager.new - assert_equal '/etc/ssl_certificate_host.txt', manager.domain_file - end - - def test_read_current_domain_when_file_missing - assert_equal '', @manager.read_current_domain - end - - def test_read_current_domain_when_file_exists - File.write(@domain_file, 'example.com') - assert_equal 'example.com', @manager.read_current_domain - end - - def test_read_current_domain_strips_whitespace - File.write(@domain_file, " example.com\n ") - assert_equal 'example.com', @manager.read_current_domain - end - - def test_process_ssl_certificate_host_first_time - # Mock restart_certbot to avoid system calls - restart_called = false - @manager.define_singleton_method(:restart_certbot) do - restart_called = true - true - end - - result = @manager.process_ssl_certificate_host('new.example.com') - - assert result, 'Should return true when domain changes' - assert_equal 'new.example.com', File.read(@domain_file) - assert restart_called, 'Should restart certbot for new non-empty domain' - assert @manager.domain_just_changed, 'Should set domain_just_changed flag' - end - - def test_process_ssl_certificate_host_no_change - File.write(@domain_file, 'example.com') - - # Mock restart_certbot - restart_called = false - @manager.define_singleton_method(:restart_certbot) do - restart_called = true - true - end - - result = @manager.process_ssl_certificate_host('example.com') - - assert !result, 'Should return false when domain unchanged' - assert !restart_called, 'Should not restart certbot when domain unchanged' - assert !@manager.domain_just_changed, 'Should not set domain_just_changed flag' - end - - def test_process_ssl_certificate_host_domain_change - File.write(@domain_file, 'old.example.com') - - # Mock restart_certbot - restart_called = false - @manager.define_singleton_method(:restart_certbot) do - restart_called = true - true - end - - result = @manager.process_ssl_certificate_host('new.example.com') - - assert result, 'Should return true when domain changes' - assert_equal 'new.example.com', File.read(@domain_file) - assert restart_called, 'Should restart certbot when domain changes' - assert @manager.domain_just_changed, 'Should set domain_just_changed flag' - end - - def test_process_ssl_certificate_host_to_empty - File.write(@domain_file, 'example.com') - - # Mock restart_certbot - restart_called = false - @manager.define_singleton_method(:restart_certbot) do - restart_called = true - true - end - - result = @manager.process_ssl_certificate_host('') - - assert result, 'Should return true when clearing domain' - assert_equal '', File.read(@domain_file) - assert !restart_called, 'Should not restart certbot when clearing domain' - assert @manager.domain_just_changed, 'Should set domain_just_changed flag' - end - - def test_process_ssl_certificate_host_from_empty - # Start with no file (empty domain) - - # Mock restart_certbot - restart_called = false - @manager.define_singleton_method(:restart_certbot) do - restart_called = true - true - end - - result = @manager.process_ssl_certificate_host('new.example.com') - - assert result, 'Should return true when setting domain from empty' - assert_equal 'new.example.com', File.read(@domain_file) - assert restart_called, 'Should restart certbot when setting non-empty domain' - assert @manager.domain_just_changed, 'Should set domain_just_changed flag' - end - - def test_certificate_exists_returns_false_for_empty_domain - File.write(@domain_file, '') - assert !@manager.certificate_exists?, 'Should return false for empty domain' - end - - def test_certificate_exists_returns_false_when_files_missing - assert !@manager.certificate_exists?('example.com'), 'Should return false when cert files missing' - end - - def test_certificate_exists_returns_false_when_only_cert_exists - domain = 'example.com' - cert_path = "/etc/ssl/#{domain}.pem" - - # Create temp cert file for testing - FileUtils.mkdir_p('/tmp/ssl_test') - temp_cert = "/tmp/ssl_test/#{domain}.pem" - FileUtils.touch(temp_cert) - - # Mock the certificate path checking - @manager.define_singleton_method(:certificate_exists?) do |d| - d ||= read_current_domain - return false if d.empty? - # Use temp paths for testing - File.exist?("/tmp/ssl_test/#{d}.pem") && File.exist?("/tmp/ssl_test/#{d}.key") - end - - assert !@manager.certificate_exists?(domain), 'Should return false when only cert exists' - - FileUtils.rm_rf('/tmp/ssl_test') - end - - def test_certificate_exists_returns_true_when_both_files_exist - domain = 'example.com' - - # Create temp cert files for testing - FileUtils.mkdir_p('/tmp/ssl_test') - temp_cert = "/tmp/ssl_test/#{domain}.pem" - temp_key = "/tmp/ssl_test/#{domain}.key" - FileUtils.touch(temp_cert) - FileUtils.touch(temp_key) - - # Mock the certificate path checking - @manager.define_singleton_method(:certificate_exists?) do |d| - d ||= read_current_domain - return false if d.empty? - # Use temp paths for testing - File.exist?("/tmp/ssl_test/#{d}.pem") && File.exist?("/tmp/ssl_test/#{d}.key") - end - - assert @manager.certificate_exists?(domain), 'Should return true when both cert files exist' - - FileUtils.rm_rf('/tmp/ssl_test') - end - - def test_should_skip_validation_when_domain_just_changed_and_no_cert - # Simulate domain just changed - @manager.instance_variable_set(:@domain_just_changed, true) - File.write(@domain_file, 'new.example.com') - - # Mock certificate_exists? to return false - @manager.define_singleton_method(:certificate_exists?) do |domain| - false - end - - assert @manager.should_skip_validation?, 'Should skip validation when domain changed and cert missing' - end - - def test_should_not_skip_validation_when_domain_unchanged - # Domain has not just changed - @manager.instance_variable_set(:@domain_just_changed, false) - File.write(@domain_file, 'example.com') - - assert !@manager.should_skip_validation?, 'Should not skip validation when domain unchanged' - end - - def test_should_not_skip_validation_when_cert_exists - # Simulate domain just changed - @manager.instance_variable_set(:@domain_just_changed, true) - File.write(@domain_file, 'example.com') - - # Mock certificate_exists? to return true - @manager.define_singleton_method(:certificate_exists?) do |domain| - true - end - - assert !@manager.should_skip_validation?, 'Should not skip validation when cert exists' - end - - def test_should_not_skip_validation_for_empty_domain - # Simulate domain just changed to empty - @manager.instance_variable_set(:@domain_just_changed, true) - File.write(@domain_file, '') - - assert !@manager.should_skip_validation?, 'Should not skip validation for empty domain' - end - - def test_reset_change_flag - @manager.instance_variable_set(:@domain_just_changed, true) - assert @manager.domain_just_changed - - @manager.reset_change_flag - assert !@manager.domain_just_changed, 'Should reset domain_just_changed flag' - end - - def test_restart_certbot_command - # Test that restart_certbot attempts the right system command - # We'll mock system to capture the command - command_executed = nil - @manager.define_singleton_method(:system) do |cmd| - command_executed = cmd - true - end - - @manager.send(:restart_certbot) - - expected_command = 'supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart certbot' - assert_equal expected_command, command_executed, 'Should execute correct supervisorctl command' - end - - def test_process_handles_write_errors_gracefully - # Make domain file unwritable - FileUtils.mkdir_p(@domain_file) # Create as directory to cause write error - - # Should raise error when unable to write - assert_raises do - @manager.process_ssl_certificate_host('example.com') - end - end - - def test_strips_whitespace_from_input - # Mock restart_certbot - @manager.define_singleton_method(:restart_certbot) { true } - - @manager.process_ssl_certificate_host(" example.com\n ") - assert_equal 'example.com', File.read(@domain_file) - end -end \ No newline at end of file diff --git a/test/test_server.rb b/test/test_server.rb deleted file mode 100644 index 0c0c6f8..0000000 --- a/test/test_server.rb +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env ruby - -require 'webrick' -require 'json' -require 'uri' -require 'fileutils' - -# Required environment variables: -# - INGESTING_HOST: The host to ingest data to (e.g. "logs.betterstack.com") -# - SOURCE_TOKEN: The source token for authentication -# - COLLECTOR_SECRET: The collector secret for testing (default: "COLLECTOR_SECRET") - -# Validate required environment variables -raise "INGESTING_HOST environment variable must be set" unless ENV['INGESTING_HOST'] -raise "SOURCE_TOKEN environment variable must be set" unless ENV['SOURCE_TOKEN'] -raise "COLLECTOR_SECRET environment variable must be set" unless ENV['COLLECTOR_SECRET'] - -# Set constants -PORT = 3010 -REQUIRED_SECRET = ENV['COLLECTOR_SECRET'] -LATEST_VERSION = "2025-05-11T11:13:00.000" -TEST_FILES_DIR = File.join(File.dirname(__FILE__), 'versions', LATEST_VERSION) - -# Ensure test files directory exists -FileUtils.mkdir_p(TEST_FILES_DIR) - -# Create server -server = WEBrick::HTTPServer.new(Port: PORT) - -# Helper method to validate collector secret -def validate_secret(req) - # For POST requests, parameters are in the request body - if req.request_method == "POST" - params = req.body ? WEBrick::HTTPUtils.parse_query(req.body) : {} - secret = params['collector_secret'] - else - # still handle GET params - secret = req.query['collector_secret'] - end - - secret == REQUIRED_SECRET -end - -# Helper method to log request details -def log_request(req) - puts "=== REQUEST ===" - puts "Path: #{req.path}" - puts "Method: #{req.request_method}" - if req.request_method == "POST" - puts "POST params: #{WEBrick::HTTPUtils.parse_query(req.body)}" if req.body && !req.body.empty? - else - puts "Query params: #{req.query}" - end - puts "===============" -end - -# Helper method to log response details -def log_response(res) - puts "=== RESPONSE ===" - puts "Status: #{res.status}" - puts "Body: #{res.body}" - puts "===============" - puts - puts -end - -# Helper method to mount endpoints with logging -def mount_endpoint(server, path, &block) - server.mount_proc "/api#{path}" do |req, res| - log_request(req) - block.call(req, res) - log_response(res) - end -end - -# Default handler for / -mount_endpoint(server, '/') do |req, res| - res.body = "Hello world from test/test_server.rb" -end - -# Endpoint: /collector/ping -mount_endpoint(server, '/collector/ping') do |req, res| - # Only accept POST requests - if req.request_method != "POST" - res.status = 405 - res.body = JSON.generate({ status: 'method_not_allowed' }) - next - end - - unless validate_secret(req) - res.status = 401 - res.body = JSON.generate({ status: 'invalid_collector_secret' }) - next - end - - # For POST requests, parameters are in the request body - params = req.body ? WEBrick::HTTPUtils.parse_query(req.body) : {} - current_version = params['configuration_version'] - - if current_version == LATEST_VERSION - res.status = 204 - else - res.status = 200 - res.body = JSON.generate({ - status: 'new_version_available', - configuration_version: LATEST_VERSION - }) - end -end - -# Endpoint: /collector/configuration -mount_endpoint(server, '/collector/configuration') do |req, res| - # Only accept POST requests - if req.request_method != "POST" - res.status = 405 - res.body = JSON.generate({ status: 'method_not_allowed' }) - next - end - - unless validate_secret(req) - res.status = 401 - res.body = JSON.generate({ status: 'invalid_collector_secret' }) - next - end - - # For POST requests, parameters are in the request body - params = req.body ? WEBrick::HTTPUtils.parse_query(req.body) : {} - configuration_version = params['configuration_version'] - - if configuration_version == LATEST_VERSION - res.status = 200 - res.body = JSON.generate({ - files: [ - { - path: "/api/collector/configuration-file?file=vector.yaml&configuration_version=#{configuration_version}", - name: "vector.yaml" - }, - { - path: "/api/collector/configuration-file?file=databases.json&configuration_version=#{configuration_version}", - name: "databases.json" - } - ] - }) - else - res.status = 404 - res.body = JSON.generate({ status: 'version_not_found' }) - end -end - -# Endpoint: /collector/cluster-collector -mount_endpoint(server, '/collector/cluster-collector') do |req, res| - # Only accept POST requests - if req.request_method != "POST" - res.status = 405 - res.body = JSON.generate({ status: 'method_not_allowed' }) - next - end - - unless validate_secret(req) - res.status = 401 - res.body = JSON.generate({ status: 'invalid_collector_secret' }) - next - end - - # Return 409 to indicate this is not a cluster collector - res.status = 409 -end - -# Endpoint for file downloads -mount_endpoint(server, '/collector/configuration-file') do |req, res| - filename = req.query['file'] - configuration_version = req.query['configuration_version'] - - # Security check to prevent directory traversal - if filename.nil? || filename.empty? || filename.include?('..') || filename.start_with?('/') - res.status = 400 - res.body = "Invalid filename" - next - end - - file_path = File.join(File.dirname(__FILE__), 'versions', configuration_version, filename) - - if File.exist?(file_path) - res.status = 200 - content = File.read(file_path) - - # Replace placeholders with environment variables - content.gsub!('INGESTING_HOST', ENV['INGESTING_HOST']) - content.gsub!('SOURCE_TOKEN', ENV['SOURCE_TOKEN']) - - res.body = content - - # Set appropriate content type - if filename.end_with?('.json') - res['Content-Type'] = 'application/json' - elsif filename.end_with?('.yaml', '.yml') - res['Content-Type'] = 'application/yaml' - else - res['Content-Type'] = 'text/plain' - end - else - res.status = 404 - res.body = "File not found" - end -end - -# Setup signal handling for graceful shutdown -trap('INT') { server.shutdown } -trap('TERM') { server.shutdown } - -# Start the server -puts "Starting test server on port #{PORT}" -puts "Required collector secret: #{REQUIRED_SECRET}" -puts "Latest version: #{LATEST_VERSION}" -puts "Using INGESTING_HOST: #{ENV['INGESTING_HOST']}" -puts "Using SOURCE_TOKEN: #{ENV['SOURCE_TOKEN']}" -puts "Using COLLECTOR_SECRET: #{ENV['COLLECTOR_SECRET']}" -server.start diff --git a/test/utils_edge_cases_test.rb b/test/utils_edge_cases_test.rb deleted file mode 100644 index 7fcaf48..0000000 --- a/test/utils_edge_cases_test.rb +++ /dev/null @@ -1,195 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require 'webmock/minitest' -require_relative '../engine/utils' - -class UtilsEdgeCasesTest < Minitest::Test - include Utils - - def setup - @test_dir = Dir.mktmpdir - @working_dir = @test_dir - end - - def teardown - FileUtils.rm_rf(@test_dir) - end - - def test_latest_version_with_invalid_directory_names - versions_dir = File.join(@working_dir, 'versions') - FileUtils.mkdir_p(versions_dir) - - # Create various invalid version directories - invalid_dirs = [ - 'not-a-timestamp', - '2025-13-01T00:00:00', # Invalid month - '2025-01-32T00:00:00', # Invalid day - '2025-01-01T25:00:00', # Invalid hour - '2025/01/01T00:00:00', # Wrong format - '', # Empty - '.', # Dot - '..', # Double dot - ] - - invalid_dirs.each { |dir| FileUtils.mkdir_p(File.join(versions_dir, dir)) } - - # Create one valid directory - valid_version = '2025-01-01T00:00:00' - FileUtils.mkdir_p(File.join(versions_dir, valid_version)) - - # The implementation sorts alphabetically, so 'not-a-timestamp' comes after '2025-...' - # This is testing the actual behavior, not the ideal behavior - result = latest_version - assert_equal 'not-a-timestamp', result - end - - def test_latest_kubernetes_discovery_with_symlinks - k8s_dir = File.join(@working_dir, 'kubernetes-discovery') - FileUtils.mkdir_p(k8s_dir) - - # Create actual directories - FileUtils.mkdir_p(File.join(k8s_dir, '2025-01-01T00:00:00')) - FileUtils.mkdir_p(File.join(k8s_dir, '2025-01-02T00:00:00')) - - # Create symlink pointing to latest - FileUtils.ln_s(File.join(k8s_dir, '2025-01-02T00:00:00'), - File.join(k8s_dir, 'latest')) - - # The implementation returns the last item alphabetically, including symlinks - # 'latest' comes after '2025-01-02T00:00:00' alphabetically - assert_equal File.join(k8s_dir, 'latest'), - latest_kubernetes_discovery - end - - def test_download_file_with_redirect_loop - url = 'https://example.com/file.txt' - path = File.join(@working_dir, 'downloaded_file.txt') - - # Mock redirect loop - include hostname parameter - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return(status: 302, headers: { 'Location' => url }) - - error = assert_raises(Utils::DownloadError) do - download_file(url, path) - end - assert_equal "Failed to download downloaded_file.txt from https://example.com/file.txt after 2 retries. Response code: 302", error.message - end - - def test_download_file_with_huge_response - url = 'https://example.com/huge.txt' - path = File.join(@working_dir, 'huge.txt') - - # Mock response with huge content-length - include hostname parameter - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return( - status: 200, - headers: { 'Content-Length' => '10737418240' }, # 10GB - body: 'small actual content' - ) - - result = download_file(url, path) - assert_equal true, result - assert_equal 'small actual content', File.read(path) - end - - def test_write_error_with_unicode_and_special_characters - error_messages = [ - "Error with emoji 🚨 and unicode ñ", - "Error with null byte \x00 in middle", - "Error with \n newlines \n and \t tabs", - "Error with ANSI escape \e[31mred text\e[0m", - "Very " + "long " * 1000 + "error message" - ] - - error_messages.each do |msg| - write_error(msg) - assert File.exist?(File.join(@working_dir, 'errors.txt')) - - content = File.read(File.join(@working_dir, 'errors.txt')) - # Should handle special characters gracefully - assert content.length > 0 - end - end - - def test_hostname_with_various_hostname_commands - # Test different hostname command outputs - hostname_outputs = [ - "my-host\n", - "my-host.local\n", - "MY-HOST\n", - " my-host \n", - "my-host\r\n", # Windows style - "", # Empty - "host with spaces", # Invalid but possible - ] - - hostname_outputs.each do |output| - Utils.stub :`, output do - result = hostname - assert result.length > 0 if output.strip.length > 0 - assert !result.include?("\n") - assert !result.include?("\r") - end - end - end - - def test_download_file_with_binary_content - url = 'https://example.com/binary.bin' - path = File.join(@working_dir, 'binary.bin') - - # Create binary content - binary_content = (0..255).map(&:chr).join - - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return(status: 200, body: binary_content) - - result = download_file(url, path) - assert_equal true, result - - # Should preserve binary content exactly - downloaded = File.binread(path) - assert_equal binary_content.force_encoding('ASCII-8BIT'), - downloaded.force_encoding('ASCII-8BIT') - end - - def test_download_file_fails_twice_then_succeeds - url = 'https://example.com/retry-test.txt' - path = File.join(@working_dir, 'retry-test.txt') - expected_content = 'Success after retries!' - - # First request fails with 500 - # Second request fails with 503 - # Third request succeeds with 200 - call_count = 0 - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return do |request| - call_count += 1 - case call_count - when 1 - { status: 500, body: 'Internal Server Error' } - when 2 - { status: 503, body: 'Service Unavailable' } - when 3 - { status: 200, body: expected_content } - else - raise "Unexpected number of calls: #{call_count}" - end - end - - # Should return true after retrying twice - result = download_file(url, path) - assert_equal true, result - - # Should have written the successful response content - assert_equal expected_content, File.read(path) - - # Should have made exactly 3 requests - assert_equal 3, call_count - end - -end \ No newline at end of file diff --git a/test/utils_test.rb b/test/utils_test.rb deleted file mode 100644 index 401b4c6..0000000 --- a/test/utils_test.rb +++ /dev/null @@ -1,249 +0,0 @@ -require 'bundler/setup' -require 'minitest/autorun' -require 'webmock/minitest' -require 'net/http' -require_relative '../engine/utils' - -def override_backtick_command(override_command, result, status, &block) - original_backtick = Kernel.method(:`) - - begin - Kernel.send(:define_method, :`) do |command| - if command == override_command - fork { exit status } - Process.wait - result - else - original_backtick.call(command) - end - end - - yield block - ensure - Kernel.send(:define_method, :`) do |command| - original_backtick.call(command) - end - end -end - -class UtilsTest < Minitest::Test - include Utils - - def setup - # Create a temporary working directory for tests - @test_dir = File.join(Dir.pwd, 'engine', 'test_tmp') - @working_dir = @test_dir - FileUtils.mkdir_p(@test_dir) - FileUtils.mkdir_p(File.join(@test_dir, 'versions')) - end - - def teardown - # Clean up temporary test directory - FileUtils.rm_rf(@test_dir) if File.exist?(@test_dir) - end - - def test_latest_version - # Create test version directories with different dates - versions = ['2022-01-01T00:00:00', '2022-02-01T00:00:00', '2021-12-01T00:00:00'] - versions.each do |version| - FileUtils.mkdir_p(File.join(@working_dir, 'versions', version)) - end - - assert_equal '2022-02-01T00:00:00', latest_version - end - - def test_latest_version_no_versions - # Test with no version directories - assert_nil latest_version - end - - def test_read_error - # Test reading error from file - error_message = "Test error message" - File.write(File.join(@working_dir, 'errors.txt'), error_message) - - assert_equal URI.encode_www_form_component(error_message), read_error - end - - def test_read_error_no_file - # Test when error file doesn't exist - assert_nil read_error - end - - def test_write_error - # Test writing error to file - error_message = "Test error message" - write_error(error_message) - - assert_equal error_message, File.read(File.join(@working_dir, 'errors.txt')) - end - - def test_validate_vector_config - version = "test_version" - config_dir = File.join(@working_dir, 'versions', version) - config_path = File.join(config_dir, 'vector.yaml') - FileUtils.mkdir_p(config_dir) - - # Test case 1: Valid configuration - File.write(config_path, "valid: config\nsinks:\n console:\n type: console") - - override_backtick_command "REGION=unknown AZ=unknown vector validate #{config_path}", "Validated", 0 do - assert_nil validate_vector_config(version), "Should return nil for valid config" - end - - # Test case 2: Config with security issue (command:) - File.write(config_path, "sinks:\n dangerous:\n type: exec\n command: /bin/sh") - - assert_equal "vector.yaml must not contain command: directives", validate_vector_config(version) - - # Test case 3: Config with validation failure - File.write(config_path, "invalid: config\nbroken_yaml") - - failed_validation_output = "Failed to load [\"#{config_path}\"]\n----------------------------------------------------------------------------------------------------------\nx could not find expected ':' at line 3 column 1, while scanning a simple key at line 2 column 1\n\n" - override_backtick_command "REGION=unknown AZ=unknown vector validate #{config_path}", failed_validation_output, 78 do - result = validate_vector_config(version) - assert result.is_a?(String), "Should return error message string when vector validate returns false" - assert_match(/Failed to load/, result, "Should contain validation error message") - end - end - - - def test_download_file - # Test successful download - url = 'https://example.com/file.txt' - path = File.join(@working_dir, 'downloaded_file.txt') - content = 'file content' - - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return(body: content, status: 200) - - assert download_file(url, path) - assert File.exist?(path) - assert_equal content, File.read(path) - end - - def test_download_file_failure - # Test download failure - url = 'https://example.com/file.txt' - path = File.join(@working_dir, 'downloaded_file.txt') - - stub_request(:get, url) - .with(query: hash_including("host")) - .to_return(status: 404) - - error = assert_raises(Utils::DownloadError) do - download_file(url, path) - end - assert_equal "Failed to download downloaded_file.txt from https://example.com/file.txt after 2 retries. Response code: 404", error.message - refute File.exist?(path) - end - - def test_download_file_network_error - # Test network error - url = 'https://example.com/file.txt' - path = File.join(@working_dir, 'downloaded_file.txt') - - stub_request(:get, url) - .with(query: hash_including("host")) - .to_raise(SocketError.new("getaddrinfo: nodename nor servname provided, or not known")) - - error = assert_raises(Utils::DownloadError) do - download_file(url, path) - end - assert_equal "Network error downloading downloaded_file.txt from https://example.com/file.txt: getaddrinfo: nodename nor servname provided, or not known after 2 retries.", error.message - refute File.exist?(path) - end - - def test_latest_database_json_with_file - # Create test version directory with databases.json - version = '2022-02-01T00:00:00' - FileUtils.mkdir_p(File.join(@working_dir, 'versions', version)) - - # Create test databases.json - db_json_content = '{"databases":[{"name":"test-db","host":"localhost"}]}' - FileUtils.mkdir_p(File.join(@working_dir, 'versions', version)) - File.write(File.join(@working_dir, 'versions', version, 'databases.json'), db_json_content) - - assert_equal db_json_content, latest_database_json - end - - def test_latest_database_json_no_file - # Create test version directory without databases.json - version = '2022-02-01T00:00:00' - FileUtils.mkdir_p(File.join(@working_dir, 'versions', version)) - - assert_equal '{}', latest_database_json - end - - def test_latest_database_json_no_versions - # Test with no version directories - assert_equal '{}', latest_database_json - end - - def test_latest_kubernetes_discovery_returns_latest_full_path - discovery_dir = File.join(@working_dir, 'kubernetes-discovery') - %w[2024-01-01T00:00:00 2024-03-01T00:00:00 2024-02-15T12:30:00].each do |version| - FileUtils.mkdir_p(File.join(discovery_dir, version)) - end - - expected_path = File.join(discovery_dir, '2024-03-01T00:00:00') - assert_equal expected_path, latest_kubernetes_discovery - end - - def test_hostname - # Pretend hostname is not available to test other fallback mechanisms - original_hostname = ENV['HOSTNAME'] - ENV['HOSTNAME'] = nil - - # Test fallback to Socket.gethostname when host files not available - # Mock Socket.gethostname to return a known value - original_method = Socket.method(:gethostname) - Socket.define_singleton_method(:gethostname) { "test-hostname" } - - # Test when host files are not accessible (common test environment case) - assert_equal "test-hostname", hostname - - # Restore original method - Socket.define_singleton_method(:gethostname, original_method) - - # Create a mock file to test hostname from file - host_proc_dir = File.join(@test_dir, 'host', 'proc', 'sys', 'kernel') - FileUtils.mkdir_p(host_proc_dir) - File.write(File.join(host_proc_dir, 'hostname'), "host-from-proc\n") - - # Mock File.exist? to pretend we have access to host files - original_exist = File.method(:exist?) - File.define_singleton_method(:exist?) do |path| - if path == '/host/proc/sys/kernel/hostname' - true - else - original_exist.call(path) - end - end - - # Mock File.read to return our test content - original_read = File.method(:read) - File.define_singleton_method(:read) do |path, *args| - if path == '/host/proc/sys/kernel/hostname' - "host-from-proc\n" - else - original_read.call(path, *args) - end - end - - # Test reading from host proc - assert_equal "host-from-proc", hostname - - # Restore original methods - File.define_singleton_method(:exist?, original_exist) - File.define_singleton_method(:read, original_read) - - # Despite all the previous setup, if HOSTNAME is set, it should be used - ENV['HOSTNAME'] = 'host-from-env' - assert_equal "host-from-env", hostname - - # Restore original env var - ENV['HOSTNAME'] = original_hostname - end -end diff --git a/test/vector_config_edge_cases_test.rb b/test/vector_config_edge_cases_test.rb deleted file mode 100644 index 51617fc..0000000 --- a/test/vector_config_edge_cases_test.rb +++ /dev/null @@ -1,178 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require_relative '../engine/vector_config' - -class VectorConfigEdgeCasesTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - @vector_config = VectorConfig.new(@test_dir) - @vector_config_dir = File.join(@test_dir, 'vector-config') - end - - def teardown - FileUtils.rm_rf(@test_dir) - end - - def test_validate_upstream_files_should_reject_malicious_command_variations - # Test various ways someone might try to sneak in command directives - test_cases = [ - "sources:\n test:\n command: 'rm -rf /'", - "sources:\n test:\n type: exec\n command: ['echo', 'test']", - "sources:\n test:\n type: exec\n # command: 'commented'\n command: 'real'", - "transforms:\n test:\n type: exec\n command: 'transform command'" - ] - - test_cases.each do |content| - version_dir = File.join(@test_dir, 'test-version') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), content) - - result = @vector_config.validate_upstream_files(version_dir) - assert_equal 'vector.yaml must not contain command: directives', result, - "Should reject config with content: #{content}" - end - end - - def test_prepare_dir_should_work_with_missing_kubernetes_discovery - # Create latest-valid-upstream with kubernetes_discovery reference - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - vector_content = "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape" - File.write(File.join(upstream_dir, 'vector.yaml'), vector_content) - - # Remove kubernetes-discovery directories - FileUtils.rm_rf(File.join(@test_dir, 'kubernetes-discovery')) - - # Mock latest_kubernetes_discovery to return nil - @vector_config.stub :latest_kubernetes_discovery, nil do - result = @vector_config.prepare_dir - assert result - - # Should still create the directory structure - assert File.exist?(File.join(result, 'vector.yaml')) - assert !File.symlink?(File.join(result, 'vector.yaml')) - # kubernetes-discovery symlink should not exist if source doesn't exist - assert !File.exist?(File.join(result, 'kubernetes-discovery')) - end - end - - def test_validate_dir_should_handle_permission_errors - config_dir = File.join(@test_dir, 'test-config') - FileUtils.mkdir_p(config_dir) - File.write(File.join(config_dir, 'vector.yaml'), "test: config") - FileUtils.mkdir_p(File.join(config_dir, 'kubernetes-discovery')) - - # Mock vector command to simulate permission error - @vector_config.stub :`, lambda { |cmd| - raise Errno::EACCES, "Permission denied" - } do - assert_raises(Errno::EACCES) do - @vector_config.validate_dir(config_dir) - end - end - end - - def test_promote_dir_should_handle_current_as_file_not_directory - config_dir = File.join(@vector_config_dir, 'new_test') - FileUtils.mkdir_p(config_dir) - - # Create current as a file instead of directory - current_path = File.join(@vector_config_dir, 'current') - FileUtils.mkdir_p(@vector_config_dir) - File.write(current_path, "I'm a file, not a directory!") - - # Should handle gracefully - @vector_config.stub :system, true do - result = nil - output = capture_io do - result = @vector_config.promote_dir(config_dir) - end - - assert File.directory?(File.join(@vector_config_dir, 'current')) - assert_match(/Promoting/, output.join) - assert result - end - end - - def test_promote_dir_should_handle_supervisorctl_failure - config_dir = File.join(@vector_config_dir, 'new_test') - FileUtils.mkdir_p(config_dir) - - # Mock system call to return false (failure) - @vector_config.stub :system, false do - result = nil - output = capture_io do - # Should not raise error, just continue - result = @vector_config.promote_dir(config_dir) - end - - assert File.directory?(File.join(@vector_config_dir, 'current')) - assert_match(/Promoting/, output.join) - assert result - end - end - - def test_validate_upstream_files_should_work_with_just_vector_yaml - # Create test directory with just vector.yaml - upstream_dir = File.join(@test_dir, 'upstream') - FileUtils.mkdir_p(upstream_dir) - - # Write only vector.yaml (valid config) - valid_config = <<~YAML - sources: - test_source: - type: file - include: ["/var/log/test.log"] - YAML - File.write(File.join(upstream_dir, 'vector.yaml'), valid_config) - - # Mock successful vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - original_backtick.call('true') - "Configuration validated successfully" - end - - # Should validate successfully with just vector.yaml - result = @vector_config.validate_upstream_files(upstream_dir) - assert_nil result, "Should validate successfully with just vector.yaml" - end - - def test_validate_upstream_files_should_work_with_just_manual_vector_yaml - # Create test directory with just manual.vector.yaml - upstream_dir = File.join(@test_dir, 'upstream') - FileUtils.mkdir_p(upstream_dir) - - # Write only manual.vector.yaml (valid config) - valid_manual_config = <<~YAML - transforms: - test_transform: - type: remap - inputs: ["test_source"] - source: '.message = "test"' - YAML - File.write(File.join(upstream_dir, 'manual.vector.yaml'), valid_manual_config) - - # Mock successful vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - original_backtick.call('true') - "Configuration validated successfully" - end - - # Should validate successfully with just manual.vector.yaml - result = @vector_config.validate_upstream_files(upstream_dir) - assert_nil result, "Should validate successfully with just manual.vector.yaml" - end - - def test_validate_upstream_files_should_fail_with_no_vector_configs - # Create test directory with no vector configs - upstream_dir = File.join(@test_dir, 'upstream') - FileUtils.mkdir_p(upstream_dir) - - # Should fail validation when no vector configs are present - result = @vector_config.validate_upstream_files(upstream_dir) - assert_match(/None of: vector.yaml, manual.vector.yaml, process_discovery.vector.yaml found/, result, "Should fail validation with no vector configs") - end -end \ No newline at end of file diff --git a/test/vector_config_test.rb b/test/vector_config_test.rb deleted file mode 100644 index 9903048..0000000 --- a/test/vector_config_test.rb +++ /dev/null @@ -1,345 +0,0 @@ -require 'minitest/autorun' -require 'fileutils' -require 'tmpdir' -require 'open3' -require_relative '../engine/vector_config' - -class VectorConfigTest < Minitest::Test - def setup - @test_dir = Dir.mktmpdir - @vector_config = VectorConfig.new(@test_dir) - @vector_config_dir = File.join(@test_dir, 'vector-config') - - # Create test directories - FileUtils.mkdir_p(File.join(@test_dir, 'versions', '0-default')) - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '0-default')) - FileUtils.mkdir_p(File.join(@test_dir, 'kubernetes-discovery', '2025-01-01T00:00:00')) - - # Create test files - File.write(File.join(@test_dir, 'versions', '0-default', 'vector.yaml'), "test: config") - File.write(File.join(@test_dir, 'kubernetes-discovery', '0-default', 'dummy.yaml'), "sources: {}") - File.write(File.join(@test_dir, 'kubernetes-discovery', '2025-01-01T00:00:00', 'test.yaml'), "sources: {}") - end - - def teardown - FileUtils.rm_rf(@test_dir) - end - - def test_validate_upstream_files_rejects_command_directive_in_vector_yaml - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), "sources:\n test:\n type: exec\n command: ['echo', 'test']") - - result = @vector_config.validate_upstream_files(version_dir) - assert_equal 'vector.yaml must not contain command: directives', result - end - - def test_validate_upstream_files_rejects_command_directive_in_process_discovery_yaml - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'process_discovery.vector.yaml'), "sources:\n test:\n type: exec\n command: ['echo', 'test']") - - result = @vector_config.validate_upstream_files(version_dir) - assert_equal 'process_discovery.vector.yaml must not contain command: directives', result - end - - def test_validate_upstream_files_returns_nil_on_success - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), "sources:\n test:\n type: file\n include: ['/test']") - - # Mock successful vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - # Run a command that succeeds to set $?.success? to true - original_backtick.call('true') - "Configuration validated successfully" - end - - result = @vector_config.validate_upstream_files(version_dir) - assert_nil result - end - - def test_validate_upstream_files_accepts_process_discovery_only - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - # Only create process_discovery.vector.yaml, no vector.yaml or manual.vector.yaml - File.write(File.join(version_dir, 'process_discovery.vector.yaml'), "sources:\n test:\n type: file\n include: ['/test']") - - # Mock successful vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - # Run a command that succeeds to set $?.success? to true - original_backtick.call('true') - "Configuration validated successfully" - end - - result = @vector_config.validate_upstream_files(version_dir) - assert_nil result, "Should accept process_discovery.vector.yaml alone" - end - - def test_validate_upstream_files_returns_error_on_validation_failure - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), "sources:\n test:\n type: file") - - # Mock failed vector validation by actually running a command that fails - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - # Run a command that will fail - original_backtick.call('false') - "Error: Missing required field 'include'" - end - - result = @vector_config.validate_upstream_files(version_dir) - assert_match(/Error: Missing required field/, result) - end - - def test_promote_upstream_files - # Create a version directory with vector.yaml and manual.vector.yaml - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), "sources:\n test:\n type: file") - File.write(File.join(version_dir, 'manual.vector.yaml'), "sources:\n manual:\n type: file") - - @vector_config.promote_upstream_files(version_dir) - - # Test the actual outcome - files copied to latest-valid-upstream - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - assert File.exist?(File.join(upstream_dir, 'vector.yaml')) - assert File.exist?(File.join(upstream_dir, 'manual.vector.yaml')) - assert_equal "sources:\n test:\n type: file", File.read(File.join(upstream_dir, 'vector.yaml')) - assert_equal "sources:\n manual:\n type: file", File.read(File.join(upstream_dir, 'manual.vector.yaml')) - end - - def test_promote_upstream_files_with_process_discovery - # Create a version directory with all three config files - version_dir = File.join(@test_dir, 'versions', '2025-01-01T00:00:00') - FileUtils.mkdir_p(version_dir) - File.write(File.join(version_dir, 'vector.yaml'), "sources:\n test:\n type: file") - File.write(File.join(version_dir, 'manual.vector.yaml'), "sources:\n manual:\n type: file") - File.write(File.join(version_dir, 'process_discovery.vector.yaml'), "sources:\n process:\n type: file") - - @vector_config.promote_upstream_files(version_dir) - - # Test the actual outcome - all files copied to latest-valid-upstream - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - assert File.exist?(File.join(upstream_dir, 'vector.yaml')) - assert File.exist?(File.join(upstream_dir, 'manual.vector.yaml')) - assert File.exist?(File.join(upstream_dir, 'process_discovery.vector.yaml')) - assert_equal "sources:\n test:\n type: file", File.read(File.join(upstream_dir, 'vector.yaml')) - assert_equal "sources:\n manual:\n type: file", File.read(File.join(upstream_dir, 'manual.vector.yaml')) - assert_equal "sources:\n process:\n type: file", File.read(File.join(upstream_dir, 'process_discovery.vector.yaml')) - end - - def test_prepare_dir_returns_nil_when_no_latest_valid_upstream - # Test that prepare_dir returns nil when no latest-valid-upstream exists - assert !File.exist?(File.join(@test_dir, 'vector-config', 'latest-valid-upstream')) - result = @vector_config.prepare_dir - assert_nil result - end - - def test_prepare_dir_uses_latest_kubernetes_discovery_when_referenced - # Create latest-valid-upstream with vector.yaml containing kubernetes_discovery_ reference - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - vector_content = "sources:\n kubernetes_discovery_test:\n type: prometheus_scrape" - File.write(File.join(upstream_dir, 'vector.yaml'), vector_content) - - # Create a kubernetes discovery directory - k8s_discovery_dir = File.join(@test_dir, 'kubernetes-discovery', '2025-01-01T00:00:00') - FileUtils.mkdir_p(k8s_discovery_dir) - - # Mock latest_kubernetes_discovery - @vector_config.stub :latest_kubernetes_discovery, k8s_discovery_dir do - result = @vector_config.prepare_dir - - # Test actual outcomes - assert result, "prepare_dir should return a directory path" - assert result.start_with?(File.join(@vector_config_dir, 'new_')) - assert File.directory?(result) - - # Check files are correctly created - assert File.exist?(File.join(result, 'vector.yaml')) - assert File.symlink?(File.join(result, 'kubernetes-discovery')) - - # Verify it uses the latest kubernetes discovery - assert_equal k8s_discovery_dir, - File.readlink(File.join(result, 'kubernetes-discovery')) - # Verify vector.yaml content - assert_equal vector_content, File.read(File.join(result, 'vector.yaml')) - end - end - - def test_prepare_dir_uses_default_kubernetes_discovery_when_not_referenced - # Create latest-valid-upstream with vector.yaml without kubernetes_discovery_ reference - upstream_dir = File.join(@test_dir, 'vector-config', 'latest-valid-upstream') - FileUtils.mkdir_p(upstream_dir) - vector_content = "sources:\n test:\n type: file" - File.write(File.join(upstream_dir, 'vector.yaml'), vector_content) - - result = @vector_config.prepare_dir - - # Test actual outcome - should use 0-default when kubernetes_discovery not used - assert result - assert_equal File.join(@test_dir, 'kubernetes-discovery', '0-default'), - File.readlink(File.join(result, 'kubernetes-discovery')) - assert_equal vector_content, File.read(File.join(result, 'vector.yaml')) - end - - def test_validate_dir_returns_nil_on_success - config_dir = File.join(@test_dir, 'test-config') - FileUtils.mkdir_p(config_dir) - File.write(File.join(config_dir, 'vector.yaml'), "test: config") - FileUtils.mkdir_p(File.join(config_dir, 'kubernetes-discovery')) - - # Mock successful vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - # Run a command that succeeds to set $?.success? to true - original_backtick.call('true') - "Configuration validated successfully" - end - - result = @vector_config.validate_dir(config_dir) - assert_nil result - end - - def test_validate_dir_returns_error_message_on_failure - config_dir = File.join(@test_dir, 'test-config') - FileUtils.mkdir_p(config_dir) - - # Mock failed vector validation - original_backtick = @vector_config.method(:`) - @vector_config.define_singleton_method(:`) do |cmd| - # Run a command that fails to set $?.success? to false - original_backtick.call('false') - "Error: Invalid configuration" - end - - result = @vector_config.validate_dir(config_dir) - assert_equal "Error: Invalid configuration", result - end - - def test_promote_dir - config_dir = File.join(@vector_config_dir, 'new_test') - FileUtils.mkdir_p(config_dir) - - # Create old current directory with a marker file - old_current_dir = File.join(@vector_config_dir, 'old_current') - FileUtils.mkdir_p(old_current_dir) - File.write(File.join(old_current_dir, 'old_marker.txt'), 'old content') - - # Create current as symlink to old directory - current_link = File.join(@vector_config_dir, 'current') - File.symlink(old_current_dir, current_link) - - # Create a marker in new config - File.write(File.join(config_dir, 'new_marker.txt'), 'new content') - - # Mock cleanup_old_directories to avoid side effects in test - @vector_config.stub :cleanup_old_directories, nil do - @vector_config.stub :system, true do - @vector_config.promote_dir(config_dir) - - # Test actual outcomes - symlink behavior - assert File.symlink?(current_link), "Current should be a symlink" - assert File.exist?(config_dir), "Original config directory should still exist" - - # Check that current symlink points to new config directory - assert_equal config_dir, File.readlink(current_link) - - # Check that content is accessible through the symlink - assert !File.exist?(File.join(current_link, 'old_marker.txt')), "Old marker should not be accessible through current" - assert File.exist?(File.join(current_link, 'new_marker.txt')), "New marker should be accessible through current" - end - end - end - - def test_cleanup_old_directories - # Create 10 old directories - old_dirs = [] - 10.times do |i| - timestamp = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%6NZ') - dir_name = File.join(@vector_config_dir, "new_2023-01-0#{i}T00:00:00.#{sprintf('%06d', i)}Z") - FileUtils.mkdir_p(dir_name) - File.write(File.join(dir_name, 'test.txt'), "content #{i}") - old_dirs << dir_name - sleep 0.001 # Ensure unique timestamps - end - - # Create current and previous symlinks pointing to some directories - current_dir = old_dirs[8] - previous_dir = old_dirs[7] - - current_link = File.join(@vector_config_dir, 'current') - previous_link = File.join(@vector_config_dir, 'previous') - - File.symlink(current_dir, current_link) - File.symlink(previous_dir, previous_link) - - # Run cleanup with keep_count=3 - @vector_config.cleanup_old_directories(3) - - # Check results - # Directories 0-4 should be deleted (5 oldest not in use) - (0..4).each do |i| - assert !File.exist?(old_dirs[i]), "Old directory #{i} should be deleted" - end - - # Directories 5-6 should exist (kept as part of keep_count=3) - (5..6).each do |i| - assert File.exist?(old_dirs[i]), "Directory #{i} should be kept" - end - - # Directories 7-8 should exist (in use as previous/current) - assert File.exist?(old_dirs[7]), "Directory 7 should exist (previous)" - assert File.exist?(old_dirs[8]), "Directory 8 should exist (current)" - - # Directory 9 should exist (most recent, part of keep_count) - assert File.exist?(old_dirs[9]), "Directory 9 should be kept (most recent)" - end - - def test_cleanup_old_directories_with_relative_symlinks - # Create directories - dir1 = File.join(@vector_config_dir, 'new_2023-01-01T00:00:00.000001Z') - dir2 = File.join(@vector_config_dir, 'new_2023-01-02T00:00:00.000002Z') - - FileUtils.mkdir_p(dir1) - FileUtils.mkdir_p(dir2) - - # Create current as relative symlink - current_link = File.join(@vector_config_dir, 'current') - Dir.chdir(@vector_config_dir) do - File.symlink('new_2023-01-02T00:00:00.000002Z', 'current') - end - - # Run cleanup with keep_count=0 (should still keep dir2 as it's in use) - @vector_config.cleanup_old_directories(0) - - # dir2 should still exist as it's referenced by current - assert File.exist?(dir2), "Directory referenced by current should not be deleted" - - # dir1 should be deleted - assert !File.exist?(dir1), "Unreferenced directory should be deleted" - end - - def test_cleanup_old_directories_ignores_non_new_directories - # Create various directories - new_dir = File.join(@vector_config_dir, 'new_2023-01-01T00:00:00.000001Z') - other_dir = File.join(@vector_config_dir, 'latest-valid-upstream') - random_dir = File.join(@vector_config_dir, 'some-other-dir') - - FileUtils.mkdir_p(new_dir) - FileUtils.mkdir_p(other_dir) - FileUtils.mkdir_p(random_dir) - - # Run cleanup - @vector_config.cleanup_old_directories(0) - - # Only new_* directory should be deleted - assert !File.exist?(new_dir), "Old new_* directory should be deleted" - assert File.exist?(other_dir), "Non-new directory should not be deleted" - assert File.exist?(random_dir), "Non-new directory should not be deleted" - end -end \ No newline at end of file diff --git a/test/versions/2025-05-11T11:13:00.000/databases.json b/test/versions/2025-05-11T11:13:00.000/databases.json deleted file mode 100644 index 5375c9a..0000000 --- a/test/versions/2025-05-11T11:13:00.000/databases.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "databases": [] -} \ No newline at end of file diff --git a/test/versions/2025-05-11T11:13:00.000/databases.sample.json b/test/versions/2025-05-11T11:13:00.000/databases.sample.json deleted file mode 100644 index 5523f4f..0000000 --- a/test/versions/2025-05-11T11:13:00.000/databases.sample.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "application_instrumentation": [ - { - "type": "postgres", - "host": "postgres-host", - "credentials": { - "username": "coroot", - "password": "password" - }, - "port": "5432", - "ssl_mode": "disable" - }, - { - "type": "mysql", - "host": "mysql-host", - "credentials": { - "username": "coroot", - "password": "password" - }, - "port": "3306", - "tls": "false" - }, - { - "type": "redis", - "host": "redis-host", - "credentials": { - "password": "password" - }, - "port": "6379" - }, - { - "type": "mongodb", - "host": "mongodb-host", - "credentials": { - "username": "coroot", - "password": "password" - }, - "port": "27017" - }, - { - "type": "memcached", - "host": "memcached", - "port": "11211" - } - ] -} \ No newline at end of file diff --git a/updater.rb b/updater.rb deleted file mode 100644 index e3a223a..0000000 --- a/updater.rb +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env ruby - -require_relative 'engine/better_stack_client' - -working_dir = File.expand_path(File.dirname(__FILE__)) -client = BetterStackClient.new(working_dir) - -SLEEP_DURATION = 15 -PING_EVERY = 2 # iterations of the loop -> every SLEEP_DURATION * PING_EVERY seconds - -iteration = 1 - -# Main loop -loop do - enrichment_table_changed = client.enrichment_table_changed? - can_reload_vector = true - config_changed = false # error flag so we can make sure we only reload vector if we have a valid config - - # Validate enrichment table if it has changed - if enrichment_table_changed - puts "Validating enrichment table" - output = client.validate_enrichment_table - puts "Enrichment table validation finished" - if !output.nil? - puts "Enrichment table validation failed" - can_reload_vector = false - else - puts "Promoting enrichment table" - client.promote_enrichment_table - puts "Enrichment table promoted" - end - end - - # Only attempt to promote config if enrichment table is valid - if iteration % PING_EVERY == 0 - iteration = 1 - puts "Starting ping" - config_changed = client.ping - puts "Ping finished" - end - - if can_reload_vector && (enrichment_table_changed || config_changed) - client.reload_vector - end - - $stdout.flush - iteration += 1 - puts "Sleeping for #{SLEEP_DURATION} seconds..." - sleep SLEEP_DURATION -end \ No newline at end of file diff --git a/vector.sh b/vector.sh deleted file mode 100644 index 610a467..0000000 --- a/vector.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Run mdprobe to get instance metadata -echo "Getting instance metadata..." -METADATA_JSON=$(/usr/bin/ruby /mdprobe/mdprobe.rb) - -# Parse JSON and extract region and availability zone using jq -export REGION=$(echo "$METADATA_JSON" | jq -r '.region // "unknown"') -export AZ=$(echo "$METADATA_JSON" | jq -r '.availability_zone // "unknown"') - -echo "Extracted metadata:" -echo " REGION=${REGION}" -echo " AZ=${AZ}" - -# Tell host_metrics source to collect from host system -export PROCFS_ROOT="/host/proc" -export SYSFS_ROOT="/host/sys" - -# Ensure enrichment directory exists -if [ ! -d "/enrichment" ]; then - echo "Creating /enrichment directory..." - mkdir -p /enrichment -fi - -# Copy default enrichment files if they don't exist -if [ ! -f "/enrichment/databases.csv" ] && [ -f "/enrichment-defaults/databases.csv" ]; then - echo "Copying default databases.csv to /enrichment..." - cp /enrichment-defaults/databases.csv /enrichment/databases.csv -fi - -if [ ! -f "/enrichment/docker-mappings.csv" ] && [ -f "/enrichment-defaults/docker-mappings.csv" ]; then - echo "Copying default docker-mappings.csv to /enrichment..." - cp /enrichment-defaults/docker-mappings.csv /enrichment/docker-mappings.csv -fi - -# Check for first boot -if [ -f "/first-boot.txt" ]; then - echo "First boot detected, skipping config validation..." - # Remove the first boot marker - rm -f /first-boot.txt -else - # Validate config files exist and are readable - if [ ! -d "/vector-config/current" ]; then - echo "ERROR: Config directory /vector-config/current does not exist!" - echo "Attempting to restore from last known good config..." - if [ -d "/vector-config/latest-valid-upstream" ]; then - mkdir -p "/vector-config/current" - cp -r /vector-config/latest-valid-upstream/* "/vector-config/current/" - echo "Restored configuration from latest-valid-upstream" - else - echo "FATAL: No valid configuration available" - exit 1 - fi - fi - - # Check if we have actual config files (follow symlinks with -L) - CONFIG_COUNT=$(find -L "/vector-config/current" -name "*.yaml" -type f 2>/dev/null | wc -l) - if [ "$CONFIG_COUNT" -eq 0 ]; then - echo "ERROR: No YAML config files found in /vector-config/current" - echo "Vector cannot start without configuration" - # Exit with 127 - "command not found" - indicates critical config missing - exit 127 - fi - - echo "Found $CONFIG_COUNT config files in /vector-config/current" -fi -echo "Starting Vector..." -exec /usr/local/bin/vector --config /vector-config/current/\*.yaml --config /vector-config/current/kubernetes-discovery/\*.yaml diff --git a/versions/0-default/vector.yaml b/versions/0-default/vector.yaml deleted file mode 100644 index 153d9ac..0000000 --- a/versions/0-default/vector.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# This is a minimal Vector configuration that is used before -# configuration is downloaded from telemetry.betterstack.com. -# Cluster agent and Beyla expect to be able to connect to the sources below. - -# Enables the `vector top` command in the collector container. -api: - enabled: true - -sources: - # Collects database metrics from cluster agent. - cluster_agent_remote_write: - type: prometheus_remote_write - address: 127.0.0.1:39090 - - # Collects OpenTelemetry traces from Beyla. - beyla_otel: - type: opentelemetry - grpc: - address: 0.0.0.0:34319 - http: - address: 0.0.0.0:34320 - -sinks: - console: - type: console - inputs: - - cluster_agent_remote_write - - beyla_otel.traces - encoding: - codec: json