Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
86765ce
Fix authentication issues in test-ros-ocp-dataflow.sh and update to i…
masayag Sep 10, 2025
6cff5fa
Update Kubernetes configurations for insights-ros-ingress migration
masayag Sep 10, 2025
311ddf1
Update GitHub Actions workflows for insights-ros-ingress authentication
masayag Sep 10, 2025
927c511
Add cluster_alias configuration for insights-ros-ingress service
masayag Sep 10, 2025
3ed664b
Fix CSV validation failure by creating proper ROS-OCP test data
masayag Sep 10, 2025
f6d425f
Remove useless messages
masayag Sep 10, 2025
28d3e07
Fix GitHub Actions workflow: reorder steps and eliminate env var dupl…
masayag Sep 11, 2025
5c2a9cf
Fix GitHub Actions: configure rootless KIND with systemd delegation
masayag Sep 11, 2025
6fc9949
Remove unsupported argument by podman port
masayag Sep 11, 2025
4d23d4a
Fix authentication issues in test-ros-ocp-dataflow.sh
masayag Sep 11, 2025
88ee402
Ensure GitHub Actions fails when upload test fails
masayag Sep 11, 2025
a5c14a9
Fix kubeconfig authentication in GitHub Actions workflow
masayag Sep 11, 2025
1e3c33e
Fix warning for watching the processror logs
masayag Sep 11, 2025
f8ed7e4
Fix ingress service port configuration in deploy-kind.sh
masayag Sep 11, 2025
d853305
Improve test-k8s-dataflow.sh with proper manifest.json format
masayag Sep 11, 2025
03231b8
Add JWT_SECRET environment variable to ingress deployment
masayag Sep 11, 2025
5b52a4a
Changelog:
jordigilh Sep 24, 2025
14f04dc
Disable docker-compose-test-yml workflow for now as it is unclear wha…
jordigilh Sep 25, 2025
f5b8431
fix: Update Kubernetes health checks and resolve test script issues
jordigilh Sep 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 135 additions & 35 deletions .github/workflows/docker-compose-test.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
name: Docker Compose Deployment Test

# DISABLED: Temporarily disabled due to authentication setup complexity in CI
# Re-enable after authentication issues are resolved in PR #24
on:
pull_request:
paths:
- 'deployment/docker-compose/**'
- '.github/workflows/docker-compose-test.yml'
workflow_dispatch:
# pull_request:
# paths:
# - 'deployment/docker-compose/**'
# - '.github/workflows/docker-compose-test.yml'
# - 'internal/**' # Include internal code changes that might affect data processing
workflow_dispatch: # Keep manual trigger available

jobs:
docker-compose-test:
Expand All @@ -25,10 +28,31 @@ jobs:
# Enable lingering for systemd user services
sudo loginctl enable-linger $USER || true

# Configure systemd for rootless KIND
sudo mkdir -p /etc/systemd/system/user@$(id -u).service.d/
echo -e "[Service]\nDelegate=yes" | sudo tee /etc/systemd/system/user@$(id -u).service.d/delegate.conf
sudo systemctl daemon-reload

- name: Install additional dependencies
run: |
sudo apt-get install -y uuid-runtime curl

KIND_VERSION=$(curl -s https://api.github.com/repos/kubernetes-sigs/kind/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
echo "Latest KIND version: $KIND_VERSION"

# Download and install the latest KIND version
curl -Lo ./kind "https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-linux-amd64"
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind

# Verify installation
kind version


curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/kubectl

- name: Configure environment variables
run: |
echo "INGRESS_PORT=3000" >> $GITHUB_ENV
Expand All @@ -42,33 +66,62 @@ jobs:
# Set DOCKER_HOST for podman-compose compatibility
echo "DOCKER_HOST=unix:///run/user/$UID/podman/podman.sock" >> $GITHUB_ENV

# Verify podman socket is running
systemctl --user status podman.socket || true

# Also set KIND to use podman provider globally
echo "KIND_EXPERIMENTAL_PROVIDER=podman" >> $GITHUB_ENV

- name: Verify podman installation
run: |
podman --version
podman-compose --version
echo "Podman info:"
podman info

- name: Setup authentication for insights-ros-ingress
working-directory: deployment/docker-compose
run: |
# Make authentication setup script executable
chmod +x ../../scripts/setup-ingress-auth.sh

# Set up KIND cluster and authentication for insights-ros-ingress
echo "Setting up Kubernetes authentication for insights-ros-ingress..."
echo "Using KIND with podman rootless provider (KIND_EXPERIMENTAL_PROVIDER=$KIND_EXPERIMENTAL_PROVIDER)"
(cd ../../scripts && ./setup-ingress-auth.sh)

# Verify authentication setup
if [ -f "../../scripts/.ingress-auth.env" ]; then
echo "✅ Authentication environment created successfully"
echo "Auth file contents (without sensitive data):"
grep -v "DEV_SERVICE_ACCOUNT_TOKEN" ../../scripts/.ingress-auth.env || true

# Source the auth environment and export KUBECONFIG
source ../../scripts/.ingress-auth.env
echo "KUBECONFIG=$KUBECONFIG" >> $GITHUB_ENV
echo "✅ KUBECONFIG exported for subsequent steps"
else
echo "❌ Authentication setup failed - environment file not found"
exit 1
fi

- name: Pull required container images
working-directory: deployment/docker-compose
run: |
# Pre-pull images to avoid timeout issues during compose up
podman-compose pull --ignore-pull-failures || true
podman-compose pull || true

- name: Start services with podman-compose
working-directory: deployment/docker-compose
run: |
# Export environment variables
export INGRESS_PORT=${{ env.INGRESS_PORT }}
export MINIO_ACCESS_KEY=${{ env.MINIO_ACCESS_KEY }}
export MINIO_SECRET_KEY=${{ env.MINIO_SECRET_KEY }}

echo "Starting services with podman-compose..."
echo "KUBECONFIG is set to: $KUBECONFIG"
echo "Environment variables: INGRESS_PORT=$INGRESS_PORT, MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY"
podman-compose up -d

echo "Waiting for services to initialize..."
sleep 30

echo "Service status:"
podman-compose ps

Expand All @@ -80,15 +133,15 @@ jobs:
timeout 300 bash -c 'until podman exec db-ros_1 pg_isready -U postgres; do sleep 5; done'
timeout 300 bash -c 'until podman exec db-kruize_1 pg_isready -U postgres; do sleep 5; done'
timeout 300 bash -c 'until podman exec db-sources_1 pg_isready -U postgres; do sleep 5; done'

# Wait for Kafka
echo "Waiting for Kafka..."
timeout 300 bash -c 'until podman exec kafka_1 kafka-broker-api-versions --bootstrap-server localhost:29092 &>/dev/null; do sleep 5; done'

# Wait for MinIO
echo "Waiting for MinIO..."
timeout 180 bash -c 'until curl -f http://localhost:9000/minio/health/live &>/dev/null; do sleep 5; done'

# Wait for Redis
echo "Waiting for Redis..."
timeout 180 bash -c 'until podman exec redis_1 redis-cli ping &>/dev/null; do sleep 5; done'
Expand All @@ -97,25 +150,25 @@ jobs:
working-directory: deployment/docker-compose
run: |
# Get actual ingress port
ACTUAL_INGRESS_PORT=$(podman port ingress_1 3000 2>/dev/null | cut -d: -f2 || echo "$INGRESS_PORT")
ACTUAL_INGRESS_PORT=$(podman port ingress_1 2>/dev/null | cut -d: -f2 || echo "$INGRESS_PORT")
echo "Using ingress port: $ACTUAL_INGRESS_PORT"

# Wait for application services
echo "Waiting for Ingress service..."
timeout 300 bash -c "until curl -f http://localhost:${ACTUAL_INGRESS_PORT}/api/ingress/v1/version &>/dev/null; do sleep 5; done"
timeout 300 bash -c "until curl -f http://localhost:${ACTUAL_INGRESS_PORT}/health &>/dev/null; do sleep 5; done"

echo "Waiting for Kruize service..."
timeout 300 bash -c 'until curl -f http://localhost:8080/listPerformanceProfiles &>/dev/null; do sleep 5; done'

echo "Waiting for Sources API..."
timeout 300 bash -c 'until curl -f http://localhost:8002/api/sources/v1.0/source_types &>/dev/null; do sleep 5; done'

echo "Waiting for ROS-OCP API..."
timeout 300 bash -c 'until curl -f http://localhost:8001/status &>/dev/null; do sleep 5; done'

echo "Waiting for processor to start..."
timeout 300 bash -c 'until podman logs rosocp-processor_1 2>/dev/null | grep -q "Starting processor"; do sleep 5; done'

echo "Waiting for recommendation poller to start..."
timeout 300 bash -c 'until podman logs rosocp-recommendation-poller_1 2>/dev/null | grep -q "Starting recommendation-poller"; do sleep 5; done'

Expand All @@ -124,8 +177,47 @@ jobs:
run: |
# Make test script executable
chmod +x test-ros-ocp-dataflow.sh

# Run the test script

# Verify new test data is available
echo "Checking for ROS-OCP test data..."
if [ -f "samples/ros-ocp-test-data.tar.gz" ]; then
echo "✅ Found ros-ocp-test-data.tar.gz (proper 37-column CSV format)"
else
echo "❌ Missing ros-ocp-test-data.tar.gz - test may fail with CSV validation errors"
fi

# Fix kubeconfig IP address for GitHub Actions environment
echo "Fixing kubeconfig for container network access..."
if [ -f "/tmp/ros-ingress-kubeconfig" ]; then
echo "Current kubeconfig server:"
grep "server:" /tmp/ros-ingress-kubeconfig || true

# Get KIND container IP
KIND_CONTAINER_IP=$(podman inspect ros-ingress-dev-control-plane 2>/dev/null | grep -o '"IPAddress": "[^"]*"' | grep -v '""' | head -1 | cut -d'"' -f4 || echo "")

if [ -n "$KIND_CONTAINER_IP" ]; then
echo "Updating kubeconfig to use KIND container IP: $KIND_CONTAINER_IP"
sed -i.bak "s|server: https://.*:6443|server: https://${KIND_CONTAINER_IP}:6443|" /tmp/ros-ingress-kubeconfig
echo "Updated kubeconfig server:"
grep "server:" /tmp/ros-ingress-kubeconfig || true

# Restart ingress service to pick up the corrected kubeconfig
echo "Restarting ingress service..."
podman-compose restart ingress
sleep 10
else
echo "❌ Could not determine KIND container IP"
exit 1
fi
else
echo "❌ Kubeconfig file not found at /tmp/ros-ingress-kubeconfig"
exit 1
fi

# Environment variables are automatically available from GITHUB_ENV
echo "Environment: INGRESS_PORT=$INGRESS_PORT, MINIO_ACCESS_KEY=$MINIO_ACCESS_KEY"

# Run the test script (it will use ros-ocp-test-data.tar.gz for proper validation)
./test-ros-ocp-dataflow.sh

- name: Check service health after test
Expand All @@ -134,17 +226,17 @@ jobs:
run: |
echo "=== Final service status ==="
podman-compose ps

echo "=== Service logs (last 20 lines each) ==="
echo "--- Ingress logs ---"
podman-compose logs --tail=20 ingress || true

echo "--- ROS-OCP API logs ---"
podman-compose logs --tail=20 rosocp-api || true

echo "--- ROS-OCP Processor logs ---"
podman-compose logs --tail=20 rosocp-processor || true

echo "--- Kruize logs ---"
podman-compose logs --tail=20 kruize-autotune || true

Expand All @@ -154,10 +246,16 @@ jobs:
run: |
echo "Cleaning up services..."
podman-compose down -v || true


# Clean up authentication resources
echo "Cleaning up authentication resources..."
kind delete cluster --name ros-ingress-dev || true
rm -f /tmp/ros-ingress-kubeconfig || true
rm -f ../../scripts/.ingress-auth.env || true

# Clean up any remaining containers
podman container prune -f || true

# Clean up any remaining volumes
podman volume prune -f || true

Expand All @@ -177,12 +275,14 @@ jobs:
echo "### Tested Components" >> $GITHUB_STEP_SUMMARY
echo "- PostgreSQL databases (ROS, Kruize, Sources)" >> $GITHUB_STEP_SUMMARY
echo "- Kafka message broker" >> $GITHUB_STEP_SUMMARY
echo "- MinIO object storage" >> $GITHUB_STEP_SUMMARY
echo "- MinIO object storage (ros-data bucket)" >> $GITHUB_STEP_SUMMARY
echo "- Redis cache" >> $GITHUB_STEP_SUMMARY
echo "- Ingress service" >> $GITHUB_STEP_SUMMARY
echo "- **insights-ros-ingress service** (with Kubernetes authentication)" >> $GITHUB_STEP_SUMMARY
echo "- ROS-OCP API service" >> $GITHUB_STEP_SUMMARY
echo "- ROS-OCP Processor service" >> $GITHUB_STEP_SUMMARY
echo "- ROS-OCP Recommendation Poller" >> $GITHUB_STEP_SUMMARY
echo "- Kruize Autotune service" >> $GITHUB_STEP_SUMMARY
echo "- Sources API service" >> $GITHUB_STEP_SUMMARY
echo "- Complete data flow (upload → kafka → processing → database)" >> $GITHUB_STEP_SUMMARY
echo "- **Authentication flow** (KIND cluster + service account tokens)" >> $GITHUB_STEP_SUMMARY
echo "- **Complete ROS-OCP data flow** (authenticated upload → CSV extraction → MinIO ros-data bucket → Kafka → processing → database)" >> $GITHUB_STEP_SUMMARY
echo "- **CSV validation and processing** (37-column ROS-OCP format)" >> $GITHUB_STEP_SUMMARY
39 changes: 38 additions & 1 deletion .github/workflows/helm-chart-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,36 @@ jobs:
kubectl get services -n $NAMESPACE
kubectl get ingress -n $NAMESPACE

- name: Wait for services to stabilize
- name: Wait for services to stabilize and verify authentication
run: |
echo "Waiting for services to stabilize..."
sleep 60

# Verify authentication setup was created by deploy-kind.sh
if [ -f "/tmp/dev-kubeconfig" ]; then
echo "✅ Authentication kubeconfig found at /tmp/dev-kubeconfig"
else
echo "❌ Authentication kubeconfig not found"
echo "Expected file: /tmp/dev-kubeconfig"
echo "This should have been created by deploy-kind.sh"
ls -la /tmp/ | grep -E "(kubeconfig|auth)" || true
fi

# Check if service account exists
if kubectl get serviceaccount insights-ros-ingress -n ${{ env.NAMESPACE }} >/dev/null 2>&1; then
echo "✅ insights-ros-ingress service account found"

# Check if token secret exists
if kubectl get secret insights-ros-ingress-token -n ${{ env.NAMESPACE }} >/dev/null 2>&1; then
echo "✅ insights-ros-ingress-token secret found"
else
echo "❌ insights-ros-ingress-token secret not found"
kubectl get secrets -n ${{ env.NAMESPACE }} | head -10 || true
fi
else
echo "❌ insights-ros-ingress service account not found"
kubectl get serviceaccounts -n ${{ env.NAMESPACE }} || true
fi

# Wait for all pods to be in Running state
timeout 600 bash -c "until kubectl wait --for=condition=ready pod -l 'app.kubernetes.io/instance=${{ env.HELM_RELEASE_NAME }}' -n ${{ env.NAMESPACE }} --timeout=30s; do echo 'Waiting for pods...'; sleep 10; done"
Expand Down Expand Up @@ -199,6 +226,16 @@ jobs:
kubectl get nodes -o wide || true
kubectl describe nodes || true

echo "=== Authentication Status ==="
echo "Service Accounts:"
kubectl get serviceaccounts -n ${{ env.NAMESPACE }} || true
echo "Secrets:"
kubectl get secrets -n ${{ env.NAMESPACE }} | grep -E "(insights-ros-ingress|token)" || true
echo "Authentication files:"
ls -la /tmp/ | grep -E "(kubeconfig|auth)" || true

echo "=== Ingress Service Logs ==="
kubectl logs -n ${{ env.NAMESPACE }} -l app.kubernetes.io/name=ingress --tail=30 || true
echo "=== Recent Logs ==="
for pod in $(kubectl get pods -n ${{ env.NAMESPACE }} -o name | head -5); do
echo "--- Logs for $pod ---"
Expand Down
40 changes: 37 additions & 3 deletions deployment/docker-compose/docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,49 @@ services:
depends_on:
- kafka

# Override Insights Ingress to use insights-onprem image
# Override Insights Ingress to use insights-ros-ingress image
ingress:
image: quay.io/insights-onprem/insights-ingress:latest
image: quay.io/insights-onprem/insights-ros-ingress:latest
# All other configuration remains the same
depends_on:
- minio
- kafka
ports:
- $INGRESS_PORT:8080
environment:
- INGRESS_VALID_UPLOAD_TYPES=hccm,rosocp
- SERVER_PORT=8080
- UPLOAD_MAX_SIZE=104857600
- UPLOAD_MAX_MEMORY=33554432
- UPLOAD_TEMP_DIR=/tmp
- UPLOAD_ALLOWED_TYPES=application/vnd.redhat.hccm.upload
- UPLOAD_REQUIRE_AUTH=true
- STORAGE_ENDPOINT=minio:9000
- STORAGE_BUCKET=ros-data
- STORAGE_USE_SSL=false
- STORAGE_URL_EXPIRATION=172800
- STORAGE_PATH_PREFIX=ros
- STORAGE_ACCESS_KEY=$MINIO_ACCESS_KEY
- STORAGE_SECRET_KEY=$MINIO_SECRET_KEY
- KAFKA_BROKERS=kafka:29092
- KAFKA_ROS_TOPIC=hccm.ros.events
- KAFKA_SECURITY_PROTOCOL=PLAINTEXT
- KAFKA_CLIENT_ID=insights-ros-ingress
- KAFKA_BATCH_SIZE=16384
- KAFKA_RETRIES=3
- AUTH_ENABLED=true
- LOG_LEVEL=info
- LOG_FORMAT=json
- LOG_OUTPUT=stdout
- METRICS_ENABLED=true
- METRICS_PATH=/metrics
- METRICS_PORT=8080
- KUBECONFIG=/tmp/ros-ingress-kubeconfig
- JWT_SECRET=dev-jwt-secret-key-for-ros-ingress
- DEFAULT_CLUSTER_ALIAS=test-cluster
- CLUSTER_ALIAS=test-cluster
- ROS_CLUSTER_ALIAS=test-cluster
volumes:
- /tmp/ros-ingress-kubeconfig:/tmp/ros-ingress-kubeconfig:ro

# Add health check for kruize-autotune service
kruize-autotune:
Expand Down
Loading
Loading