From 6925db00e63f51d3cff39096f6cecf36c5a60ccc Mon Sep 17 00:00:00 2001 From: Francisco Herrera Date: Thu, 23 Oct 2025 16:59:01 +0200 Subject: [PATCH 1/5] Adding Mapt script tool Signed-off-by: Francisco Herrera --- .gitignore | 3 + README.md | 60 +++ mapt_cluster/README.md | 342 +++++++++++++ mapt_cluster/create_mapt_cluster.sh | 760 ++++++++++++++++++++++++++++ 4 files changed, 1165 insertions(+) create mode 100644 mapt_cluster/README.md create mode 100755 mapt_cluster/create_mapt_cluster.sh diff --git a/.gitignore b/.gitignore index aaadf73..ef0190b 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,9 @@ go.work.sum # env file .env +# Pull secret file +pull-secret.json + # Editor/IDE # .idea/ # .vscode/ diff --git a/README.md b/README.md index 159a969..51f84bf 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,62 @@ # ci-utils Shared utilities to standardize and simplify build, test, and deployment pipelines. + +## Overview +This repository contains a collection of scripts, configurations, and tools designed to streamline continuous integration (CI) processes across various projects. By utilizing these shared utilities, teams can ensure consistency, reduce duplication of effort, and accelerate development workflows. + +## Tools Available + +### Report Portal Integration +**Location**: `report_portal/` + +Generic tool for sending JUnit XML test results to any Report Portal instance via Data Router. Supports both local development and CI/CD environments with secure credential handling. + +[View Documentation](report_portal/README.md) + +### MAPT Cluster Management +**Location**: `mapt_cluster/` + +Standardized tool for creating and managing OpenShift clusters using MAPT across different environments. Automatically detects local vs CI environments and handles S3 bucket management. + +[View Documentation](mapt_cluster/README.md) + +## Getting Started + +1. Clone the repository and navigate to the desired tool directory +2. Make the script executable: `chmod +x script_name.sh` +3. Configure required environment variables (see individual tool documentation) +4. Test with `--dry-run --verbose` to verify configuration +5. Run the tool according to its documentation + +## Security Considerations + +All tools in this repository follow security best practices: + +- **No credential logging**: Sensitive information is never logged and appears as `[REDACTED]` +- **Automatic cleanup**: Credentials and temporary resources are cleaned up automatically +- **Secure environment variables**: Preferred over file-based credential storage +- **Error handling**: Proper cleanup occurs even when operations fail +- **Restricted file permissions**: Log files and sensitive data have appropriate permissions + +## Contributing + +When adding new tools or improving existing ones: + +1. **Follow the established patterns**: Look at existing tools for structure and security practices +2. **Include comprehensive documentation**: README with examples, security notes, and troubleshooting +3. **Add help functionality**: `--help` flag with usage examples +4. **Implement dry-run mode**: `--dry-run` for testing without executing operations +5. **Security first**: Never log credentials, implement automatic cleanup +6. **Test in multiple environments**: Verify local and CI environment compatibility + +## Support + +For issues, questions, or contributions: + +1. Check the individual tool's README for specific guidance +2. Look at the troubleshooting sections in each tool's documentation +3. Open an issue in this repository with: + - Tool name and version + - Environment details (local/CI platform) + - Error logs (with credentials redacted) + - Steps to reproduce \ No newline at end of file diff --git a/mapt_cluster/README.md b/mapt_cluster/README.md new file mode 100644 index 0000000..6db0214 --- /dev/null +++ b/mapt_cluster/README.md @@ -0,0 +1,342 @@ +# MAPT Cluster Management Tool + +A standardized, reusable tool for creating and managing OpenShift clusters using MAPT (Managed Application Platform Tools) across different environments - from local development to CI/CD pipelines. + +## Features + +- **Multi-Environment Support**: Works seamlessly in local development and CI/CD environments +- **Automatic Environment Detection**: Auto-detects CI vs local environments and adjusts configuration +- **Flexible Backing URLs**: Supports both S3 (for CI) and local file-based backing +- **S3 Bucket Management**: Automatic creation and cleanup of S3 buckets for CI environments +- **Comprehensive Logging**: Timestamped logs with separate files for different operations and live log streaming in verbose mode +- **Secure Credential Handling**: No credentials logged, automatic cleanup on exit +- **Configurable Cluster Specs**: CPU, memory, version, and other cluster parameters +- **Error Handling**: Proper cleanup on failures with detailed error reporting +- **Container Engine Flexibility**: Supports both Podman and Docker + +## Prerequisites + +### Required Tools +- **Container Engine**: Podman (recommended) or Docker +- **AWS CLI**: Required when using S3 backing (CI environments) +- **Pull Secret**: OpenShift pull secret file for cluster creation + +### Required Credentials +- **AWS Access Key ID**: For S3 operations and cluster provisioning +- **AWS Secret Access Key**: For S3 operations and cluster provisioning +- **OpenShift Pull Secret**: Valid pull secret file for cluster creation + +## Installation + +1. Copy the script to your desired location +2. Make it executable: `chmod +x create_mapt_cluster.sh` +3. Ensure prerequisites are installed and available in PATH + +## Usage + +### Basic Usage + +```bash +# Set required environment variables +export AWS_ACCESS_KEY_ID="your_access_key" +export AWS_SECRET_ACCESS_KEY="your_secret_key" + +# Ensure pull secret is available +cp /path/to/your/pull-secret.json ./pull-secret.json + +# Create and destroy cluster (default behavior) +./create_mapt_cluster.sh +``` + +### Advanced Usage + +```bash +# Create cluster only (for development/testing/ci) +./create_mapt_cluster.sh --create-only --verbose + +# Delete existing cluster only +export CLUSTER_NAME="my-existing-cluster" +./create_mapt_cluster.sh --delete-only + +# Custom cluster configuration +export CLUSTER_NAME="my-test-cluster" +export CLUSTER_VERSION="4.18.0" +export CLUSTER_CPUS=8 +export CLUSTER_MEMORY=32 +export CLUSTER_SPOT=false +./create_mapt_cluster.sh --verbose + +# Test configuration without execution +./create_mapt_cluster.sh --dry-run --verbose +``` + +## Environment Variables + +### Required + +| Variable | Description | Example | +|----------|-------------|---------| +| `AWS_ACCESS_KEY_ID` | AWS access key for S3 and cluster provisioning | `AKIA...` | +| `AWS_SECRET_ACCESS_KEY` | AWS secret key for S3 and cluster provisioning | `abcd1234...` | + +### Optional Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `AWS_DEFAULT_REGION` | `us-east-1` | AWS region for resources | +| `CLUSTER_NAME` | `mapt-cluster-{timestamp}` | Name of the cluster project | +| `CLUSTER_VERSION` | `4.19.0` | OpenShift version to deploy | +| `CLUSTER_CPUS` | `16` | Number of CPUs for the cluster | +| `CLUSTER_MEMORY` | `64` | Memory in GB for the cluster | +| `CLUSTER_SPOT` | `true` | Use spot instances (true/false) | +| `CLUSTER_TIMEOUT` | `60` | Timeout for cluster operations in minutes | +| `PULL_SECRET_FILE` | `./pull-secret.json` | Path to pull secret file | +| `CLUSTER_TAGS` | Basic tags | Additional tags for AWS resources | +| `BACKED_URL_TYPE` | Auto-detected | Backing URL type: "s3" or "file" | +| `S3_BUCKET_PREFIX` | `mapt-cluster` | Prefix for S3 bucket names | +| `CONTAINER_ENGINE` | `podman` | Container engine: "podman" or "docker" | +| `MAPT_IMAGE` | `quay.io/redhat-developer/mapt:v0.9.4` | MAPT container image | +| `LOG_LEVEL` | `normal` | Logging verbosity: "verbose" or "normal" | + +## Command Line Options + +| Option | Description | +|--------|-------------| +| `-c, --create-only` | Create cluster only (don't delete) | +| `-d, --delete-only` | Delete cluster only (don't create) | +| `-b, --both` | Create and delete cluster (default) | +| `-h, --help` | Show help message and exit | +| `-v, --verbose` | Enable verbose logging | +| `--dry-run` | Show what would be executed without running | + +## Environment Detection + +The script automatically detects the environment and adjusts its behavior: + +### CI Environment Detection +The script detects CI environments by checking for these variables: +- `CI=true` (generic CI indicator) + +### CI Environment Behavior +- **Backing URL**: Uses S3 with auto-generated bucket names +- **Cluster Name**: Auto-generates with timestamp if not provided +- **S3 Management**: Automatically creates and cleans up S3 buckets +- **Logging**: Enhanced logging for CI debugging + +### Local Environment Behavior +- **Backing URL**: Uses local file-based backing (`file:///workspace`) +- **Cluster Name**: Uses provided name or generates timestamped name +- **S3 Management**: No S3 operations (unless explicitly configured) +- **Logging**: Standard logging to local files + +## Examples + +### Local Development + +```bash +# Basic local cluster for development +export AWS_ACCESS_KEY_ID="your_key" +export AWS_SECRET_ACCESS_KEY="your_secret" +export CLUSTER_NAME="dev-cluster" +./create_mapt_cluster.sh --create-only --verbose +``` + +### CI/CD Pipeline Usage + +#### GitHub Actions + +```yaml +- name: Create OpenShift cluster + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CI: true + CLUSTER_NAME: "gh-${{ github.run_id }}" + CLUSTER_VERSION: "4.19.0" + PULL_SECRET_FILE: "./pull-secret.json" + run: | + echo "${{ secrets.PULL_SECRET_CONTENT }}" > pull-secret.json + ./mapt_cluster/create_mapt_cluster.sh --create-only --verbose + +- name: Run tests + env: + KUBECONFIG: ./kubeconfig + run: | + # Your test commands here + make test.e2e.ocp + +- name: Cleanup cluster + if: always() + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CI: true + CLUSTER_NAME: "gh-${{ github.run_id }}" + run: | + ./mapt_cluster/create_mapt_cluster.sh --delete-only +``` + +### Custom Configuration Examples + +```bash +# High-spec cluster for performance testing +export CLUSTER_CPUS=32 +export CLUSTER_MEMORY=128 +export CLUSTER_SPOT=false +export CLUSTER_TIMEOUT=90 +./create_mapt_cluster.sh + +# Minimal cluster for quick testing +export CLUSTER_CPUS=8 +export CLUSTER_MEMORY=32 +export CLUSTER_VERSION="4.18.0" +./create_mapt_cluster.sh --create-only + +# Custom tags and S3 configuration +export CLUSTER_TAGS="project=myproject,team=platform,environment=testing" +export S3_BUCKET_PREFIX="mycompany-mapt" +./create_mapt_cluster.sh + +# Using custom MAPT image version +export MAPT_IMAGE="quay.io/redhat-developer/mapt:v0.9.3" +export CONTAINER_ENGINE="docker" +./create_mapt_cluster.sh --verbose +``` + +## Logging + +The script creates comprehensive logs for all operations: + +### Log Files + +| File | Content | +|------|---------| +| `mapt_cluster_YYYYMMDD_HHMMSS.log` | Main execution log with timestamps | +| `mapt_create_YYYYMMDD_HHMMSS.log` | Cluster creation container logs | +| `mapt_destroy_YYYYMMDD_HHMMSS.log` | Cluster destruction container logs | + +### Log Levels + +- **Normal**: Standard information and error messages with logs saved to files +- **Verbose**: Detailed configuration and operation information with live container logs displayed in real-time + +### Log Security + +- No AWS credentials are ever logged +- Sensitive information is redacted in logs +- Log files are created with restricted permissions (600) + +## Troubleshooting + +### Common Issues + +1. **Container engine not found** + ```bash + # Install Podman (recommended) + # Ubuntu/Debian + sudo apt-get install podman + + # RHEL/CentOS/Fedora + sudo dnf install podman + + # macOS + brew install podman + ``` + +2. **AWS CLI not found** (for S3 backing) + ```bash + # Install AWS CLI + # Using pip + pip install awscli + + # Using package manager + # Ubuntu/Debian + sudo apt-get install awscli + ``` + +3. **Pull secret file not found** + ```bash + # Download from Red Hat Cloud Console + # https://console.redhat.com/openshift/install/pull-secret + + # Or set custom location + export PULL_SECRET_FILE="/path/to/your/pull-secret.json" + ``` + +4. **Cluster creation timeout** + ```bash + # Increase timeout for slow environments + export CLUSTER_TIMEOUT=120 # 2 hours + ``` + +5. **S3 bucket creation fails** + ```bash + # Check AWS credentials and permissions + aws sts get-caller-identity + + # Use custom bucket prefix to avoid conflicts + export S3_BUCKET_PREFIX="yourcompany-mapt" + ``` + +6. **S3 bucket left after failed cluster creation** + ```bash + # This is NORMAL and EXPECTED behavior! + # The S3 bucket contains Pulumi state files needed for cleanup + + # First, try to destroy the cluster (even if creation failed) + export CLUSTER_NAME="your-failed-cluster-name" + ./create_mapt_cluster.sh --delete-only + + # Only delete S3 bucket AFTER cluster destruction succeeds + # The script will do this automatically, or you can do it manually: + aws s3 rb s3://your-bucket-name --force + ``` + +### Debug Mode + +Use verbose mode and dry-run for debugging: + +```bash +./create_mapt_cluster.sh --dry-run --verbose +``` + +This will show: +- Complete configuration +- Commands that would be executed +- Environment detection results +- File paths and permissions + +### Manual Cleanup + +If the script fails to clean up resources: + +```bash +# Remove containers +podman rm -f mapt-create-* mapt-destroy-* + +# Remove S3 bucket (if created) +aws s3 rb s3://your-bucket-name --force + +# Remove local state files +rm -f kubeconfig *.log +``` + +## Security Considerations + +### Credential Security +- AWS credentials are passed securely via environment variables +- Credentials are automatically cleared from memory on script exit +- No credentials are logged in any log files +- Log files are created with restricted permissions + +### Network Security +- Clusters are created with default AWS security groups + +### Resource Cleanup +- Automatic cleanup on script exit (success or failure) +- **S3 buckets are preserved until cluster is successfully destroyed** (critical for cleanup) +- Container cleanup prevents resource leaks +- Failed operations trigger cleanup procedures +- **Important**: S3 buckets contain Pulumi state files needed for cluster destruction + +## More Information diff --git a/mapt_cluster/create_mapt_cluster.sh b/mapt_cluster/create_mapt_cluster.sh new file mode 100755 index 0000000..af3b0b3 --- /dev/null +++ b/mapt_cluster/create_mapt_cluster.sh @@ -0,0 +1,760 @@ +#!/bin/bash + +# For information regarding this script, please refer to the README.md file in the same directory. + +set -euo pipefail + +# --- Global Variables --- +SCRIPT_START_TIME=$(date '+%Y%m%d_%H%M%S') +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" +VERBOSE=false +DRY_RUN=false + +# Operation modes +CREATE_CLUSTER=true +DELETE_CLUSTER=true + +# State tracking +cluster_created=false +s3_bucket_created=false +cluster_destroyed=false +cleanup_done=false + +# Log files +MAIN_LOG_FILE="mapt_cluster_${SCRIPT_START_TIME}.log" +CREATE_LOG_FILE="mapt_create_${SCRIPT_START_TIME}.log" +DESTROY_LOG_FILE="mapt_destroy_${SCRIPT_START_TIME}.log" + +# Container names (unique per execution) +CREATE_CONTAINER_NAME="mapt-create-${SCRIPT_START_TIME}" +DESTROY_CONTAINER_NAME="mapt-destroy-${SCRIPT_START_TIME}" + +# --- Security and Cleanup Functions --- + +# Security function to clear sensitive variables +cleanup_credentials() { + unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY 2>/dev/null || true +} + +# Comprehensive cleanup function +cleanup() { + # Disable ERR trap to prevent infinite loops during cleanup + trap - ERR + + # Prevent cleanup from running multiple times + if [ "$cleanup_done" = true ]; then + log_info "Cleanup already completed, skipping..." + return 0 + fi + + log_info "Starting cleanup process..." + cleanup_done=true + + # Determine if we should destroy the cluster + should_destroy=false + + if [ "$DELETE_CLUSTER" = true ]; then + log_verbose "Delete mode enabled, will destroy cluster if it exists" + should_destroy=true + elif [ "$cluster_created" = true ] && [ "$?" -ne 0 ]; then + log_info "Error detected and cluster was created, will destroy for cleanup" + should_destroy=true + fi + + # Check if cluster exists + cluster_exists=false + if [ "$cluster_created" = true ] || container_exists "$CREATE_CONTAINER_NAME"; then + cluster_exists=true + elif [ "$BACKED_URL_TYPE" = "s3" ] && [ "$DELETE_CLUSTER" = true ]; then + # For delete-only operations, check if Pulumi state exists in S3 + if aws s3 ls "s3://$S3_BUCKET_NAME/.pulumi/" &>/dev/null; then + log_verbose "Found Pulumi state files in S3, cluster exists for deletion" + cluster_exists=true + fi + fi + + # Destroy cluster if needed + if [ "$should_destroy" = true ] && [ "$cluster_exists" = true ]; then + if [ "$DRY_RUN" = true ]; then + log_info "DRY RUN: Would destroy cluster: $CLUSTER_NAME" + else + destroy_cluster + fi + fi + + # Only clean up S3 bucket if cluster was successfully destroyed + # This preserves the Pulumi state files needed for cluster cleanup + if [ "$BACKED_URL_TYPE" = "s3" ] && [ "$DRY_RUN" = false ]; then + if [ "$cluster_destroyed" = true ]; then + log_info "Cluster successfully destroyed, cleaning up S3 bucket: $S3_BUCKET_NAME" + if aws s3 rb "s3://$S3_BUCKET_NAME" --force 2>/dev/null; then + log_info "S3 bucket deleted successfully" + else + log_warn "Failed to delete S3 bucket - may need manual cleanup" + fi + else + log_warn "Preserving S3 bucket for cluster cleanup: $S3_BUCKET_NAME" + if [ "$cluster_created" = true ]; then + log_warn "IMPORTANT: Cluster resources may still be running in AWS!" + log_warn "To properly clean up:" + log_warn " 1. First destroy the cluster: $0 --delete-only" + log_warn " 2. Then delete the S3 bucket: aws s3 rb s3://$S3_BUCKET_NAME --force" + else + log_info "Cluster was not created, but S3 bucket may contain state files" + log_info "You can safely delete the S3 bucket: aws s3 rb s3://$S3_BUCKET_NAME --force" + fi + fi + fi + + # Clean up containers (force removal to ensure cleanup) + cleanup_containers + + # Clear credentials + cleanup_credentials + + log_info "Cleanup completed" +} + +# Set up traps for cleanup +trap 'log_error "Script aborted due to error. Running cleanup..."; cleanup' ERR +trap 'log_warn "Script interrupted by user. Running cleanup..."; cleanup' INT TERM +trap 'cleanup' EXIT + +# --- Logging Functions --- + +log_with_timestamp() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$MAIN_LOG_FILE" +} + +log_info() { + log_with_timestamp "[INFO] $*" +} + +log_warn() { + log_with_timestamp "[WARN] $*" >&2 +} + +log_error() { + log_with_timestamp "[ERROR] $*" >&2 +} + +log_verbose() { + if [ "$VERBOSE" = true ]; then + log_with_timestamp "[VERBOSE] $*" + fi +} + +log_continue() { + echo "$*" | tee -a "$MAIN_LOG_FILE" +} + +# --- Helper Functions --- + +show_help() { + cat << EOF +Generic MAPT Cluster Management Tool + +USAGE: + $0 [OPTIONS] + +DESCRIPTION: + Standardized tool for creating and managing OpenShift clusters using MAPT + across different environments (local development and CI/CD pipelines). + +OPTIONS: + -c, --create-only Create cluster only (don't delete) + -d, --delete-only Delete cluster only (don't create) + -b, --both Create and delete cluster (default) + -h, --help Show this help message and exit + -v, --verbose Enable verbose logging + --dry-run Show what would be executed without running + +REQUIRED ENVIRONMENT VARIABLES: + AWS_ACCESS_KEY_ID AWS access key for S3 and cluster provisioning + AWS_SECRET_ACCESS_KEY AWS secret key for S3 and cluster provisioning + +OPTIONAL ENVIRONMENT VARIABLES: + AWS_DEFAULT_REGION AWS region for resources (default: us-east-1) + CLUSTER_NAME Name of the cluster project (default: auto-generated) + CLUSTER_VERSION OpenShift version (default: 4.19.0) + CLUSTER_CPUS Number of CPUs (default: 16) + CLUSTER_MEMORY Memory in GB (default: 64) + CLUSTER_SPOT Use spot instances (default: true) + CLUSTER_TIMEOUT Timeout in minutes (default: 60) + PULL_SECRET_FILE Path to pull secret file (default: ./pull-secret.json) + CLUSTER_TAGS Additional tags (default: basic tags) + BACKED_URL_TYPE Backing URL type: "s3" or "file" (auto-detected) + S3_BUCKET_PREFIX S3 bucket prefix (default: mapt-cluster) + CONTAINER_ENGINE Container engine: "podman" or "docker" (default: podman) + MAPT_IMAGE MAPT container image (default: latest stable) + LOG_LEVEL Logging: "verbose" or "normal" (default: normal) + +EXAMPLES: + # Basic usage (create and delete) + export AWS_ACCESS_KEY_ID="your_key" + export AWS_SECRET_ACCESS_KEY="your_secret" + $0 + + # Create cluster only for testing + $0 --create-only --verbose + + # Custom cluster configuration + export CLUSTER_NAME="my-test-cluster" + export CLUSTER_VERSION="4.18.0" + export CLUSTER_CPUS=8 + export CLUSTER_MEMORY=32 + $0 + + # Delete existing cluster + export CLUSTER_NAME="existing-cluster" + $0 --delete-only + +ENVIRONMENT DETECTION: + The script automatically detects the environment: + - CI Environment: Set CI=true to use S3 backing with auto-generated bucket names + - Local Environment: Uses file-based backing in current directory + +LOGS: + All operations are logged to timestamped files: + - $MAIN_LOG_FILE (main log) + - mapt_create_*.log (cluster creation) + - mapt_destroy_*.log (cluster destruction) + +EOF +} + +# Check if container exists +container_exists() { + local container_name="$1" + local engine="${CONTAINER_ENGINE:-podman}" + $engine ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null +} + +# Clean up containers +cleanup_containers() { + log_verbose "Cleaning up containers..." + + # Use default container engine if not set yet + local engine="${CONTAINER_ENGINE:-podman}" + + # Clean up specific containers by name + for container_name in "$CREATE_CONTAINER_NAME" "$DESTROY_CONTAINER_NAME"; do + if $engine ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null; then + log_verbose "Removing container: $container_name" + # First try to stop the container if it's running + $engine stop "$container_name" &>/dev/null || true + # Then force remove it + $engine rm -f "$container_name" &>/dev/null || true + fi + done + + # Also clean up any containers that might have been left from previous runs + # Look for containers with the mapt-create or mapt-destroy pattern + local old_containers + old_containers=$($engine ps -a --format "{{.Names}}" | grep -E "^mapt-(create|destroy)-" 2>/dev/null || true) + + if [ -n "$old_containers" ]; then + log_verbose "Found old MAPT containers, cleaning up..." + echo "$old_containers" | while read -r container_name; do + if [ -n "$container_name" ]; then + log_verbose "Removing old container: $container_name" + $engine stop "$container_name" &>/dev/null || true + $engine rm -f "$container_name" &>/dev/null || true + fi + done + fi +} + +# Detect environment and set defaults +detect_environment() { + # Detect if running in CI environment + if [ "${CI:-}" = "true" ]; then + log_info "CI environment detected" + IS_CI=true + + # Auto-generate cluster name for CI if not provided + if [ -z "${CLUSTER_NAME:-}" ]; then + CLUSTER_NAME="mapt-ci-${SCRIPT_START_TIME}" + fi + + # Set S3 backing for CI + BACKED_URL_TYPE="s3" + else + log_info "Local environment detected" + IS_CI=false + BACKED_URL_TYPE="${BACKED_URL_TYPE:-file}" + fi +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + -c|--create-only) + CREATE_CLUSTER=true + DELETE_CLUSTER=false + log_verbose "Mode: Create cluster only" + shift + ;; + -d|--delete-only) + CREATE_CLUSTER=false + DELETE_CLUSTER=true + log_verbose "Mode: Delete cluster only" + shift + ;; + -b|--both) + CREATE_CLUSTER=true + DELETE_CLUSTER=true + log_verbose "Mode: Create and delete cluster" + shift + ;; + -h|--help) + show_help + exit 0 + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + --dry-run) + DRY_RUN=true + log_info "DRY RUN mode enabled - no actual operations will be performed" + shift + ;; + *) + log_error "Unknown option: $1" + show_help + exit 1 + ;; + esac + done +} + +# Validate required environment variables +validate_environment() { + local missing_vars=() + + if [ -z "${AWS_ACCESS_KEY_ID:-}" ]; then + missing_vars+=("AWS_ACCESS_KEY_ID") + fi + + if [ -z "${AWS_SECRET_ACCESS_KEY:-}" ]; then + missing_vars+=("AWS_SECRET_ACCESS_KEY") + fi + + if [ ${#missing_vars[@]} -gt 0 ]; then + log_error "Missing required environment variables:" + for var in "${missing_vars[@]}"; do + log_error " - $var" + done + log_error "" + log_error "Please set the required variables and try again." + log_error "Use --help for more information." + exit 1 + fi +} + +# Set default values for optional environment variables +set_defaults() { + # Detect environment first + detect_environment + + readonly AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-"us-east-1"} + readonly CLUSTER_NAME=${CLUSTER_NAME:-"mapt-cluster-${SCRIPT_START_TIME}"} + readonly CLUSTER_VERSION=${CLUSTER_VERSION:-"4.19.13"} + readonly CLUSTER_CPUS=${CLUSTER_CPUS:-16} + readonly CLUSTER_MEMORY=${CLUSTER_MEMORY:-64} + readonly CLUSTER_SPOT=${CLUSTER_SPOT:-true} + readonly CLUSTER_TIMEOUT=${CLUSTER_TIMEOUT:-60} + readonly PULL_SECRET_FILE=${PULL_SECRET_FILE:-"./pull-secret.json"} + readonly CLUSTER_TAGS=${CLUSTER_TAGS:-"tool=mapt"} + readonly S3_BUCKET_PREFIX=${S3_BUCKET_PREFIX:-"mapt-cluster"} + readonly CONTAINER_ENGINE=${CONTAINER_ENGINE:-"podman"} + readonly MAPT_IMAGE=${MAPT_IMAGE:-"quay.io/redhat-developer/mapt:v0.9.9"} + + # Set backing URL based on environment + if [ "$BACKED_URL_TYPE" = "s3" ]; then + readonly S3_BUCKET_NAME="${S3_BUCKET_PREFIX}-${CLUSTER_NAME}" + readonly BACKED_URL="s3://${S3_BUCKET_NAME}" + else + readonly BACKED_URL="file:///workspace" + fi + + # Enable verbose if LOG_LEVEL is set to verbose + if [ "${LOG_LEVEL:-}" = "verbose" ]; then + VERBOSE=true + fi + + log_verbose "Configuration loaded:" + if [ "$IS_CI" = true ]; then + log_verbose " Environment: CI" + else + log_verbose " Environment: Local" + fi + log_verbose " Cluster Name: $CLUSTER_NAME" + log_verbose " Cluster Version: $CLUSTER_VERSION" + log_verbose " Cluster CPUs: $CLUSTER_CPUS" + log_verbose " Cluster Memory: ${CLUSTER_MEMORY}GB" + log_verbose " Spot Instances: $CLUSTER_SPOT" + log_verbose " Timeout: ${CLUSTER_TIMEOUT}m" + log_verbose " Backing URL: $BACKED_URL" + log_verbose " Container Engine: $CONTAINER_ENGINE" + log_verbose " MAPT Image: $MAPT_IMAGE" + log_verbose " Pull Secret: $PULL_SECRET_FILE" + log_verbose " Cluster Tags: $CLUSTER_TAGS" +} + +# --- Core Functions --- + +# Create S3 bucket if needed +create_s3_bucket() { + if [ "$BACKED_URL_TYPE" = "s3" ]; then + log_info "Creating S3 bucket: $S3_BUCKET_NAME" + + if [ "$DRY_RUN" = true ]; then + log_info "DRY RUN: Would create S3 bucket: $S3_BUCKET_NAME" + return 0 + fi + + if aws s3api create-bucket --bucket "$S3_BUCKET_NAME" --region "$AWS_DEFAULT_REGION" 2>/dev/null; then + s3_bucket_created=true + log_info "S3 bucket created successfully" + else + # Bucket might already exist, check if we can access it + if aws s3 ls "s3://$S3_BUCKET_NAME" &>/dev/null; then + log_info "S3 bucket already exists and is accessible" + else + log_error "Failed to create or access S3 bucket: $S3_BUCKET_NAME" + exit 1 + fi + fi + fi +} + +# Verify prerequisites +verify_prerequisites() { + log_verbose "Verifying prerequisites..." + + # Check container engine + if ! command -v "$CONTAINER_ENGINE" &> /dev/null; then + log_error "Container engine '$CONTAINER_ENGINE' not found. Please install $CONTAINER_ENGINE." + exit 1 + fi + + # Check AWS CLI if using S3 backing + if [ "$BACKED_URL_TYPE" = "s3" ] && ! command -v aws &> /dev/null; then + log_error "AWS CLI not found but S3 backing is enabled. Please install AWS CLI." + exit 1 + fi + + # Check pull secret file + if [ "$CREATE_CLUSTER" = true ] && [ ! -f "$PULL_SECRET_FILE" ]; then + log_error "Pull secret file not found: $PULL_SECRET_FILE" + log_error "Please ensure the pull secret file exists or set PULL_SECRET_FILE environment variable." + exit 1 + fi + + log_verbose "Prerequisites verified successfully" +} + +# Create cluster +create_cluster() { + log_info "Creating OpenShift cluster: $CLUSTER_NAME" + + if [ "$DRY_RUN" = true ]; then + log_info "DRY RUN: Would create cluster with the following configuration:" + log_info " Name: $CLUSTER_NAME" + log_info " Version: $CLUSTER_VERSION" + log_info " CPUs: $CLUSTER_CPUS" + log_info " Memory: ${CLUSTER_MEMORY}GB" + log_info " Spot: $CLUSTER_SPOT" + log_info " Backing URL: $BACKED_URL" + log_info " Tags: $CLUSTER_TAGS" + return 0 + fi + + # Prepare spot argument + local spot_arg="" + if [ "$CLUSTER_SPOT" = true ]; then + spot_arg="--spot" + fi + + log_info "Starting cluster creation container..." + $CONTAINER_ENGINE run -d --name "$CREATE_CONTAINER_NAME" \ + -v "${PWD}:/workspace:z" \ + -e AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + -e AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + -e AWS_DEFAULT_REGION="$AWS_DEFAULT_REGION" \ + "$MAPT_IMAGE" aws openshift-snc create \ + --backed-url "$BACKED_URL" \ + --conn-details-output "/workspace" \ + --pull-secret-file "/workspace/$(basename "$PULL_SECRET_FILE")" \ + --project-name "$CLUSTER_NAME" \ + --tags "$CLUSTER_TAGS" \ + --version "$CLUSTER_VERSION" \ + $spot_arg \ + --timeout "${CLUSTER_TIMEOUT}m" \ + --cpus "$CLUSTER_CPUS" \ + --memory "$CLUSTER_MEMORY" + + # Wait for creation to complete + log_info "Waiting for cluster creation to complete (timeout: ${CLUSTER_TIMEOUT}m)..." + local container_id + container_id=$($CONTAINER_ENGINE ps -q --filter "name=$CREATE_CONTAINER_NAME") + + if [ -z "$container_id" ]; then + log_error "Create container did not start properly" + exit 1 + fi + + # Show live logs if verbose mode is enabled + if [ "$VERBOSE" = true ]; then + log_info "Showing live container logs (press Ctrl+C to stop following logs):" + log_info "Container logs will continue in background..." + + # Start following logs in background and save to file + ($CONTAINER_ENGINE logs -f "$container_id" 2>&1 | tee "$CREATE_LOG_FILE") & + local logs_pid=$! + + # Convert timeout to seconds + local timeout_seconds=$((CLUSTER_TIMEOUT * 60)) + + # Wait for container to complete + if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then + # Stop following logs + kill $logs_pid 2>/dev/null || true + wait $logs_pid 2>/dev/null || true + + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + log_info "Container execution completed with exit code: $exit_code" + else + # Stop following logs on timeout + kill $logs_pid 2>/dev/null || true + wait $logs_pid 2>/dev/null || true + log_error "Timeout waiting for cluster creation to complete" + # Clean up container + $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + exit 1 + fi + else + # Convert timeout to seconds + local timeout_seconds=$((CLUSTER_TIMEOUT * 60)) + + if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + # Save logs + log_info "Saving cluster creation logs to $CREATE_LOG_FILE" + $CONTAINER_ENGINE logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 + else + log_error "Timeout waiting for cluster creation to complete" + # Save logs even on timeout + $CONTAINER_ENGINE logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 + # Clean up container + $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + exit 1 + fi + fi + + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + if [ "$exit_code" -eq 0 ]; then + cluster_created=true + log_info "Cluster created successfully: $CLUSTER_NAME" + + # Verify kubeconfig was created + if [ -f "./kubeconfig" ]; then + log_info "Kubeconfig file created successfully" + else + log_warn "Kubeconfig file not found after cluster creation" + fi + else + log_error "Cluster creation failed with exit code: $exit_code" + log_error "Check $CREATE_LOG_FILE for detailed error information" + # Clean up container + $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + exit 1 + fi + + # Clean up the container after successful execution + log_verbose "Cleaning up creation container: $CREATE_CONTAINER_NAME" + $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true +} + +# Destroy cluster +destroy_cluster() { + log_info "Destroying cluster: $CLUSTER_NAME" + + if [ "$DRY_RUN" = true ]; then + log_info "DRY RUN: Would destroy cluster: $CLUSTER_NAME" + return 0 + fi + + log_info "Starting cluster destruction container..." + $CONTAINER_ENGINE run -d --name "$DESTROY_CONTAINER_NAME" \ + -v "${PWD}:/workspace:z" \ + -e AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ + -e AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ + -e AWS_DEFAULT_REGION="$AWS_DEFAULT_REGION" \ + "$MAPT_IMAGE" aws openshift-snc destroy \ + --project-name "$CLUSTER_NAME" \ + --backed-url "$BACKED_URL" + + # Wait for destruction to complete + log_info "Waiting for cluster destruction to complete (timeout: 20m)..." + local container_id + container_id=$($CONTAINER_ENGINE ps -q --filter "name=$DESTROY_CONTAINER_NAME") + + if [ -n "$container_id" ]; then + # Show live logs if verbose mode is enabled + if [ "$VERBOSE" = true ]; then + log_info "Showing live destruction logs:" + + # Start following logs in background and save to file + ($CONTAINER_ENGINE logs -f "$container_id" 2>&1 | tee "$DESTROY_LOG_FILE") & + local logs_pid=$! + + # Wait for container to complete + if timeout 1200 $CONTAINER_ENGINE wait "$container_id"; then + # Stop following logs + kill $logs_pid 2>/dev/null || true + wait $logs_pid 2>/dev/null || true + + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + log_info "Destruction container completed with exit code: $exit_code" + else + # Stop following logs on timeout + kill $logs_pid 2>/dev/null || true + wait $logs_pid 2>/dev/null || true + log_warn "Timeout waiting for cluster destruction to complete" + log_warn "Cluster destruction may not have completed" + fi + else + if timeout 1200 $CONTAINER_ENGINE wait "$container_id"; then + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + # Save logs + log_info "Saving cluster destruction logs to $DESTROY_LOG_FILE" + $CONTAINER_ENGINE logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 + else + log_warn "Timeout waiting for cluster destruction to complete" + # Save logs even on timeout + $CONTAINER_ENGINE logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 + log_warn "Cluster destruction may not have completed" + fi + fi + + local exit_code + exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + + if [ "$exit_code" -eq 0 ]; then + cluster_destroyed=true + log_info "Cluster destroyed successfully: $CLUSTER_NAME" + else + log_warn "Cluster destruction may have failed, exit code: $exit_code" + log_warn "Check $DESTROY_LOG_FILE for detailed information" + log_warn "S3 bucket will be preserved to allow manual cleanup" + fi + + # Clean up the container after execution + log_verbose "Cleaning up destruction container: $DESTROY_CONTAINER_NAME" + $CONTAINER_ENGINE rm -f "$DESTROY_CONTAINER_NAME" &>/dev/null || true + else + log_warn "Could not find destroy container. Manual cleanup may be required for project: $CLUSTER_NAME" + fi +} + +# --- Main Execution --- + +main() { + # Initialize log file with proper permissions + touch "$MAIN_LOG_FILE" + chmod 600 "$MAIN_LOG_FILE" + + log_info "=== MAPT Cluster Management Tool Started ===" + log_info "Script version: 1.0.0" + log_info "Start time: $(date)" + log_info "Log file: $MAIN_LOG_FILE" + + # Parse command line arguments + parse_args "$@" + + # Validate and set up environment + validate_environment + set_defaults + + log_info "Operation modes - Create: $CREATE_CLUSTER, Delete: $DELETE_CLUSTER" + log_info "Cluster name: $CLUSTER_NAME" + if [ "$IS_CI" = true ]; then + log_info "Environment: CI" + else + log_info "Environment: Local" + fi + + # Record start time for duration calculation + local start_time + start_time=$(date +%s) + + # Verify prerequisites + verify_prerequisites + + # Create S3 bucket if needed + if [ "$CREATE_CLUSTER" = true ]; then + create_s3_bucket + fi + + # Execute requested operations + if [ "$CREATE_CLUSTER" = true ]; then + create_cluster + fi + + # Note: Cluster deletion is handled in cleanup function + # This ensures proper cleanup even on errors + + # Calculate and display execution summary + local end_time elapsed hours minutes seconds + end_time=$(date +%s) + elapsed=$((end_time - start_time)) + hours=$((elapsed / 3600)) + minutes=$(((elapsed % 3600) / 60)) + seconds=$((elapsed % 60)) + + log_info "" + log_info "=== EXECUTION SUMMARY ===" + if [ "$CREATE_CLUSTER" = true ] && [ "$cluster_created" = true ]; then + log_info "✓ Cluster created successfully: $CLUSTER_NAME" + elif [ "$CREATE_CLUSTER" = true ]; then + log_info "✗ Cluster creation failed or incomplete" + fi + + if [ "$DELETE_CLUSTER" = true ]; then + log_info "✓ Cluster deletion scheduled for cleanup" + elif [ "$cluster_created" = true ]; then + log_info "ℹ Cluster preserved (use '$0 --delete-only' to delete later)" + fi + + log_info "Total execution time: $(printf "%02d:%02d:%02d" $hours $minutes $seconds)" + log_info "Log files created:" + log_info " - $MAIN_LOG_FILE (main execution log)" + if [ -f "$CREATE_LOG_FILE" ]; then + log_info " - $CREATE_LOG_FILE (cluster creation log)" + fi + if [ -f "$DESTROY_LOG_FILE" ]; then + log_info " - $DESTROY_LOG_FILE (cluster destruction log)" + fi + log_info "=========================" + + log_info "Script execution completed. Cleanup will run automatically." +} + +# Only run main if script is executed directly (not sourced) +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi \ No newline at end of file From 09c4dc96e3c431089790959803b0c65fc859242e Mon Sep 17 00:00:00 2001 From: Francisco Herrera Date: Mon, 27 Oct 2025 15:38:26 +0100 Subject: [PATCH 2/5] Fix error with timeout value Signed-off-by: Francisco Herrera --- .gitignore | 10 ++++++++++ mapt_cluster/create_mapt_cluster.sh | 12 ++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index ef0190b..6858037 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,16 @@ go.work.sum # Pull secret file pull-secret.json +# Resulting cluster files +host +id_rsa +kubeconfig +kubeadmin_pass +developer_pass +username +.pulumi +*.log + # Editor/IDE # .idea/ # .vscode/ diff --git a/mapt_cluster/create_mapt_cluster.sh b/mapt_cluster/create_mapt_cluster.sh index af3b0b3..df345b9 100755 --- a/mapt_cluster/create_mapt_cluster.sh +++ b/mapt_cluster/create_mapt_cluster.sh @@ -180,7 +180,6 @@ OPTIONAL ENVIRONMENT VARIABLES: CLUSTER_CPUS Number of CPUs (default: 16) CLUSTER_MEMORY Memory in GB (default: 64) CLUSTER_SPOT Use spot instances (default: true) - CLUSTER_TIMEOUT Timeout in minutes (default: 60) PULL_SECRET_FILE Path to pull secret file (default: ./pull-secret.json) CLUSTER_TAGS Additional tags (default: basic tags) BACKED_URL_TYPE Backing URL type: "s3" or "file" (auto-detected) @@ -361,11 +360,10 @@ set_defaults() { readonly AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-"us-east-1"} readonly CLUSTER_NAME=${CLUSTER_NAME:-"mapt-cluster-${SCRIPT_START_TIME}"} - readonly CLUSTER_VERSION=${CLUSTER_VERSION:-"4.19.13"} + readonly CLUSTER_VERSION=${CLUSTER_VERSION:-"4.20.0"} readonly CLUSTER_CPUS=${CLUSTER_CPUS:-16} readonly CLUSTER_MEMORY=${CLUSTER_MEMORY:-64} readonly CLUSTER_SPOT=${CLUSTER_SPOT:-true} - readonly CLUSTER_TIMEOUT=${CLUSTER_TIMEOUT:-60} readonly PULL_SECRET_FILE=${PULL_SECRET_FILE:-"./pull-secret.json"} readonly CLUSTER_TAGS=${CLUSTER_TAGS:-"tool=mapt"} readonly S3_BUCKET_PREFIX=${S3_BUCKET_PREFIX:-"mapt-cluster"} @@ -396,7 +394,6 @@ set_defaults() { log_verbose " Cluster CPUs: $CLUSTER_CPUS" log_verbose " Cluster Memory: ${CLUSTER_MEMORY}GB" log_verbose " Spot Instances: $CLUSTER_SPOT" - log_verbose " Timeout: ${CLUSTER_TIMEOUT}m" log_verbose " Backing URL: $BACKED_URL" log_verbose " Container Engine: $CONTAINER_ENGINE" log_verbose " MAPT Image: $MAPT_IMAGE" @@ -493,12 +490,11 @@ create_cluster() { --tags "$CLUSTER_TAGS" \ --version "$CLUSTER_VERSION" \ $spot_arg \ - --timeout "${CLUSTER_TIMEOUT}m" \ --cpus "$CLUSTER_CPUS" \ --memory "$CLUSTER_MEMORY" # Wait for creation to complete - log_info "Waiting for cluster creation to complete (timeout: ${CLUSTER_TIMEOUT}m)..." + log_info "Waiting for cluster creation to complete..." local container_id container_id=$($CONTAINER_ENGINE ps -q --filter "name=$CREATE_CONTAINER_NAME") @@ -517,7 +513,7 @@ create_cluster() { local logs_pid=$! # Convert timeout to seconds - local timeout_seconds=$((CLUSTER_TIMEOUT * 60)) + local timeout_seconds=1200 # Wait for container to complete if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then @@ -540,7 +536,7 @@ create_cluster() { fi else # Convert timeout to seconds - local timeout_seconds=$((CLUSTER_TIMEOUT * 60)) + local timeout_seconds=1200 if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then local exit_code From 989f07ec24edec36b7c4724cf62134f6d3ff6e3b Mon Sep 17 00:00:00 2001 From: Francisco Herrera Date: Wed, 29 Oct 2025 14:56:08 +0100 Subject: [PATCH 3/5] Update from review Signed-off-by: Francisco Herrera --- mapt_cluster/README.md | 38 +++++++++---- mapt_cluster/create_mapt_cluster.sh | 88 ++++++++++++++--------------- 2 files changed, 67 insertions(+), 59 deletions(-) diff --git a/mapt_cluster/README.md b/mapt_cluster/README.md index 6db0214..4ff4b82 100644 --- a/mapt_cluster/README.md +++ b/mapt_cluster/README.md @@ -19,7 +19,6 @@ A standardized, reusable tool for creating and managing OpenShift clusters using ### Required Tools - **Container Engine**: Podman (recommended) or Docker - **AWS CLI**: Required when using S3 backing (CI environments) -- **Pull Secret**: OpenShift pull secret file for cluster creation ### Required Credentials - **AWS Access Key ID**: For S3 operations and cluster provisioning @@ -52,11 +51,11 @@ cp /path/to/your/pull-secret.json ./pull-secret.json ```bash # Create cluster only (for development/testing/ci) -./create_mapt_cluster.sh --create-only --verbose +./create_mapt_cluster.sh --create --verbose # Delete existing cluster only export CLUSTER_NAME="my-existing-cluster" -./create_mapt_cluster.sh --delete-only +./create_mapt_cluster.sh --delete # Custom cluster configuration export CLUSTER_NAME="my-test-cluster" @@ -85,7 +84,7 @@ export CLUSTER_SPOT=false |----------|---------|-------------| | `AWS_DEFAULT_REGION` | `us-east-1` | AWS region for resources | | `CLUSTER_NAME` | `mapt-cluster-{timestamp}` | Name of the cluster project | -| `CLUSTER_VERSION` | `4.19.0` | OpenShift version to deploy | +| `CLUSTER_VERSION` | `4.20.0` | OpenShift version to deploy (full version with patch required, e.g., `4.20.0`) | | `CLUSTER_CPUS` | `16` | Number of CPUs for the cluster | | `CLUSTER_MEMORY` | `64` | Memory in GB for the cluster | | `CLUSTER_SPOT` | `true` | Use spot instances (true/false) | @@ -102,13 +101,18 @@ export CLUSTER_SPOT=false | Option | Description | |--------|-------------| -| `-c, --create-only` | Create cluster only (don't delete) | -| `-d, --delete-only` | Delete cluster only (don't create) | -| `-b, --both` | Create and delete cluster (default) | +| `--create` | Create cluster only (don't delete) | +| `--delete` | Delete cluster only (don't create) | +| Default (no options) | Create cluster, then delete it after completion | | `-h, --help` | Show help message and exit | | `-v, --verbose` | Enable verbose logging | | `--dry-run` | Show what would be executed without running | +**Note**: The default behavior creates a cluster and then immediately deletes it. This is useful for end-to-end testing of the cluster lifecycle. For typical CI/CD workflows, you would: +1. Use `--create` to create the cluster +2. Run your tests or workloads +3. Use `--delete` to clean up the cluster after tests complete + ## Environment Detection The script automatically detects the environment and adjusts its behavior: @@ -138,7 +142,7 @@ The script detects CI environments by checking for these variables: export AWS_ACCESS_KEY_ID="your_key" export AWS_SECRET_ACCESS_KEY="your_secret" export CLUSTER_NAME="dev-cluster" -./create_mapt_cluster.sh --create-only --verbose +./create_mapt_cluster.sh --create --verbose ``` ### CI/CD Pipeline Usage @@ -156,7 +160,7 @@ export CLUSTER_NAME="dev-cluster" PULL_SECRET_FILE: "./pull-secret.json" run: | echo "${{ secrets.PULL_SECRET_CONTENT }}" > pull-secret.json - ./mapt_cluster/create_mapt_cluster.sh --create-only --verbose + ./mapt_cluster/create_mapt_cluster.sh --create --verbose - name: Run tests env: @@ -173,7 +177,7 @@ export CLUSTER_NAME="dev-cluster" CI: true CLUSTER_NAME: "gh-${{ github.run_id }}" run: | - ./mapt_cluster/create_mapt_cluster.sh --delete-only + ./mapt_cluster/create_mapt_cluster.sh --delete ``` ### Custom Configuration Examples @@ -190,7 +194,7 @@ export CLUSTER_TIMEOUT=90 export CLUSTER_CPUS=8 export CLUSTER_MEMORY=32 export CLUSTER_VERSION="4.18.0" -./create_mapt_cluster.sh --create-only +./create_mapt_cluster.sh --create # Custom tags and S3 configuration export CLUSTER_TAGS="project=myproject,team=platform,environment=testing" @@ -285,13 +289,23 @@ The script creates comprehensive logs for all operations: # First, try to destroy the cluster (even if creation failed) export CLUSTER_NAME="your-failed-cluster-name" - ./create_mapt_cluster.sh --delete-only + ./create_mapt_cluster.sh --delete # Only delete S3 bucket AFTER cluster destruction succeeds # The script will do this automatically, or you can do it manually: aws s3 rb s3://your-bucket-name --force ``` +**Finding Available OpenShift Versions**: To find available OpenShift versions with patch numbers, you can: +- Check using the AWS clie the AMI available for your account with the name openshift-local: +```bash +aws ec2 describe-images --filters "Name=name,Values=openshift-local-*" --query 'Images[*].[Name]' --output text | sort -V +openshift-local-4.19.0-arm64 +openshift-local-4.19.0-x86_64 +openshift-local-4.20.0-x86_64-d3cd1dd +``` +Note: The openshift local team regularly updates these AMIs with the latest patches. It can be more available images that are not being copied to the shared account. + ### Debug Mode Use verbose mode and dry-run for debugging: diff --git a/mapt_cluster/create_mapt_cluster.sh b/mapt_cluster/create_mapt_cluster.sh index df345b9..9d5e9e5 100755 --- a/mapt_cluster/create_mapt_cluster.sh +++ b/mapt_cluster/create_mapt_cluster.sh @@ -97,7 +97,7 @@ cleanup() { if [ "$cluster_created" = true ]; then log_warn "IMPORTANT: Cluster resources may still be running in AWS!" log_warn "To properly clean up:" - log_warn " 1. First destroy the cluster: $0 --delete-only" + log_warn " 1. First destroy the cluster: $0 --delete" log_warn " 2. Then delete the S3 bucket: aws s3 rb s3://$S3_BUCKET_NAME --force" else log_info "Cluster was not created, but S3 bucket may contain state files" @@ -162,9 +162,9 @@ DESCRIPTION: across different environments (local development and CI/CD pipelines). OPTIONS: - -c, --create-only Create cluster only (don't delete) - -d, --delete-only Delete cluster only (don't create) - -b, --both Create and delete cluster (default) + --create Create cluster only (don't delete) + --delete Delete cluster only (don't create) + Default (no options) Create cluster, then delete it after completion -h, --help Show this help message and exit -v, --verbose Enable verbose logging --dry-run Show what would be executed without running @@ -195,7 +195,7 @@ EXAMPLES: $0 # Create cluster only for testing - $0 --create-only --verbose + $0 --create --verbose # Custom cluster configuration export CLUSTER_NAME="my-test-cluster" @@ -206,7 +206,7 @@ EXAMPLES: # Delete existing cluster export CLUSTER_NAME="existing-cluster" - $0 --delete-only + $0 --delete ENVIRONMENT DETECTION: The script automatically detects the environment: @@ -226,7 +226,7 @@ EOF container_exists() { local container_name="$1" local engine="${CONTAINER_ENGINE:-podman}" - $engine ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null + "$engine" ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null } # Clean up containers @@ -238,27 +238,27 @@ cleanup_containers() { # Clean up specific containers by name for container_name in "$CREATE_CONTAINER_NAME" "$DESTROY_CONTAINER_NAME"; do - if $engine ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null; then + if "$engine" ps -a --format "{{.Names}}" | grep -q "^${container_name}$" 2>/dev/null; then log_verbose "Removing container: $container_name" # First try to stop the container if it's running - $engine stop "$container_name" &>/dev/null || true + "$engine" stop "$container_name" &>/dev/null || true # Then force remove it - $engine rm -f "$container_name" &>/dev/null || true + "$engine" rm -f "$container_name" &>/dev/null || true fi done # Also clean up any containers that might have been left from previous runs # Look for containers with the mapt-create or mapt-destroy pattern local old_containers - old_containers=$($engine ps -a --format "{{.Names}}" | grep -E "^mapt-(create|destroy)-" 2>/dev/null || true) + old_containers=$("$engine" ps -a --format "{{.Names}}" | grep -E "^mapt-(create|destroy)-" 2>/dev/null || true) if [ -n "$old_containers" ]; then log_verbose "Found old MAPT containers, cleaning up..." echo "$old_containers" | while read -r container_name; do if [ -n "$container_name" ]; then log_verbose "Removing old container: $container_name" - $engine stop "$container_name" &>/dev/null || true - $engine rm -f "$container_name" &>/dev/null || true + "$engine" stop "$container_name" &>/dev/null || true + "$engine" rm -f "$container_name" &>/dev/null || true fi done fi @@ -289,24 +289,18 @@ detect_environment() { parse_args() { while [[ $# -gt 0 ]]; do case $1 in - -c|--create-only) + --create) CREATE_CLUSTER=true DELETE_CLUSTER=false log_verbose "Mode: Create cluster only" shift ;; - -d|--delete-only) + --delete) CREATE_CLUSTER=false DELETE_CLUSTER=true log_verbose "Mode: Delete cluster only" shift ;; - -b|--both) - CREATE_CLUSTER=true - DELETE_CLUSTER=true - log_verbose "Mode: Create and delete cluster" - shift - ;; -h|--help) show_help exit 0 @@ -477,7 +471,7 @@ create_cluster() { fi log_info "Starting cluster creation container..." - $CONTAINER_ENGINE run -d --name "$CREATE_CONTAINER_NAME" \ + "$CONTAINER_ENGINE" run -d --name "$CREATE_CONTAINER_NAME" \ -v "${PWD}:/workspace:z" \ -e AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ -e AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ @@ -496,7 +490,7 @@ create_cluster() { # Wait for creation to complete log_info "Waiting for cluster creation to complete..." local container_id - container_id=$($CONTAINER_ENGINE ps -q --filter "name=$CREATE_CONTAINER_NAME") + container_id=$("$CONTAINER_ENGINE" ps -q --filter "name=$CREATE_CONTAINER_NAME") if [ -z "$container_id" ]; then log_error "Create container did not start properly" @@ -509,20 +503,20 @@ create_cluster() { log_info "Container logs will continue in background..." # Start following logs in background and save to file - ($CONTAINER_ENGINE logs -f "$container_id" 2>&1 | tee "$CREATE_LOG_FILE") & + ("$CONTAINER_ENGINE" logs -f "$container_id" 2>&1 | tee "$CREATE_LOG_FILE") & local logs_pid=$! # Convert timeout to seconds local timeout_seconds=1200 # Wait for container to complete - if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then + if timeout "$timeout_seconds" "$CONTAINER_ENGINE" wait "$container_id"; then # Stop following logs kill $logs_pid 2>/dev/null || true wait $logs_pid 2>/dev/null || true local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') log_info "Container execution completed with exit code: $exit_code" else @@ -531,32 +525,32 @@ create_cluster() { wait $logs_pid 2>/dev/null || true log_error "Timeout waiting for cluster creation to complete" # Clean up container - $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + "$CONTAINER_ENGINE" rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true exit 1 fi else # Convert timeout to seconds local timeout_seconds=1200 - if timeout "$timeout_seconds" $CONTAINER_ENGINE wait "$container_id"; then + if timeout "$timeout_seconds" "$CONTAINER_ENGINE" wait "$container_id"; then local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') # Save logs log_info "Saving cluster creation logs to $CREATE_LOG_FILE" - $CONTAINER_ENGINE logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 + "$CONTAINER_ENGINE" logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 else log_error "Timeout waiting for cluster creation to complete" # Save logs even on timeout - $CONTAINER_ENGINE logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 + "$CONTAINER_ENGINE" logs "$container_id" > "$CREATE_LOG_FILE" 2>&1 # Clean up container - $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + "$CONTAINER_ENGINE" rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true exit 1 fi fi local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') if [ "$exit_code" -eq 0 ]; then cluster_created=true @@ -572,13 +566,13 @@ create_cluster() { log_error "Cluster creation failed with exit code: $exit_code" log_error "Check $CREATE_LOG_FILE for detailed error information" # Clean up container - $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + "$CONTAINER_ENGINE" rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true exit 1 fi # Clean up the container after successful execution log_verbose "Cleaning up creation container: $CREATE_CONTAINER_NAME" - $CONTAINER_ENGINE rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true + "$CONTAINER_ENGINE" rm -f "$CREATE_CONTAINER_NAME" &>/dev/null || true } # Destroy cluster @@ -591,7 +585,7 @@ destroy_cluster() { fi log_info "Starting cluster destruction container..." - $CONTAINER_ENGINE run -d --name "$DESTROY_CONTAINER_NAME" \ + "$CONTAINER_ENGINE" run -d --name "$DESTROY_CONTAINER_NAME" \ -v "${PWD}:/workspace:z" \ -e AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \ -e AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \ @@ -603,7 +597,7 @@ destroy_cluster() { # Wait for destruction to complete log_info "Waiting for cluster destruction to complete (timeout: 20m)..." local container_id - container_id=$($CONTAINER_ENGINE ps -q --filter "name=$DESTROY_CONTAINER_NAME") + container_id=$("$CONTAINER_ENGINE" ps -q --filter "name=$DESTROY_CONTAINER_NAME") if [ -n "$container_id" ]; then # Show live logs if verbose mode is enabled @@ -611,17 +605,17 @@ destroy_cluster() { log_info "Showing live destruction logs:" # Start following logs in background and save to file - ($CONTAINER_ENGINE logs -f "$container_id" 2>&1 | tee "$DESTROY_LOG_FILE") & + ("$CONTAINER_ENGINE" logs -f "$container_id" 2>&1 | tee "$DESTROY_LOG_FILE") & local logs_pid=$! # Wait for container to complete - if timeout 1200 $CONTAINER_ENGINE wait "$container_id"; then + if timeout 1200 "$CONTAINER_ENGINE" wait "$container_id"; then # Stop following logs kill $logs_pid 2>/dev/null || true wait $logs_pid 2>/dev/null || true local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') log_info "Destruction container completed with exit code: $exit_code" else @@ -632,23 +626,23 @@ destroy_cluster() { log_warn "Cluster destruction may not have completed" fi else - if timeout 1200 $CONTAINER_ENGINE wait "$container_id"; then + if timeout 1200 "$CONTAINER_ENGINE" wait "$container_id"; then local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') # Save logs log_info "Saving cluster destruction logs to $DESTROY_LOG_FILE" - $CONTAINER_ENGINE logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 + "$CONTAINER_ENGINE" logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 else log_warn "Timeout waiting for cluster destruction to complete" # Save logs even on timeout - $CONTAINER_ENGINE logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 + "$CONTAINER_ENGINE" logs "$container_id" > "$DESTROY_LOG_FILE" 2>&1 log_warn "Cluster destruction may not have completed" fi fi local exit_code - exit_code=$($CONTAINER_ENGINE inspect "$container_id" --format '{{.State.ExitCode}}') + exit_code=$("$CONTAINER_ENGINE" inspect "$container_id" --format '{{.State.ExitCode}}') if [ "$exit_code" -eq 0 ]; then cluster_destroyed=true @@ -661,7 +655,7 @@ destroy_cluster() { # Clean up the container after execution log_verbose "Cleaning up destruction container: $DESTROY_CONTAINER_NAME" - $CONTAINER_ENGINE rm -f "$DESTROY_CONTAINER_NAME" &>/dev/null || true + "$CONTAINER_ENGINE" rm -f "$DESTROY_CONTAINER_NAME" &>/dev/null || true else log_warn "Could not find destroy container. Manual cleanup may be required for project: $CLUSTER_NAME" fi @@ -733,7 +727,7 @@ main() { if [ "$DELETE_CLUSTER" = true ]; then log_info "✓ Cluster deletion scheduled for cleanup" elif [ "$cluster_created" = true ]; then - log_info "ℹ Cluster preserved (use '$0 --delete-only' to delete later)" + log_info "ℹ Cluster preserved (use '$0 --delete' to delete later)" fi log_info "Total execution time: $(printf "%02d:%02d:%02d" $hours $minutes $seconds)" From bffbf31f99b8e62c48093a8f4f607e47ca5b0b78 Mon Sep 17 00:00:00 2001 From: Francisco Herrera Date: Wed, 29 Oct 2025 14:59:51 +0100 Subject: [PATCH 4/5] Changes from review Signed-off-by: Francisco Herrera --- mapt_cluster/README.md | 2 +- mapt_cluster/create_mapt_cluster.sh | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mapt_cluster/README.md b/mapt_cluster/README.md index 4ff4b82..66be389 100644 --- a/mapt_cluster/README.md +++ b/mapt_cluster/README.md @@ -91,7 +91,7 @@ export CLUSTER_SPOT=false | `CLUSTER_TIMEOUT` | `60` | Timeout for cluster operations in minutes | | `PULL_SECRET_FILE` | `./pull-secret.json` | Path to pull secret file | | `CLUSTER_TAGS` | Basic tags | Additional tags for AWS resources | -| `BACKED_URL_TYPE` | Auto-detected | Backing URL type: "s3" or "file" | +| `BACKEND_URL_TYPE` | Auto-detected | Backing URL type: "s3" or "file" | | `S3_BUCKET_PREFIX` | `mapt-cluster` | Prefix for S3 bucket names | | `CONTAINER_ENGINE` | `podman` | Container engine: "podman" or "docker" | | `MAPT_IMAGE` | `quay.io/redhat-developer/mapt:v0.9.4` | MAPT container image | diff --git a/mapt_cluster/create_mapt_cluster.sh b/mapt_cluster/create_mapt_cluster.sh index 9d5e9e5..09aa8fc 100755 --- a/mapt_cluster/create_mapt_cluster.sh +++ b/mapt_cluster/create_mapt_cluster.sh @@ -65,7 +65,7 @@ cleanup() { cluster_exists=false if [ "$cluster_created" = true ] || container_exists "$CREATE_CONTAINER_NAME"; then cluster_exists=true - elif [ "$BACKED_URL_TYPE" = "s3" ] && [ "$DELETE_CLUSTER" = true ]; then + elif [ "$BACKEND_URL_TYPE" = "s3" ] && [ "$DELETE_CLUSTER" = true ]; then # For delete-only operations, check if Pulumi state exists in S3 if aws s3 ls "s3://$S3_BUCKET_NAME/.pulumi/" &>/dev/null; then log_verbose "Found Pulumi state files in S3, cluster exists for deletion" @@ -84,7 +84,7 @@ cleanup() { # Only clean up S3 bucket if cluster was successfully destroyed # This preserves the Pulumi state files needed for cluster cleanup - if [ "$BACKED_URL_TYPE" = "s3" ] && [ "$DRY_RUN" = false ]; then + if [ "$BACKEND_URL_TYPE" = "s3" ] && [ "$DRY_RUN" = false ]; then if [ "$cluster_destroyed" = true ]; then log_info "Cluster successfully destroyed, cleaning up S3 bucket: $S3_BUCKET_NAME" if aws s3 rb "s3://$S3_BUCKET_NAME" --force 2>/dev/null; then @@ -182,7 +182,7 @@ OPTIONAL ENVIRONMENT VARIABLES: CLUSTER_SPOT Use spot instances (default: true) PULL_SECRET_FILE Path to pull secret file (default: ./pull-secret.json) CLUSTER_TAGS Additional tags (default: basic tags) - BACKED_URL_TYPE Backing URL type: "s3" or "file" (auto-detected) + BACKEND_URL_TYPE Backing URL type: "s3" or "file" (auto-detected) S3_BUCKET_PREFIX S3 bucket prefix (default: mapt-cluster) CONTAINER_ENGINE Container engine: "podman" or "docker" (default: podman) MAPT_IMAGE MAPT container image (default: latest stable) @@ -277,11 +277,11 @@ detect_environment() { fi # Set S3 backing for CI - BACKED_URL_TYPE="s3" + BACKEND_URL_TYPE="s3" else log_info "Local environment detected" IS_CI=false - BACKED_URL_TYPE="${BACKED_URL_TYPE:-file}" + BACKEND_URL_TYPE="${BACKEND_URL_TYPE:-file}" fi } @@ -365,7 +365,7 @@ set_defaults() { readonly MAPT_IMAGE=${MAPT_IMAGE:-"quay.io/redhat-developer/mapt:v0.9.9"} # Set backing URL based on environment - if [ "$BACKED_URL_TYPE" = "s3" ]; then + if [ "$BACKEND_URL_TYPE" = "s3" ]; then readonly S3_BUCKET_NAME="${S3_BUCKET_PREFIX}-${CLUSTER_NAME}" readonly BACKED_URL="s3://${S3_BUCKET_NAME}" else @@ -399,7 +399,7 @@ set_defaults() { # Create S3 bucket if needed create_s3_bucket() { - if [ "$BACKED_URL_TYPE" = "s3" ]; then + if [ "$BACKEND_URL_TYPE" = "s3" ]; then log_info "Creating S3 bucket: $S3_BUCKET_NAME" if [ "$DRY_RUN" = true ]; then @@ -433,7 +433,7 @@ verify_prerequisites() { fi # Check AWS CLI if using S3 backing - if [ "$BACKED_URL_TYPE" = "s3" ] && ! command -v aws &> /dev/null; then + if [ "$BACKEND_URL_TYPE" = "s3" ] && ! command -v aws &> /dev/null; then log_error "AWS CLI not found but S3 backing is enabled. Please install AWS CLI." exit 1 fi From 61f024d6191163c74a316a425fe5261d35cfcd79 Mon Sep 17 00:00:00 2001 From: Francisco Herrera Date: Wed, 29 Oct 2025 15:02:03 +0100 Subject: [PATCH 5/5] Adding some comments in the read me Signed-off-by: Francisco Herrera --- mapt_cluster/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapt_cluster/README.md b/mapt_cluster/README.md index 66be389..3063a90 100644 --- a/mapt_cluster/README.md +++ b/mapt_cluster/README.md @@ -101,8 +101,8 @@ export CLUSTER_SPOT=false | Option | Description | |--------|-------------| -| `--create` | Create cluster only (don't delete) | -| `--delete` | Delete cluster only (don't create) | +| `--create` | Create cluster and S3 bucket only (don't delete the cluster or S3 bucket) | +| `--delete` | Delete cluster and S3 bucket only (don't create anything). Should be used for cleanup | | Default (no options) | Create cluster, then delete it after completion | | `-h, --help` | Show help message and exit | | `-v, --verbose` | Enable verbose logging |