diff --git a/jenkins/README.md b/jenkins/README.md new file mode 100644 index 0000000..ab12a39 --- /dev/null +++ b/jenkins/README.md @@ -0,0 +1,113 @@ +# Jenkins Pipeline Configuration + +This directory contains Jenkins pipeline definitions for the dremio-diagnostic-collector project. + +## Pipelines + +- **build.Jenkinsfile** - Main build pipeline that creates a K3s cluster and runs tests +- **release.Jenkinsfile** - Release pipeline for publishing releases +- **delete.Jenkinsfile** - Cleanup pipeline +- **agent.yaml** - Kubernetes pod template for Jenkins agents + +## Build Pipeline Configuration + +The `build.Jenkinsfile` uses **build parameters** that appear in the Jenkins UI when you click "Build with Parameters". + +### Build Parameters + +After the first build, Jenkins will show a "Build with Parameters" button with the following options: + +| Parameter | Description | Default Value | +|-----------|-------------|---------------| +| `GCP_PROJECT_ID` | Google Cloud Project ID | `your-gcp-project` | +| `GCP_ZONE` | GCP zone for VM instances | `us-west1-b` | +| `GCP_SERVICE_ACCOUNT` | GCP service account email | `your-sa@developer.gserviceaccount.com` | +| `GCP_NETWORK_SUBNET` | GCP subnet name | `primary-west` | +| `GCP_MACHINE_TYPE` | GCE instance machine type | `e2-standard-16` | +| `GCP_DISK_SIZE` | Boot disk size in GB | `100` | +| `GCP_DISK_POLICY` | (Optional) Disk resource policy | _(empty)_ | +| `GCP_IMAGE` | GCE boot disk image | `projects/debian-cloud/global/images/debian-12-bookworm-v20240910` | +| `CLEANUP_INSTANCES` | Delete instances after build? | `true` | + +**Note:** On the first build, click "Build Now" to let Jenkins discover the parameters. After that, you'll see "Build with Parameters" instead. + +### Required Vault Secrets + +The build pipeline requires Google Cloud service account credentials stored in HashiCorp Vault: + +**Path:** `secret/support/private/gcloud-service-account` +**Key:** `credentials-file` +**Value:** Complete JSON content of the GCP service account key file + +### How to Use + +1. **First Build**: Click "Build Now" - Jenkins will scan the Jenkinsfile and discover the parameters +2. **Subsequent Builds**: Click "Build with Parameters" - You'll see a form with all the parameters +3. **Fill in your values** (or use the defaults) +4. **Click Build** + +**Example values for Dremio:** +``` +GCP_PROJECT_ID: dremio-1093 +GCP_ZONE: us-west1-b +GCP_SERVICE_ACCOUNT: 73420150722-compute@developer.gserviceaccount.com +GCP_NETWORK_SUBNET: primary-west +GCP_DISK_POLICY: projects/dremio-1093/regions/us-west1/resourcePolicies/regression-spark3hive +CLEANUP_INSTANCES: true +``` + +## Pipeline Stages + +The build pipeline consists of the following stages: + +1. **Setup** - Install basic dependencies (bash, curl, gcloud SDK) +2. **Install k3sup** - Download and install k3sup tool for K3s cluster management +3. **GCloud Auth & SSH Setup** - Authenticate with GCP and generate SSH keys +4. **Create GCE Instances** - Spin up 4 GCE instances in parallel +5. **Setup K3s Cluster** - Install K3s master node and join worker nodes +6. **Install Build Tools** - Download Go and kubectl +7. **Build** - Execute the actual build via `./script/cibuild` + +## Automatic Cleanup + +The build pipeline can **automatically clean up** all GCE instances after the build completes. This is controlled by the `CLEANUP_INSTANCES` parameter: + +- **`CLEANUP_INSTANCES=true`** (default): Deletes all 4 VMs after build completes (success or failure) +- **`CLEANUP_INSTANCES=false`**: Leaves VMs running for debugging/investigation + +### When cleanup is enabled: + +- No VMs are left running (avoiding unnecessary costs) +- Cleanup happens even if the build fails +- All 4 instances are deleted in parallel for speed + +The cleanup stage will show output like: +``` +Cleanup enabled - deleting GCE instances... +Starting cleanup of GCE instances... +Deleting k8s-ddc-ci-1-123 +Deleting k8s-ddc-ci-2-123 +Deleting k8s-ddc-ci-3-123 +Deleting k8s-ddc-ci-4-123 + ✓ Successfully deleted k8s-ddc-ci-1-123 + ✓ Successfully deleted k8s-ddc-ci-2-123 + ✓ Successfully deleted k8s-ddc-ci-3-123 + ✓ Successfully deleted k8s-ddc-ci-4-123 +Deletion complete +``` + +### When cleanup is disabled: + +``` +Cleanup disabled - GCE instances will remain running +Instance names: k8s-ddc-ci-{1..4}-123 +To delete manually, run the delete.Jenkinsfile job +``` + +## Security Notes + +- Never commit GCP credentials, project IDs, or service account emails directly to the repository +- Use environment variables or Vault for all sensitive configuration +- Rotate service account keys regularly +- Use least-privilege service accounts with only necessary permissions + diff --git a/jenkins/agent.yaml b/jenkins/agent.yaml new file mode 100644 index 0000000..8c41ffc --- /dev/null +++ b/jenkins/agent.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Pod +spec: + containers: + - image: us-west1-docker.pkg.dev/dremio-1093/dockerhub/alpine:3.23.2 + imagePullPolicy: Always + name: agent + command: ["tail", "-f", "/dev/null"] + workingDir: /workspace + resources: + limits: + cpu: 3.5 + memory: 4G + requests: + cpu: 3.5 + memory: 4G + serviceAccountName: support-tools-sa + restartPolicy: Never + securityContext: + runAsUser: 0 + runAsGroup: 0 + fsGroup: 0 diff --git a/jenkins/build.Jenkinsfile b/jenkins/build.Jenkinsfile new file mode 100644 index 0000000..26c9f1d --- /dev/null +++ b/jenkins/build.Jenkinsfile @@ -0,0 +1,244 @@ +pipeline { + agent { + kubernetes { + agentInjection true + defaultContainer 'agent' + cloud 'kubernetes' + yamlFile 'jenkins/agent.yaml' + } + } + + parameters { + string( + name: 'GCP_PROJECT_ID', + defaultValue: 'your-gcp-project', + description: 'GCP Project ID' + ) + string( + name: 'GCP_ZONE', + defaultValue: 'us-west1-b', + description: 'GCP Zone for VM instances' + ) + string( + name: 'GCP_NETWORK_SUBNET', + defaultValue: 'primary-west', + description: 'GCP Network Subnet name' + ) + string( + name: 'GCP_MACHINE_TYPE', + defaultValue: 'e2-standard-16', + description: 'GCE instance machine type' + ) + string( + name: 'GCP_DISK_SIZE', + defaultValue: '100', + description: 'Boot disk size in GB' + ) + string( + name: 'GCP_DISK_POLICY', + defaultValue: '', + description: 'Disk resource policy (optional, leave empty if not needed)' + ) + string( + name: 'GCP_IMAGE', + defaultValue: 'projects/debian-cloud/global/images/debian-12-bookworm-v20240910', + description: 'GCE boot disk image' + ) + choice( + name: 'CLEANUP_INSTANCES', + choices: ['true', 'false'], + description: 'Automatically delete GCE instances after build completes?' + ) + } + + environment { + // GCP Configuration - use parameters if provided, otherwise fall back to env vars + GCP_PROJECT_ID = "${params.GCP_PROJECT_ID}" + GCP_ZONE = "${params.GCP_ZONE}" + GCP_NETWORK_SUBNET = "${params.GCP_NETWORK_SUBNET}" + GCP_MACHINE_TYPE = "${params.GCP_MACHINE_TYPE}" + GCP_DISK_SIZE = "${params.GCP_DISK_SIZE}" + GCP_DISK_POLICY = "${params.GCP_DISK_POLICY}" + GCP_IMAGE = "${params.GCP_IMAGE}" + + // K3sup version + K3SUP_VERSION = "0.13.9" + + // Go and kubectl versions + GO_VERSION = "1.24.3" + KUBECTL_VERSION = "v1.32.0" + } + + options { + timeout(time: 60, unit: 'MINUTES') + } + + stages { + stage('Setup') { + steps { + sh ''' + apk add bash curl python3 py3-pip openssh-client + + # Install gcloud SDK + curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-458.0.1-linux-x86_64.tar.gz + tar -xzf google-cloud-sdk-458.0.1-linux-x86_64.tar.gz + ./google-cloud-sdk/install.sh --quiet --path-update=false + + # Verify gcloud is installed + ./google-cloud-sdk/bin/gcloud version + + # Generate SSH key for VM access + ssh-keygen -t ed25519 -f $HOME/.ssh/id_ed25519 -q -P "" + ''' + } + } + + stage('Install k3sup') { + steps { + sh ''' + curl -O -L https://github.com/alexellis/k3sup/releases/download/${K3SUP_VERSION}/k3sup + chmod +x k3sup + mkdir -p $HOME/bin + mv k3sup $HOME/bin/ + ''' + } + } + stage('Create GCE Instances') { + steps { + sh '''#!/bin/bash + # Function to find and read SSH public key + get_ssh_public_key() { + local ssh_dir="$HOME/.ssh" + local public_key="" + + # Look for common SSH public key files in order of preference + for key_file in "id_ed25519.pub" "id_rsa.pub" "id_ecdsa.pub" "id_dsa.pub"; do + if [ -f "$ssh_dir/$key_file" ]; then + public_key=$(cat "$ssh_dir/$key_file" | tr -d '\\n\\r') + echo "Found SSH public key: $ssh_dir/$key_file" >&2 + break + fi + done + + if [ -z "$public_key" ]; then + echo "Error: No SSH public key found in $ssh_dir" >&2 + echo "Please ensure you have one of the following files:" >&2 + echo " - $ssh_dir/id_ed25519.pub" >&2 + echo " - $ssh_dir/id_rsa.pub" >&2 + echo " - $ssh_dir/id_ecdsa.pub" >&2 + echo " - $ssh_dir/id_dsa.pub" >&2 + exit 1 + fi + + echo "$public_key" + } + + SSH_PUBLIC_KEY="$(get_ssh_public_key)" + + # Build disk policy parameter if set + DISK_POLICY_PARAM="" + if [ -n "${GCP_DISK_POLICY}" ]; then + DISK_POLICY_PARAM="disk-resource-policy=${GCP_DISK_POLICY}," + fi + + for n in {1..4}; do + node_name=k8s-ddc-ci-$n-$BUILD_NUMBER + ./google-cloud-sdk/bin/gcloud compute instances create $node_name \\ + --project=${GCP_PROJECT_ID} \\ + --zone=${GCP_ZONE} \\ + --machine-type=${GCP_MACHINE_TYPE} \\ + --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=${GCP_NETWORK_SUBNET} \\ + --maintenance-policy=MIGRATE \\ + --provisioning-model=STANDARD \\ + --metadata="ssh-keys=jenkins:${SSH_PUBLIC_KEY}" \\ + --create-disk=auto-delete=yes,boot=yes,device-name=$node_name,${DISK_POLICY_PARAM}image=${GCP_IMAGE},mode=rw,size=${GCP_DISK_SIZE},type=pd-balanced \\ + --no-shielded-secure-boot \\ + --shielded-vtpm \\ + --shielded-integrity-monitoring \\ + --labels=goog-ec-src=vm_add-gcloud \\ + --reservation-affinity=any & + done + wait + sleep 60 + ''' + } + } + + stage('Setup K3s Cluster') { + steps { + sh '''#!/bin/bash + for n in {1..4}; do + node_name=k8s-ddc-ci-$n-$BUILD_NUMBER + if [ "$n" -eq 1 ]; then + MASTER_IP=$(./google-cloud-sdk/bin/gcloud compute instances describe $node_name --zone=${GCP_ZONE} --format='get(networkInterfaces[0].networkIP)') + $HOME/bin/k3sup install --ip $MASTER_IP --user jenkins --ssh-key $HOME/.ssh/id_ed25519 + else + IP=$(./google-cloud-sdk/bin/gcloud compute instances describe $node_name --zone=${GCP_ZONE} --format='get(networkInterfaces[0].networkIP)') + $HOME/bin/k3sup join --ip $IP --server-ip $MASTER_IP --user jenkins --ssh-key $HOME/.ssh/id_ed25519 + fi + done + + mkdir -p $HOME/.kube + mv kubeconfig $HOME/.kube/config + ''' + } + } + + stage('Install Build Tools') { + steps { + sh ''' + wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz + tar -C ../ -xzf go${GO_VERSION}.linux-amd64.tar.gz + curl -LO https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl + chmod +x kubectl + mv kubectl $HOME/bin + ''' + } + } + + stage('Build') { + environment { + KUBECONFIG = "${env.HOME}/.kube/config" + } + steps { + sh './script/cibuild' + } + } + } + + post { + always { + script { + // Cleanup: Delete GCE instances if CLEANUP_INSTANCES is true + if (params.CLEANUP_INSTANCES == 'true') { + echo "Cleanup enabled - deleting GCE instances..." + sh '''#!/bin/bash + echo "Starting cleanup of GCE instances..." + + for n in {1..4}; do + node_name=k8s-ddc-ci-$n-$BUILD_NUMBER + echo "Deleting $node_name" + # Run deletion in background and capture results + ( + if ./google-cloud-sdk/bin/gcloud compute instances delete "$node_name" \\ + --project=${GCP_PROJECT_ID} \\ + --zone=${GCP_ZONE} \\ + --quiet 2>/dev/null; then + echo " ✓ Successfully deleted $node_name" + else + echo " ✗ Failed to delete $node_name (may not exist)" + fi + ) & + done + wait + echo "Deletion complete" + ''' + } else { + echo "Cleanup disabled - GCE instances will remain running" + echo "Instance names: k8s-ddc-ci-{1..4}-${BUILD_NUMBER}" + echo "To delete manually, run the delete.Jenkinsfile job" + } + } + } + } +} diff --git a/jenkins/delete.Jenkinsfile b/jenkins/delete.Jenkinsfile new file mode 100644 index 0000000..6cd4841 --- /dev/null +++ b/jenkins/delete.Jenkinsfile @@ -0,0 +1,20 @@ +pipeline { + agent { + kubernetes { + agentInjection true + defaultContainer 'agent' + cloud 'kubernetes' + yamlFile 'jenkins/agent.yaml' + } + } + options { + timeout(time: 20, unit: 'MINUTES') + } + stages { + stage('Setup') { + steps { + sh 'apk add bash curl' + } + } + } +} diff --git a/jenkins/release.Jenkinsfile b/jenkins/release.Jenkinsfile new file mode 100644 index 0000000..7cef9c8 --- /dev/null +++ b/jenkins/release.Jenkinsfile @@ -0,0 +1,32 @@ +pipeline { + agent { + kubernetes { + agentInjection true + defaultContainer 'agent' + cloud 'kubernetes' + yamlFile 'jenkins/agent.yaml' + } + } + options { + timeout(time: 20, unit: 'MINUTES') + } + stages { + stage('Setup') { + steps { + sh 'apk add bash curl' + } + } + stage('Release') { + steps { + withVault(vaultSecrets: [[ + path: 'secret/support/private/ddc-gh-pat', + secretValues: [ + [envVar: 'GITHUB_RELEASE_TOKEN', vaultKey: 'ddc-gh-pat'], + ] + ]]) { + echo "Can use secret $GITHUB_RELEASE_TOKEN" + } + } + } + } +}