diff --git a/Makefile b/Makefile index dfd597dab5d..75f405a8b6a 100644 --- a/Makefile +++ b/Makefile @@ -116,6 +116,12 @@ test: ## Run smoke tests ./scripts/helm-deploy.sh test -r $(RELEASE_NAME) -n $(NAMESPACE) $(call log_success,"Tests completed successfully") +.PHONY: validate-init +validate-init: ## Validate Accumulo initialization with Alluxio + $(call log_info,"Validating Accumulo initialization...") + ./scripts/validate-accumulo-init.sh $(RELEASE_NAME) $(NAMESPACE) + $(call log_success,"Validation completed") + .PHONY: status status: ## Show deployment status ./scripts/helm-deploy.sh status -r $(RELEASE_NAME) -n $(NAMESPACE) diff --git a/charts/accumulo/DEPLOYMENT.md b/charts/accumulo/DEPLOYMENT.md index b80f62b0437..b814073eac6 100644 --- a/charts/accumulo/DEPLOYMENT.md +++ b/charts/accumulo/DEPLOYMENT.md @@ -122,7 +122,27 @@ helm install accumulo-dev ./charts/accumulo \ kubectl wait --for=condition=Ready pod --all --timeout=600s ``` -### 3. Run Smoke Tests +### 3. Validate Initialization + +Before running tests, validate that Accumulo initialized correctly with Alluxio: + +```bash +# Run the validation script +./scripts/validate-accumulo-init.sh accumulo-dev default + +# Or use the Makefile target +make validate-init RELEASE_NAME=accumulo-dev +``` + +The validation script checks: +- All pods are running (ZooKeeper, Alluxio, Accumulo components) +- Services have endpoints +- Alluxio Master is accessible +- Accumulo instance is properly initialized in ZooKeeper +- Accumulo data directories exist in Alluxio filesystem +- Alluxio client libraries are available + +### 4. Run Smoke Tests ```bash # Run the built-in smoke tests @@ -132,7 +152,14 @@ helm test accumulo-dev kubectl logs accumulo-dev-smoke-test ``` -### 4. Access Services +The smoke tests validate: +- All services are accessible (ZooKeeper, Alluxio, Accumulo) +- Accumulo table operations work correctly +- Data can be written and read from Alluxio +- Alluxio filesystem integration is functional +- Monitor web interface is available + +### 5. Access Services ```bash # Access Accumulo Monitor (web UI) @@ -148,7 +175,7 @@ kubectl port-forward svc/accumulo-dev-minio 9001:9001 & echo "MinIO Console: http://localhost:9001 (minioadmin/minioadmin)" ``` -### 5. Connect with Accumulo Shell +### 6. Connect with Accumulo Shell ```bash # Get a shell into the manager pod diff --git a/charts/accumulo/README.md b/charts/accumulo/README.md index 62b0c1ab580..f388def13fb 100644 --- a/charts/accumulo/README.md +++ b/charts/accumulo/README.md @@ -231,6 +231,25 @@ curl http://localhost:19999/ kubectl exec -it deployment/accumulo-manager -- /opt/accumulo/bin/accumulo shell -u root ``` +### Initialization Validation + +Validate that Accumulo initialized correctly with Alluxio: + +```bash +# Using the validation script +./scripts/validate-accumulo-init.sh + +# Or using Make +make validate-init RELEASE_NAME= NAMESPACE= +``` + +The validation checks: +- All pods and services are running +- Alluxio Master is accessible and serving +- Accumulo instance is initialized in ZooKeeper +- Accumulo data directories exist in Alluxio +- Alluxio client integration is working + ### Smoke Tests Run the built-in smoke tests to validate deployment: @@ -241,8 +260,9 @@ helm test The smoke test validates: - All services are accessible -- Accumulo table operations work -- Alluxio integration is functional +- Accumulo table operations work correctly +- Data persistence through Alluxio +- Alluxio filesystem integration - Monitor web interface is available ## Upgrade Guide diff --git a/charts/accumulo/VALIDATION.md b/charts/accumulo/VALIDATION.md new file mode 100644 index 00000000000..21a61113440 --- /dev/null +++ b/charts/accumulo/VALIDATION.md @@ -0,0 +1,435 @@ + + +# Accumulo Initialization Validation + +This document describes the validation mechanisms implemented to ensure Apache Accumulo initializes correctly with Alluxio storage in Kubernetes. + +## Overview + +The Accumulo Helm chart includes comprehensive validation to ensure: + +1. **Alluxio is operational** before Accumulo initialization begins +2. **Alluxio filesystem is accessible** and writable +3. **Accumulo instance is properly initialized** in ZooKeeper +4. **Data directories are created** in Alluxio +5. **Integration between Accumulo and Alluxio** works correctly + +## Prerequisites + +The validation mechanisms require: + +- **Docker Image**: Accumulo image must include the Alluxio client binaries at `/opt/alluxio/client/bin/alluxio` + - The official image includes Alluxio 2.9.4 client, patched for Java 17 compatibility + - See [Docker README](../../../docker/README.md) for details on the Alluxio client installation + - Note: The Alluxio CLI script is patched during image build to accept Java 17 (required by Accumulo 4.x) +- **Kubernetes**: Version 1.19+ with proper network connectivity between pods +- **Helm**: Version 3.2.0+ for deploying the chart + +## Validation Layers + +### 1. Init Container Validation + +The `init-accumulo` container in the manager deployment performs pre-initialization checks: + +**Location**: `charts/accumulo/templates/accumulo-manager-deployment.yaml` + +**Checks Performed**: +- ✓ Alluxio Master web UI is accessible (HTTP GET to port 19999) +- ✓ Alluxio filesystem is accessible (required - fails if not accessible) +- ✓ Accumulo instance doesn't already exist (idempotent check) +- ✓ If instance exists: instance_id file must be present in Alluxio (critical check - fails if missing) +- ✓ Write permissions to Alluxio filesystem +- ✓ Accumulo initialization completes successfully +- ✓ instance_id file is created in Alluxio (critical check - fails if missing) + +**Example Output (New Installation)**: +``` +=== Accumulo Initialization Validation === +Validating Alluxio connectivity... +✓ Alluxio master web UI is accessible +Validating Alluxio filesystem accessibility... +Alluxio root path: alluxio://accumulo-alluxio-master:19998/accumulo +✓ Alluxio filesystem is accessible +Checking if Accumulo instance already exists... +Initializing new Accumulo instance 'accumulo'... +Creating Accumulo directory structure in Alluxio... +Running accumulo init... +✓ Accumulo initialization completed successfully +Verifying Accumulo instance_id file in Alluxio... +✓ Accumulo instance_id file successfully created in Alluxio +=== Accumulo Initialization Validation Complete === +``` + +**Example Output (Existing Installation)**: +``` +=== Accumulo Initialization Validation === +Validating Alluxio connectivity... +✓ Alluxio master web UI is accessible +Validating Alluxio filesystem accessibility... +Alluxio root path: alluxio://accumulo-alluxio-master:19998/accumulo +✓ Alluxio filesystem is accessible +Checking if Accumulo instance already exists... +✓ Accumulo instance 'accumulo' already exists in ZooKeeper +Verifying instance_id file exists in Alluxio... +✓ Accumulo instance_id file found in Alluxio at alluxio://accumulo-alluxio-master:19998/accumulo/instance_id +``` + +### 2. Helm Smoke Tests + +The Helm smoke test validates the deployed system end-to-end. + +**Location**: `charts/accumulo/templates/tests/smoke-test.yaml` + +**Tests Performed**: +- Service availability (ZooKeeper, Alluxio, Accumulo components) +- Accumulo table operations (create, insert, scan, delete) +- Alluxio filesystem integration +- Alluxio cache statistics +- Monitor web interface accessibility + +**Running the Test**: +```bash +helm test +kubectl logs -smoke-test +``` + +### 3. Standalone Validation Script + +A comprehensive validation script for manual or automated testing. + +**Location**: `scripts/validate-accumulo-init.sh` + +**Usage**: +```bash +./scripts/validate-accumulo-init.sh + +# Or using Make +make validate-init RELEASE_NAME=accumulo-dev NAMESPACE=default +``` + +**Validation Categories**: + +| Category | Checks | +|----------|--------| +| **Environment** | kubectl connectivity, Helm release exists | +| **Pod Status** | All pods running (ZooKeeper, Alluxio, Accumulo) | +| **Services** | All services have endpoints | +| **Alluxio** | Master accessible, filesystem responding | +| **Accumulo Init** | Instance in ZooKeeper, init logs successful | +| **Integration** | Alluxio client available, data directories exist | +| **Functionality** | Table operations work | + +**Example Output**: +``` +============================================== + Accumulo Initialization Validation +============================================== +Release Name: accumulo-dev +Namespace: default +Instance: accumulo +============================================== + +[INFO] Checking kubectl connectivity... +[✓] Kubernetes cluster is accessible +[INFO] Checking if Helm release exists... +[✓] Helm release 'accumulo-dev' exists in namespace 'default' +[INFO] Checking pod status... +[✓] ZooKeeper pod is running +[✓] Alluxio Master pod is running +[✓] Alluxio Worker pods running: 3 +[✓] Accumulo Manager pod is running +[✓] Accumulo TabletServer pods running: 3 +[INFO] Checking service endpoints... +[✓] ZooKeeper service has endpoints +[✓] Alluxio Master service has endpoints +[✓] Accumulo Manager service has endpoints +[INFO] Checking Alluxio Master accessibility... +[✓] Alluxio Master web UI is accessible +[INFO] Checking Accumulo initialization... +[✓] Accumulo instance 'accumulo' exists in ZooKeeper +[✓] Accumulo initialization validation completed +[INFO] Checking Alluxio filesystem integration... +[✓] Alluxio client directory exists in Manager pod +[✓] Accumulo directory found in Alluxio filesystem + +============================================== + Validation Summary +============================================== +[✓] Passed: 14 +[⚠] Warnings: 0 +[✗] Failed: 0 +============================================== + +[✓] All critical validations passed! +``` + +## Validation Flow + +``` +┌─────────────────────────────────────────────┐ +│ Helm Install/Upgrade │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Wait for ZooKeeper (initContainer) │ +│ ✓ TCP connectivity to ZooKeeper:2181 │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Wait for Alluxio Master (initContainer) │ +│ ✓ TCP connectivity to Alluxio:19998 │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Validate Alluxio (init-accumulo) │ +│ ✓ HTTP GET Alluxio master:19999/ │ +│ ✓ Alluxio filesystem ls / │ +│ ✓ Create test directory in Alluxio │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Initialize Accumulo (init-accumulo) │ +│ ✓ Check if instance exists │ +│ ✓ Run accumulo init if needed │ +│ ✓ Verify instance_id in Alluxio │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Start Accumulo Manager │ +│ • Connects to ZooKeeper │ +│ • Connects to Alluxio via alluxio:// │ +│ • Manages tablet servers │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Validation Script (Optional) │ +│ • Comprehensive system check │ +│ • Reports pass/fail/warning status │ +└──────────────┬──────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Helm Smoke Test (helm test) │ +│ • End-to-end functionality test │ +│ • Create/read/delete tables │ +│ • Verify data in Alluxio │ +└─────────────────────────────────────────────┘ +``` + +## Troubleshooting + +### Init Container Fails + +**Check init container logs**: +```bash +kubectl logs -c init-accumulo +``` + +**Common issues**: +- **Alluxio filesystem not accessible**: Initialization will fail immediately. Ensure Alluxio is fully started and accessible. +- **Instance exists but instance_id file missing**: Critical error indicating corrupted state. See detailed resolution below. +- **ZooKeeper not accessible**: Check ZooKeeper pod and service +- **Permissions issues**: Verify service account has proper RBAC permissions +- **Storage backend not configured**: Check Alluxio mount configuration + +#### Critical Error: Instance exists in ZooKeeper but instance_id file not found in Alluxio + +This error occurs when Accumulo is registered in ZooKeeper but the critical `instance_id` file is missing from Alluxio: + +```bash +✗ ERROR: Instance exists in ZooKeeper but instance_id file not found in Alluxio +Expected file: alluxio://accumulo-alluxio-master:19998/accumulo/instance_id +This indicates a corrupted or incomplete Accumulo installation +``` + +**Root Causes**: +1. Alluxio storage backend was cleared/reset while ZooKeeper data remained +2. Alluxio mount configuration changed after Accumulo was initialized +3. Storage backend credentials or permissions changed +4. Different storage backend is being used than during initialization +5. Previous incomplete initialization + +**Resolution Steps**: + +```bash +# Step 1: Verify current state +kubectl exec deployment/accumulo-manager -c manager -- \ + /opt/accumulo/bin/accumulo org.apache.accumulo.server.util.ListInstances + +kubectl exec deployment/accumulo-manager -- \ + /opt/alluxio/client/bin/alluxio fs ls alluxio://accumulo-alluxio-master:19998/accumulo + +# Step 2: Choose resolution approach + +# Option A: Clean reinstall (DESTROYS ALL DATA) +kubectl delete pvc -l app.kubernetes.io/name=zookeeper +kubectl delete pvc -l app.kubernetes.io/name=minio # if using MinIO +helm uninstall accumulo +helm install accumulo ./charts/accumulo -f values.yaml + +# Option B: Fix Alluxio mount (if storage backend exists but mount is wrong) +# Update values.yaml with correct storage configuration +helm upgrade accumulo ./charts/accumulo -f corrected-values.yaml +kubectl delete pod -l app.kubernetes.io/component=alluxio-master +kubectl delete pod -l app.kubernetes.io/component=manager + +# Option C: Restore from backup +# Restore instance_id and other Accumulo files to Alluxio storage backend +``` + +#### Error: Unable to resolve host accumulo-alluxio-worker-XXXXX + +This error occurs during Accumulo initialization when trying to write data via Alluxio workers: + +```bash +java.io.IOException: Failed to cache: Unable to resolve host accumulo-alluxio-worker-nbb7g +``` + +**Root Cause**: Alluxio workers are advertising individual pod hostnames, but Kubernetes DNS resolution requires fully qualified domain names (FQDNs) for pods behind a headless service. + +**Solution**: The chart configures Alluxio workers to use FQDNs in the format: +``` +...svc.cluster.local +``` + +This configuration is already included in the chart. If you encounter this error: + +1. **Verify headless service exists**: +```bash +kubectl get svc -l app.kubernetes.io/component=alluxio-worker +# Should show clusterIP: None +``` + +2. **Check worker configuration**: +```bash +kubectl exec daemonset/accumulo-alluxio-worker -- \ + cat /opt/alluxio/conf/alluxio-site.properties | grep worker.hostname +# Should show: alluxio.worker.hostname=.accumulo-alluxio-worker..svc.cluster.local +``` + +3. **Test DNS resolution**: +```bash +kubectl run test --rm -it --image=busybox --restart=Never -- \ + nslookup .accumulo-alluxio-worker..svc.cluster.local +``` + +4. **If DNS fails**, ensure: + - CoreDNS or kube-dns is running properly + - Network policies allow DNS queries + - Pod's `/etc/resolv.conf` has correct search domains + +5. **Restart workers if needed**: +```bash +kubectl rollout restart daemonset accumulo-alluxio-worker +``` +``` + +### Smoke Test Fails + +**Check test logs**: +```bash +kubectl logs -smoke-test +``` + +**Common issues**: +- Services not ready: Wait longer for all pods to be Running +- Authentication failures: Verify instance secret is correct +- Alluxio mount failures: Check storage backend configuration +- Network policies: Ensure pods can communicate + +### Validation Script Warnings + +**Review specific warnings**: +- Warnings typically indicate non-critical issues +- Check if reduced functionality is acceptable +- Some checks may fail in restricted environments + +## Best Practices + +1. **Always run validation** after deployment: + ```bash + make validate-init RELEASE_NAME= + ``` + +2. **Check init logs** if there are issues: + ```bash + kubectl logs -c init-accumulo + ``` + +3. **Run smoke tests** to verify functionality: + ```bash + helm test + ``` + +4. **Monitor Alluxio** cache hit rates: + ```bash + kubectl port-forward svc/-alluxio-master 19999:19999 + curl http://localhost:19999/metrics + ``` + +5. **Validate before upgrades**: + - Run validation script before upgrading + - Back up ZooKeeper data + - Verify Alluxio storage is accessible + +## Integration with CI/CD + +### GitHub Actions Example + +```yaml +- name: Deploy Accumulo + run: | + make deploy-dev + +- name: Validate Initialization + run: | + make validate-init RELEASE_NAME=accumulo-dev + +- name: Run Smoke Tests + run: | + make test RELEASE_NAME=accumulo-dev +``` + +### GitLab CI Example + +```yaml +test: + script: + - make deploy-dev + - make validate-init RELEASE_NAME=accumulo-dev + - make test RELEASE_NAME=accumulo-dev + artifacts: + when: on_failure + paths: + - validation-results.log +``` + +## Additional Resources + +- [Deployment Guide](DEPLOYMENT.md) +- [README](README.md) +- [Apache Accumulo Documentation](https://accumulo.apache.org/docs/) +- [Alluxio Documentation](https://docs.alluxio.io/) diff --git a/charts/accumulo/templates/_helpers.tpl b/charts/accumulo/templates/_helpers.tpl index ba7670f8223..7ca191cc8a0 100644 --- a/charts/accumulo/templates/_helpers.tpl +++ b/charts/accumulo/templates/_helpers.tpl @@ -181,4 +181,12 @@ Common environment variables for Accumulo containers value: {{ include "accumulo.zookeeperHosts" . | quote }} - name: ACCUMULO_LOG_DIR value: "/opt/accumulo/logs" +- name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name +- name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace {{- end }} \ No newline at end of file diff --git a/charts/accumulo/templates/accumulo-manager-deployment.yaml b/charts/accumulo/templates/accumulo-manager-deployment.yaml index 33522b921a7..f13ae3e7924 100644 --- a/charts/accumulo/templates/accumulo-manager-deployment.yaml +++ b/charts/accumulo/templates/accumulo-manager-deployment.yaml @@ -84,16 +84,81 @@ spec: - /bin/sh - -c - | - # Check if instance is already initialized - if /opt/accumulo/bin/accumulo org.apache.accumulo.server.util.ListInstances | grep -q "{{ .Values.accumulo.instance.name }}"; then - echo "Accumulo instance '{{ .Values.accumulo.instance.name }}' already exists" - exit 0 + set -e + echo "=== Accumulo Initialization Validation ===" + + # Validate Alluxio is accessible + echo "Validating Alluxio connectivity..." + ALLUXIO_MASTER="{{ include "accumulo.fullname" . }}-alluxio-master:19998" + MAX_RETRIES=30 + RETRY_COUNT=0 + + while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + if curl -f -s "http://{{ include "accumulo.fullname" . }}-alluxio-master:19999/" > /dev/null 2>&1; then + echo "✓ Alluxio master web UI is accessible" + break + fi + RETRY_COUNT=$((RETRY_COUNT + 1)) + echo "Waiting for Alluxio master web UI... (attempt $RETRY_COUNT/$MAX_RETRIES)" + sleep 5 + done + + if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then + echo "✗ ERROR: Alluxio master web UI not accessible after $MAX_RETRIES attempts" + exit 1 fi - - echo "Initializing Accumulo instance '{{ .Values.accumulo.instance.name }}'" - /opt/accumulo/bin/accumulo init \ - --instance-name {{ .Values.accumulo.instance.name }} \ - --password {{ .Values.accumulo.instance.secret }} + # Check if instance is already initialized in ZooKeeper + echo "Checking if Accumulo instance already exists..." + INSTANCE_EXISTS=false + + # Use ListInstances to check ZooKeeper (works with Java 17) + if /opt/accumulo/bin/accumulo org.apache.accumulo.server.util.ListInstances 2>/dev/null | grep -q "{{ .Values.accumulo.instance.name }}"; then + echo "✓ Accumulo instance '{{ .Values.accumulo.instance.name }}' already exists in ZooKeeper" + INSTANCE_EXISTS=true + + # For existing instances, we trust that if it's in ZooKeeper, the initialization completed + # The Manager will fail to start if instance_id is missing from Alluxio anyway + echo "Skipping initialization - instance already configured" + else + echo "Instance not found in ZooKeeper, will initialize..." + fi + + # Initialize Accumulo only if it doesn't exist + if [ "$INSTANCE_EXISTS" = "false" ]; then + echo "Initializing new Accumulo instance '{{ .Values.accumulo.instance.name }}'..." + + # Pre-create the accumulo directory in Alluxio to validate write permissions + echo "Creating Accumulo directory structure in Alluxio..." + /opt/alluxio/client/bin/alluxio fs mkdir -p "$ALLUXIO_ROOT" 2>/dev/null || echo "Directory may already exist" + + # Initialize Accumulo + echo "Running accumulo init..." + if /opt/accumulo/bin/accumulo init \ + --instance-name {{ .Values.accumulo.instance.name }} \ + --password {{ .Values.accumulo.instance.secret }}; then + echo "✓ Accumulo initialization completed successfully" + + # Verify initialization created expected structure + echo "Verifying Accumulo instance_id file in Alluxio..." + sleep 3 + + if /opt/alluxio/client/bin/alluxio fs test -e "$ALLUXIO_ROOT/instance_id" 2>/dev/null; then + echo "✓ Accumulo instance_id file successfully created in Alluxio" + else + echo "✗ ERROR: Accumulo initialization completed but instance_id file not found in Alluxio" + echo "Expected file: $ALLUXIO_ROOT/instance_id" + echo "This may indicate Alluxio mount issues or permissions problems" + exit 1 + fi + else + echo "✗ ERROR: Accumulo initialization failed" + exit 1 + fi + else + echo "Skipping initialization - instance already exists" + fi + + echo "=== Accumulo Initialization Validation Complete ===" env: {{- include "accumulo.commonEnv" . | nindent 8 }} - name: ACCUMULO_HOME @@ -110,6 +175,9 @@ spec: subPath: log4j2-service.properties - name: alluxio-client mountPath: /opt/alluxio/client + - name: hadoop-config + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml containers: - name: manager image: {{ include "accumulo.image" . }} diff --git a/charts/accumulo/templates/alluxio-worker-daemonset.yaml b/charts/accumulo/templates/alluxio-worker-daemonset.yaml index 3e03856b055..7eaaea74d1a 100644 --- a/charts/accumulo/templates/alluxio-worker-daemonset.yaml +++ b/charts/accumulo/templates/alluxio-worker-daemonset.yaml @@ -61,6 +61,19 @@ spec: sleep 5 done + # Set worker hostname to FQDN for Kubernetes DNS resolution + WORKER_FQDN="${POD_NAME}.{{ include "accumulo.fullname" . }}-alluxio-worker.${POD_NAMESPACE}.svc.cluster.local" + echo "Setting Alluxio worker hostname to: ${WORKER_FQDN}" + + # Export as environment variables for Alluxio to pick up + export ALLUXIO_WORKER_HOSTNAME="${WORKER_FQDN}" + export ALLUXIO_JOB_WORKER_HOSTNAME="${WORKER_FQDN}" + + # Add to alluxio-site.properties dynamically + cat >> /opt/alluxio/conf/alluxio-site.properties <fs.AbstractFileSystem.alluxio.impl alluxio.hadoop.AlluxioFileSystem + + + alluxio.master.hostname + {{ include "accumulo.fullname" . }}-alluxio-master + Alluxio master hostname + + + alluxio.master.rpc.port + 19998 + Alluxio master RPC port + + + alluxio.user.network.netty.channel + EPOLL + Use EPOLL for better network performance + + + alluxio.user.network.writer.close.timeout + 120s + Timeout for closing network connections + diff --git a/charts/accumulo/templates/tests/smoke-test.yaml b/charts/accumulo/templates/tests/smoke-test.yaml index 7deeb0ce288..ac28c3f5379 100644 --- a/charts/accumulo/templates/tests/smoke-test.yaml +++ b/charts/accumulo/templates/tests/smoke-test.yaml @@ -124,8 +124,36 @@ spec: # Check if data is being stored in Alluxio echo "Checking Alluxio master status..." - curl -f {{ include "accumulo.fullname" . }}-alluxio-master:19999/ > /dev/null - echo "SUCCESS: Alluxio master is accessible" + if curl -f {{ include "accumulo.fullname" . }}-alluxio-master:19999/ > /dev/null 2>&1; then + echo "SUCCESS: Alluxio master web interface is accessible" + else + echo "FAILED: Alluxio master web interface is not accessible" + exit 1 + fi + + # Verify Alluxio filesystem has Accumulo data + echo "Verifying Accumulo data in Alluxio filesystem..." + ALLUXIO_ROOT="{{ .Values.accumulo.instance.volumes }}" + echo "Checking Alluxio path: $ALLUXIO_ROOT" + + # Check if Accumulo instance directory exists + if /opt/alluxio/client/bin/alluxio fs test -d "$ALLUXIO_ROOT/instance_id" 2>/dev/null; then + echo "SUCCESS: Accumulo instance data found in Alluxio at $ALLUXIO_ROOT" + else + echo "WARNING: Accumulo instance directory not found at expected location" + fi + + # List Accumulo directories in Alluxio + echo "Listing Accumulo directories in Alluxio:" + /opt/alluxio/client/bin/alluxio fs ls "$ALLUXIO_ROOT" 2>/dev/null || echo "Could not list directory" + + # Check Alluxio cache statistics + echo "Checking Alluxio cache statistics..." + if curl -s {{ include "accumulo.fullname" . }}-alluxio-master:19999/metrics 2>/dev/null | grep -q "alluxio"; then + echo "SUCCESS: Alluxio metrics are available" + else + echo "WARNING: Could not retrieve Alluxio metrics" + fi echo "=== Testing Monitor Web Interface ===" @@ -153,9 +181,20 @@ spec: - name: accumulo-config mountPath: /opt/accumulo/conf/accumulo-env.sh subPath: accumulo-env.sh + - name: alluxio-client + mountPath: /opt/alluxio/client + - name: hadoop-config + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml volumes: - name: accumulo-config configMap: name: {{ include "accumulo.fullname" . }}-config defaultMode: 0755 + - name: alluxio-client + hostPath: + path: /srv/alluxio/client + - name: hadoop-config + configMap: + name: {{ include "accumulo.fullname" . }}-core-site {{- end }} \ No newline at end of file diff --git a/charts/accumulo/tests/README.md b/charts/accumulo/tests/README.md new file mode 100644 index 00000000000..5fe4994f447 --- /dev/null +++ b/charts/accumulo/tests/README.md @@ -0,0 +1,194 @@ + + +# Accumulo Helm Chart Tests + +This directory contains documentation for Helm test manifests for validating the Accumulo deployment with Alluxio integration. + +The actual test manifests are located in `templates/tests/` directory. + +## Available Tests + +### Smoke Test (`templates/tests/smoke-test.yaml`) + +The smoke test performs comprehensive validation of the Accumulo cluster with Alluxio storage. + +**Test Scope**: +- Service connectivity and availability +- Accumulo table operations (CRUD) +- Alluxio filesystem integration +- Data persistence through Alluxio +- Monitor web interface + +**Running the Test**: +```bash +# Run all tests +helm test + +# Run tests with timeout +helm test --timeout 10m + +# View test logs +kubectl logs -smoke-test + +# Clean up test pods +kubectl delete pod -smoke-test +``` + +## Test Execution Flow + +``` +1. Wait for Services (init container) + ├─ ZooKeeper ready (port 2181) + ├─ Alluxio master ready (port 19998) + ├─ Accumulo manager ready (port 9999) + └─ TabletServer ready (port 9997) + +2. Test Accumulo Operations (main container) + ├─ Create test table + ├─ Insert test data (3 rows) + ├─ Scan and verify data + ├─ Flush table + └─ Compact table + +3. Test Alluxio Integration + ├─ Check Alluxio master web UI + ├─ Verify data directories in Alluxio + ├─ List Accumulo directories + └─ Check Alluxio cache metrics + +4. Test Monitor Interface + └─ Access Monitor web UI (port 9995) + +5. Cleanup + └─ Delete test table +``` + +## Test Configuration + +Tests are configured through Helm values: + +```yaml +dev: + smokeTest: + enabled: true + image: + registry: docker.io + repository: accumulo/accumulo + tag: "4.0.0-SNAPSHOT" +``` + +To disable tests: +```yaml +dev: + smokeTest: + enabled: false +``` + +## Test Results + +### Success Criteria + +All of the following must pass: +- ✓ All services are accessible +- ✓ Test table created successfully +- ✓ 3 test rows inserted and verified +- ✓ Table operations (flush, compact) complete +- ✓ Alluxio master responds to HTTP requests +- ✓ Accumulo directories exist in Alluxio +- ✓ Monitor web interface is accessible +- ✓ Test table deleted successfully + +### Common Test Failures + +#### 1. Service Timeout +``` +Waiting for ... +Error: timed out waiting for the condition +``` + +**Resolution**: +- Increase test timeout: `helm test --timeout 15m` +- Check pod status: `kubectl get pods` +- Verify services: `kubectl get endpoints` + +#### 2. Table Creation Fails +``` +FAILED: Could not create table +``` + +**Resolution**: +- Check manager logs: `kubectl logs deployment/-manager` +- Verify ZooKeeper connectivity +- Check Alluxio mount status + +#### 3. Alluxio Integration Fails +``` +WARNING: Could not verify Accumulo data in Alluxio +``` + +**Resolution**: +- Check Alluxio master logs: `kubectl logs deployment/-alluxio-master` +- Verify storage backend configuration +- Check Alluxio mount: `kubectl exec -- /opt/alluxio/client/bin/alluxio fs ls /` + +#### 4. Data Verification Fails +``` +FAILED: Expected 3 rows, found +``` + +**Resolution**: +- Check TabletServer logs: `kubectl logs deployment/-tserver` +- Verify write operations completed +- Check for compaction issues + +## Manual Testing + +For manual testing outside of Helm tests: + +```bash +# Access Accumulo shell +kubectl exec -it deployment/-manager -- \ + /opt/accumulo/bin/accumulo shell -u root -p + +# Run Accumulo commands +createtable testtable +insert row1 cf1 cq1 value1 +scan +deletetable -f testtable +quit + +# Check Alluxio filesystem +kubectl exec deployment/-manager -- \ + /opt/alluxio/client/bin/alluxio fs ls /accumulo + +# Test Alluxio master +kubectl port-forward svc/-alluxio-master 19999:19999 +curl http://localhost:19999/ + +# Test Monitor +kubectl port-forward svc/-monitor 9995:9995 +curl http://localhost:9995/ +``` + +## Related Documentation + +- [VALIDATION.md](../VALIDATION.md) - Accumulo Initialization Validation +- [DEPLOYMENT.md](../DEPLOYMENT.md) - Deployment Guide +- [README.md](../README.md) - Apache Accumulo Helm Chart diff --git a/docker/README.md b/docker/README.md index 6d0a1757672..8a5605f4808 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1 +1,75 @@ +# Accumulo Docker Image + +This directory contains the Dockerfile and scripts for building Apache Accumulo container images with Alluxio integration. + +## Features + +The Docker image includes: +- Apache Accumulo 4.0.0-SNAPSHOT +- Hadoop 3.3.6 client libraries +- ZooKeeper 3.8.4 client libraries +- **Alluxio 2.9.4 client (binaries + JARs)** for filesystem integration, patched for Java 17 compatibility +- Java 17 (Eclipse Temurin) + +**Note**: Alluxio 2.9.4 is patched to accept Java 17. The original version only officially supports Java 8 or 11, but the client JARs work correctly with Java 17 for basic filesystem operations. The alluxio CLI script is patched during image build to accept Java 17. + +### Alluxio Client Integration + +The image includes the full Alluxio client installation at `/opt/alluxio/client`: +- **CLI Tool**: `/opt/alluxio/client/bin/alluxio` - Command-line interface for Alluxio filesystem operations +- **Client JARs**: `/opt/alluxio/client/*.jar` - Alluxio client libraries copied to Accumulo's classpath +- **Libraries**: `/opt/alluxio/client/lib/` - Required dependencies for the CLI tool +- **Configuration**: `/opt/alluxio/client/conf/` - Alluxio configuration files +- **Scripts**: `/opt/alluxio/client/libexec/` - Helper scripts required by the CLI (alluxio-config.sh, etc.) + +This enables: +1. Accumulo to read/write data to Alluxio filesystem via the client JARs +2. Init containers to validate Alluxio connectivity using the `alluxio fs` CLI commands +3. Troubleshooting and debugging with direct Alluxio filesystem access + +## Building the Image + +```bash +# Build from the repository root +cd /path/to/veculo +./scripts/build-docker.sh -r accumulo -t 4.0.0-SNAPSHOT + +# Or build manually +cd docker/accumulo +docker build -t accumulo/accumulo:4.0.0-SNAPSHOT . +``` + +## Loading into Minikube + +```bash minikube image load accumulo/accumulo:4.0.0-SNAPSHOT +``` + +## Environment Variables + +- `ACCUMULO_HOME=/opt/accumulo` +- `HADOOP_HOME=/opt/hadoop/hadoop-3.3.6` +- `ZOOKEEPER_HOME=/opt/zookeeper` +- `ALLUXIO_HOME=/opt/alluxio/client` +- `ALLUXIO_CLIENT_HOME=/opt/alluxio/client` +- `JAVA_HOME=/opt/java/openjdk` + +## Using the Alluxio Client + +Inside the container: + +```bash +# List Alluxio filesystem +/opt/alluxio/client/bin/alluxio fs ls / + +# Check if a file exists +/opt/alluxio/client/bin/alluxio fs test -e /path/to/file + +# Create directory +/opt/alluxio/client/bin/alluxio fs mkdir /path/to/dir +``` + +## Related Documentation + +- [Helm Chart Validation](../charts/accumulo/VALIDATION.md) - Uses Alluxio CLI for initialization validation +- [Deployment Guide](../charts/accumulo/DEPLOYMENT.md) - Deployment procedures diff --git a/docker/accumulo/Dockerfile b/docker/accumulo/Dockerfile index 6d472880930..eab6f75949d 100644 --- a/docker/accumulo/Dockerfile +++ b/docker/accumulo/Dockerfile @@ -31,6 +31,7 @@ ENV HADOOP_HOME=/opt/hadoop/hadoop-${HADOOP_VERSION} ENV ZOOKEEPER_HOME=/opt/zookeeper ENV JAVA_HOME=/opt/java/openjdk ENV ALLUXIO_CLIENT_HOME=/opt/alluxio/client +ENV ALLUXIO_HOME=/opt/alluxio/client # Install required packages @@ -73,23 +74,38 @@ RUN set -eux; \ chown -R accumulo:accumulo ${ZOOKEEPER_HOME}; \ echo "✅ ZooKeeper installed at ${ZOOKEEPER_HOME} with lib populated" -# Download and install Alluxio client JARs +# Download and install Alluxio client (binaries + JARs) +# Using 2.9.4 with Java version check patched for Java 17 compatibility ENV ALLUXIO_VERSION=2.9.4 -# Download and install Alluxio client jars RUN set -eux; \ mkdir -p /opt/alluxio/client; \ wget -q https://downloads.alluxio.io/downloads/files/${ALLUXIO_VERSION}/alluxio-${ALLUXIO_VERSION}-bin.tar.gz; \ tar -xzf alluxio-${ALLUXIO_VERSION}-bin.tar.gz -C /opt; \ - if [ -d /opt/alluxio-${ALLUXIO_VERSION}/assembly/client ]; then \ - cp /opt/alluxio-${ALLUXIO_VERSION}/assembly/client/*.jar /opt/alluxio/client/; \ - elif [ -d /opt/alluxio-${ALLUXIO_VERSION}/client ]; then \ - cp /opt/alluxio-${ALLUXIO_VERSION}/client/*.jar /opt/alluxio/client/; \ - else \ - echo "ERROR: Alluxio client jars not found" && ls -R /opt/alluxio-${ALLUXIO_VERSION} && exit 1; \ + mv /opt/alluxio-${ALLUXIO_VERSION} /opt/alluxio-full; \ + # Copy client JARs for Accumulo integration \ + if [ -d /opt/alluxio-full/client/alluxio-*-client.jar ]; then \ + cp /opt/alluxio-full/client/alluxio-*-client.jar /opt/alluxio/client/; \ fi; \ - rm -rf /opt/alluxio-${ALLUXIO_VERSION} alluxio-${ALLUXIO_VERSION}-bin.tar.gz; \ + # Copy all client JARs from client directory \ + if [ -d /opt/alluxio-full/client ] && [ "$(ls -A /opt/alluxio-full/client/*.jar 2>/dev/null)" ]; then \ + cp /opt/alluxio-full/client/*.jar /opt/alluxio/client/ 2>/dev/null || true; \ + fi; \ + # Copy necessary directories for CLI to work \ + mkdir -p /opt/alluxio/client/bin /opt/alluxio/client/lib /opt/alluxio/client/conf /opt/alluxio/client/libexec; \ + cp /opt/alluxio-full/bin/alluxio /opt/alluxio/client/bin/; \ + cp -r /opt/alluxio-full/lib/* /opt/alluxio/client/lib/ 2>/dev/null || true; \ + cp -r /opt/alluxio-full/conf/* /opt/alluxio/client/conf/ 2>/dev/null || true; \ + cp -r /opt/alluxio-full/libexec/* /opt/alluxio/client/libexec/ 2>/dev/null || true; \ + # Patch the alluxio script to accept Java 17 \ + sed -i 's/JAVA_VERSION="\${JAVA_VERSION:-8}"/JAVA_VERSION="${JAVA_VERSION:-17}"/' /opt/alluxio/client/bin/alluxio; \ + sed -i 's/java_version_check 8 11/java_version_check 8 11 17/' /opt/alluxio/client/bin/alluxio; \ + # Clean up full installation, keep only client \ + rm -rf /opt/alluxio-full alluxio-${ALLUXIO_VERSION}-bin.tar.gz; \ + # Verify the CLI exists and required files \ + test -f /opt/alluxio/client/bin/alluxio || (echo "ERROR: Alluxio CLI not found" && exit 1); \ + test -f /opt/alluxio/client/libexec/alluxio-config.sh || (echo "ERROR: alluxio-config.sh not found" && exit 1); \ chown -R accumulo:accumulo /opt/alluxio; \ - echo "✅ Alluxio client installed at /opt/alluxio/client" + echo "✅ Alluxio client (binaries + JARs) installed at /opt/alluxio/client" # Add missing XML parser dependencies for Alluxio client RUN curl -L -o /opt/alluxio/client/woodstox-core-6.4.0.jar https://repo1.maven.org/maven2/com/fasterxml/woodstox/woodstox-core/6.4.0/woodstox-core-6.4.0.jar && \ @@ -137,7 +153,7 @@ USER accumulo WORKDIR $ACCUMULO_HOME # Set default environment variables -ENV PATH=$ACCUMULO_HOME/bin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$PATH +ENV PATH=$ACCUMULO_HOME/bin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$ALLUXIO_HOME/bin:$PATH ENV ACCUMULO_LOG_DIR=$ACCUMULO_HOME/logs ENV HADOOP_CONF_DIR=$ACCUMULO_HOME/conf ENV ACCUMULO_CONF_DIR=$ACCUMULO_HOME/conf diff --git a/scripts/README.md b/scripts/README.md index f9f7d239bee..595db03228f 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -91,6 +91,36 @@ Comprehensive Helm deployment helper with dependency management. - Support for all Helm operations (install, upgrade, uninstall, test, status) - Comprehensive error handling and logging +### `validate-accumulo-init.sh` +Validates that Accumulo has been properly initialized with Alluxio storage. + +**Usage:** +```bash +# Validate with default parameters +./scripts/validate-accumulo-init.sh + +# Validate specific release +./scripts/validate-accumulo-init.sh accumulo-prod production accumulo-instance + +# Use with Make +make validate-init RELEASE_NAME=accumulo-dev NAMESPACE=default +``` + +**Validates:** +- Kubernetes cluster connectivity +- Helm release exists +- All required pods are running (ZooKeeper, Alluxio, Accumulo components) +- Services have proper endpoints +- Alluxio Master is accessible +- Accumulo instance is initialized in ZooKeeper +- Alluxio filesystem integration is working +- Accumulo data directories exist in Alluxio +- Alluxio client libraries are available + +**Exit Codes:** +- `0`: All critical validations passed +- `1`: One or more critical validations failed + ## Quick Start Workflow ### 1. Development Setup diff --git a/scripts/validate-accumulo-init.sh b/scripts/validate-accumulo-init.sh new file mode 100755 index 00000000000..6c00418a92e --- /dev/null +++ b/scripts/validate-accumulo-init.sh @@ -0,0 +1,318 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Accumulo Initialization Validation Script +# This script validates that Accumulo has been properly initialized with Alluxio storage + +set -euo pipefail + +# Script configuration +RELEASE_NAME="${1:-accumulo-dev}" +NAMESPACE="${2:-default}" +INSTANCE_NAME="${3:-accumulo}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[✓]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[⚠]${NC} $1" +} + +log_error() { + echo -e "${RED}[✗]${NC} $1" +} + +# Validation results +VALIDATION_PASSED=0 +VALIDATION_FAILED=0 +VALIDATION_WARNING=0 + +validate_check() { + local check_name="$1" + local status="$2" + + if [ "$status" = "pass" ]; then + log_success "$check_name" + VALIDATION_PASSED=$((VALIDATION_PASSED + 1)) + elif [ "$status" = "warn" ]; then + log_warning "$check_name" + VALIDATION_WARNING=$((VALIDATION_WARNING + 1)) + else + log_error "$check_name" + VALIDATION_FAILED=$((VALIDATION_FAILED + 1)) + fi +} + +# Check kubectl connectivity +check_kubectl() { + log_info "Checking kubectl connectivity..." + if kubectl cluster-info &>/dev/null; then + validate_check "Kubernetes cluster is accessible" "pass" + else + validate_check "Kubernetes cluster is NOT accessible" "fail" + exit 1 + fi +} + +# Check if release exists +check_release() { + log_info "Checking if Helm release exists..." + if helm status "$RELEASE_NAME" -n "$NAMESPACE" &>/dev/null; then + validate_check "Helm release '$RELEASE_NAME' exists in namespace '$NAMESPACE'" "pass" + else + validate_check "Helm release '$RELEASE_NAME' does NOT exist in namespace '$NAMESPACE'" "fail" + exit 1 + fi +} + +# Check pod status +check_pods() { + log_info "Checking pod status..." + + # Check ZooKeeper + if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=zookeeper -o jsonpath='{.items[*].status.phase}' | grep -q "Running"; then + validate_check "ZooKeeper pod is running" "pass" + else + validate_check "ZooKeeper pod is NOT running" "fail" + fi + + # Check Alluxio Master + if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=alluxio-master -o jsonpath='{.items[*].status.phase}' | grep -q "Running"; then + validate_check "Alluxio Master pod is running" "pass" + else + validate_check "Alluxio Master pod is NOT running" "fail" + fi + + # Check Alluxio Workers + WORKER_COUNT=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=alluxio-worker -o jsonpath='{.items[*].status.phase}' | grep -o "Running" | wc -l) + if [ "$WORKER_COUNT" -gt 0 ]; then + validate_check "Alluxio Worker pods running: $WORKER_COUNT" "pass" + else + validate_check "No Alluxio Worker pods are running" "warn" + fi + + # Check Accumulo Manager + if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=manager -o jsonpath='{.items[*].status.phase}' | grep -q "Running"; then + validate_check "Accumulo Manager pod is running" "pass" + else + validate_check "Accumulo Manager pod is NOT running" "fail" + fi + + # Check Accumulo TabletServers + TSERVER_COUNT=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=tserver -o jsonpath='{.items[*].status.phase}' | grep -o "Running" | wc -l) + if [ "$TSERVER_COUNT" -gt 0 ]; then + validate_check "Accumulo TabletServer pods running: $TSERVER_COUNT" "pass" + else + validate_check "No Accumulo TabletServer pods are running" "fail" + fi +} + +# Check service endpoints +check_services() { + log_info "Checking service endpoints..." + + # Check ZooKeeper service + if kubectl get endpoints "$RELEASE_NAME-zookeeper" -n "$NAMESPACE" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | grep -q "."; then + validate_check "ZooKeeper service has endpoints" "pass" + else + validate_check "ZooKeeper service has NO endpoints" "fail" + fi + + # Check Alluxio Master service + if kubectl get endpoints "$RELEASE_NAME-alluxio-master" -n "$NAMESPACE" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | grep -q "."; then + validate_check "Alluxio Master service has endpoints" "pass" + else + validate_check "Alluxio Master service has NO endpoints" "fail" + fi + + # Check Accumulo Manager service + if kubectl get endpoints "$RELEASE_NAME-manager" -n "$NAMESPACE" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | grep -q "."; then + validate_check "Accumulo Manager service has endpoints" "pass" + else + validate_check "Accumulo Manager service has NO endpoints" "fail" + fi +} + +# Check Alluxio master accessibility +check_alluxio_master() { + log_info "Checking Alluxio Master accessibility..." + + # Port-forward temporarily to check master + kubectl port-forward -n "$NAMESPACE" "svc/$RELEASE_NAME-alluxio-master" 19999:19999 &>/dev/null & + PF_PID=$! + sleep 2 + + if curl -f -s http://localhost:19999/ > /dev/null 2>&1; then + validate_check "Alluxio Master web UI is accessible" "pass" + else + validate_check "Alluxio Master web UI is NOT accessible" "warn" + fi + + kill $PF_PID 2>/dev/null || true + wait $PF_PID 2>/dev/null || true +} + +# Check Accumulo initialization +check_accumulo_init() { + log_info "Checking Accumulo initialization..." + + # Get the manager pod name + MANAGER_POD=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=manager -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + + if [ -z "$MANAGER_POD" ]; then + validate_check "Could not find Accumulo Manager pod" "fail" + return + fi + + # Check if instance exists in ZooKeeper + if kubectl exec -n "$NAMESPACE" "$MANAGER_POD" -- /opt/accumulo/bin/accumulo org.apache.accumulo.server.util.ListInstances 2>/dev/null | grep -q "$INSTANCE_NAME"; then + validate_check "Accumulo instance '$INSTANCE_NAME' exists in ZooKeeper" "pass" + else + validate_check "Accumulo instance '$INSTANCE_NAME' NOT found in ZooKeeper" "fail" + fi + + # Check init container logs for successful initialization + INIT_LOGS=$(kubectl logs -n "$NAMESPACE" "$MANAGER_POD" -c init-accumulo 2>/dev/null || echo "") + if echo "$INIT_LOGS" | grep -q "Accumulo Initialization Validation Complete"; then + validate_check "Accumulo initialization validation completed" "pass" + elif echo "$INIT_LOGS" | grep -q "already exists"; then + validate_check "Accumulo instance was previously initialized" "pass" + else + validate_check "Could not verify Accumulo initialization" "warn" + fi + + # Check for any initialization errors + if echo "$INIT_LOGS" | grep -q "ERROR"; then + validate_check "Found errors in initialization logs" "warn" + fi +} + +# Check Alluxio filesystem integration +check_alluxio_filesystem() { + log_info "Checking Alluxio filesystem integration..." + + # Get the manager pod name + MANAGER_POD=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=manager -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + + if [ -z "$MANAGER_POD" ]; then + validate_check "Could not find Accumulo Manager pod for filesystem check" "warn" + return + fi + + # Check if Alluxio client is available + if kubectl exec -n "$NAMESPACE" "$MANAGER_POD" -- test -d /opt/alluxio/client 2>/dev/null; then + validate_check "Alluxio client directory exists in Manager pod" "pass" + else + validate_check "Alluxio client directory NOT found in Manager pod" "warn" + fi + + # Try to list Alluxio filesystem + if kubectl exec -n "$NAMESPACE" "$MANAGER_POD" -- /opt/alluxio/client/bin/alluxio fs ls / 2>/dev/null | grep -q "accumulo"; then + validate_check "Accumulo directory found in Alluxio filesystem" "pass" + else + validate_check "Could not verify Accumulo directory in Alluxio filesystem" "warn" + fi +} + +# Check Accumulo tables +check_accumulo_tables() { + log_info "Checking Accumulo tables..." + + # Get the manager pod name + MANAGER_POD=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=manager -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + + if [ -z "$MANAGER_POD" ]; then + validate_check "Could not find Accumulo Manager pod for table check" "warn" + return + fi + + # Try to list tables (requires root password from values) + # This is a basic connectivity test + if kubectl exec -n "$NAMESPACE" "$MANAGER_POD" -- /opt/accumulo/bin/accumulo shell -u root -e "tables" 2>/dev/null | grep -q "accumulo"; then + validate_check "Successfully connected to Accumulo and listed tables" "pass" + else + validate_check "Could not list Accumulo tables (may need credentials)" "warn" + fi +} + +# Print summary +print_summary() { + echo "" + echo "==============================================" + echo " Validation Summary" + echo "==============================================" + log_success "Passed: $VALIDATION_PASSED" + log_warning "Warnings: $VALIDATION_WARNING" + log_error "Failed: $VALIDATION_FAILED" + echo "==============================================" + + if [ $VALIDATION_FAILED -eq 0 ]; then + echo "" + log_success "All critical validations passed!" + if [ $VALIDATION_WARNING -gt 0 ]; then + log_warning "There are $VALIDATION_WARNING warning(s) that should be reviewed" + fi + exit 0 + else + echo "" + log_error "$VALIDATION_FAILED critical validation(s) failed!" + exit 1 + fi +} + +# Main execution +main() { + echo "" + echo "==============================================" + echo " Accumulo Initialization Validation" + echo "==============================================" + echo "Release Name: $RELEASE_NAME" + echo "Namespace: $NAMESPACE" + echo "Instance: $INSTANCE_NAME" + echo "==============================================" + echo "" + + check_kubectl + check_release + check_pods + check_services + check_alluxio_master + check_accumulo_init + check_alluxio_filesystem + check_accumulo_tables + + print_summary +} + +# Run main function +main "$@"