add test for mlflow

diamonwiggins · diamonwiggins · commit c3b2141ee4bc · 2025-04-09T20:01:49.000-04:00
diff --git a/.github/workflows/mlflow-ci.yml b/.github/workflows/mlflow-ci.yml
@@ -5,13 +5,15 @@ on:
     paths:
       - 'applications/mlflow/charts/**'
       - 'applications/mlflow/kots/**'
+      - 'applications/mlflow/tests/**'
       - '.github/workflows/mlflow-ci.yml'
   push:
     branches:
       - main
     paths:
       - 'applications/mlflow/charts/**'
       - 'applications/mlflow/kots/**'
+      - 'applications/mlflow/tests/**'
       - '.github/workflows/mlflow-ci.yml'
 
 env:
@@ -148,6 +150,10 @@ jobs:
           #  version: 1.31
           #- distribution: kind
           #  version: 1.30
+        config:
+          - name: nodeport-ingress-disabled
+            values_file: tests/helm/nodeport-ingress-disabled.yaml
+            port: 30080
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -205,10 +211,21 @@ jobs:
           api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
           kubernetes-distribution: ${{ matrix.cluster.distribution }}
           kubernetes-version: ${{ matrix.cluster.version }}
-          cluster-name: mlflow-ci-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
+          cluster-name: mlflow-ci-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}
           ttl: 1h
           export-kubeconfig: true
 
+      - name: Expose Application Port
+        id: expose-port
+        uses: replicatedhq/replicated-actions/expose-port@main
+        with:
+          api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
+          cluster-id: ${{ steps.create-cluster.outputs.cluster-id }}
+          port: '${{ matrix.config.port }}'
+          protocols: 'http,https'
+          wildcard: 'false'
+          timeout-minutes: '5'
+
       - name: Add Helm repositories
         run: |
           cd applications/mlflow
@@ -231,22 +248,44 @@ jobs:
         env:
           REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }}
 
-      - name: Run Helm installation test with charts from Replicated registry
+      - name: Run Helm installation test with chart-testing
         run: |
           cd applications/mlflow
           # Save kubeconfig to a file
           KUBECONFIG_FILE="/tmp/kubeconfig-${{ github.run_id }}"
           echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
           echo "Saved kubeconfig to $KUBECONFIG_FILE"
           
-          # Pass env vars directly to make
-          KUBECONFIG="$KUBECONFIG_FILE" REPLICATED_APP="$REPLICATED_APP" REPLICATED_CHANNEL="$REPLICATED_CHANNEL" REPLICATED_LICENSE_ID="$REPLICATED_LICENSE_ID" make test-replicated-helm
+          # Set up environment for the make target
+          export KUBECONFIG="$KUBECONFIG_FILE"
+          export REPLICATED_APP="${REPLICATED_APP}"
+          export REPLICATED_CHANNEL="${REPLICATED_CHANNEL}"
+          export REPLICATED_LICENSE_ID="${REPLICATED_LICENSE_ID}"
+          
+          # Use test-specific values file
+          export MLFLOW_VALUES="${{ matrix.config.values_file }}"
+          
+          echo "Running test '${{ matrix.config.name }}' with MLflow values file: $MLFLOW_VALUES"
+          
+          # Run chart testing installation using our updated make target that uses 'ct'
+          make test-replicated-helm-with-values
         env:
           KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
           REPLICATED_APP: ${{ env.APP_SLUG }}
           REPLICATED_CHANNEL: ${{ needs.create-release.outputs.channel-slug }}
           REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }}
 
+      # Application testing with our consolidated test file
+      - name: Run Application Tests
+        run: |
+          cd applications/mlflow
+          echo "Installing Python dependencies for tests..."
+          pip install mlflow pandas scikit-learn
+          
+          echo "Running MLflow application tests against ${{ steps.expose-port.outputs.hostname }}"
+          python tests/mlflow_test.py 443 --protocol https
+        if: false  # Disabled for now until we're ready to implement application tests
+
       - name: Install troubleshoot
         run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf -
         if: failure()
@@ -258,7 +297,7 @@ jobs:
           echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
           echo "Saved kubeconfig to $KUBECONFIG_FILE"
           
-          ./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
+          ./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
         if: failure()
         env:
           KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
@@ -267,8 +306,8 @@ jobs:
         uses: actions/upload-artifact@v4
         if: failure()
         with:
-          name: mlflow-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
-          path: 'ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}.tar.gz'
+          name: mlflow-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}
+          path: 'ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}.tar.gz'
 
       - name: Remove Cluster
         uses: replicatedhq/replicated-actions/remove-cluster@v1.17.0
diff --git a/applications/mlflow/Makefile b/applications/mlflow/Makefile
@@ -140,6 +140,44 @@ test-replicated-helm: registry-login
 	rm -f ct-oci.yaml; \
 	echo "Replicated Helm installation test completed successfully."
 
+# Target for testing with chart-testing (ct) and custom values
+.PHONY: test-replicated-helm-with-values
+test-replicated-helm-with-values: registry-login
+	echo "Running Helm installation test with custom values using chart-testing..."; \
+	echo "Note: This requires REPLICATED_APP and REPLICATED_CHANNEL env vars."; \
+	if [ -z "$$REPLICATED_APP" ] || [ -z "$$REPLICATED_CHANNEL" ]; then \
+		echo "ERROR: REPLICATED_APP and REPLICATED_CHANNEL must be set"; \
+		exit 1; \
+	fi; \
+	OCI_URL="oci://registry.replicated.com/$$REPLICATED_APP/$$REPLICATED_CHANNEL"; \
+	echo "Creating temporary ct-oci.yaml config file with custom values..."; \
+	echo "chart-repos:" > ct-oci.yaml; \
+	echo "  - replicated=$$OCI_URL" >> ct-oci.yaml; \
+	echo "debug: true" >> ct-oci.yaml; \
+	\
+	# Prepare values arguments if provided
+	if [ -n "$$MLFLOW_VALUES" ]; then \
+		echo "Using MLflow values file: $$MLFLOW_VALUES"; \
+		echo "helm-extra-args: --values $$MLFLOW_VALUES" >> ct-oci.yaml; \
+	fi; \
+	\
+	cat ct-oci.yaml; \
+	\
+	echo "Installing infra chart with chart-testing..."; \
+	ct install --config ct-oci.yaml \
+		--charts "infra" \
+		--namespace default \
+		--release-label "ci-test"; \
+	\
+	echo "Installing mlflow chart with chart-testing..."; \
+	ct install --config ct-oci.yaml \
+		--charts "mlflow" \
+		--namespace default \
+		--release-label "ci-test"; \
+	\
+	rm -f ct-oci.yaml; \
+	echo "Helm installation with custom values completed successfully."
+
 # Example target to check versions (optional)
 .PHONY: check-versions
 check-versions:
diff --git a/applications/mlflow/tests/helm/nodeport-ingress-disabled.yaml b/applications/mlflow/tests/helm/nodeport-ingress-disabled.yaml
@@ -0,0 +1,15 @@
+# Test values for MLflow CI pipeline
+# These values specifically configure the service to use NodePort for testing
+
+mlflow:
+  # Service configuration for MLflow
+  service:
+    # Use NodePort to expose the service on a specific port
+    type: NodePort
+    # Service port number (internal)
+    port: 5000
+    # Hardcoded nodePort for consistent access 
+    # Note: Must be between 30000-32767
+    nodePort: 30080
+    # Service port name
+    name: http
diff --git a/applications/mlflow/tests/mlflow_test.py b/applications/mlflow/tests/mlflow_test.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import argparse
+import subprocess
+import mlflow
+from mlflow.models import infer_signature
+
+import pandas as pd
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+
+def run_mlflow_test(tracking_uri):
+    """
+    Run MLflow test with the specified tracking URI
+    
+    Args:
+        tracking_uri: The URI to use for the MLflow tracking server
+        
+    Returns:
+        True if the test passed, False otherwise
+    """
+    try:
+        print(f"Setting MLflow tracking URI to: {tracking_uri}")
+        mlflow.set_tracking_uri(tracking_uri)
+        
+        # Load the Iris dataset
+        X, y = datasets.load_iris(return_X_y=True)
+        
+        # Split the data into training and test sets
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
+        
+        # Define the model hyperparameters
+        params = {
+            "solver": "lbfgs",
+            "max_iter": 1000,
+            "multi_class": "auto",
+            "random_state": 8888,
+        }
+        
+        # Train the model
+        lr = LogisticRegression(**params)
+        lr.fit(X_train, y_train)
+        
+        # Predict on the test set
+        y_pred = lr.predict(X_test)
+        
+        # Calculate metrics
+        accuracy = accuracy_score(y_test, y_pred)
+        
+        print("Current tracking URI:", mlflow.get_tracking_uri())
+        
+        # Create a new MLflow Experiment
+        mlflow.set_experiment("MLflow CI Test")
+        
+        # Start an MLflow run
+        with mlflow.start_run():
+            # Log the hyperparameters
+            mlflow.log_params(params)
+            
+            # Log the loss metric
+            mlflow.log_metric("accuracy", accuracy)
+            
+            # Set a tag that we can use to remind ourselves what this run was for
+            mlflow.set_tag("Training Info", "CI Test for MLflow")
+            
+            # Infer the model signature
+            signature = infer_signature(X_train, lr.predict(X_train))
+            
+            # Log the model
+            model_info = mlflow.sklearn.log_model(
+                sk_model=lr,
+                artifact_path="iris_model",
+                registered_model_name="ci-test-model",
+                signature=signature
+            )
+            
+            print(f"Model URI: {model_info.model_uri}")
+            
+        # Load the model back for predictions as a generic Python Function model
+        try:
+            loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+            predictions = loaded_model.predict(X_test[:3])
+            print(f"Test predictions: {predictions}")
+            return True
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            return False
+            
+    except Exception as e:
+        print(f"Test failed with error: {e}")
+        return False
+
+def ensure_dependencies():
+    """Ensure required packages are installed."""
+    try:
+        import mlflow
+        import pandas
+        import sklearn
+    except ImportError:
+        print("Installing required dependencies...")
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install", 
+            "mlflow", "pandas", "scikit-learn"
+        ])
+
+def main():
+    parser = argparse.ArgumentParser(description="MLflow CI testing tool")
+    parser.add_argument("hostname", help="Hostname of the MLflow server")
+    parser.add_argument("--port", type=int, help="Port number (if not included in hostname)")
+    parser.add_argument("--protocol", default="https", help="Protocol (http or https, default: https)")
+    
+    args = parser.parse_args()
+    
+    # Build the tracking URI
+    tracking_uri = f"{args.protocol}://{args.hostname}"
+    if args.port:
+        tracking_uri += f":{args.port}"
+    
+    # Ensure dependencies are installed
+    ensure_dependencies()
+    
+    # Run the test
+    success = run_mlflow_test(tracking_uri)
+    
+    if success:
+        print("✅ MLflow test completed successfully")
+        sys.exit(0)
+    else:
+        print("❌ MLflow test failed")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()