diff --git a/.github/workflows/integration-test-docker.yml b/.github/workflows/integration-test-docker.yml index 7b642fe3f..3045dd103 100644 --- a/.github/workflows/integration-test-docker.yml +++ b/.github/workflows/integration-test-docker.yml @@ -30,16 +30,43 @@ jobs: - name: Check out the repo uses: actions/checkout@v4 - - name: Free up disk space + - name: Setup model storage on /mnt run: | - echo "Disk space before cleanup:" - df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/share/boost - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - echo "Disk space after cleanup:" - df -h + # Use /mnt for model storage (has ~75GB vs ~14GB on root) + # This helps prevent "no space left on device" errors + echo "Disk space before setup:" + df -h / && df -h /mnt + + # Create /mnt/models directory if it doesn't exist + sudo mkdir -p /mnt/models + sudo chown -R $USER:$USER /mnt/models + + # If models directory already exists in workspace, move it to /mnt + if [ -d "models" ] && [ ! -L "models" ]; then + echo "Moving existing models directory to /mnt/models..." + # Move contents if /mnt/models is not empty, otherwise just move the directory + if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then + echo "Warning: /mnt/models already has content, merging..." + sudo cp -r models/* /mnt/models/ || true + rm -rf models + else + sudo mv models /mnt/models + fi + fi + + # Create symlink from models/ to /mnt/models/ so existing code continues to work + if [ ! -e "models" ]; then + ln -s /mnt/models models + echo "Created symlink: models -> /mnt/models" + elif [ -L "models" ]; then + echo "Symlink already exists: models -> $(readlink models)" + else + echo "Warning: models exists but is not a symlink" + fi + + echo "Disk space after setup:" + df -h / && df -h /mnt + echo "Models directory setup complete. Models will be stored in /mnt/models" - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml index 7e3442423..330c904eb 100644 --- a/.github/workflows/integration-test-k8s.yml +++ b/.github/workflows/integration-test-k8s.yml @@ -77,16 +77,6 @@ jobs: run: | make build-e2e - - name: Free up disk space - run: | - # Remove unnecessary toolchains to free ~25GB disk space - # This helps prevent "no space left on device" errors - echo "Disk before cleanup:" - df -h / - # Note: Do NOT remove $AGENT_TOOLSDIRECTORY - it contains Go/Rust from setup actions - sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost /usr/local/lib/android /opt/hostedtoolcache/CodeQL || true - echo "Disk after cleanup:" - df -h / - name: Run Integration E2E tests (${{ matrix.profile }}) id: e2e-test @@ -97,6 +87,7 @@ jobs: echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT exit ${TEST_EXIT_CODE} + - name: Upload test reports if: always() uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 18ed118e0..db3a337a5 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -80,6 +80,44 @@ jobs: restore-keys: | ${{ runner.os }}-go- + - name: Setup model storage on /mnt + run: | + # Use /mnt for model storage (has ~75GB vs ~14GB on root) + # This helps prevent "no space left on device" errors + echo "Disk space before setup:" + df -h / && df -h /mnt + + # Create /mnt/models directory if it doesn't exist + sudo mkdir -p /mnt/models + sudo chown -R $USER:$USER /mnt/models + + # If models directory already exists in workspace, move it to /mnt + if [ -d "models" ] && [ ! -L "models" ]; then + echo "Moving existing models directory to /mnt/models..." + # Move contents if /mnt/models is not empty, otherwise just move the directory + if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then + echo "Warning: /mnt/models already has content, merging..." + sudo cp -r models/* /mnt/models/ || true + rm -rf models + else + sudo mv models /mnt/models + fi + fi + + # Create symlink from models/ to /mnt/models/ so existing code continues to work + if [ ! -e "models" ]; then + ln -s /mnt/models models + echo "Created symlink: models -> /mnt/models" + elif [ -L "models" ]; then + echo "Symlink already exists: models -> $(readlink models)" + else + echo "Warning: models exists but is not a symlink" + fi + + echo "Disk space after setup:" + df -h / && df -h /mnt + echo "Models directory setup complete. Models will be stored in /mnt/models" + - name: Cache Models uses: actions/cache@v4 with: diff --git a/e2e/pkg/cluster/kind.go b/e2e/pkg/cluster/kind.go index c3a737e54..4cddf229b 100644 --- a/e2e/pkg/cluster/kind.go +++ b/e2e/pkg/cluster/kind.go @@ -38,8 +38,37 @@ func (k *KindCluster) Create(ctx context.Context) error { return nil } - // Create cluster - cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", k.Name) + // Mount /mnt from host into Kind node so storage provisioner can use it (more disk space) + configContent := fmt.Sprintf(`kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: %s +nodes: + - role: control-plane + extraMounts: + - hostPath: /mnt + containerPath: /mnt + - role: worker + extraMounts: + - hostPath: /mnt + containerPath: /mnt +`, k.Name) + + configFile, err := os.CreateTemp("", "kind-config-*.yaml") + if err != nil { + return fmt.Errorf("failed to create temp config file: %w", err) + } + defer os.Remove(configFile.Name()) + + if _, err := configFile.WriteString(configContent); err != nil { + configFile.Close() + return fmt.Errorf("failed to write config file: %w", err) + } + configFile.Close() + + k.log("Using Kind config with /mnt mount for storage") + + // Create cluster with config file + cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", k.Name, "--config", configFile.Name()) if k.Verbose { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr @@ -55,6 +84,22 @@ func (k *KindCluster) Create(ctx context.Context) error { return fmt.Errorf("cluster failed to become ready: %w", err) } + // Configure storage provisioner to use /mnt (75GB) instead of /tmp (limited space) + kubeConfig, err := k.GetKubeConfig(ctx) + if err != nil { + return fmt.Errorf("failed to get kubeconfig: %w", err) + } + defer os.Remove(kubeConfig) + + // Simple one-liner: update ConfigMap and restart provisioner + // Models downloaded in pods will be stored in /mnt via PVCs + exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeConfig, + "patch", "configmap", "local-path-config", "-n", "local-path-storage", + "--type", "merge", + "-p", `{"data":{"config.json":"{\"nodePathMap\":[{\"node\":\"DEFAULT_PATH_FOR_NON_LISTED_NODES\",\"paths\":[\"/mnt/local-path-provisioner\"]}]}"}}`).Run() + exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeConfig, + "rollout", "restart", "deployment/local-path-provisioner", "-n", "local-path-storage").Run() + k.log("Cluster %s created successfully", k.Name) return nil } diff --git a/src/semantic-router/pkg/classification/model_discovery.go b/src/semantic-router/pkg/classification/model_discovery.go index d0b5a4caf..0193aa1a1 100644 --- a/src/semantic-router/pkg/classification/model_discovery.go +++ b/src/semantic-router/pkg/classification/model_discovery.go @@ -62,8 +62,14 @@ func AutoDiscoverModels(modelsDir string) (*ModelPaths, error) { modelsDir = "./models" } + // Resolve symlinks to handle cases where models directory is a symlink (e.g., CI uses /mnt/models) + resolved, err := filepath.EvalSymlinks(modelsDir) + if err == nil && resolved != "" { + modelsDir = resolved + } + // Check if models directory exists - if _, err := os.Stat(modelsDir); os.IsNotExist(err) { + if _, statErr := os.Stat(modelsDir); os.IsNotExist(statErr) { return nil, fmt.Errorf("models directory does not exist: %s", modelsDir) } @@ -78,7 +84,7 @@ func AutoDiscoverModels(modelsDir string) (*ModelPaths, error) { legacyPaths := &ModelPaths{} // Walk through the models directory to collect all models - err := filepath.Walk(modelsDir, func(path string, info os.FileInfo, err error) error { + err = filepath.Walk(modelsDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err }