Conversation
There was a problem hiding this comment.
Pull request overview
This PR introduces major improvements to NyRAG's cloud deployment capabilities, including Vespa CLI integration for simplified authentication, token-based authentication support, and a redesigned UI that defaults to a project creation workflow. The changes enable seamless switching between local Docker and Vespa Cloud deployments with automatic credential detection from the Vespa CLI.
Changes:
- Added Vespa CLI integration for automatic credential detection and cloud authentication
- Implemented token-based authentication as the preferred method for Vespa Cloud (with mTLS fallback)
- Redesigned UI to start in "feed mode" with streamlined project creation workflow
- Enhanced deployment logic to capture and persist endpoint information from VespaCloud API
- Added comprehensive test coverage for new modules (vespa_cli, vespa_docker, process, logger, deploy, config)
Reviewed changes
Copilot reviewed 23 out of 23 changed files in this pull request and generated 24 comments.
Show a summary per file
| File | Description |
|---|---|
| src/nyrag/vespa_cli.py | New module for Vespa CLI integration including authentication, config parsing, and target management |
| src/nyrag/deploy.py | Added DeployResult dataclass, improved endpoint extraction, and disk_folder handling for VespaCloud |
| src/nyrag/config.py | Enhanced DeployConfig with Vespa CLI fallbacks and cloud_tenant field support |
| src/nyrag/utils.py | Added get_cloud_secret_token utility and updated mTLS path resolution to support tenant/app/instance |
| src/nyrag/process.py | Added config persistence with resolved endpoint information |
| src/nyrag/feed.py | Integrated token-based authentication with mTLS fallback |
| src/nyrag/api.py | Added cloud mode detection, deploy-mode endpoint, and dynamic client recreation |
| src/nyrag/cli.py | Added --cloud flag with automatic Vespa Cloud authentication flow |
| src/nyrag/static/chat.js | Redesigned UI flow to default to feed mode with project creation workflow |
| src/nyrag/static/chat.css | Added styles for deploy mode badge, feed panel, and loading states |
| src/nyrag/templates/chat.html | Updated template structure with deploy mode badge and inline feed panel |
| src/nyrag/tests/*.py | Added comprehensive test coverage for new and modified modules |
| src/nyrag/examples/*.yml | Updated example configs with cloud_tenant field |
| README.md | Simplified documentation focusing on UI-first workflow |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| def get_vespa_url(self) -> str: | ||
| """Get Vespa URL from env var or default.""" | ||
| return os.getenv("VESPA_URL", DEFAULT_VESPA_URL) | ||
| env_url = os.getenv("VESPA_URL") | ||
| if env_url: | ||
| return env_url | ||
| if self.deploy_mode == "cloud": | ||
| if self.cloud_tenant: | ||
| tenant = self.get_cloud_tenant() | ||
| application = self.get_cloud_application() | ||
| instance = self.get_cloud_instance() | ||
| if tenant and application and instance: | ||
| return f"https://{application}.{tenant}.{instance}.z.vespa-app.cloud" | ||
| cli = get_vespa_cli_cloud_config() | ||
| endpoint = cli.get("endpoint") | ||
| if endpoint: | ||
| parsed = urlparse(endpoint) | ||
| if parsed.scheme and parsed.hostname: | ||
| return f"{parsed.scheme}://{parsed.hostname}" | ||
| return endpoint.rstrip("/") | ||
| tenant = self.get_cloud_tenant() | ||
| application = self.get_cloud_application() | ||
| instance = self.get_cloud_instance() | ||
| if tenant and application and instance: | ||
| return f"https://{application}.{tenant}.{instance}.z.vespa-app.cloud" | ||
| return DEFAULT_VESPA_URL |
There was a problem hiding this comment.
The get_vespa_url method has complex nested logic with multiple fallback paths. The logic for constructing cloud URLs is duplicated (lines 99-102 and 111-114). This duplication makes the code harder to maintain and could lead to inconsistencies. Consider extracting the URL construction into a helper method.
| def _persist_config_with_endpoint( | ||
| config: Config, | ||
| output_dir: Path, | ||
| config_path: Optional[str], | ||
| deploy_result: Optional[DeployResult] = None, | ||
| ) -> None: | ||
| """Persist config to output conf.yml with resolved Vespa endpoint info. | ||
|
|
||
| Uses deploy_result if available (from VespaCloud.get_mtls_endpoint() etc), | ||
| otherwise falls back to config methods. | ||
| """ | ||
| import yaml | ||
|
|
||
| conf_path = output_dir / "conf.yml" | ||
| data: dict | ||
| if config_path and Path(config_path).exists(): | ||
| with open(config_path, "r", encoding="utf-8") as f: | ||
| data = yaml.safe_load(f) or {} | ||
| else: | ||
| data = config.model_dump(exclude_none=True) | ||
|
|
||
| # Prefer deploy_result endpoints (from VespaCloud API) over constructed URLs | ||
| if deploy_result and deploy_result.vespa_url: | ||
| data["vespa_url"] = deploy_result.vespa_url.rstrip("/") | ||
| data["vespa_port"] = deploy_result.vespa_port or 443 | ||
|
|
||
| # Store additional cloud endpoint info for reference | ||
| if deploy_result.mtls_endpoint: | ||
| data["vespa_mtls_endpoint"] = deploy_result.mtls_endpoint | ||
| if deploy_result.token_endpoint: | ||
| data["vespa_token_endpoint"] = deploy_result.token_endpoint | ||
| else: | ||
| # Fallback to config methods | ||
| vespa_url = config.get_vespa_url() | ||
| vespa_port = config.get_vespa_port() | ||
| data["vespa_url"] = vespa_url | ||
| data["vespa_port"] = vespa_port | ||
|
|
||
| if config.is_local_deploy_mode(): | ||
| if "VESPA_URL" not in os.environ and not config.vespa_url: | ||
| data["vespa_url"] = "http://localhost" | ||
| if "VESPA_PORT" not in os.environ and config.vespa_port is None: | ||
| data["vespa_port"] = DEFAULT_VESPA_LOCAL_PORT | ||
|
|
||
| with open(conf_path, "w", encoding="utf-8") as f: | ||
| yaml.safe_dump(data, f, sort_keys=False) | ||
|
|
||
| logger.info(f"Config saved to {conf_path}") |
There was a problem hiding this comment.
The function _persist_config_with_endpoint reads from the original config file if it exists, but then potentially overwrites user-provided values with deploy_result values. This could lead to unexpected behavior where manually set vespa_url in the config gets replaced. Consider preserving user-set values or documenting this override behavior clearly.
| def test_path_value_with_pem_extension(self): | ||
| """Test that value ending in .pem is classified as path.""" | ||
| path, key = _classify_api_key("my-key.pem") | ||
| assert path is not None # Will check if file exists or treat as path | ||
| assert key is None or key == "my-key.pem" # Depends on file existence |
There was a problem hiding this comment.
In the test test_path_value_with_pem_extension, the assertion checks for either path or key to be set, which makes the test non-deterministic. The behavior depends on whether a file named "my-key.pem" exists in the test environment. This test should either mock the file existence check or use a more deterministic test case.
| def test_path_value_with_pem_extension(self): | |
| """Test that value ending in .pem is classified as path.""" | |
| path, key = _classify_api_key("my-key.pem") | |
| assert path is not None # Will check if file exists or treat as path | |
| assert key is None or key == "my-key.pem" # Depends on file existence | |
| def test_path_value_with_pem_extension(self, monkeypatch): | |
| """Test that value ending in .pem is classified as path, deterministically.""" | |
| # Simulate that the file "my-key.pem" exists so it is classified as a path. | |
| monkeypatch.setattr(Path, "exists", lambda self: True) | |
| monkeypatch.setattr(Path, "is_file", lambda self: True) | |
| path, key = _classify_api_key("my-key.pem") | |
| assert path == "my-key.pem" | |
| assert key is None |
| async function initializeApp() { | ||
| // Always start in feed mode (create new project) | ||
| // Users must upload a config to chat with an existing project | ||
| activeProjectName = null; | ||
|
|
||
| // Fetch deploy mode explicitly | ||
| try { | ||
| const res = await fetch("/deploy-mode"); | ||
| if (res.ok) { | ||
| const data = await res.json(); | ||
| console.log("Deploy mode fetch:", data); | ||
| if (data.mode) { | ||
| updateDeployModeBadge(data.mode, "deploy-mode"); | ||
| } | ||
| } else { | ||
| console.warn("Deploy mode fetch failed:", res.status); | ||
| } | ||
| } catch (e) { | ||
| console.error("Failed to fetch deploy mode", e); | ||
| } | ||
|
|
||
| // Fetch stats (which might also have deploy_mode but we prefer the explicit one above) | ||
| try { | ||
| const res = await fetch("/stats"); | ||
| const data = await res.json(); | ||
| if (data.deploy_mode) { | ||
| updateDeployModeBadge(data.deploy_mode, "stats"); | ||
| } | ||
| } catch (e) { | ||
| console.error("Failed to fetch initial stats", e); | ||
| } | ||
|
|
||
| // Set indicator to "New Project" | ||
| const indicator = document.getElementById("active-project-indicator"); | ||
| if (indicator) { | ||
| indicator.textContent = "New Project"; | ||
| } | ||
|
|
||
| // Load the default/template config for feed mode | ||
| await loadSchema("web"); | ||
| currentConfig = JSON.parse(JSON.stringify(FALLBACK_CONFIG)); | ||
| renderConfigEditor(); | ||
| await loadExamples(); | ||
|
|
||
| // Set to feed mode AFTER everything is loaded | ||
| setMode("feed"); | ||
| } |
There was a problem hiding this comment.
The function initializeApp() always starts in feed mode (line 1162-1163) and sets activeProjectName to null, which means users must always upload a config or create a new project to use the UI. This behavior should be documented more clearly in the UI or provide a better initial state message explaining what users need to do.
| const rawName = parsed.name; | ||
| const cleanName = rawName.replace(/-/g, "").replace(/_/g, "").toLowerCase(); | ||
| const normalizedName = `nyrag${cleanName}`; | ||
|
|
There was a problem hiding this comment.
The normalized project name logic (lines 1050-1052) uses simple string replacement that could produce unexpected results. For example, a project named "my-app_test" would become "nyragmyapptest", which might not be the intended behavior. Consider using a more robust normalization function or documenting the expected input format.
| const rawName = parsed.name; | |
| const cleanName = rawName.replace(/-/g, "").replace(/_/g, "").toLowerCase(); | |
| const normalizedName = `nyrag${cleanName}`; | |
| function normalizeProjectName(rawName) { | |
| if (typeof rawName !== "string") { | |
| return "nyrag"; | |
| } | |
| // Lowercase, strip accents, and remove all non-alphanumeric characters | |
| const lower = rawName.toLowerCase(); | |
| const withoutAccents = lower | |
| .normalize("NFKD") | |
| .replace(/[\u0300-\u036f]/g, ""); | |
| const cleaned = withoutAccents.replace(/[^a-z0-9]/g, ""); | |
| if (!cleaned) { | |
| return "nyrag"; | |
| } | |
| return `nyrag${cleaned}`; | |
| } | |
| const normalizedName = normalizeProjectName(parsed.name); |
src/nyrag/deploy.py
Outdated
| if ensure_vespa_cli_target(tenant, application, instance): | ||
| logger.info(f"Vespa CLI target set to {tenant}.{application}.{instance}") | ||
| else: | ||
| logger.debug("Skipping Vespa CLI target set (CLI not found or command failed)") |
There was a problem hiding this comment.
The ensure_vespa_cli_target function is called during cloud deployment but failures are only logged as debug messages (line 304). If setting the CLI target is important for subsequent operations, this silent failure could lead to confusing errors later. Consider whether this should be a warning or error instead.
| logger.debug("Skipping Vespa CLI target set (CLI not found or command failed)") | |
| logger.warning( | |
| "Failed to set Vespa CLI target (CLI not found or command failed); " | |
| "subsequent Vespa CLI commands may not use the expected target." | |
| ) |
| try: | ||
| # Run vespa auth login interactively (it will open browser) | ||
| result = subprocess.run( | ||
| ["vespa", "auth", "login"], | ||
| check=False, | ||
| ) | ||
| clear_vespa_cli_cache() | ||
| return result.returncode == 0 | ||
| except Exception: | ||
| return False |
There was a problem hiding this comment.
The vespa_auth_login function runs subprocess.run without timeout, which could hang indefinitely if the Vespa CLI process becomes unresponsive. Consider adding a timeout parameter to prevent potential denial-of-service scenarios.
| def is_vespa_cloud_authenticated() -> bool: | ||
| """Check if user is authenticated with Vespa Cloud. | ||
|
|
||
| Returns: | ||
| True if valid authentication exists, False otherwise. | ||
| """ | ||
| # Check for auth.json file which is created after successful login | ||
| auth_file = Path.home() / ".vespa" / "auth.json" | ||
| if auth_file.exists(): | ||
| try: | ||
| with open(auth_file, "r") as f: | ||
| data = json.load(f) | ||
| # Check if there's a valid access token | ||
| if data.get("access_token"): | ||
| return True | ||
| except Exception: | ||
| pass | ||
|
|
||
| # Also check API key as fallback | ||
| config = get_vespa_cli_cloud_config() | ||
| if config.get("api_key") or config.get("api_key_path"): | ||
| return True | ||
|
|
||
| return False |
There was a problem hiding this comment.
The authentication check in is_vespa_cloud_authenticated reads the auth.json file and checks for an access_token, but doesn't validate if the token is expired. This could lead to false positives where the function returns True but authentication actually fails. Consider adding token expiration validation or documenting this limitation.
| # Pass disk_folder to prevent VespaCloud from creating a folder in cwd | ||
| if "disk_folder" in deploy_sig.parameters: | ||
| deploy_kwargs["disk_folder"] = str(application_root) | ||
| except Exception: |
There was a problem hiding this comment.
'except' clause does nothing but pass and there is no explanatory comment.
| # Check if there's a valid access token | ||
| if data.get("access_token"): | ||
| return True | ||
| except Exception: |
There was a problem hiding this comment.
'except' clause does nothing but pass and there is no explanatory comment.
| except Exception: | |
| except Exception: | |
| # If auth.json is missing, unreadable, or malformed, treat as not authenticated | |
| # and fall back to other authentication checks below. |
No description provided.