Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
35c4998
refactor: move token and instance-name to top-level command
mdesmet Jun 10, 2025
98e1232
feat: add config file support with env var substitution
mdesmet Jun 10, 2025
e9d3f71
build: add python-dotenv dependency
mdesmet Jun 10, 2025
ff3dca6
refactor: standardize string quotes and improve code consistency
mdesmet Jun 10, 2025
34288ef
feat: hide token input in CLI for security
mdesmet Jun 10, 2025
c4059c7
chore: update config file loading to use Path.open
mdesmet Jun 10, 2025
e5eaab4
fix: add 'dbt' prefix to project-health command in tests
mdesmet Jun 10, 2025
a23748f
refactor: simplify config file opening syntax
mdesmet Jun 10, 2025
2aeda41
feat: add knowledge CLI command group
mdesmet Jun 10, 2025
9de835b
feat: add knowledge serve command with HTTP server
mdesmet Jun 10, 2025
6236ac1
fix: correct knowledge cli import path in main.py
mdesmet Jun 10, 2025
521b381
feat: add knowledge CLI group and serve command
mdesmet Jun 10, 2025
2f3858c
feat: change default server port from 3000 to 4000
mdesmet Jun 10, 2025
f963721
style: reformat imports and strings in knowledge CLI
mdesmet Jun 10, 2025
c9f4229
fix: validate URL scheme to allow only HTTP/HTTPS in knowledge base h…
mdesmet Jun 10, 2025
54e3495
feat: Add timeout to urlopen() calls
mdesmet Jun 10, 2025
13b3d1b
fix: store HTTPError.read() result to avoid double read
mdesmet Jun 10, 2025
1ceafce
style: standardize string quotes in URL scheme check
mdesmet Jun 10, 2025
973bb0f
fix: suppress S310 warning for validated URL scheme
mdesmet Jun 10, 2025
9f439e9
fix: Shorten noqa comment for urlopen security warning
mdesmet Jun 10, 2025
8494366
fix: add security comment for urlopen usage
mdesmet Jun 10, 2025
5222d4a
fix: add noqa comment for bandit security check
mdesmet Jun 10, 2025
a02e6c2
feat: add --version option to CLI
mdesmet Jun 11, 2025
788c259
Merge branch 'main' into feat/kb-proxy
mdesmet Jun 11, 2025
7954b55
refactor: extract KnowledgeBaseHandler to separate file
mdesmet Jun 11, 2025
79efc66
style: reorder imports and clean up whitespace
mdesmet Jun 11, 2025
5be69f7
refactor: update knowledge base endpoint paths
mdesmet Jun 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def read(*names, **kwargs):
"sqlglot~=25.30.0",
"mcp~=1.9.0",
"pyperclip~=1.8.2",
"python-dotenv~=1.0.0",
],
extras_require={
# eg:
Expand Down
83 changes: 82 additions & 1 deletion src/datapilot/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,94 @@
import json
import os
import re
from pathlib import Path

import click
from dotenv import load_dotenv

from datapilot.core.knowledge.cli import cli as knowledge
from datapilot.core.mcp_utils.mcp import mcp
from datapilot.core.platforms.dbt.cli.cli import dbt


def load_config_from_file():
"""Load configuration from ~/.altimate/altimate.json if it exists."""
config_path = Path.home() / ".altimate" / "altimate.json"

if not config_path.exists():
return {}

try:
with config_path.open() as f:
config = json.load(f)
return config
except (OSError, json.JSONDecodeError) as e:
click.echo(f"Warning: Failed to load config from {config_path}: {e}", err=True)
return {}


def substitute_env_vars(value):
"""Replace ${env:ENV_VARIABLE} patterns with actual environment variable values."""
if not isinstance(value, str):
return value

# Pattern to match ${env:VARIABLE_NAME}
pattern = r"\$\{env:([^}]+)\}"

def replacer(match):
env_var = match.group(1)
return os.environ.get(env_var, match.group(0))

return re.sub(pattern, replacer, value)


def process_config(config):
"""Process configuration dictionary to substitute environment variables."""
processed = {}
for key, value in config.items():
processed[key] = substitute_env_vars(value)
return processed


@click.group()
def datapilot():
@click.option("--token", required=False, help="Your API token for authentication.", hide_input=True)
@click.option("--instance-name", required=False, help="Your tenant ID.")
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
@click.pass_context
def datapilot(ctx, token, instance_name, backend_url):
"""Altimate CLI for DBT project management."""
# Load .env file from current directory if it exists
load_dotenv()

# Load configuration from file
file_config = load_config_from_file()
file_config = process_config(file_config)

# Map config file keys to CLI option names
config_mapping = {"altimateApiKey": "token", "altimateInstanceName": "instance_name", "altimateUrl": "backend_url"}

# Store common options in context, with CLI args taking precedence
ctx.ensure_object(dict)

# Apply file config first
for file_key, cli_key in config_mapping.items():
if file_key in file_config:
ctx.obj[cli_key] = file_config[file_key]

# Override with CLI arguments if provided
if token is not None:
ctx.obj["token"] = token
if instance_name is not None:
ctx.obj["instance_name"] = instance_name
if backend_url != "https://api.myaltimate.com": # Only override if not default
ctx.obj["backend_url"] = backend_url

# Set defaults if nothing was provided
ctx.obj.setdefault("token", None)
ctx.obj.setdefault("instance_name", None)
ctx.obj.setdefault("backend_url", "https://api.myaltimate.com")


datapilot.add_command(dbt)
datapilot.add_command(mcp)
datapilot.add_command(knowledge)
Empty file.
113 changes: 113 additions & 0 deletions src/datapilot/core/knowledge/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import json
import re
from http.server import BaseHTTPRequestHandler
from http.server import HTTPServer
from urllib.error import HTTPError
from urllib.error import URLError
from urllib.parse import urlparse
from urllib.request import Request
from urllib.request import urlopen

import click


@click.group(name="knowledge")
def cli():
"""knowledge specific commands."""


@cli.command()
@click.option("--port", default=4000, help="Port to run the server on")
@click.pass_context
def serve(ctx, port):
"""Serve knowledge bases via HTTP server."""
# Get configuration from parent context
token = ctx.parent.obj.get("token")
instance_name = ctx.parent.obj.get("instance_name")
backend_url = ctx.parent.obj.get("backend_url")

if not token or not instance_name:
click.echo(
"Error: API token and instance name are required. Use --token and --instance-name options or set them in config.", err=True
)
ctx.exit(1)

class KnowledgeBaseHandler(BaseHTTPRequestHandler):
def do_GET(self):
"""Handle GET requests."""
path = urlparse(self.path).path

# Match /knowledge_bases/{uuid} pattern
match = re.match(r"^/knowledge_bases/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", path)

if match:
public_id = match.group(1)
self.handle_knowledge_base(public_id)
elif path == "/health":
self.handle_health()
else:
self.send_error(404, "Not Found")

def handle_knowledge_base(self, public_id):
"""Fetch and return knowledge base data."""
url = f"{backend_url}/knowledge_bases/public/{public_id}"

# Validate URL scheme for security
parsed_url = urlparse(url)
if parsed_url.scheme not in ('http', 'https'):
self.send_response(400)
self.send_header("Content-Type", "application/json")
self.end_headers()
error_msg = json.dumps({"error": "Invalid URL scheme. Only HTTP and HTTPS are allowed."})
self.wfile.write(error_msg.encode("utf-8"))
return

headers = {"Authorization": f"Bearer {token}", "X-Tenant": instance_name, "Content-Type": "application/json"}

req = Request(url, headers=headers)

try:
with urlopen(req, timeout=30) as response:
data = response.read()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(data)
except HTTPError as e:
error_body = e.read()
error_data = error_body.decode("utf-8") if error_body else '{"error": "HTTP Error"}'
self.send_response(e.code)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(error_data.encode("utf-8"))
except URLError as e:
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
error_msg = json.dumps({"error": str(e)})
self.wfile.write(error_msg.encode("utf-8"))

def handle_health(self):
"""Handle health check endpoint."""
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"status": "ok"}).encode("utf-8"))

def log_message(self, format, *args):
"""Override to use click.echo for logging."""
click.echo(f"{self.address_string()} - {format % args}")

server_address = ("", port)
httpd = HTTPServer(server_address, KnowledgeBaseHandler)

click.echo(f"Starting knowledge base server on port {port}...")
click.echo(f"Backend URL: {backend_url}")
click.echo(f"Instance: {instance_name}")
click.echo(f"Server running at http://localhost:{port}")

try:
httpd.serve_forever()
except KeyboardInterrupt:
click.echo("\nShutting down server...")
httpd.shutdown()
37 changes: 24 additions & 13 deletions src/datapilot/core/platforms/dbt/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@

# New dbt group
@click.group()
def dbt():
@click.pass_context
def dbt(ctx):
"""DBT specific commands."""
# Ensure context object exists
ctx.ensure_object(dict)


@dbt.command("project-health")
@click.option("--token", required=False, help="Your API token for authentication.")
@click.option("--instance-name", required=False, help="Your tenant ID.")
@click.option(
"--manifest-path",
required=True,
Expand All @@ -57,21 +58,24 @@ def dbt():
default=None,
help="Selective model testing. Specify one or more models to run tests on.",
)
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
@click.pass_context
def project_health(
token,
instance_name,
ctx,
manifest_path,
catalog_path,
config_path=None,
config_name=None,
select=None,
backend_url="https://api.myaltimate.com",
):
"""
Validate the DBT project's configuration and structure.
:param manifest_path: Path to the DBT manifest file.
"""
# Get common options from parent context
token = ctx.parent.obj.get("token")
instance_name = ctx.parent.obj.get("instance_name")
backend_url = ctx.parent.obj.get("backend_url")

config = None
if config_path:
config = load_config(config_path)
Expand Down Expand Up @@ -131,25 +135,32 @@ def project_health(


@dbt.command("onboard")
@click.option("--token", prompt="API Token", help="Your API token for authentication.")
@click.option("--instance-name", prompt="Instance Name", help="Your tenant ID.")
@click.option("--dbt_core_integration_id", prompt="DBT Core Integration ID", help="DBT Core Integration ID")
@click.option(
"--dbt_core_integration_environment", default="PROD", prompt="DBT Core Integration Environment", help="DBT Core Integration Environment"
)
@click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
@click.option("--catalog-path", required=False, prompt=False, help="Path to the catalog file.")
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
@click.pass_context
def onboard(
token,
instance_name,
ctx,
dbt_core_integration_id,
dbt_core_integration_environment,
manifest_path,
catalog_path,
backend_url="https://api.myaltimate.com",
):
"""Onboard a manifest file to DBT."""
# Get common options from parent context
token = ctx.parent.obj.get("token")
instance_name = ctx.parent.obj.get("instance_name")
backend_url = ctx.parent.obj.get("backend_url")

# For onboard command, token and instance_name are required
if not token:
token = click.prompt("API Token")
if not instance_name:
instance_name = click.prompt("Instance Name")

check_token_and_instance(token, instance_name)

if not validate_credentials(token, backend_url, instance_name):
Expand Down
26 changes: 19 additions & 7 deletions tests/core/platform/dbt/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# test_app.py
from click.testing import CliRunner

from datapilot.core.platforms.dbt.cli.cli import project_health
from datapilot.cli.main import datapilot


def test_project_health_with_required_and_optional_args():
Expand All @@ -11,7 +11,9 @@ def test_project_health_with_required_and_optional_args():
config_path = "tests/data/config.yml"

# Simulate command invocation
result = runner.invoke(project_health, ["--manifest-path", manifest_path, "--catalog-path", catalog_path, "--config-path", config_path])
result = runner.invoke(
datapilot, ["dbt", "project-health", "--manifest-path", manifest_path, "--catalog-path", catalog_path, "--config-path", config_path]
)

assert result.exit_code == 0 # Ensure the command executed successfully
# Add more assertions here to validate the behavior of your command,
Expand All @@ -25,8 +27,10 @@ def test_project_health_with_only_required_arg():

# Simulate command invocation without optional arguments
result = runner.invoke(
project_health,
datapilot,
[
"dbt",
"project-health",
"--manifest-path",
manifest_path,
],
Expand All @@ -43,8 +47,10 @@ def test_project_health_with_only_required_arg_version1_6():

# Simulate command invocation without optional arguments
result = runner.invoke(
project_health,
datapilot,
[
"dbt",
"project-health",
"--manifest-path",
manifest_path,
],
Expand All @@ -61,8 +67,10 @@ def test_project_health_with_macro_args():

# Simulate command invocation without optional arguments
result = runner.invoke(
project_health,
datapilot,
[
"dbt",
"project-health",
"--manifest-path",
manifest_path,
],
Expand All @@ -76,8 +84,10 @@ def test_project_health_with_macro_args():

# Simulate command invocation without optional arguments
result = runner.invoke(
project_health,
datapilot,
[
"dbt",
"project-health",
"--manifest-path",
manifest_path,
],
Expand All @@ -95,7 +105,9 @@ def test_project_health_with_required_and_optional_args_v12():
config_path = "tests/data/config.yml"

# Simulate command invocation
result = runner.invoke(project_health, ["--manifest-path", manifest_path, "--catalog-path", catalog_path, "--config-path", config_path])
result = runner.invoke(
datapilot, ["dbt", "project-health", "--manifest-path", manifest_path, "--catalog-path", catalog_path, "--config-path", config_path]
)

assert result.exit_code == 0 # Ensure the command executed successfully
# Add more assertions here to validate the behavior of your command,
Expand Down
Loading