Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
35c4998
refactor: move token and instance-name to top-level command
mdesmet Jun 10, 2025
98e1232
feat: add config file support with env var substitution
mdesmet Jun 10, 2025
e9d3f71
build: add python-dotenv dependency
mdesmet Jun 10, 2025
ff3dca6
refactor: standardize string quotes and improve code consistency
mdesmet Jun 10, 2025
34288ef
feat: hide token input in CLI for security
mdesmet Jun 10, 2025
c4059c7
chore: update config file loading to use Path.open
mdesmet Jun 10, 2025
e5eaab4
fix: add 'dbt' prefix to project-health command in tests
mdesmet Jun 10, 2025
a23748f
refactor: simplify config file opening syntax
mdesmet Jun 10, 2025
2aeda41
feat: add knowledge CLI command group
mdesmet Jun 10, 2025
9de835b
feat: add knowledge serve command with HTTP server
mdesmet Jun 10, 2025
6236ac1
fix: correct knowledge cli import path in main.py
mdesmet Jun 10, 2025
521b381
feat: add knowledge CLI group and serve command
mdesmet Jun 10, 2025
2f3858c
feat: change default server port from 3000 to 4000
mdesmet Jun 10, 2025
f963721
style: reformat imports and strings in knowledge CLI
mdesmet Jun 10, 2025
c9f4229
fix: validate URL scheme to allow only HTTP/HTTPS in knowledge base h…
mdesmet Jun 10, 2025
54e3495
feat: Add timeout to urlopen() calls
mdesmet Jun 10, 2025
13b3d1b
fix: store HTTPError.read() result to avoid double read
mdesmet Jun 10, 2025
1ceafce
style: standardize string quotes in URL scheme check
mdesmet Jun 10, 2025
973bb0f
fix: suppress S310 warning for validated URL scheme
mdesmet Jun 10, 2025
9f439e9
fix: Shorten noqa comment for urlopen security warning
mdesmet Jun 10, 2025
8494366
fix: add security comment for urlopen usage
mdesmet Jun 10, 2025
5222d4a
fix: add noqa comment for bandit security check
mdesmet Jun 10, 2025
a02e6c2
feat: add --version option to CLI
mdesmet Jun 11, 2025
788c259
Merge branch 'main' into feat/kb-proxy
mdesmet Jun 11, 2025
7954b55
refactor: extract KnowledgeBaseHandler to separate file
mdesmet Jun 11, 2025
79efc66
style: reorder imports and clean up whitespace
mdesmet Jun 11, 2025
5be69f7
refactor: update knowledge base endpoint paths
mdesmet Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/datapilot/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import click
from dotenv import load_dotenv

from datapilot import __version__
from datapilot.core.knowledge.cli import cli as knowledge
from datapilot.core.mcp_utils.mcp import mcp
from datapilot.core.platforms.dbt.cli.cli import dbt

Expand Down Expand Up @@ -50,6 +52,7 @@ def process_config(config):


@click.group()
@click.version_option(version=__version__, prog_name="datapilot")
@click.option("--token", required=False, help="Your API token for authentication.", hide_input=True)
@click.option("--instance-name", required=False, help="Your tenant ID.")
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
Expand Down Expand Up @@ -90,3 +93,4 @@ def datapilot(ctx, token, instance_name, backend_url):

datapilot.add_command(dbt)
datapilot.add_command(mcp)
datapilot.add_command(knowledge)
Empty file.
46 changes: 46 additions & 0 deletions src/datapilot/core/knowledge/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from http.server import HTTPServer

import click

from .server import KnowledgeBaseHandler


@click.group(name="knowledge")
def cli():
"""knowledge specific commands."""


@cli.command()
@click.option("--port", default=4000, help="Port to run the server on")
@click.pass_context
def serve(ctx, port):
"""Serve knowledge bases via HTTP server."""
# Get configuration from parent context
token = ctx.parent.obj.get("token")
instance_name = ctx.parent.obj.get("instance_name")
backend_url = ctx.parent.obj.get("backend_url")

if not token or not instance_name:
click.echo(
"Error: API token and instance name are required. Use --token and --instance-name options or set them in config.", err=True
)
ctx.exit(1)

# Set context data for the handler
KnowledgeBaseHandler.token = token
KnowledgeBaseHandler.instance_name = instance_name
KnowledgeBaseHandler.backend_url = backend_url

server_address = ("", port)
httpd = HTTPServer(server_address, KnowledgeBaseHandler)

click.echo(f"Starting knowledge base server on port {port}...")
click.echo(f"Backend URL: {backend_url}")
click.echo(f"Instance: {instance_name}")
click.echo(f"Server running at http://localhost:{port}")

try:
httpd.serve_forever()
except KeyboardInterrupt:
click.echo("\nShutting down server...")
httpd.shutdown()
84 changes: 84 additions & 0 deletions src/datapilot/core/knowledge/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import json
import re
from http.server import BaseHTTPRequestHandler
from urllib.error import HTTPError
from urllib.error import URLError
from urllib.parse import urlparse
from urllib.request import Request
from urllib.request import urlopen

import click


class KnowledgeBaseHandler(BaseHTTPRequestHandler):
"""HTTP request handler for serving knowledge bases and health checks."""

token: str = ""
instance_name: str = ""
backend_url: str = ""

def do_GET(self):
"""Handle GET requests."""
path = urlparse(self.path).path

# Match /knowledge_bases/{uuid} pattern
match = re.match(r"^/kb/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", path)

if match:
public_id = match.group(1)
self.handle_knowledge_base(public_id)
elif path == "/health":
self.handle_health()
else:
self.send_error(404, "Not Found")

def handle_knowledge_base(self, public_id):
"""Fetch and return knowledge base data."""
url = f"{self.backend_url}/knowledge_bases/private/{public_id}"

# Validate URL scheme for security
parsed_url = urlparse(url)
if parsed_url.scheme not in ("http", "https"):
self.send_response(400)
self.send_header("Content-Type", "application/json")
self.end_headers()
error_msg = json.dumps({"error": "Invalid URL scheme. Only HTTP and HTTPS are allowed."})
self.wfile.write(error_msg.encode("utf-8"))
return

headers = {"Authorization": f"Bearer {self.token}", "X-Tenant": self.instance_name, "Content-Type": "application/json"}

req = Request(url, headers=headers) # noqa: S310

try:
# URL scheme validated above - only HTTP/HTTPS allowed
with urlopen(req, timeout=30) as response: # noqa: S310
data = response.read()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(data)
except HTTPError as e:
error_body = e.read()
error_data = error_body.decode("utf-8") if error_body else '{"error": "HTTP Error"}'
self.send_response(e.code)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(error_data.encode("utf-8"))
except URLError as e:
self.send_response(500)
self.send_header("Content-Type", "application/json")
self.end_headers()
error_msg = json.dumps({"error": str(e)})
self.wfile.write(error_msg.encode("utf-8"))

def handle_health(self):
"""Handle health check endpoint."""
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"status": "ok"}).encode("utf-8"))

def log_message(self, format, *args):
"""Override to use click.echo for logging."""
click.echo(f"{self.address_string()} - {format % args}")