feat(cli): download model command

Daniele Briggi · Daniele Briggi · commit 877d1396e6f3 · 2025-09-03T08:57:43.000Z
diff --git a/.github/workflows/pypi-package.yaml b/.github/workflows/pypi-package.yaml
@@ -43,8 +43,6 @@ jobs:
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 
       - name: Build
-        env:
-          PACKAGE_VERSION: ${{ steps.get_version.outputs.version }}
         run: |
           # Update version in pyproject.toml
           sed -i 's/^version = ".*"/version = "${{ steps.get_version.outputs.version }}"/' pyproject.toml
diff --git a/bandit.yaml b/bandit.yaml
@@ -1,3 +1,3 @@
 # https://bndit.readthedocs.io/en/latest/config.html
-skips: ['B101', 'B608']
+skips: ['B101', 'B615']
 exclude_dirs: ['tests']
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,14 +18,15 @@ classifiers = [
 dependencies = [
     "attrs",
     "typer",
-    "huggingface_hub",
+    "huggingface_hub[cli]",
     "markitdown[all]",
     "sqlite-ai",
     "sqliteai-vector"
 ]
 
 [project.optional-dependencies]
 dev = [
+    "toml",
     "pytest",
     "pytest-cov",
     "black",
diff --git a/src/sqlite_rag/cli.py b/src/sqlite_rag/cli.py
@@ -2,6 +2,7 @@
 import json
 import shlex
 import sys
+from pathlib import Path
 from typing import Optional
 
 import typer
@@ -344,6 +345,56 @@ def search(
             typer.echo(f"{idx:<3} {snippet:<60} {uri:<40}")
 
 
+@app.command("download-model")
+def download_model(
+    model_id: str = typer.Argument(
+        ..., help="Hugging Face model ID (e.g., Qwen/Qwen3-Embedding-0.6B-GGUF)"
+    ),
+    gguf_file: str = typer.Argument(
+        ..., help="GGUF filename to download (e.g., Qwen3-Embedding-0.6B-Q8_0.gguf)"
+    ),
+    local_dir: str = typer.Option(
+        "./models", "--local-dir", "-d", help="Local directory to download to"
+    ),
+    revision: str = typer.Option(
+        "main", "--revision", "-r", help="Model revision/branch to download from"
+    ),
+):
+    """Download a GGUF model file from Hugging Face"""
+    try:
+        from huggingface_hub import hf_hub_download
+    except ImportError:
+        typer.echo(
+            "Error: huggingface_hub not found. Install with: pip install huggingface_hub"
+        )
+        raise typer.Exit(1)
+
+    # Create local directory structure
+    local_path = Path(local_dir) / model_id
+    local_path.mkdir(parents=True, exist_ok=True)
+
+    typer.echo(f"Downloading {gguf_file} from {model_id}...")
+
+    try:
+        # Download the specific GGUF file
+        downloaded_path = hf_hub_download(
+            repo_id=model_id,
+            filename=gguf_file,
+            local_dir=str(local_path),
+            revision=revision,
+        )
+
+        final_path = Path(downloaded_path)
+        typer.echo(f"Downloaded to: {final_path}")
+
+        if final_path.exists():
+            typer.echo(f"File size: {final_path.stat().st_size / (1024*1024):.1f} MB")
+
+    except Exception as e:
+        typer.echo(f"Error downloading model: {e}")
+        raise typer.Exit(1)
+
+
 def repl_mode():
     """Interactive REPL mode"""
     typer.echo("Entering interactive mode. Type 'help' for commands or 'exit' to quit.")
diff --git a/src/sqlite_rag/settings.py b/src/sqlite_rag/settings.py
@@ -14,15 +14,15 @@ class Settings:
     model_path_or_name: str = (
         "./models/Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-f16.gguf"
     )
-    model_config: str = "n_ctx=12000,pooling_type=last,normalize_embedding=1"
+    model_config: str = "n_ctx=128,pooling_type=last,normalize_embedding=1"
 
     vector_type: str = "FLOAT16"
     embedding_dim: int = 1024
     other_vector_config: str = "distance=cosine"  # e.g. distance=metric,other=value,...
 
-    chunk_size: int = 12000
+    chunk_size: int = 128
     # Token overlap between chunks
-    chunk_overlap: int = 1200
+    chunk_overlap: int = 20
 
     #
     # Search settings