vespa-engine · kkraune · Nov 14, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/visual-retrieval-colpali/README.md b/visual-retrieval-colpali/README.md
@@ -128,15 +128,15 @@ See [Custom python spaces](https://huggingface.co/docs/hub/en/spaces-sdks-python
 
 #### Upload the files
 
-To deploy, run
+To deploy, run from the project root directory:
 
 (Replace `vespa-engine/colpali-vespa-visual-retrieval` with your own huggingface user/repo name, does not need to exist beforehand)
 
 ```bash
-huggingface-cli upload vespa-engine/colpali-vespa-visual-retrieval src . --repo-type=space
+hf upload vespa-engine/colpali-vespa-visual-retrieval src . --repo-type=space --delete "*" --commit-message "Deploy to HF Space"
 ```
 
-Note that we upload only the `src` directory.
+Note that we upload only the `src` directory. The `--delete "*"` flag ensures that any files that exist in the space but not in your local `src` directory will be removed, keeping everything in sync.
 
 ## Development
 

diff --git a/visual-retrieval-colpali/pyproject.toml b/visual-retrieval-colpali/pyproject.toml
@@ -6,20 +6,22 @@ readme = "README.md"
 requires-python = ">=3.10, <3.12"
 license = { text = "Apache-2.0" }
 dependencies = [
-    "python-fasthtml",
+    "python-fasthtml==0.12.33",
+    "uvicorn[standard]==0.38.0",
     "huggingface-hub",
     "pyvespa>=0.50.0",
     "vespacli",
     "torch",
     "vidore-benchmark[interpretability]>=4.0.0,<5.0.0",
-    "colpali-engine",
+    "colpali-engine==0.3.1",
     "einops",
     "pypdf",
     "setuptools",
     "python-dotenv",
     "shad4fast>=1.2.1",
     "google-generativeai>=0.7.2",
     "spacy",
+    "transformers==4.45",
     "pip",
 ]
 

diff --git a/visual-retrieval-colpali/src/README.md b/visual-retrieval-colpali/src/README.md
@@ -10,6 +10,7 @@ app_file: main.py
 pinned: false
 license: apache-2.0
 suggested_hardware: t4-small
+header: mini
 models:
   - vidore/colpaligemma-3b-pt-448-base
   - vidore/colpali-v1.2

diff --git a/visual-retrieval-colpali/src/frontend/app.py b/visual-retrieval-colpali/src/frontend/app.py
@@ -755,7 +755,7 @@ def ChatResult(query_id: str, query: str, doc_ids: Optional[list] = None):
         )
 
     return Div(
-        Div("AI-response (Gemini-2.0)", cls="text-xl font-semibold p-5"),
+        Div("AI-response (Gemini-2.5)", cls="text-xl font-semibold p-5"),
         Div(
             Div(
                 messages,

diff --git a/visual-retrieval-colpali/src/main.py b/visual-retrieval-colpali/src/main.py
@@ -24,7 +24,6 @@
     Script,
     StreamingResponse,
     fast_app,
-    serve,
 )
 from PIL import Image
 from shad4fast import ShadHead
@@ -43,6 +42,7 @@
     SimMapButtonReady,
 )
 from frontend.layout import Layout
+import uvicorn
 
 highlight_js_theme_link = Link(id="highlight-theme", rel="stylesheet", href="")
 highlight_js_theme = Script(src="/static/js/highlightjs-theme.js")
@@ -68,7 +68,11 @@
 awesomplete_js = Script(
     src="https://cdnjs.cloudflare.com/ajax/libs/awesomplete/1.1.7/awesomplete.min.js"
 )
-sselink = Script(src="https://unpkg.com/[email protected]/sse.js")
+sselink = Script(
+    src="https://cdn.jsdelivr.net/npm/[email protected]",
+    integrity="sha384-A986SAtodyH8eg8x8irJnYUk7i9inVQqYigD6qZ9evobksGNIXfeFvDwLSHcp31N",
+    crossorigin="anonymous",
+)
 
 # Get log level from environment variable, default to INFO
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
@@ -112,7 +116,7 @@
 Do NOT include backticks (`) in your response. Only simple HTML tags and text.
 """
 gemini_model = genai.GenerativeModel(
-    "gemini-2.0-flash", system_instruction=GEMINI_SYSTEM_PROMPT
+    "gemini-2.5-flash", system_instruction=GEMINI_SYSTEM_PROMPT
 )
 STATIC_DIR = Path("static")
 IMG_DIR = STATIC_DIR / "full_images"
@@ -381,7 +385,7 @@ async def message_generator(query_id: str, query: str, doc_ids: list):
     # yield message with number of images ready
     yield f"event: message\ndata: Generating response based on {len(images)} images...\n\n"
     if not images:
-        yield "event: message\ndata: Failed to send images to Gemini 2.0!\n\n"
+        yield "event: message\ndata: Failed to send images to Gemini 2.5!\n\n"
         yield "event: close\ndata: \n\n"
         return
 
@@ -417,4 +421,5 @@ def get():
 if __name__ == "__main__":
     HOT_RELOAD = os.getenv("HOT_RELOAD", "False").lower() == "true"
     logger.info(f"Starting app with hot reload: {HOT_RELOAD}")
-    serve(port=7860, reload=HOT_RELOAD)
+    uvicorn.run("main:app", host="0.0.0.0", timeout_worker_healthcheck=30, port=7860)
+    # serve(port=7860, reload=HOT_RELOAD)
diff --git a/visual-retrieval-colpali/src/requirements.txt b/visual-retrieval-colpali/src/requirements.txt
@@ -2,14 +2,14 @@
 #    uv pip compile pyproject.toml -o src/requirements.txt
 accelerate==0.34.2
     # via peft
-aiohappyeyeballs==2.4.3
+aiohappyeyeballs==2.6.1
     # via aiohttp
 aiohttp==3.12.14
     # via
     #   datasets
     #   fsspec
     #   pyvespa
-aiosignal==1.3.1
+aiosignal==1.4.0
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
@@ -18,6 +18,10 @@ anyio==4.6.0
     #   httpx
     #   starlette
     #   watchfiles
+apsw==3.50.4.0
+    # via apswutils
+apswutils==0.1.0
+    # via fastlite
 async-timeout==4.0.3
     # via aiohttp
 attrs==24.2.0
@@ -85,13 +89,13 @@ eval-type-backport==0.2.0
     # via mteb
 exceptiongroup==1.2.2
     # via anyio
-fastcore==1.7.11
+fastcore==1.8.15
     # via
+    #   apswutils
     #   fastlite
     #   python-fasthtml
     #   pyvespa
-    #   sqlite-minutils
-fastlite==0.0.11
+fastlite==0.2.1
     # via python-fasthtml
 filelock==3.16.1
     # via
@@ -150,21 +154,23 @@ h11==0.16.0
     #   uvicorn
 h2==4.3.0
     # via httpx
-hpack==4.0.0
+hf-xet==1.2.0
+    # via huggingface-hub
+hpack==4.1.0
     # via h2
-httpcore==1.0.6
+httpcore==1.0.9
     # via httpx
 httplib2==0.22.0
     # via
     #   google-api-python-client
     #   google-auth-httplib2
-httptools==0.6.1
+httptools==0.7.1
     # via uvicorn
 httpx==0.27.2
     # via
     #   python-fasthtml
     #   pyvespa
-huggingface-hub==0.25.1
+huggingface-hub==0.36.0
     # via
     #   visual-retrieval-colpali (pyproject.toml)
     #   accelerate
@@ -173,7 +179,7 @@ huggingface-hub==0.25.1
     #   sentence-transformers
     #   tokenizers
     #   transformers
-hyperframe==6.0.1
+hyperframe==6.1.0
     # via h2
 idna==3.10
     # via
@@ -291,6 +297,10 @@ preshed==3.0.9
     # via
     #   spacy
     #   thinc
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
 proto-plus==1.24.0
     # via
     #   google-ai-generativelanguage
@@ -346,7 +356,7 @@ python-dotenv==1.0.1
     #   visual-retrieval-colpali (pyproject.toml)
     #   uvicorn
     #   vidore-benchmark
-python-fasthtml==0.6.9
+python-fasthtml==0.12.33
     # via
     #   visual-retrieval-colpali (pyproject.toml)
     #   lucide-fasthtml
@@ -439,8 +449,6 @@ spacy-legacy==3.0.12
     # via spacy
 spacy-loggers==1.0.5
     # via spacy
-sqlite-minutils==3.37.0.post3
-    # via fastlite
 srsly==2.4.8
     # via
     #   confection
@@ -457,7 +465,7 @@ thinc==8.2.5
     # via spacy
 threadpoolctl==3.5.0
     # via scikit-learn
-tokenizers==0.20.0
+tokenizers==0.20.3
     # via transformers
 torch==2.8.0
     # via
@@ -478,8 +486,9 @@ tqdm==4.66.5
     #   sentence-transformers
     #   spacy
     #   transformers
-transformers==4.53.0
+transformers==4.45.0
     # via
+    #   visual-retrieval-colpali (pyproject.toml)
     #   colpali-engine
     #   peft
     #   sentence-transformers
@@ -491,6 +500,7 @@ typer==0.12.5
     #   weasel
 typing-extensions==4.12.2
     # via
+    #   aiosignal
     #   anyio
     #   cloudpathlib
     #   google-generativeai
@@ -502,6 +512,7 @@ typing-extensions==4.12.2
     #   pypdf
     #   pyvespa
     #   rich
+    #   starlette
     #   torch
     #   typer
     #   uvicorn
@@ -513,8 +524,10 @@ urllib3==2.5.0
     # via
     #   docker
     #   requests
-uvicorn==0.31.0
-    # via python-fasthtml
+uvicorn==0.38.0
+    # via
+    #   visual-retrieval-colpali (pyproject.toml)
+    #   python-fasthtml
 uvloop==0.20.0
     # via uvicorn
 vespacli==8.391.23
@@ -536,5 +549,5 @@ wrapt==1.16.0
     # via smart-open
 xxhash==3.5.0
     # via datasets
-yarl==1.13.1
+yarl==1.22.0
     # via aiohttp