Skip to content

Commit 2ac5ef5

Browse files
committed
Split dependency group to reduce size
1 parent c2aabfd commit 2ac5ef5

File tree

5 files changed

+24
-17
lines changed

5 files changed

+24
-17
lines changed

smoosense-py/pyproject.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,12 @@ dependencies = [
2929
"click>=8.1.8",
3030
"duckdb>=1.2.1",
3131
"flask>=3.1.0",
32-
"hdbscan>=0.8.40",
3332
"lancedb>=0.25.2",
3433
"pyarrow>=20.0.0",
3534
"pydantic>=2.11.7",
3635
"pytz>=2025.2",
3736
"requests>=2.32.3",
3837
"rich>=14.0.0",
39-
"tqdm>=4.67.1",
40-
"transformers>=4.57.3",
41-
"umap-learn>=0.5.9.post2",
4238
]
4339

4440
[project.urls]
@@ -63,6 +59,12 @@ images = [
6359
"torch>=2.0.0",
6460
"pillow>=10.0.0",
6561
]
62+
emb = [
63+
"hdbscan>=0.8.40",
64+
"tqdm>=4.67.1",
65+
"transformers>=4.57.3",
66+
"umap-learn>=0.5.9.post2",
67+
]
6668

6769
[dependency-groups]
6870
dev = [

smoosense-py/smoosense/handlers/umap.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from timeit import default_timer
33

44
import numpy as np
5-
import umap
65
from flask import Blueprint, Response, current_app, jsonify, request
76

87
from smoosense.handlers.auth import requires_auth_api
@@ -16,6 +15,7 @@
1615
# Maximum number of rows to compute UMAP on (random sample if exceeded)
1716
UMAP_MAX_ROWS = 1_000
1817

18+
1919
@umap_bp.post("/umap")
2020
@requires_auth_api
2121
@handle_api_errors
@@ -107,6 +107,9 @@ def compute_umap() -> Response:
107107
# Adjust n_neighbors if larger than dataset
108108
actual_n_neighbors = min(n_neighbors, len(embeddings) - 1)
109109

110+
# Lazily import umap since it is only available in some cases
111+
import umap
112+
110113
# Compute UMAP with performance optimizations
111114
reducer = umap.UMAP(
112115
n_neighbors=actual_n_neighbors,

smoosense-py/smoosense/images/ingest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ def compute_dinov2_embeddings_batch(
7373
device: str,
7474
) -> list[list[float]]:
7575
"""Compute DINOv2 embeddings for a batch of images (L2-normalized)."""
76-
inputs = processor(images=images, return_tensors="pt").to(device) # type: ignore[operator]
76+
inputs = processor(images=images, return_tensors="pt").to(device)
7777
with torch.no_grad():
78-
outputs = model(**inputs) # type: ignore[operator]
78+
outputs = model(**inputs)
7979
# Use CLS token from last hidden state
8080
embs = outputs.last_hidden_state[:, 0, :]
8181
# L2 normalize

smoosense-py/smoosense/lance/parquet_to_lance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def is_float_or_double_list(col_type: pa.DataType) -> bool:
7272
if not pa.types.is_list(col_type):
7373
return False
7474
element_type = col_type.value_type
75-
return pa.types.is_floating(element_type)
75+
return bool(pa.types.is_floating(element_type))
7676

7777

7878
def get_embedding_columns(schema: pa.Schema, min_dim: int = 10) -> list[str]:

smoosense-py/uv.lock

Lines changed: 11 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)