Skip to content

Commit 5641a1c

Browse files
committed
Support s3 lance
1 parent 024c845 commit 5641a1c

File tree

5 files changed

+59
-48
lines changed

5 files changed

+59
-48
lines changed

smoosense-gui/src/components/layout/EmbeddingTabContent.tsx

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use client'
22

3-
import { useState, useCallback, useMemo, useEffect } from 'react'
3+
import { useState, useCallback, useMemo } from 'react'
44
import { useAppSelector, useAppDispatch } from '@/lib/hooks'
55
import { setActiveEmbTab } from '@/lib/features/ui/uiSlice'
66
import { Tabs, TabsList, TabsTrigger } from '@/components/ui/tabs'
@@ -10,21 +10,10 @@ import Umap2D, { type UmapResult, type UmapSelection } from '@/components/emb/Um
1010
import GalleryItem from '@/components/gallery/GalleryItem'
1111
import GalleryControls from '@/components/gallery/GalleryControls'
1212
import { useSingleColumnRenderType } from '@/lib/hooks/useRenderType'
13-
import TextPlaceHolder from '@/components/common/TextPlaceHolder'
1413

1514
const embTabs = ['Retrieve', 'UMAP', 'Cluster'] as const
1615

17-
function useIsLocal(): boolean {
18-
const [isLocal, setIsLocal] = useState(false)
19-
useEffect(() => {
20-
const url = window.location.href
21-
setIsLocal(url.startsWith('http://localhost') || url.startsWith('http://127.0.0.1'))
22-
}, [])
23-
return isLocal
24-
}
25-
2616
export default function EmbeddingTabContent() {
27-
const isLocal = useIsLocal()
2817
const dispatch = useAppDispatch()
2918
const activeEmbTab = useAppSelector((state) => state.ui.activeEmbTab)
3019
const visualColumn = useAppSelector((state) => state.ui.columnForGalleryVisual)
@@ -60,14 +49,6 @@ export default function EmbeddingTabContent() {
6049
}))
6150
}, [umapResult, umapSelection, visualColumn, captionColumn])
6251

63-
if (!isLocal) {
64-
return (
65-
<TextPlaceHolder>
66-
For better performance, please download data to your laptop and run SmooSense locally
67-
</TextPlaceHolder>
68-
)
69-
}
70-
7152
return (
7253
<ResizablePanels
7354
direction="horizontal"
@@ -110,7 +91,7 @@ export default function EmbeddingTabContent() {
11091
<div className="h-full flex items-center justify-center text-muted-foreground text-center">
11192
<div>
11293
<p className="text-lg font-medium">Retrieve</p>
113-
<p className="text-sm">Similarity search placeholder</p>
94+
<p className="text-sm">Coming soon</p>
11495
</div>
11596
</div>
11697
) : activeEmbTab === 'UMAP' ? (
@@ -122,7 +103,7 @@ export default function EmbeddingTabContent() {
122103
<div className="h-full flex items-center justify-center text-muted-foreground text-center">
123104
<div>
124105
<p className="text-lg font-medium">Cluster</p>
125-
<p className="text-sm">Clustering visualization placeholder</p>
106+
<p className="text-sm">Coming soon</p>
126107
</div>
127108
</div>
128109
) : null}

smoosense-gui/src/components/providers/TableUrlParamsProvider.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ function TableUrlParamsProviderInner({ children }: { children: React.ReactNode }
3030
}
3131

3232
// Handle tablePath specifically
33-
const urlTablePath = searchParams.get('tablePath')
33+
let urlTablePath = searchParams.get('tablePath')
34+
// Remove trailing slash if present
35+
if (urlTablePath?.endsWith('/')) {
36+
urlTablePath = urlTablePath.slice(0, -1)
37+
}
3438
if (urlTablePath !== currentTablePath) {
3539
dispatch(uiSliceActions.setTablePath(urlTablePath))
3640

smoosense-py/smoosense/handlers/umap.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ def compute_umap() -> Response:
151151
min_dist=min_dist,
152152
n_components=2,
153153
metric="cosine",
154-
random_state=42,
155154
low_memory=False, # Trade memory for speed
156155
n_jobs=-1, # Use all CPU cores
157156
)

smoosense-py/smoosense/lance/db_client.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,23 @@ def __init__(self, root_folder: str):
1616
Initialize the Lance database client.
1717
1818
Args:
19-
root_folder: Path to the Lance database directory
19+
root_folder: Path to the Lance database directory (local path or S3 URI)
2020
"""
2121
import lancedb # Import lancedb lazily since it may be slow at the 1st time
2222

23-
if root_folder.startswith("~"):
24-
root_folder = os.path.expanduser(root_folder)
23+
# Check if it's an S3 path
24+
is_s3_path = root_folder.startswith("s3://")
2525

26-
if not os.path.exists(root_folder):
27-
raise ValueError(f"Directory does not exist: {root_folder}")
26+
if not is_s3_path:
27+
# Local path handling
28+
if root_folder.startswith("~"):
29+
root_folder = os.path.expanduser(root_folder)
2830

29-
if not os.path.isdir(root_folder):
30-
raise ValueError(f"Path is not a directory: {root_folder}")
31+
if not os.path.exists(root_folder):
32+
raise ValueError(f"Directory does not exist: {root_folder}")
33+
34+
if not os.path.isdir(root_folder):
35+
raise ValueError(f"Path is not a directory: {root_folder}")
3136

3237
self.root_folder = root_folder
3338
self.db = lancedb.connect(root_folder)

smoosense-py/smoosense/lance/table_client.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,24 @@ def __init__(self, root_folder: str, table_name: str):
1919
Initialize the Lance table client.
2020
2121
Args:
22-
root_folder: Path to the Lance database directory
22+
root_folder: Path to the Lance database directory (local path or S3 URI)
2323
table_name: Name of the table
2424
"""
2525
import lancedb # Import lancedb lazily since it may be slow at the 1st time
2626

27-
if root_folder.startswith("~"):
28-
root_folder = os.path.expanduser(root_folder)
27+
# Check if it's an S3 path
28+
is_s3_path = root_folder.startswith("s3://")
2929

30-
if not os.path.exists(root_folder):
31-
raise ValueError(f"Directory does not exist: {root_folder}")
30+
if not is_s3_path:
31+
# Local path handling
32+
if root_folder.startswith("~"):
33+
root_folder = os.path.expanduser(root_folder)
3234

33-
if not os.path.isdir(root_folder):
34-
raise ValueError(f"Path is not a directory: {root_folder}")
35+
if not os.path.exists(root_folder):
36+
raise ValueError(f"Directory does not exist: {root_folder}")
37+
38+
if not os.path.isdir(root_folder):
39+
raise ValueError(f"Path is not a directory: {root_folder}")
3540

3641
self.root_folder = root_folder
3742
self.table_name = table_name
@@ -46,29 +51,42 @@ def from_table_path(table_path: str) -> "LanceTableClient":
4651
Create a LanceTableClient from a table path.
4752
4853
Args:
49-
table_path: Path to the Lance table (e.g., /path/to/db/table_name.lance)
54+
table_path: Path to the Lance table (e.g., /path/to/db/table_name.lance
55+
or s3://bucket/path/table_name.lance)
5056
5157
Returns:
5258
LanceTableClient instance
5359
5460
Raises:
5561
ValueError: If the table path is invalid or doesn't end with .lance
5662
"""
57-
# Expand ~ if present
58-
if table_path.startswith("~"):
59-
table_path = os.path.expanduser(table_path)
63+
# Check if it's an S3 path
64+
is_s3_path = table_path.startswith("s3://")
65+
66+
if not is_s3_path:
67+
# Local path handling
68+
# Expand ~ if present
69+
if table_path.startswith("~"):
70+
table_path = os.path.expanduser(table_path)
6071

61-
# Validate path exists
62-
if not os.path.exists(table_path):
63-
raise ValueError(f"Table path does not exist: {table_path}")
72+
# Validate path exists
73+
if not os.path.exists(table_path):
74+
raise ValueError(f"Table path does not exist: {table_path}")
6475

6576
# Validate path ends with .lance
6677
if not table_path.endswith(".lance"):
6778
raise ValueError(f"Table path must end with .lance: {table_path}")
6879

6980
# Extract root folder and table name
70-
root_folder = os.path.dirname(table_path)
71-
table_name = os.path.basename(table_path).replace(".lance", "")
81+
# Works for both local paths and S3 URIs
82+
if is_s3_path:
83+
# For S3: s3://bucket/path/table.lance -> s3://bucket/path and table
84+
last_slash = table_path.rfind("/")
85+
root_folder = table_path[:last_slash]
86+
table_name = table_path[last_slash + 1 :].replace(".lance", "")
87+
else:
88+
root_folder = os.path.dirname(table_path)
89+
table_name = os.path.basename(table_path).replace(".lance", "")
7290

7391
return LanceTableClient(root_folder, table_name)
7492

@@ -175,7 +193,11 @@ def _get_filtered_arrow_table(self) -> pa.Table:
175193
Raises:
176194
ValueError: If no compatible columns found
177195
"""
178-
table_path = os.path.join(self.root_folder, f"{self.table_name}.lance")
196+
# Construct table path - handle both local and S3 paths
197+
if self.root_folder.startswith("s3://"):
198+
table_path = f"{self.root_folder}/{self.table_name}.lance"
199+
else:
200+
table_path = os.path.join(self.root_folder, f"{self.table_name}.lance")
179201
filtered_arrow_table, _ = self._load_and_filter_arrow_table(table_path)
180202
return filtered_arrow_table
181203

0 commit comments

Comments
 (0)