Skip to content

Commit c25d07d

Browse files
committed
Add anonymous LanceDB public bucket connectivity test script
1 parent 9c479c0 commit c25d07d

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
'''
2+
Author: haoxingjun
3+
Date: 2025-12-11 23:31:05
4+
5+
LastEditors: haoxingjun
6+
LastEditTime: 2025-12-11 23:32:22
7+
Description: file information
8+
Company: ByteDance
9+
'''
10+
import os
11+
import sys
12+
import pyarrow.fs as fs
13+
import lancedb
14+
15+
16+
def _split_db_and_table(uri: str):
17+
"""Split uri like s3://bucket/path/.../table_name into (bucket, db_root_uri, table_name)."""
18+
if not uri:
19+
return None, None, None
20+
scheme = ""
21+
rest = uri
22+
if rest.startswith("s3://"):
23+
scheme = "s3://"
24+
rest = rest[len("s3://"):]
25+
elif rest.startswith("tos://"):
26+
scheme = "tos://"
27+
rest = rest[len("tos://"):]
28+
parts = [p for p in rest.split("/") if p]
29+
if not parts:
30+
return None, None, None
31+
bucket = parts[0]
32+
table_name = parts[-1]
33+
db_root = "/".join(parts[:-1])
34+
db_root_uri = f"{scheme}{db_root}" if db_root else None
35+
return bucket, db_root_uri, table_name
36+
37+
38+
def main():
39+
# Default to the public demo bucket if env not provided
40+
uri = os.getenv(
41+
"LANCEDB_URI",
42+
"s3://data-analysis-demo-data/lance_catalog/default/imdb_top_1000",
43+
)
44+
region = os.getenv("TOS_REGION", "cn-beijing")
45+
46+
bucket, db_root_uri, table_name = _split_db_and_table(uri)
47+
if not bucket or not db_root_uri or not table_name:
48+
print(f"Invalid LANCEDB_URI: {uri}")
49+
sys.exit(1)
50+
51+
# Use bucket-scoped TOS S3 endpoint (virtual-hosted style) via LanceDB storage_options
52+
storage_options = {
53+
"aws_endpoint": f"https://{bucket}.tos-s3-{region}.volces.com",
54+
"virtual_hosted_style_request": "true",
55+
# Do NOT pass any credentials to ensure anonymous access
56+
}
57+
58+
print(f"Connecting to LanceDB: root={db_root_uri}, table={table_name}")
59+
try:
60+
db = lancedb.connect(db_root_uri, storage_options=storage_options)
61+
tbl = db.open_table(table_name)
62+
# Lightweight validation: simply ensure we got a table object
63+
print("Connection successful. Table type:", type(tbl))
64+
sys.exit(0)
65+
except Exception as e:
66+
print("Open default table failed:", e)
67+
# Fallback: try metadata table from env or default
68+
metadata_uri = os.getenv(
69+
"LANCEDB_METADATA_URI",
70+
"s3://data-analysis-demo-data/lance_catalog/default/metadata_table",
71+
)
72+
m_bucket, m_root, m_table = _split_db_and_table(metadata_uri)
73+
if not m_bucket or not m_root or not m_table:
74+
print(f"Invalid LANCEDB_METADATA_URI: {metadata_uri}")
75+
sys.exit(2)
76+
m_opts = {
77+
"aws_endpoint": f"https://{m_bucket}.tos-s3-{region}.volces.com",
78+
"virtual_hosted_style_request": "true",
79+
}
80+
print(f"Trying metadata table: root={m_root}, table={m_table}")
81+
try:
82+
mdb = lancedb.connect(m_root, storage_options=m_opts)
83+
mtbl = mdb.open_table(m_table)
84+
print(f"Connection successful. Opened metadata table: {m_table}. Type: {type(mtbl)}")
85+
sys.exit(0)
86+
except Exception as e2:
87+
print("Connection failed:", e2)
88+
sys.exit(2)
89+
90+
91+
if __name__ == "__main__":
92+
main()

0 commit comments

Comments
 (0)