-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsettings.toml
More file actions
31 lines (28 loc) · 1.25 KB
/
settings.toml
File metadata and controls
31 lines (28 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
[paths]
# Directory to index (relative to project root or absolute)
root_dir = "./examples"
# SQLite database file for passages + vectors
db_path = "db/index.db"
[model]
name = "all-MiniLM-L6-v2" # Hugging Face model slug loaded by SentenceTransformer
embed_dim = 384 # Dimension of the embedding vectors (must match the model)
[index]
chunk_len = 1000 # Passage chunk length
chunk_overlap = 40 # Overlap
top_k = 5 # Top‑K search results returned to the user
pool_size = 20 # candidates from each list, set to 0 to effectively turn off BM25
rrf_k = 60 # larger K flattens the rank curve
line_by_line_ext = [ # extensions to index line‑by‑line
".csv", ".tsv"
]
[filters]
max_file_size_mb = 5 # Skip files larger than this many megabytes
# Explicitly ignore these file extensions (case‑insensitive)
blacklist_ext = [
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tif", ".tiff", # images
".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", # office
".zip", ".tar", ".gz", ".7z", ".rar", # archives
".exe", ".dll", ".so", ".dylib", # binaries
]
# Also skip directories with these names (regardless of depth)
blacklist_dirs = ["__pycache__", ".git"]