Skip to content

Commit e3b942f

Browse files
refactor(config): split threshold into search_threshold and ask_threshold
Separate config fields so search and ask can be tuned independently. Both default to 0.001. Bump to v1.0.7. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6eb0fcb commit e3b942f

File tree

7 files changed

+21
-17
lines changed

7 files changed

+21
-17
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ sources = [
125125
# embed_dims = 1536
126126
# chat_model = "gpt-4o-mini"
127127
# max_chunk_chars = 2000
128-
# threshold = 0.001 # minimum cosine similarity to include a result (also --threshold flag)
128+
# search_threshold = 0.001 # min cosine similarity for `kb search` (also --threshold flag)
129+
# ask_threshold = 0.001 # min cosine similarity for `kb ask` (also --threshold flag)
129130
# rerank_fetch_k = 20
130131
# rerank_top_k = 5
131132
# index_code = false # set true to also index source code files

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "kb"
3-
version = "1.0.6"
3+
version = "1.0.7"
44
description = "CLI knowledge base: index markdown + PDFs, hybrid search, RAG answers. Powered by sqlite-vec."
55
readme = "README.md"
66
license = "MIT"

src/kb/cli.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
kb sources List configured sources
4343
kb index [DIR...] [--no-size-limit] Index sources (skip files > max_file_size_mb)
4444
kb allow <file> Whitelist a large file for indexing
45-
kb search "query" [k] Hybrid semantic + keyword search (default k=5)
45+
kb search "query" [k] [--threshold N] Hybrid semantic + keyword search (default k=5, threshold=0.001)
4646
kb ask "question" [k] [--threshold N] RAG: search + rerank + answer (default k=8, threshold=0.001)
4747
kb similar <file> [k] Find similar documents (no API call, default k=10)
4848
kb tag <file> tag1 [tag2...] Add tags to a document
@@ -219,7 +219,7 @@ def cmd_index(cfg: Config, args: list[str]):
219219

220220
def cmd_search(query: str, cfg: Config, top_k: int = 5, threshold: float | None = None):
221221
if threshold is not None:
222-
cfg.threshold = threshold
222+
cfg.search_threshold = threshold
223223
if not cfg.db_path.exists():
224224
print("No index found. Run 'kb index' first.")
225225
sys.exit(1)
@@ -240,7 +240,7 @@ def cmd_search(query: str, cfg: Config, top_k: int = 5, threshold: float | None
240240
query_emb = resp.data[0].embedding
241241
embed_ms = (time.time() - t0) * 1000
242242

243-
has_threshold = cfg.threshold > 0
243+
has_threshold = cfg.search_threshold > 0
244244
retrieve_k = (top_k * 5) if has_filters else (top_k * 3)
245245

246246
t0 = time.time()
@@ -282,7 +282,7 @@ def cmd_search(query: str, cfg: Config, top_k: int = 5, threshold: float | None
282282
results = [
283283
r
284284
for r in results
285-
if r["similarity"] is None or r["similarity"] >= cfg.threshold
285+
if r["similarity"] is None or r["similarity"] >= cfg.search_threshold
286286
]
287287

288288
print(f'Query: "{clean_query}"')
@@ -319,7 +319,7 @@ def cmd_search(query: str, cfg: Config, top_k: int = 5, threshold: float | None
319319
def cmd_ask(question: str, cfg: Config, top_k: int = 8, threshold: float | None = None):
320320
"""Full RAG: hybrid retrieve -> filter -> LLM rerank -> confidence filter -> answer."""
321321
if threshold is not None:
322-
cfg.threshold = threshold
322+
cfg.ask_threshold = threshold
323323
if not cfg.db_path.exists():
324324
print("No index found. Run 'kb index' first.")
325325
sys.exit(1)
@@ -380,7 +380,7 @@ def cmd_ask(question: str, cfg: Config, top_k: int = 8, threshold: float | None
380380
filtered = [
381381
r
382382
for r in results
383-
if r["similarity"] is None or r["similarity"] >= cfg.threshold
383+
if r["similarity"] is None or r["similarity"] >= cfg.ask_threshold
384384
]
385385

386386
if not filtered:

src/kb/config.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@
3939
# min_chunk_chars = 50
4040
4141
# Search
42-
# threshold = 0.001 # minimum cosine similarity to include a result (0.0–1.0)
43-
# rrf_k = 60.0 # RRF smoothing constant
44-
# rerank_fetch_k = 20 # candidates to fetch for LLM rerank
45-
# rerank_top_k = 5 # how many to keep after rerank
42+
# search_threshold = 0.001 # min cosine similarity for `kb search` (0.0–1.0)
43+
# ask_threshold = 0.001 # min cosine similarity for `kb ask` (0.0–1.0)
44+
# rrf_k = 60.0 # RRF smoothing constant
45+
# rerank_fetch_k = 20 # candidates to fetch for LLM rerank
46+
# rerank_top_k = 5 # how many to keep after rerank
4647
4748
# Format options
4849
# index_code = false # also index source code files (.py, .js, .ts, etc.)
@@ -91,7 +92,8 @@ class Config:
9192
chat_model: str = "gpt-4o-mini"
9293
max_chunk_chars: int = 2000
9394
min_chunk_chars: int = 50
94-
threshold: float = 0.001
95+
search_threshold: float = 0.001
96+
ask_threshold: float = 0.001
9597
rrf_k: float = 60.0
9698
rerank_fetch_k: int = 20
9799
rerank_top_k: int = 5

tests/test_cli_commands.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_search_top_k(self, populated_db, capsys):
186186

187187
def test_threshold_reduces_result_count(self, tmp_path, capsys):
188188
"""Threshold should remove low-similarity results, not backfill with FTS-only."""
189-
cfg = Config(embed_dims=4, threshold=0.99)
189+
cfg = Config(embed_dims=4, search_threshold=0.99)
190190
cfg.scope = "project"
191191
cfg.config_dir = tmp_path
192192
cfg.config_path = tmp_path / ".kb.toml"
@@ -300,7 +300,7 @@ def test_ask_calls_rerank_when_enough_results(self, populated_db, capsys):
300300
def test_ask_no_results_above_threshold(self, tmp_path, capsys):
301301
"""When all results have similarity below threshold, show 'no relevant documents'."""
302302
# Build a DB where vec results have high distance (low similarity)
303-
cfg = Config(embed_dims=4, threshold=0.99)
303+
cfg = Config(embed_dims=4, ask_threshold=0.99)
304304
cfg.scope = "project"
305305
cfg.config_dir = tmp_path
306306
cfg.config_path = tmp_path / ".kb.toml"

tests/test_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def test_defaults(self):
2222
assert cfg.chat_model == "gpt-4o-mini"
2323
assert cfg.max_chunk_chars == 2000
2424
assert cfg.min_chunk_chars == 50
25-
assert cfg.threshold == 0.001
25+
assert cfg.search_threshold == 0.001
26+
assert cfg.ask_threshold == 0.001
2627
assert cfg.rrf_k == 60.0
2728
assert cfg.rerank_fetch_k == 20
2829
assert cfg.rerank_top_k == 5

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)