Skip to content

Commit 20731f3

Browse files
author
Daniele Briggi
committed
refact(formatters): use sentences
1 parent e7b82f8 commit 20731f3

File tree

8 files changed

+132
-157
lines changed

8 files changed

+132
-157
lines changed

src/sqlite_rag/cli.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -439,17 +439,12 @@ def reset(
439439
def search(
440440
ctx: typer.Context,
441441
query: str,
442-
limit: int = typer.Option(10, help="Number of results to return"),
442+
limit: int = typer.Option(5, help="Number of results to return"),
443443
debug: bool = typer.Option(
444444
False,
445445
"-d",
446446
"--debug",
447-
help="Print extra debug information with modern formatting",
448-
),
449-
debug2: bool = typer.Option(
450-
False,
451-
"--debug2",
452-
help="Print debug format with sentence-level details and snippet context",
447+
help="Print extra debug information with sentence-level details",
453448
),
454449
peek: bool = typer.Option(
455450
False, "--peek", help="Print debug information using compact table format"
@@ -467,7 +462,7 @@ def search(
467462
results = results[:limit]
468463

469464
# Get the appropriate formatter and display results
470-
formatter = get_formatter(debug=debug, debug2=debug2, table_view=peek)
465+
formatter = get_formatter(debug=debug, table_view=peek)
471466
formatter.format_results(results, query)
472467

473468
typer.echo(f"{search_time:.3f} seconds")

src/sqlite_rag/database.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
9696
chunk_id INTEGER,
9797
content TEXT,
9898
embedding BLOB,
99-
sequence INTEGER,
10099
start_offset INTEGER,
101100
end_offset INTEGER
102101
)

src/sqlite_rag/engine.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,6 @@ def search_sentences(
245245
row_number() OVER (ORDER BY v.distance) AS rank_number,
246246
v.distance,
247247
sentences.content as sentence_content,
248-
sentences.sequence as sentence_sequence,
249248
sentences.start_offset as sentence_start_offset,
250249
sentences.end_offset as sentence_end_offset
251250
FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
@@ -256,7 +255,6 @@ def search_sentences(
256255
SELECT
257256
sentence_id,
258257
sentence_content,
259-
sentence_sequence,
260258
sentence_start_offset,
261259
sentence_end_offset,
262260
rank_number,
@@ -278,7 +276,6 @@ def search_sentences(
278276
SentenceResult(
279277
id=row["sentence_id"],
280278
chunk_id=chunk_id,
281-
sequence=row["sentence_sequence"],
282279
rank=row["rank_number"],
283280
distance=row["distance"],
284281
start_offset=row["sentence_start_offset"],

src/sqlite_rag/formatters.py

Lines changed: 123 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,81 @@ def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
8282
uri_display = f"{icon} ...{uri[-available_width:]}"
8383
return uri_display
8484

85+
def _build_sentence_preview(
86+
self,
87+
chunk_content: str,
88+
sentences: List[SentenceResult],
89+
max_chars: int = 400,
90+
) -> str:
91+
"""Build preview from top 3 ranked sentences with [...] for gaps.
92+
93+
Args:
94+
chunk_content: The full chunk text
95+
sentences: List of SentenceResult objects (should already be sorted by rank)
96+
max_chars: Maximum total characters for preview
97+
98+
Returns:
99+
Preview string with top sentences and [...] separators.
100+
Falls back to truncated chunk_content if sentences have no offsets.
101+
"""
102+
103+
# Take top 3 sentences (they should already be sorted by rank/distance)
104+
top_sentences = sentences[:3] if sentences else []
105+
106+
if not top_sentences:
107+
# Fallback: no sentences, return truncated chunk content
108+
return chunk_content[:max_chars]
109+
110+
# Filter sentences that have offset information
111+
sentences_with_offsets = [
112+
s
113+
for s in top_sentences
114+
if s.start_offset is not None and s.end_offset is not None
115+
]
116+
117+
if not sentences_with_offsets:
118+
# Fallback: sentences exist but no offset information, return truncated chunk content
119+
return chunk_content[:max_chars]
120+
121+
# Sort by start_offset to maintain document order
122+
sentences_with_offsets.sort(
123+
key=lambda s: s.start_offset if s.start_offset is not None else -1
124+
)
125+
126+
preview_parts = []
127+
total_chars = 0
128+
prev_end_offset = None
129+
130+
for sentence in sentences_with_offsets:
131+
# Extract sentence text using offsets
132+
sentence_text = chunk_content[
133+
sentence.start_offset : sentence.end_offset
134+
].strip()
135+
136+
# Calculate remaining budget including potential separator
137+
separator_len = len(" [...] ") if preview_parts else 0
138+
remaining = max_chars - total_chars - separator_len
139+
140+
if remaining <= 0:
141+
break
142+
143+
# Truncate sentence if needed
144+
if len(sentence_text) > remaining:
145+
sentence_text = sentence_text[: remaining - 3] + "..."
146+
147+
# Check if there's a gap > 10 chars from previous sentence
148+
if prev_end_offset is not None and sentence.start_offset is not None:
149+
gap_size = sentence.start_offset - prev_end_offset
150+
if gap_size > 10:
151+
preview_parts.append("[...]")
152+
total_chars += len(" [...] ")
153+
154+
preview_parts.append(sentence_text)
155+
total_chars += len(sentence_text)
156+
prev_end_offset = sentence.end_offset
157+
158+
return " ".join(preview_parts)
159+
85160

86161
class BoxedFormatter(SearchResultFormatter):
87162
"""Base class for boxed result formatters."""
@@ -100,8 +175,15 @@ def format_results(self, results: List[DocumentResult], query: str) -> None:
100175
def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
101176
"""Format a single result with box layout."""
102177
icon = self._get_file_icon(doc.document.uri or "")
178+
179+
# Use sentence-based preview if sentences are available
180+
if doc.sentences:
181+
snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
182+
else:
183+
snippet_text = doc.snippet
184+
103185
snippet_lines = self._clean_and_wrap_snippet(
104-
doc.snippet, width=75, max_length=400
186+
snippet_text, width=75, max_length=400
105187
)
106188

107189
# Draw the result box header
@@ -164,33 +246,19 @@ def _get_debug_line(self, doc: DocumentResult) -> str:
164246
def _should_show_debug(self) -> bool:
165247
return True
166248

249+
def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
250+
"""Format a single result with box layout including sentence summary."""
251+
icon = self._get_file_icon(doc.document.uri or "")
167252

168-
class BoxedDebug2Formatter(BoxedFormatter):
169-
"""Debug formatter showing sentence-level details with snippet preview from top sentences."""
253+
# Use sentence-based preview if sentences are available
254+
if doc.sentences:
255+
snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
256+
else:
257+
snippet_text = doc.snippet
170258

171-
def _get_debug_line(self, doc: DocumentResult) -> str:
172-
"""Format debug metrics line."""
173-
combined = (
174-
f"{doc.combined_rank:.5f}" if doc.combined_rank is not None else "N/A"
175-
)
176-
vec_info = (
177-
f"#{doc.vec_rank} ({doc.vec_distance:.6f})"
178-
if doc.vec_rank is not None
179-
else "N/A"
180-
)
181-
fts_info = (
182-
f"#{doc.fts_rank} ({doc.fts_score:.6f})"
183-
if doc.fts_rank is not None
184-
else "N/A"
259+
snippet_lines = self._clean_and_wrap_snippet(
260+
snippet_text, width=75, max_length=400
185261
)
186-
return f"│ Combined: {combined} │ Vector: {vec_info} │ FTS: {fts_info}"
187-
188-
def _should_show_debug(self) -> bool:
189-
return True
190-
191-
def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
192-
"""Format a single result with box layout including sentence details."""
193-
icon = self._get_file_icon(doc.document.uri or "")
194262

195263
# Draw the result box header
196264
header = f"┌─ Result #{idx} " + "─" * (67 - len(str(idx)))
@@ -213,139 +281,52 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
213281
typer.echo(debug_line)
214282
typer.echo("├" + "─" * 77 + "┤")
215283

216-
# Display snippet preview from top sentences
217-
if doc.sentences:
218-
snippet_preview = self._build_sentence_preview(doc.snippet, doc.sentences)
219-
preview_lines = self._clean_and_wrap_snippet(
220-
snippet_preview, width=75, max_length=400
221-
)
222-
223-
typer.echo(
224-
"│ Preview (top 3 sentences): │"
225-
)
226-
for line in preview_lines:
227-
typer.echo(f"│ {line:<75} │")
284+
# Display snippet preview
285+
for line in snippet_lines:
286+
typer.echo(f"│ {line:<75} │")
228287

288+
# Display sentence details if available
289+
if doc.sentences:
229290
typer.echo("├" + "─" * 77 + "┤")
230291
typer.echo(
231292
"│ Sentences: │"
232293
)
233294

234-
# Display sentences with their distances
235-
for i, sentence in enumerate(doc.sentences, 1):
295+
for sentence in doc.sentences[:5]: # Show max 5 sentences
236296
distance_str = (
237297
f"{sentence.distance:.6f}"
238298
if sentence.distance is not None
239299
else "N/A"
240300
)
241301
rank_str = f"#{sentence.rank}" if sentence.rank is not None else "N/A"
242302

243-
# Format sentence header
244-
sentence_header = (
245-
f"│ {i}. [Rank: {rank_str}, Distance: {distance_str}]"
246-
)
247-
typer.echo(sentence_header.ljust(78) + " │")
248-
249-
# Extract sentence text using offsets from the chunk snippet
303+
# Extract sentence preview (first 50 chars)
250304
if (
251305
sentence.start_offset is not None
252306
and sentence.end_offset is not None
253307
):
254308
sentence_text = doc.snippet[
255309
sentence.start_offset : sentence.end_offset
256-
]
310+
].strip()
311+
# Truncate and clean for display
312+
sentence_preview = sentence_text.replace("\n", " ").replace(
313+
"\r", ""
314+
)
315+
if len(sentence_preview) > 50:
316+
sentence_preview = sentence_preview[:47] + "..."
257317
else:
258-
sentence_text = "[No offset information available]"
318+
sentence_preview = "[No offset info]"
259319

260-
# Wrap and display sentence content
261-
sentence_lines = self._clean_and_wrap_snippet(
262-
sentence_text, width=72, max_length=400
320+
# Format sentence line
321+
sentence_line = (
322+
f"│ {rank_str:>3} ({distance_str}) | {sentence_preview}"
263323
)
264-
for line in sentence_lines:
265-
typer.echo(f"│ {line:<72} │")
266-
else:
267-
# Fallback to regular snippet display if no sentences
268-
snippet_lines = self._clean_and_wrap_snippet(
269-
doc.snippet, width=75, max_length=400
270-
)
271-
for line in snippet_lines:
272-
typer.echo(f"│ {line:<75} │")
324+
# Pad to 78 chars and add closing border
325+
typer.echo(sentence_line.ljust(78) + " │")
273326

274327
typer.echo("└" + "─" * 77 + "┘")
275328
typer.echo()
276329

277-
def _build_sentence_preview(
278-
self,
279-
chunk_content: str,
280-
sentences: List[SentenceResult],
281-
max_chars: int = 400,
282-
) -> str:
283-
"""Build preview from top 3 ranked sentences with [...] for gaps.
284-
285-
Args:
286-
chunk_content: The full chunk text
287-
sentences: List of SentenceResult objects (should already be sorted by rank)
288-
max_chars: Maximum total characters for preview
289-
290-
Returns:
291-
Preview string with top sentences and [...] separators
292-
"""
293-
294-
# Take top 3 sentences (they should already be sorted by rank/distance)
295-
top_sentences = sentences[:3]
296-
297-
if not top_sentences:
298-
return chunk_content[:max_chars]
299-
300-
# Sort sentences by their position in the chunk (using start_offset)
301-
# so we can build a preview in the order they appear
302-
sentences_with_offsets = [
303-
s
304-
for s in top_sentences
305-
if s.start_offset is not None and s.end_offset is not None
306-
]
307-
308-
if not sentences_with_offsets:
309-
# Fallback: no offset information, return truncated chunk content
310-
return chunk_content[:max_chars]
311-
312-
# Sort by start_offset to maintain document order
313-
sentences_with_offsets.sort(key=lambda s: s.start_offset)
314-
315-
preview_parts = []
316-
total_chars = 0
317-
prev_end_offset = None
318-
319-
for sentence in sentences_with_offsets:
320-
# Extract sentence text using offsets
321-
sentence_text = chunk_content[
322-
sentence.start_offset : sentence.end_offset
323-
].strip()
324-
325-
# Calculate remaining budget including potential separator
326-
separator_len = len(" [...] ") if preview_parts else 0
327-
remaining = max_chars - total_chars - separator_len
328-
329-
if remaining <= 0:
330-
break
331-
332-
# Truncate sentence if needed
333-
if len(sentence_text) > remaining:
334-
sentence_text = sentence_text[: remaining - 3] + "..."
335-
336-
# Check if there's a gap > 10 chars from previous sentence
337-
if prev_end_offset is not None:
338-
gap_size = sentence.start_offset - prev_end_offset
339-
if gap_size > 10:
340-
preview_parts.append("[...]")
341-
total_chars += len(" [...] ")
342-
343-
preview_parts.append(sentence_text)
344-
total_chars += len(sentence_text)
345-
prev_end_offset = sentence.end_offset
346-
347-
return " ".join(preview_parts)
348-
349330

350331
class TableDebugFormatter(SearchResultFormatter):
351332
"""Table view debug formatter."""
@@ -383,8 +364,16 @@ def _print_table_header(self) -> None:
383364

384365
def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
385366
"""Print a single table row."""
367+
# Use sentence-based preview if sentences are available
368+
if doc.sentences:
369+
snippet = self._build_sentence_preview(
370+
doc.snippet, doc.sentences, max_chars=52
371+
)
372+
else:
373+
snippet = doc.snippet
374+
386375
# Clean snippet display
387-
snippet = doc.snippet.replace("\n", " ").replace("\r", "")
376+
snippet = snippet.replace("\n", " ").replace("\r", "")
388377
snippet = snippet[:49] + "..." if len(snippet) > 52 else snippet
389378

390379
# Clean URI display
@@ -409,13 +398,11 @@ def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
409398

410399

411400
def get_formatter(
412-
debug: bool = False, debug2: bool = False, table_view: bool = False
401+
debug: bool = False, table_view: bool = False
413402
) -> SearchResultFormatter:
414403
"""Factory function to get the appropriate formatter."""
415404
if table_view:
416405
return TableDebugFormatter()
417-
elif debug2:
418-
return BoxedDebug2Formatter()
419406
elif debug:
420407
return BoxedDebugFormatter()
421408
else:

0 commit comments

Comments
 (0)