Skip to content

Commit b0dbad4

Browse files
authored
fix: run checkpoint after DuckDB inserts (#145)
1 parent 68c5cb9 commit b0dbad4

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

src/raglite/_eval.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
from pydantic import BaseModel, ConfigDict, Field, field_validator
8+
from sqlalchemy import text
89
from sqlmodel import Session, func, select
910
from tqdm.auto import tqdm, trange
1011

@@ -18,7 +19,7 @@
1819
from raglite._search import hybrid_search, retrieve_chunk_spans, vector_search
1920

2021

21-
def insert_evals( # noqa: C901
22+
def insert_evals( # noqa: C901, PLR0912
2223
*, num_evals: int = 100, max_contexts_per_eval: int = 20, config: RAGLiteConfig | None = None
2324
) -> None:
2425
"""Generate and insert evals into the database."""
@@ -172,6 +173,8 @@ class AnswerResponse(BaseModel):
172173
)
173174
session.add(eval_)
174175
session.commit()
176+
if engine.dialect.name == "duckdb":
177+
session.execute(text("CHECKPOINT;"))
175178

176179

177180
def answer_evals(

src/raglite/_insert.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,14 @@ def insert_document(
157157
session.commit()
158158
if engine.dialect.name == "duckdb":
159159
# DuckDB does not automatically update its keyword search index [1], so we do it
160-
# manually after insertion.
160+
# manually after insertion. Additionally, we synchronize data in the write-ahead log
161+
# (WAL) to the database data file with the CHECKPOINT statement [2].
161162
# [1] https://duckdb.org/docs/stable/extensions/full_text_search
163+
# [2] https://duckdb.org/docs/stable/sql/statements/checkpoint.html
162164
session.execute(
163165
text("PRAGMA create_fts_index('chunk', 'id', 'body', overwrite = 1);")
164166
)
165167
session.commit()
168+
session.execute(text("CHECKPOINT;"))
166169
pbar.update(1)
167170
pbar.close()

src/raglite/_query_adapter.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Compute and update an optimal query adapter."""
22

33
import numpy as np
4+
from sqlalchemy import text
45
from sqlalchemy.orm.attributes import flag_modified
56
from sqlmodel import Session, col, select
67
from tqdm.auto import tqdm
@@ -162,3 +163,5 @@ def update_query_adapter( # noqa: PLR0915, C901
162163
flag_modified(index_metadata, "metadata_")
163164
session.add(index_metadata)
164165
session.commit()
166+
if engine.dialect.name == "duckdb":
167+
session.execute(text("CHECKPOINT;"))

0 commit comments

Comments
 (0)