Skip to content

Commit d7fdee1

Browse files
authored
Merge pull request #2 from crd/codex/add-pytest-tests-and-update-documentation
Add pytest coverage and CI pipeline
2 parents b129ba0 + f83d2d8 commit d7fdee1

File tree

6 files changed

+179
-3
lines changed

6 files changed

+179
-3
lines changed

.github/workflows/tests.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
8+
jobs:
9+
pytest:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- name: Set up uv
14+
uses: astral-sh/setup-uv@v3
15+
with:
16+
python-version: "3.13"
17+
- name: Install dependencies
18+
run: uv sync --extra dev
19+
- name: Run tests
20+
run: uv run --extra dev pytest

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Create/refresh a local .venv and install deps from pyproject/uv.lock
44
install:
5-
uv sync
5+
uv sync --extra dev
66

77
# Optional: create venv explicitly (uv sync will also create one if missing)
88
venv:
@@ -15,6 +15,10 @@ ingest:
1515
chat:
1616
uv run chat.py
1717

18+
# Run the automated test suite
19+
test:
20+
uv run --extra dev pytest
21+
1822
# Create/update a lockfile explicitly (optional; uv sync also updates it)
1923
lock:
2024
uv lock

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
A fully local RAG pipeline using LlamaIndex + Ollama + Chroma to query your Logseq notes.
44

55
## Prereqs
6-
- Python 3.10+
6+
- Python 3.13+
77
- Ollama running (https://ollama.com)
88
- Pull a chat and embedding model:
99
```bash
@@ -36,6 +36,11 @@ make ingest
3636
make chat
3737
```
3838

39+
## Tests
40+
```bash
41+
make test
42+
```
43+
3944
### Example questions
4045
- Summarize tasks tagged #home in October 2025.
4146
- Find notes referencing [[Team Topologies]] and list my pros/cons.

ingest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
PAGE_LINK = re.compile(r"\[\[([^\]]+)\]\]") # [[Page]]
3434
BLOCK_REF = re.compile(r"\(\(([a-zA-Z0-9_-]{6,})\)\)") # ((block-id))
3535
TAG_HASH = re.compile(r"(?<!\w)#([A-Za-z0-9/_-]+)") # #tag
36-
TAG_PROP = re.compile(r"^tags::\s*(.+)$", re.MULTILINE) # tags:: a, b
36+
TAG_PROP = re.compile(r"^\s*tags::\s*(.+)$", re.MULTILINE) # tags:: a, b
3737

3838
def normalize_logseq_links(text: str) -> str:
3939
"""Replace Logseq-specific link syntax with plain text.

pyproject.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,15 @@ dependencies = [
1212
"llama-index-vector-stores-chroma>=0.5.3",
1313
"pyyaml>=6.0.3",
1414
]
15+
16+
[project.optional-dependencies]
17+
dev = [
18+
"pytest>=8.3.3",
19+
]
20+
21+
[tool.pytest.ini_options]
22+
minversion = "8.0"
23+
addopts = "-ra"
24+
testpaths = [
25+
"tests",
26+
]

tests/test_ingest.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import importlib
2+
import sys
3+
from pathlib import Path
4+
import textwrap
5+
6+
import pytest
7+
8+
9+
@pytest.fixture(scope="session")
10+
def ingest_module():
11+
project_root = Path(__file__).resolve().parents[1]
12+
config_path = project_root / "config.yaml"
13+
created = False
14+
15+
if not config_path.exists():
16+
config_path.write_text(
17+
textwrap.dedent(
18+
"""
19+
logseq_root: /tmp
20+
include_dirs: []
21+
file_exts: []
22+
exclude_globs: []
23+
models:
24+
llm: llama3.1
25+
embedding: nomic-embed-text
26+
storage:
27+
chroma_path: /tmp/chroma
28+
retrieval:
29+
top_k: 5
30+
mmr: false
31+
chunk:
32+
chunk_size: 512
33+
chunk_overlap: 50
34+
"""
35+
).strip()
36+
)
37+
created = True
38+
39+
added_to_path = False
40+
if str(project_root) not in sys.path:
41+
sys.path.insert(0, str(project_root))
42+
added_to_path = True
43+
44+
try:
45+
if "ingest" in sys.modules:
46+
module = sys.modules["ingest"]
47+
else:
48+
module = importlib.import_module("ingest")
49+
yield module
50+
finally:
51+
if added_to_path and str(project_root) in sys.path:
52+
sys.path.remove(str(project_root))
53+
if created and config_path.exists():
54+
config_path.unlink()
55+
56+
57+
def test_normalize_logseq_links(ingest_module):
58+
text = "Follow [[Page Name]] then see ((abc123))."
59+
result = ingest_module.normalize_logseq_links(text)
60+
assert result == "Follow Page Name then see [ref:abc123]."
61+
62+
63+
def test_parse_tags_combines_sources(ingest_module):
64+
text = """
65+
#alpha introduces the topic
66+
Another line with #beta and #alpha
67+
tags:: gamma, beta , delta
68+
"""
69+
result = ingest_module.parse_tags(text)
70+
assert result == ["alpha", "beta", "delta", "gamma"]
71+
72+
73+
def test_page_title_from_path(ingest_module):
74+
path = "/tmp/logseq/pages/project_notes.md"
75+
assert ingest_module.page_title_from_path(path) == "project-notes"
76+
77+
78+
def test_collect_files_respects_ext_and_excludes(tmp_path, ingest_module):
79+
pages = tmp_path / "pages"
80+
journals = tmp_path / "journals"
81+
archive = pages / "archive"
82+
pages.mkdir()
83+
journals.mkdir()
84+
archive.mkdir()
85+
86+
keep_pages = pages / "alpha.md"
87+
keep_journal = journals / "2025-01-01.md"
88+
ignore_ext = pages / "ignore.txt"
89+
excluded = archive / "old.md"
90+
91+
keep_pages.write_text("alpha")
92+
keep_journal.write_text("journal")
93+
ignore_ext.write_text("nope")
94+
excluded.write_text("archive")
95+
96+
found = ingest_module.collect_files(
97+
str(tmp_path),
98+
["pages", "journals"],
99+
[".md"],
100+
["pages/archive/*"],
101+
)
102+
103+
assert set(found) == {str(keep_pages), str(keep_journal)}
104+
105+
106+
def test_load_documents_applies_metadata(monkeypatch, tmp_path, ingest_module):
107+
docs_dir = tmp_path / "pages"
108+
docs_dir.mkdir()
109+
doc_path = docs_dir / "demo_page.md"
110+
doc_path.write_text(
111+
"""
112+
#alpha tag at the top
113+
tags:: beta, alpha
114+
Content referencing [[Other Page]] and ((xyz789)).
115+
"""
116+
)
117+
118+
class DummyDocument:
119+
def __init__(self, text, metadata):
120+
self.text = text
121+
self.metadata = metadata
122+
123+
monkeypatch.setattr(ingest_module, "Document", DummyDocument)
124+
125+
docs = ingest_module.load_documents([str(doc_path)])
126+
127+
assert len(docs) == 1
128+
doc = docs[0]
129+
assert doc.text.strip().startswith("#alpha tag at the top")
130+
assert "[[" not in doc.text and "((" not in doc.text
131+
assert doc.metadata["source"] == str(doc_path)
132+
assert doc.metadata["title"] == "demo-page"
133+
assert doc.metadata["tags"] == "alpha, beta"
134+
assert doc.metadata["basename"] == "demo_page.md"
135+
assert doc.metadata["dir"] == "pages"

0 commit comments

Comments
 (0)