Skip to content

Commit 0423afa

Browse files
Axectclaude
andcommitted
Add comprehensive AI paper review with cached translation support
Introduce map-reduce paper review system that generates 12-section analysis (executive summary, methodology, math, experiments, etc.) with per-section caching. Reviews load instantly from cache and support translation with both original and translated files saved. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 53d04df commit 0423afa

File tree

12 files changed

+2336
-1
lines changed

12 files changed

+2336
-1
lines changed

src/arxiv_explorer/cli/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def main(
3939

4040

4141
# Import and register subcommands
42-
from . import config, daily, export, lists, notes, preferences, search # noqa: E402
42+
from . import config, daily, export, lists, notes, preferences, review, search # noqa: E402
4343

4444
app.add_typer(preferences.app, name="prefs", help="Preference management")
4545
app.add_typer(lists.app, name="list", help="Reading list management")
@@ -55,6 +55,7 @@ def main(
5555
app.command(name="dislike")(daily.dislike)
5656
app.command(name="show")(daily.show)
5757
app.command(name="translate")(daily.translate)
58+
app.command(name="review")(review.review)
5859

5960

6061
@app.command()

src/arxiv_explorer/cli/review.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
"""Review command -- generate comprehensive AI paper review."""
2+
3+
from pathlib import Path
4+
from typing import Optional
5+
6+
import typer
7+
from rich.progress import (
8+
BarColumn,
9+
Progress,
10+
SpinnerColumn,
11+
TaskProgressColumn,
12+
TextColumn,
13+
TimeElapsedColumn,
14+
)
15+
16+
from ..core.models import Language, ReviewSectionType
17+
from ..services.paper_service import PaperService
18+
from ..services.review_service import PaperReviewService
19+
from ..services.settings_service import SettingsService
20+
from ..utils.display import console, print_error, print_info, print_success
21+
22+
# Human-readable names for review sections
23+
_SECTION_NAMES: dict[ReviewSectionType, str] = {
24+
ReviewSectionType.EXECUTIVE_SUMMARY: "Executive Summary",
25+
ReviewSectionType.KEY_CONTRIBUTIONS: "Key Contributions",
26+
ReviewSectionType.SECTION_SUMMARIES: "Section Summaries",
27+
ReviewSectionType.METHODOLOGY: "Methodology Analysis",
28+
ReviewSectionType.MATH_FORMULATIONS: "Math Formulations",
29+
ReviewSectionType.FIGURES: "Figure Descriptions",
30+
ReviewSectionType.TABLES: "Table Descriptions",
31+
ReviewSectionType.EXPERIMENTAL_RESULTS: "Experimental Results",
32+
ReviewSectionType.STRENGTHS_WEAKNESSES: "Strengths & Weaknesses",
33+
ReviewSectionType.RELATED_WORK: "Related Work",
34+
ReviewSectionType.GLOSSARY: "Glossary",
35+
ReviewSectionType.QUESTIONS: "Questions",
36+
}
37+
38+
39+
def review(
40+
arxiv_id: str = typer.Argument(..., help="arXiv ID (e.g., 2401.00001)"),
41+
output: Optional[Path] = typer.Option(
42+
None, "--output", "-o", help="Save review to file (default: print to console)"
43+
),
44+
force: bool = typer.Option(
45+
False, "--force", "-f", help="Regenerate all sections (ignore cache)"
46+
),
47+
translate: bool = typer.Option(
48+
False, "--translate", "-t", help="Translate review to configured language"
49+
),
50+
language: Optional[str] = typer.Option(
51+
None, "--language", "-L", help="Target language code (e.g., 'ko')"
52+
),
53+
no_full_text: bool = typer.Option(
54+
False, "--no-full-text", help="Skip full text extraction, use abstract only"
55+
),
56+
status: bool = typer.Option(
57+
False, "--status", "-s", help="Show cached review status without generating"
58+
),
59+
delete: bool = typer.Option(
60+
False, "--delete", help="Delete cached review for this paper"
61+
),
62+
):
63+
"""Generate a comprehensive AI review of an arXiv paper.
64+
65+
Fetches the full paper text when possible (via arxiv-doc-builder),
66+
then analyzes each section with AI to produce a detailed Markdown review.
67+
Reviews are cached section-by-section -- interrupted reviews resume
68+
automatically.
69+
70+
Examples:
71+
axp review 2401.00001
72+
axp review 2401.00001 -o review.md
73+
axp review 2401.00001 --force --translate
74+
axp review 2401.00001 --status
75+
"""
76+
review_service = PaperReviewService()
77+
78+
# Handle --delete
79+
if delete:
80+
if review_service.delete_review(arxiv_id):
81+
print_success(f"Deleted cached review for {arxiv_id}")
82+
else:
83+
print_info(f"No cached review found for {arxiv_id}")
84+
return
85+
86+
# Handle --status
87+
if status:
88+
cached = review_service.get_cached_review(arxiv_id)
89+
if cached is None:
90+
print_info(f"No cached review for {arxiv_id}")
91+
else:
92+
total = len(ReviewSectionType)
93+
done = len(cached.sections)
94+
console.print(f"[bold]Review status for {arxiv_id}[/bold]")
95+
console.print(f"Sections: {done}/{total}")
96+
for st in ReviewSectionType:
97+
if st in cached.sections:
98+
icon = "[green]\u2714[/green]"
99+
else:
100+
icon = "[dim]\u2022[/dim]"
101+
console.print(
102+
f" {icon} {_SECTION_NAMES.get(st, st.value)}"
103+
)
104+
return
105+
106+
# Fetch paper metadata
107+
paper_service = PaperService()
108+
109+
with Progress(
110+
SpinnerColumn(),
111+
TextColumn("[progress.description]{task.description}"),
112+
console=console,
113+
) as progress:
114+
progress.add_task("Fetching paper metadata...", total=None)
115+
paper = paper_service.get_paper(arxiv_id)
116+
117+
if not paper:
118+
print_error(f"Paper not found: {arxiv_id}")
119+
raise typer.Exit(1)
120+
121+
console.print(f"\n[bold]{paper.title}[/bold]")
122+
console.print(f"[dim]{', '.join(paper.authors[:5])}[/dim]\n")
123+
124+
# If --no-full-text, skip extraction
125+
if no_full_text:
126+
review_service._extract_full_text = lambda _: None # type: ignore[assignment]
127+
128+
# Generate review with progress bar
129+
succeeded = 0
130+
failed = 0
131+
132+
with Progress(
133+
SpinnerColumn(),
134+
TextColumn("[progress.description]{task.description}"),
135+
BarColumn(),
136+
TaskProgressColumn(),
137+
TimeElapsedColumn(),
138+
console=console,
139+
) as progress:
140+
task = progress.add_task(
141+
"Generating review...", total=len(ReviewSectionType)
142+
)
143+
144+
def on_start(
145+
section_type: ReviewSectionType, idx: int, total: int
146+
) -> None:
147+
name = _SECTION_NAMES.get(section_type, section_type.value)
148+
progress.update(task, description=f"[cyan]{name}[/cyan]...")
149+
150+
def on_complete(section_type: ReviewSectionType, success: bool) -> None:
151+
nonlocal succeeded, failed
152+
if success:
153+
succeeded += 1
154+
else:
155+
failed += 1
156+
progress.advance(task)
157+
158+
paper_review = review_service.generate_review(
159+
paper=paper,
160+
force=force,
161+
on_section_start=on_start,
162+
on_section_complete=on_complete,
163+
)
164+
165+
if not paper_review:
166+
print_error("Review generation failed completely.")
167+
raise typer.Exit(1)
168+
169+
# Report results
170+
print_info(f"Sections: {succeeded} succeeded, {failed} failed")
171+
if paper_review.source_type == "abstract":
172+
print_info(
173+
"Note: Full text was not available."
174+
" Review is based on abstract only."
175+
)
176+
177+
# Resolve language
178+
target_lang = Language.EN
179+
if translate or language:
180+
if language:
181+
try:
182+
target_lang = Language(language)
183+
except ValueError:
184+
supported = ", ".join(lang.value for lang in Language)
185+
print_error(
186+
f"Unknown language: {language}. Supported: {supported}"
187+
)
188+
raise typer.Exit(1)
189+
else:
190+
target_lang = SettingsService().get_language()
191+
192+
# Render markdown
193+
markdown = review_service.render_markdown(paper_review, language=target_lang)
194+
195+
# Output
196+
if output:
197+
output.parent.mkdir(parents=True, exist_ok=True)
198+
output.write_text(markdown, encoding="utf-8")
199+
print_success(f"Review saved: {output}")
200+
else:
201+
console.print()
202+
from rich.markdown import Markdown
203+
204+
console.print(Markdown(markdown))

src/arxiv_explorer/core/database.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,17 @@
8989
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
9090
);
9191
92+
-- Paper review sections (incremental cache)
93+
CREATE TABLE IF NOT EXISTS paper_review_sections (
94+
id INTEGER PRIMARY KEY AUTOINCREMENT,
95+
arxiv_id TEXT NOT NULL,
96+
section_type TEXT NOT NULL,
97+
content_json TEXT NOT NULL,
98+
source_type TEXT NOT NULL DEFAULT 'abstract',
99+
generated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
100+
UNIQUE(arxiv_id, section_type)
101+
);
102+
92103
-- Paper cache
93104
CREATE TABLE IF NOT EXISTS papers (
94105
arxiv_id TEXT PRIMARY KEY NOT NULL,
@@ -109,6 +120,7 @@
109120
CREATE INDEX IF NOT EXISTS idx_list_papers_list ON reading_list_papers(list_id);
110121
CREATE INDEX IF NOT EXISTS idx_translations_arxiv ON paper_translations(arxiv_id);
111122
CREATE INDEX IF NOT EXISTS idx_papers_cached_at ON papers(cached_at);
123+
CREATE INDEX IF NOT EXISTS idx_review_sections_arxiv ON paper_review_sections(arxiv_id);
112124
"""
113125

114126

src/arxiv_explorer/core/models.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@ class Language(str, Enum):
3838
KO = "ko"
3939

4040

41+
class ReviewSectionType(str, Enum):
42+
EXECUTIVE_SUMMARY = "executive_summary"
43+
KEY_CONTRIBUTIONS = "key_contributions"
44+
SECTION_SUMMARIES = "section_summaries"
45+
METHODOLOGY = "methodology"
46+
MATH_FORMULATIONS = "math_formulations"
47+
FIGURES = "figures"
48+
TABLES = "tables"
49+
EXPERIMENTAL_RESULTS = "experimental_results"
50+
STRENGTHS_WEAKNESSES = "strengths_weaknesses"
51+
RELATED_WORK = "related_work"
52+
GLOSSARY = "glossary"
53+
QUESTIONS = "questions"
54+
55+
4156
@dataclass
4257
class Paper:
4358
"""Paper data model."""
@@ -143,6 +158,41 @@ class KeywordInterest:
143158
source: str = "explicit" # 'explicit' or 'inferred'
144159

145160

161+
@dataclass
162+
class ReviewSection:
163+
"""One section of a paper review, cached individually."""
164+
165+
id: int
166+
arxiv_id: str
167+
section_type: ReviewSectionType
168+
content_json: str
169+
generated_at: datetime = field(default_factory=datetime.now)
170+
171+
172+
@dataclass
173+
class PaperReview:
174+
"""Assembled paper review."""
175+
176+
arxiv_id: str
177+
title: str
178+
authors: list[str]
179+
categories: list[str]
180+
published: datetime
181+
abstract: str
182+
sections: dict[ReviewSectionType, dict] = field(default_factory=dict)
183+
pdf_url: Optional[str] = None
184+
source_type: str = "abstract"
185+
generated_at: datetime = field(default_factory=datetime.now)
186+
187+
@property
188+
def is_complete(self) -> bool:
189+
return set(self.sections.keys()) == set(ReviewSectionType)
190+
191+
@property
192+
def missing_sections(self) -> list[ReviewSectionType]:
193+
return [s for s in ReviewSectionType if s not in self.sections]
194+
195+
146196
@dataclass
147197
class RecommendedPaper:
148198
"""Recommended paper with score."""

0 commit comments

Comments
 (0)