Skip to content

Commit af941b2

Browse files
committed
feat(paper_review): add rebuttal generation and assessment support
Add two new capabilities to the paper review pipeline: - Rebuttal Generation: drafts point-by-point rebuttals with [TODO] placeholders for items requiring new experiments, proofs, or data the author must provide - Rebuttal Assessment: evaluates whether an author's rebuttal adequately addresses reviewer concerns and provides an updated recommendation score (1-6) New files: - graders/rebuttal_generation.py, graders/rebuttal_assessment.py - prompts/rebuttal_generation.py, prompts/rebuttal_assessment.py - examples/rebuttal_workflow.py Modified files: - schema.py: RebuttalResult, RebuttalAssessmentResult, new ReviewStage values - pipeline.py: generate_rebuttal(), assess_rebuttal(), review_and_report() extension - report.py: rebuttal draft and assessment report sections - __init__.py files: export new classes Made-with: Cursor
1 parent 33bafc8 commit af941b2

File tree

11 files changed

+1016
-1
lines changed

11 files changed

+1016
-1
lines changed

cookbooks/paper_review/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
CriticalityGrader,
2222
FormatGrader,
2323
JailbreakingGrader,
24+
RebuttalAssessmentGrader,
25+
RebuttalGenerationGrader,
2426
ReviewGrader,
2527
)
2628
from cookbooks.paper_review.pipeline import PaperReviewPipeline, PipelineConfig
@@ -32,6 +34,10 @@
3234
CriticalityResult,
3335
PaperReviewResult,
3436
ProgressCallback,
37+
RebuttalAssessmentResult,
38+
RebuttalConcern,
39+
RebuttalPointAssessment,
40+
RebuttalResult,
3541
ReviewProgress,
3642
ReviewResult,
3743
ReviewStage,
@@ -49,6 +55,8 @@
4955
"CriticalityGrader",
5056
"FormatGrader",
5157
"JailbreakingGrader",
58+
"RebuttalGenerationGrader",
59+
"RebuttalAssessmentGrader",
5260
# Processors
5361
"BibChecker",
5462
"TexPackageProcessor",
@@ -58,6 +66,10 @@
5866
"ReviewResult",
5967
"CriticalityResult",
6068
"BibVerificationSummary",
69+
"RebuttalResult",
70+
"RebuttalConcern",
71+
"RebuttalAssessmentResult",
72+
"RebuttalPointAssessment",
6173
# Progress
6274
"ReviewStage",
6375
"ReviewProgress",
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# -*- coding: utf-8 -*-
2+
"""Example: rebuttal generation and assessment workflows.
3+
4+
Usage:
5+
# Generate a rebuttal draft after reviewing a paper
6+
python -m cookbooks.paper_review.examples.rebuttal_workflow generate \
7+
--pdf_path paper.pdf --api_key YOUR_KEY
8+
9+
# Assess an existing rebuttal against a review
10+
python -m cookbooks.paper_review.examples.rebuttal_workflow assess \
11+
--pdf_path paper.pdf --rebuttal_path rebuttal.txt --api_key YOUR_KEY
12+
13+
# Full pipeline: review + generate rebuttal in one call
14+
python -m cookbooks.paper_review.examples.rebuttal_workflow full \
15+
--pdf_path paper.pdf --api_key YOUR_KEY
16+
"""
17+
18+
import asyncio
19+
from pathlib import Path
20+
21+
import fire
22+
23+
from cookbooks.paper_review.pipeline import PaperReviewPipeline, PipelineConfig
24+
from cookbooks.paper_review.report import generate_report
25+
26+
27+
async def _generate_rebuttal(
28+
pdf_path: str,
29+
model_name: str = "gpt-4o",
30+
api_key: str = "",
31+
base_url: str | None = None,
32+
discipline: str | None = None,
33+
venue: str | None = None,
34+
language: str | None = None,
35+
output_path: str | None = None,
36+
):
37+
"""Review a paper then generate a rebuttal draft with [TODO] placeholders."""
38+
config = PipelineConfig(
39+
model_name=model_name,
40+
api_key=api_key,
41+
base_url=base_url,
42+
discipline=discipline,
43+
venue=venue,
44+
language=language,
45+
)
46+
pipeline = PaperReviewPipeline(config)
47+
48+
print("Step 1: Reviewing paper...")
49+
result = await pipeline.review_paper(pdf_path)
50+
51+
if not result.review:
52+
print("Review failed or was disabled. Cannot generate rebuttal.")
53+
return
54+
55+
print(f"Review score: {result.review.score}/6")
56+
print("\nStep 2: Generating rebuttal draft...")
57+
rebuttal = await pipeline.generate_rebuttal(pdf_path, review_result=result)
58+
result.rebuttal = rebuttal
59+
60+
report = generate_report(result, Path(pdf_path).stem, output_path)
61+
if output_path:
62+
print(f"\nReport saved to: {output_path}")
63+
else:
64+
print("\n" + report)
65+
66+
todo_count = rebuttal.rebuttal_text.count("[TODO:")
67+
if todo_count:
68+
print(f"\n>>> {todo_count} [TODO] placeholder(s) require your attention. <<<")
69+
70+
71+
async def _assess_rebuttal(
72+
pdf_path: str,
73+
rebuttal_path: str,
74+
review_text: str | None = None,
75+
model_name: str = "gpt-4o",
76+
api_key: str = "",
77+
base_url: str | None = None,
78+
discipline: str | None = None,
79+
language: str | None = None,
80+
output_path: str | None = None,
81+
):
82+
"""Review a paper then assess an existing rebuttal."""
83+
config = PipelineConfig(
84+
model_name=model_name,
85+
api_key=api_key,
86+
base_url=base_url,
87+
discipline=discipline,
88+
language=language,
89+
)
90+
pipeline = PaperReviewPipeline(config)
91+
92+
rebuttal_text = Path(rebuttal_path).read_text(encoding="utf-8")
93+
94+
if review_text:
95+
print("Using provided review text.")
96+
result = await pipeline.review_paper(pdf_path)
97+
else:
98+
print("Step 1: Reviewing paper...")
99+
result = await pipeline.review_paper(pdf_path)
100+
if result.review:
101+
review_text = result.review.review
102+
else:
103+
print("Review failed. Cannot assess rebuttal without review text.")
104+
return
105+
106+
print("\nStep 2: Assessing rebuttal...")
107+
assessment = await pipeline.assess_rebuttal(
108+
pdf_path,
109+
rebuttal_text=rebuttal_text,
110+
review_text=review_text,
111+
review_result=result,
112+
)
113+
result.rebuttal_assessment = assessment
114+
115+
report = generate_report(result, Path(pdf_path).stem, output_path)
116+
if output_path:
117+
print(f"\nReport saved to: {output_path}")
118+
else:
119+
print("\n" + report)
120+
121+
print(f"\nScore: {assessment.original_score}/6 -> {assessment.updated_score}/6")
122+
123+
124+
async def _full_pipeline(
125+
pdf_path: str,
126+
model_name: str = "gpt-4o",
127+
api_key: str = "",
128+
base_url: str | None = None,
129+
discipline: str | None = None,
130+
venue: str | None = None,
131+
language: str | None = None,
132+
output_path: str | None = None,
133+
):
134+
"""Review + generate rebuttal in a single review_and_report call."""
135+
config = PipelineConfig(
136+
model_name=model_name,
137+
api_key=api_key,
138+
base_url=base_url,
139+
discipline=discipline,
140+
venue=venue,
141+
language=language,
142+
enable_rebuttal_generation=True,
143+
)
144+
pipeline = PaperReviewPipeline(config)
145+
result, report = await pipeline.review_and_report(
146+
pdf_path,
147+
paper_name=Path(pdf_path).stem,
148+
output_path=output_path,
149+
)
150+
151+
if output_path:
152+
print(f"Report saved to: {output_path}")
153+
else:
154+
print(report)
155+
156+
157+
class RebuttalCLI:
158+
"""CLI for rebuttal workflows."""
159+
160+
def generate(self, **kwargs):
161+
"""Generate a rebuttal draft."""
162+
asyncio.run(_generate_rebuttal(**kwargs))
163+
164+
def assess(self, **kwargs):
165+
"""Assess an existing rebuttal."""
166+
asyncio.run(_assess_rebuttal(**kwargs))
167+
168+
def full(self, **kwargs):
169+
"""Full pipeline: review + generate rebuttal."""
170+
asyncio.run(_full_pipeline(**kwargs))
171+
172+
173+
if __name__ == "__main__":
174+
fire.Fire(RebuttalCLI)

cookbooks/paper_review/graders/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from cookbooks.paper_review.graders.criticality import CriticalityGrader
66
from cookbooks.paper_review.graders.format import FormatGrader
77
from cookbooks.paper_review.graders.jailbreaking import JailbreakingGrader
8+
from cookbooks.paper_review.graders.rebuttal_assessment import RebuttalAssessmentGrader
9+
from cookbooks.paper_review.graders.rebuttal_generation import RebuttalGenerationGrader
810
from cookbooks.paper_review.graders.review import ReviewGrader
911

1012
__all__ = [
@@ -13,4 +15,6 @@
1315
"CriticalityGrader",
1416
"FormatGrader",
1517
"JailbreakingGrader",
18+
"RebuttalGenerationGrader",
19+
"RebuttalAssessmentGrader",
1620
]
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# -*- coding: utf-8 -*-
2+
"""Rebuttal assessment grader for academic papers."""
3+
4+
import json
5+
import re
6+
from typing import List, Optional
7+
8+
from cookbooks.paper_review.disciplines.base import DisciplineConfig
9+
from cookbooks.paper_review.prompts.rebuttal_assessment import (
10+
REBUTTAL_ASSESSMENT_USER_PROMPT,
11+
get_rebuttal_assessment_system_prompt,
12+
)
13+
from cookbooks.paper_review.utils import extract_response_content
14+
from openjudge.graders.base_grader import GraderError, GraderMode, GraderScore
15+
from openjudge.graders.llm_grader import LLMGrader
16+
from openjudge.models.base_chat_model import BaseChatModel
17+
18+
19+
def parse_rebuttal_assessment_response(text: str) -> dict:
20+
"""Parse JSON-formatted rebuttal assessment response."""
21+
json_match = re.search(r"\{[\s\S]*\}", text)
22+
if json_match:
23+
try:
24+
data = json.loads(json_match.group())
25+
point_assessments = []
26+
for p in data.get("point_assessments", []):
27+
point_assessments.append(
28+
{
29+
"concern": p.get("concern", ""),
30+
"author_response_summary": p.get("author_response_summary", ""),
31+
"adequacy": p.get("adequacy", "not_addressed"),
32+
"reasoning": p.get("reasoning", ""),
33+
}
34+
)
35+
return {
36+
"updated_score": int(data.get("updated_score", 3)),
37+
"score_change_reasoning": data.get("score_change_reasoning", ""),
38+
"overall_assessment": data.get("overall_assessment", ""),
39+
"point_assessments": point_assessments,
40+
"unresolved_concerns": data.get("unresolved_concerns", []),
41+
"rebuttal_strengths": data.get("rebuttal_strengths", []),
42+
}
43+
except (json.JSONDecodeError, ValueError, TypeError):
44+
pass
45+
46+
return {
47+
"updated_score": 3,
48+
"score_change_reasoning": "",
49+
"overall_assessment": text,
50+
"point_assessments": [],
51+
"unresolved_concerns": [],
52+
"rebuttal_strengths": [],
53+
}
54+
55+
56+
def build_rebuttal_assessment_messages(
57+
pdf_data: str,
58+
review_text: str,
59+
rebuttal_text: str,
60+
original_score: int,
61+
discipline: Optional[DisciplineConfig] = None,
62+
language: Optional[str] = None,
63+
) -> List[dict]:
64+
"""Build messages for rebuttal assessment."""
65+
user_prompt = REBUTTAL_ASSESSMENT_USER_PROMPT.format(
66+
original_score=original_score,
67+
review_text=review_text,
68+
rebuttal_text=rebuttal_text,
69+
)
70+
return [
71+
{
72+
"role": "system",
73+
"content": get_rebuttal_assessment_system_prompt(
74+
discipline=discipline,
75+
language=language,
76+
),
77+
},
78+
{
79+
"role": "user",
80+
"content": [
81+
{"type": "text", "text": user_prompt},
82+
{"type": "file", "file": {"file_data": pdf_data}},
83+
],
84+
},
85+
]
86+
87+
88+
class RebuttalAssessmentGrader(LLMGrader):
89+
"""Grader that assesses whether a rebuttal adequately addresses reviewer concerns.
90+
91+
Score range: 1-6 (updated recommendation after reading the rebuttal)
92+
"""
93+
94+
def __init__(
95+
self,
96+
model: BaseChatModel | dict,
97+
discipline: Optional[DisciplineConfig] = None,
98+
language: Optional[str] = None,
99+
):
100+
super().__init__(
101+
name="rebuttal_assessment",
102+
mode=GraderMode.POINTWISE,
103+
description="Assess rebuttal adequacy and update recommendation",
104+
model=model,
105+
template="",
106+
)
107+
self.discipline = discipline
108+
self.language = language
109+
110+
async def aevaluate(
111+
self,
112+
pdf_data: str,
113+
review_text: str,
114+
rebuttal_text: str,
115+
original_score: int,
116+
) -> GraderScore:
117+
"""Assess a rebuttal.
118+
119+
Args:
120+
pdf_data: Base64 encoded PDF data URL
121+
review_text: The original reviewer comments
122+
rebuttal_text: The author's rebuttal text
123+
original_score: The original recommendation score (1-6)
124+
125+
Returns:
126+
GraderScore with updated score and assessment details in metadata
127+
"""
128+
try:
129+
messages = build_rebuttal_assessment_messages(
130+
pdf_data,
131+
review_text,
132+
rebuttal_text,
133+
original_score,
134+
discipline=self.discipline,
135+
language=self.language,
136+
)
137+
response = await self.model.achat(messages=messages)
138+
content = await extract_response_content(response)
139+
parsed = parse_rebuttal_assessment_response(content)
140+
141+
return GraderScore(
142+
name=self.name,
143+
score=parsed["updated_score"],
144+
reason=parsed["overall_assessment"],
145+
metadata={
146+
"original_score": original_score,
147+
"score_change_reasoning": parsed["score_change_reasoning"],
148+
"point_assessments": parsed["point_assessments"],
149+
"unresolved_concerns": parsed["unresolved_concerns"],
150+
"rebuttal_strengths": parsed["rebuttal_strengths"],
151+
},
152+
)
153+
except Exception as e:
154+
return GraderError(name=self.name, error=str(e))

0 commit comments

Comments
 (0)