Skip to content

Commit 8051071

Browse files
committed
feat(paper_review): upgrade rebuttal prompts with discipline/venue/instructions support
Bring rebuttal prompts to the same professional academic level as review prompts: - Deep integration with DisciplineConfig: evaluation_dimensions, correctness_categories, reviewer_context, scoring_notes all flow into rebuttal generation and assessment - Venue-aware: rebuttal generation adapts to venue conventions; assessment applies venue-specific acceptance bar and contribution standards - Author instructions support for rebuttal generation (same as review prompt) - Professional AC identity with high-standards calibration for assessment - Structured assessment framework: Relevance, Evidence Strength, Completeness, Verifiability, Honesty — mirroring the rigor of the review evaluation dimensions - Explicit score update rules: increase/decrease/maintain with concrete criteria - Graders updated to pass venue and instructions parameters through to prompts Made-with: Cursor
1 parent af941b2 commit 8051071

File tree

6 files changed

+243
-57
lines changed

6 files changed

+243
-57
lines changed

cookbooks/paper_review/graders/rebuttal_assessment.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def build_rebuttal_assessment_messages(
5959
rebuttal_text: str,
6060
original_score: int,
6161
discipline: Optional[DisciplineConfig] = None,
62+
venue: Optional[str] = None,
6263
language: Optional[str] = None,
6364
) -> List[dict]:
6465
"""Build messages for rebuttal assessment."""
@@ -72,6 +73,7 @@ def build_rebuttal_assessment_messages(
7273
"role": "system",
7374
"content": get_rebuttal_assessment_system_prompt(
7475
discipline=discipline,
76+
venue=venue,
7577
language=language,
7678
),
7779
},
@@ -95,6 +97,7 @@ def __init__(
9597
self,
9698
model: BaseChatModel | dict,
9799
discipline: Optional[DisciplineConfig] = None,
100+
venue: Optional[str] = None,
98101
language: Optional[str] = None,
99102
):
100103
super().__init__(
@@ -105,6 +108,7 @@ def __init__(
105108
template="",
106109
)
107110
self.discipline = discipline
111+
self.venue = venue
108112
self.language = language
109113

110114
async def aevaluate(
@@ -132,6 +136,7 @@ async def aevaluate(
132136
rebuttal_text,
133137
original_score,
134138
discipline=self.discipline,
139+
venue=self.venue,
135140
language=self.language,
136141
)
137142
response = await self.model.achat(messages=messages)

cookbooks/paper_review/graders/rebuttal_generation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def build_rebuttal_generation_messages(
5252
review_text: str,
5353
discipline: Optional[DisciplineConfig] = None,
5454
venue: Optional[str] = None,
55+
instructions: Optional[str] = None,
5556
language: Optional[str] = None,
5657
) -> List[dict]:
5758
"""Build messages for rebuttal generation."""
@@ -62,6 +63,7 @@ def build_rebuttal_generation_messages(
6263
"content": get_rebuttal_generation_system_prompt(
6364
discipline=discipline,
6465
venue=venue,
66+
instructions=instructions,
6567
language=language,
6668
),
6769
},
@@ -87,6 +89,7 @@ def __init__(
8789
model: BaseChatModel | dict,
8890
discipline: Optional[DisciplineConfig] = None,
8991
venue: Optional[str] = None,
92+
instructions: Optional[str] = None,
9093
language: Optional[str] = None,
9194
):
9295
super().__init__(
@@ -98,6 +101,7 @@ def __init__(
98101
)
99102
self.discipline = discipline
100103
self.venue = venue
104+
self.instructions = instructions
101105
self.language = language
102106

103107
async def aevaluate(self, pdf_data: str, review_text: str) -> GraderScore:
@@ -116,6 +120,7 @@ async def aevaluate(self, pdf_data: str, review_text: str) -> GraderScore:
116120
review_text,
117121
discipline=self.discipline,
118122
venue=self.venue,
123+
instructions=self.instructions,
119124
language=self.language,
120125
)
121126
response = await self.model.achat(messages=messages)

cookbooks/paper_review/pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,13 @@ def _init_graders(self):
166166
self.model,
167167
discipline=self._discipline,
168168
venue=self._venue,
169+
instructions=self._instructions,
169170
language=self._language,
170171
)
171172
self.rebuttal_assessment_grader = RebuttalAssessmentGrader(
172173
self.model,
173174
discipline=self._discipline,
175+
venue=self._venue,
174176
language=self._language,
175177
)
176178

cookbooks/paper_review/prompts/rebuttal_assessment.py

Lines changed: 117 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,74 +10,160 @@
1010
def get_rebuttal_assessment_system_prompt(
1111
date: datetime | None = None,
1212
discipline: Optional[DisciplineConfig] = None,
13+
venue: Optional[str] = None,
1314
language: Optional[str] = None,
1415
) -> str:
1516
"""Get system prompt for rebuttal assessment.
1617
1718
Args:
1819
date: Date to use (defaults to today).
1920
discipline: Discipline-specific configuration.
21+
venue: Target conference/journal name.
2022
language: Output language ("en" default, "zh" for Chinese).
2123
"""
2224
current_date = (date or datetime.now()).strftime("%Y-%m-%d")
2325

26+
# ── AC / Meta-Reviewer identity ───────────────────────────────────────────
2427
if discipline:
25-
identity = (
26-
f"You are a senior Area Chair / Meta-Reviewer in {discipline.name}, "
27-
f"responsible for evaluating whether the authors' rebuttal adequately "
28-
f"addresses the reviewers' concerns."
28+
discipline_label = discipline.name
29+
reviewer_context = discipline.reviewer_context or f"You specialize in {discipline_label}."
30+
identity_block = (
31+
f"You are a senior Area Chair / Meta-Reviewer for a top venue in {discipline_label}. "
32+
f"You are the most experienced and fair-minded AC in the field.\n"
33+
f"{reviewer_context}\n"
34+
f"You have served on dozens of program committees and have deep understanding of "
35+
f"what constitutes a convincing rebuttal versus a superficial or evasive one."
2936
)
3037
else:
31-
identity = (
32-
"You are a senior Area Chair / Meta-Reviewer responsible for evaluating "
33-
"whether the authors' rebuttal adequately addresses the reviewers' concerns."
38+
identity_block = (
39+
"You are a senior Area Chair / Meta-Reviewer for a top academic venue. "
40+
"You are the most experienced and fair-minded AC in the field.\n"
41+
"You have served on dozens of program committees and have deep understanding of "
42+
"what constitutes a convincing rebuttal versus a superficial or evasive one."
3443
)
3544

45+
# ── Venue context ─────────────────────────────────────────────────────────
46+
if venue:
47+
venue_block = (
48+
f"\n**Target Venue: {venue}**\n"
49+
f"You are making a recommendation for **{venue}**. Apply {venue}'s specific "
50+
f"standards and contribution bar when judging whether the rebuttal resolves "
51+
f"the concerns sufficiently for this venue. Consider {venue}'s acceptance rate, "
52+
f"audience expectations, and the level of rigor required."
53+
)
54+
elif discipline and discipline.venues:
55+
venue_list = discipline.format_venues()
56+
venue_block = (
57+
f"\nYou typically serve as AC for top venues in {discipline.name}, "
58+
f"such as: {venue_list}. Apply corresponding standards."
59+
)
60+
else:
61+
venue_block = (
62+
"\nYou typically serve as AC for top venues such as " "NeurIPS, ICLR, ICML, Nature, Science, The Lancet."
63+
)
64+
65+
# ── Evaluation dimensions for assessment ──────────────────────────────────
66+
if discipline and discipline.evaluation_dimensions:
67+
dimensions_block = (
68+
"\nWhen evaluating whether reviewer concerns are adequately addressed, consider "
69+
"how the rebuttal impacts these key evaluation dimensions that reviewers in this "
70+
"field prioritize:\n\n" + discipline.format_evaluation_dimensions()
71+
)
72+
else:
73+
dimensions_block = (
74+
"\nWhen evaluating whether concerns are adequately addressed, consider how the "
75+
"rebuttal impacts these dimensions: Quality, Clarity, Significance, Originality, "
76+
"Reproducibility, Ethics & Limitations, and Citations & Related Work."
77+
)
78+
79+
# ── Discipline-specific error awareness ───────────────────────────────────
80+
if discipline and discipline.correctness_categories:
81+
error_awareness_block = (
82+
"\nReviewers in this field commonly flag these types of errors. When assessing "
83+
"whether the rebuttal resolves such concerns, apply the appropriate level of "
84+
"scrutiny for each category:\n\n" + discipline.format_correctness_categories()
85+
)
86+
else:
87+
error_awareness_block = ""
88+
89+
# ── Scoring ───────────────────────────────────────────────────────────────
90+
scoring_block = """Scoring (1-6):
91+
1: Strong Reject - Well-known results, technical flaws, or unaddressed ethical considerations
92+
2: Reject - Technical flaws, weak evaluation, inadequate reproducibility
93+
3: Borderline Reject - Technically solid but reasons to reject outweigh reasons to accept
94+
4: Borderline Accept - Technically solid where reasons to accept outweigh reasons to reject
95+
5: Accept - Technically solid with high impact, good evaluation
96+
6: Strong Accept - Technically flawless with groundbreaking impact"""
97+
98+
if discipline and discipline.scoring_notes:
99+
scoring_block += f"\n\nDiscipline-specific guidance: {discipline.scoring_notes}"
100+
101+
# ── Output language ───────────────────────────────────────────────────────
36102
if language == "zh":
37103
language_block = (
38104
"\n**Output Language: Chinese (Simplified)**\n"
39-
"You MUST write the entire assessment in Simplified Chinese (简体中文)."
105+
"You MUST write the entire assessment in Simplified Chinese (简体中文). "
106+
"Technical terms may remain in English where conventional."
40107
)
41108
else:
42109
language_block = ""
43110

44-
return f"""{identity}
111+
return f"""{identity_block}
45112
46113
**Current Date: {current_date}**
47-
{language_block}
114+
Note: References to papers from 2024, 2025, or 2026 are valid and should NOT be flagged as "future" papers.
115+
{venue_block}{language_block}
116+
117+
You keep incredibly high standards. A convincing rebuttal must meet ALL of the following criteria:
118+
- Addresses the **specific** concern raised, not a straw-man or adjacent issue
119+
- Provides **concrete evidence** (experimental results, theoretical arguments, citations) rather than vague promises
120+
- Maintains **intellectual honesty** — acknowledging genuine limitations rather than deflecting
121+
- Demonstrates **scholarly professionalism** in tone and substance
48122
49123
You are given:
50124
1. The original paper (PDF)
51125
2. The reviewer comments
52126
3. The authors' rebuttal
53127
4. The original recommendation score (1-6)
54-
55-
Your task:
56-
1. For each reviewer concern, determine whether the rebuttal addresses it:
57-
- "fully_addressed": The response is convincing with sufficient evidence or clarification.
58-
- "partially_addressed": The response acknowledges the issue but the resolution is incomplete.
59-
- "not_addressed": The concern is ignored or the response is inadequate.
60-
2. Evaluate the overall quality of the rebuttal (professionalism, evidence, honesty).
61-
3. Based on the rebuttal, decide an updated recommendation score (1-6). The score may go up, stay the same, or go down.
62-
63-
Scoring reminder (1-6):
64-
1: Strong Reject 2: Reject 3: Borderline Reject 4: Borderline Accept 5: Accept 6: Strong Accept
128+
{dimensions_block}
129+
{error_awareness_block}
130+
131+
ASSESSMENT FRAMEWORK — For each reviewer concern, evaluate:
132+
1. **Relevance**: Does the response address the actual concern or a different/adjacent issue?
133+
2. **Evidence strength**: Is the response backed by concrete evidence (data, proofs, citations) or just assertions?
134+
3. **Completeness**: Does the response fully resolve the concern, or are aspects left unaddressed?
135+
4. **Verifiability**: Can the claims in the rebuttal be verified against the paper content?
136+
5. **Honesty**: Does the author honestly acknowledge limitations, or are they deflecting/dismissing valid criticism?
137+
138+
ADEQUACY CLASSIFICATION:
139+
- "fully_addressed": The response directly addresses the concern with convincing evidence or clarification that can be verified against the paper. The concern no longer stands as a reason to reject.
140+
- "partially_addressed": The response acknowledges the concern and provides some evidence, but the resolution is incomplete — e.g., promised experiments not yet shown, partial clarification that leaves open questions, or evidence that only addresses part of the concern.
141+
- "not_addressed": The concern is ignored, the response is off-topic, the argument is circular, or the evidence provided does not actually resolve the issue.
142+
143+
SCORE UPDATE RULES:
144+
- The score may increase if major concerns are convincingly resolved with strong evidence.
145+
- The score stays the same if the rebuttal is adequate but does not change the fundamental assessment.
146+
- The score may DECREASE if the rebuttal reveals new weaknesses (contradictions, misunderstandings of own work, dishonest framing).
147+
- A single unresolved major concern is sufficient reason to maintain or lower the score.
148+
- Promises of future work ("we will add...") without concrete evidence carry minimal weight.
149+
150+
{scoring_block}
65151
66152
Return your assessment as JSON:
67153
{{
68154
"updated_score": <int 1-6>,
69-
"score_change_reasoning": "Why the score changed (or didn't)",
70-
"overall_assessment": "High-level summary of the rebuttal quality",
155+
"score_change_reasoning": "Why the score changed (or didn't), referencing specific concerns",
156+
"overall_assessment": "High-level summary of the rebuttal quality and its impact on the paper's standing",
71157
"point_assessments": [
72158
{{
73-
"concern": "The reviewer's original concern",
74-
"author_response_summary": "Brief summary of author's response",
159+
"concern": "The reviewer's original concern (verbatim or faithfully summarized)",
160+
"author_response_summary": "Brief summary of the author's response",
75161
"adequacy": "fully_addressed" or "partially_addressed" or "not_addressed",
76-
"reasoning": "Why you judged it this way"
162+
"reasoning": "Detailed reasoning for your judgment, referencing evidence from the paper and rebuttal"
77163
}}
78164
],
79-
"unresolved_concerns": ["List of concerns that remain unresolved"],
80-
"rebuttal_strengths": ["What the rebuttal did well"]
165+
"unresolved_concerns": ["Specific concerns that remain unresolved after the rebuttal"],
166+
"rebuttal_strengths": ["What the rebuttal did particularly well"]
81167
}}"""
82168

83169

@@ -89,4 +175,6 @@ def get_rebuttal_assessment_system_prompt(
89175
Author rebuttal:
90176
{rebuttal_text}
91177
92-
Please assess whether the rebuttal adequately addresses each concern and provide an updated recommendation score."""
178+
Carefully evaluate each point in the rebuttal against the original paper and reviewer comments. \
179+
Determine whether each concern is fully addressed, partially addressed, or not addressed. \
180+
Then provide an updated recommendation score with detailed justification."""

0 commit comments

Comments
 (0)