Skip to content

Commit d9294e4

Browse files
authored
Merge pull request #10 from delschlangen/claude/enhance-bluebook-citations-uO8FO
Fix citation formatting bugs
2 parents de25c69 + ba70654 commit d9294e4

File tree

3 files changed

+87
-30
lines changed

3 files changed

+87
-30
lines changed

backend/app/services/bluebook_rules.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,30 @@ def format_citation(self, citation: Citation, is_law_review: bool = True) -> str
3333
def format_case(self, citation: Citation, is_law_review: bool = True) -> str:
3434
"""
3535
Format case citation per Bluebook Rule 10.
36-
36+
3737
Law review format: Case Name, Vol. Reporter Page, Pincite (Court Year).
3838
Brief format: Case Name, Vol. Reporter Page, Pincite (Court Year)
3939
"""
4040
if not citation.parties or len(citation.parties) < 2:
4141
return citation.raw_text
42-
42+
43+
# Validate party names are reasonable (not paragraphs of text)
44+
plaintiff_raw = citation.parties[0]
45+
defendant_raw = citation.parties[1]
46+
47+
# Party names shouldn't be too long or contain invalid words
48+
invalid_indicators = ["ISSUE", "ANALYSIS", "See ", "The Court", "Whether"]
49+
for indicator in invalid_indicators:
50+
if indicator in plaintiff_raw or indicator in defendant_raw:
51+
return citation.raw_text
52+
53+
# Party names should be reasonable length (under 50 chars typically)
54+
if len(plaintiff_raw) > 60 or len(defendant_raw) > 60:
55+
return citation.raw_text
56+
4357
# Format case name (Rule 10.2)
44-
plaintiff = abbreviate_party_name(citation.parties[0])
45-
defendant = abbreviate_party_name(citation.parties[1])
58+
plaintiff = abbreviate_party_name(plaintiff_raw)
59+
defendant = abbreviate_party_name(defendant_raw)
4660
case_name = f"{plaintiff} v. {defendant}"
4761

4862
# Build reporter citation (Rule 10.3)

backend/app/services/lookup_service.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -561,13 +561,20 @@ async def smart_complete(self, citation: Citation) -> Dict[str, Any]:
561561
if citation.raw_text:
562562
raw_text = citation.raw_text.strip()
563563

564-
# Check if it looks like a case
565-
if " v. " in raw_text or " v " in raw_text:
566-
case_result = await self.search_by_text(raw_text, "case")
564+
# Check if it looks like a case name (contains "v." pattern)
565+
# This is the primary indicator - don't search for articles about cases
566+
case_name_pattern = re.search(r'([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*)\s+v\.?\s+([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*)', raw_text)
567+
if case_name_pattern:
568+
# Extract just the case name for search
569+
case_name = case_name_pattern.group(0)
570+
case_result = await self.search_by_text(case_name, "case")
567571
results["strategies_tried"].append("case_text_search")
568572
if case_result.get("found"):
569573
case_result["inferred_type"] = "case"
570574
return case_result
575+
# Don't fall through to article search for case names
576+
results["note"] = f"Looks like case citation: {case_name}"
577+
return results
571578

572579
# Check if it looks like a statute
573580
if re.search(r'\b(U\.?S\.?C|C\.?F\.?R|Code|§)', raw_text, re.IGNORECASE):
@@ -577,18 +584,21 @@ async def smart_complete(self, citation: Citation) -> Dict[str, Any]:
577584
statute_result["inferred_type"] = "statute"
578585
return statute_result
579586

580-
# Try as article
581-
article_result = await self.search_by_text(raw_text, "article")
582-
results["strategies_tried"].append("article_text_search")
583-
if article_result.get("found"):
584-
article_result["inferred_type"] = "law_review"
585-
return article_result
586-
587-
# Last resort: generic case search
588-
case_result = await self.search_by_text(raw_text, "case")
589-
results["strategies_tried"].append("fallback_case_search")
590-
if case_result.get("found"):
591-
return case_result
587+
# Only try article search if it doesn't look like a case
588+
# and has some indication it's an article (author name pattern, journal words)
589+
if re.search(r'(Law Review|L\. Rev\.|Journal|L\.J\.|\d+\s+[A-Z][a-z]+\.\s+L\.)', raw_text):
590+
article_result = await self.search_by_text(raw_text, "article")
591+
results["strategies_tried"].append("article_text_search")
592+
if article_result.get("found"):
593+
article_result["inferred_type"] = "law_review"
594+
return article_result
595+
596+
# Last resort: generic case search (but only if text is short enough to be a citation)
597+
if len(raw_text) < 200:
598+
case_result = await self.search_by_text(raw_text, "case")
599+
results["strategies_tried"].append("fallback_case_search")
600+
if case_result.get("found"):
601+
return case_result
592602

593603
return results
594604

backend/app/utils/bluebook_patterns.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,20 @@
99
# Citation detection patterns
1010
PATTERNS: Dict[str, Pattern] = {
1111
# Cases: Party v. Party, Volume Reporter Page (Court Year)
12+
# Fixed to not be greedy with whitespace - party names can't span multiple lines
1213
"case_complete": re.compile(
13-
r"([A-Z][a-zA-Z\.\'\-\s]+)\s+v\.\s+([A-Z][a-zA-Z\.\'\-\s]+),\s*"
14-
r"(\d+)\s+([A-Z][a-zA-Z\.\s\d]+)\s+(\d+)"
14+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*(?:,?\s+(?:LLC|Inc\.|Corp\.|Co\.|Ltd\.))?)\s+v\.\s+"
15+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*(?:,?\s+(?:LLC|Inc\.|Corp\.|Co\.|Ltd\.))?),\s*"
16+
r"(\d+)\s+([A-Z][a-zA-Z\.\s\d]+?)\s+(\d+)"
1517
r"(?:,\s*(\d+(?:-\d+)?))?\s*"
1618
r"\(([^)]+)\)"
1719
),
18-
20+
1921
# Incomplete case: just Party v. Party (missing reporter info)
22+
# Fixed to not be greedy - requires capitalized words only
2023
"case_incomplete": re.compile(
21-
r"([A-Z][a-zA-Z\.\'\-\s]+)\s+v\.\s+([A-Z][a-zA-Z\.\'\-\s]+)"
24+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*)\s+v\.\s+"
25+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Z][a-zA-Z\.\'\-]+)*)"
2226
r"(?!\s*,\s*\d+\s+[A-Z])"
2327
),
2428

@@ -48,10 +52,11 @@
4852
),
4953

5054
# Books: Author, Title (Edition Year)
55+
# Fixed to capture full edition like "6th" not just "6"
5156
"book": re.compile(
5257
r"([A-Z][a-zA-Z\.\s]+),\s+"
5358
r"([A-Z][^(]+)\s*"
54-
r"\((?:(\d+)(?:st|nd|rd|th)\s+ed\.\s+)?(\d{4})\)"
59+
r"\((?:(\d+(?:st|nd|rd|th))\s+ed\.\s+)?(\d{4})\)"
5560
),
5661

5762
# Short forms - Id.
@@ -452,18 +457,46 @@ def get_journal_abbreviation(journal: str) -> str:
452457
"""Get the Bluebook abbreviation for a journal."""
453458
return JOURNAL_ABBREVIATIONS.get(journal, journal)
454459

455-
def abbreviate_party_name(party: str) -> str:
456-
"""Abbreviate a party name per Bluebook Table 6."""
460+
def abbreviate_party_name(party: str, is_state_party: bool = False) -> str:
461+
"""
462+
Abbreviate a party name per Bluebook Table 6.
463+
464+
Args:
465+
party: The party name to abbreviate
466+
is_state_party: If True, don't abbreviate state/geographic names
467+
(per Rule 10.2.1 - states as parties keep full names)
468+
"""
457469
result = party
458-
470+
459471
# Remove "The" at beginning (Rule 10.2.1(e))
460472
if result.lower().startswith("the "):
461473
result = result[4:]
462-
463-
# Apply abbreviations
474+
475+
# Don't abbreviate if it's a state/geographic entity as a party
476+
# (e.g., "North Carolina" as defendant should stay "North Carolina")
477+
if is_state_party:
478+
return result.strip()
479+
480+
# Check if this is a pure state name - if so, don't abbreviate
481+
if result.strip() in STATE_ABBREVIATIONS:
482+
return result.strip()
483+
484+
# Words that should NOT be abbreviated in party names
485+
# (only abbreviate organizational/business terms)
486+
skip_abbreviations = {
487+
"North", "South", "East", "West", "Eastern", "Western",
488+
"Northern", "Southern", "Northeast", "Northwest",
489+
"Southeast", "Southwest", "Carolina", "Dakota", "Virginia",
490+
"Hampshire", "Jersey", "Mexico", "York"
491+
}
492+
493+
# Apply abbreviations only for business/organizational terms
464494
for full, abbrev in PARTY_ABBREVIATIONS.items():
495+
# Skip geographic terms in case names
496+
if full in skip_abbreviations:
497+
continue
465498
# Use word boundaries for replacement
466499
pattern = re.compile(r'\b' + re.escape(full) + r'\b', re.IGNORECASE)
467500
result = pattern.sub(abbrev, result)
468-
501+
469502
return result.strip()

0 commit comments

Comments
 (0)