Skip to content

Commit 668a917

Browse files
committed
Fix regex patterns - limit party names to max 5 words, fix edition capture
1 parent 9f81d7c commit 668a917

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

backend/app/utils/bluebook_patterns.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,20 @@
99
# Citation detection patterns
1010
PATTERNS: Dict[str, Pattern] = {
1111
# Cases: Party v. Party, Volume Reporter Page (Court Year)
12+
# Party names: capitalized words, can include LLC/Inc/Corp, no greedy whitespace
1213
"case_complete": re.compile(
13-
r"([A-Z][a-zA-Z\.\'\-\s]+)\s+v\.\s+([A-Z][a-zA-Z\.\'\-\s]+),\s*"
14-
r"(\d+)\s+([A-Z][a-zA-Z\.\s\d]+)\s+(\d+)"
14+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Za-z\.\'\-]+){0,5})\s+v\.\s+"
15+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Za-z\.\'\-]+){0,5}),\s*"
16+
r"(\d+)\s+([A-Z][a-zA-Z\.\s\d]+?)\s+(\d+)"
1517
r"(?:,\s*(\d+(?:-\d+)?))?\s*"
1618
r"\(([^)]+)\)"
1719
),
18-
20+
1921
# Incomplete case: just Party v. Party (missing reporter info)
22+
# Limited to reasonable party name length (max 5 words per party)
2023
"case_incomplete": re.compile(
21-
r"([A-Z][a-zA-Z\.\'\-\s]+)\s+v\.\s+([A-Z][a-zA-Z\.\'\-\s]+)"
24+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Za-z\.\'\-]+){0,5})\s+v\.\s+"
25+
r"([A-Z][a-zA-Z\.\'\-]+(?:\s+[A-Za-z\.\'\-]+){0,5})"
2226
r"(?!\s*,\s*\d+\s+[A-Z])"
2327
),
2428

@@ -47,11 +51,11 @@
4751
r"\((\d{4})\)"
4852
),
4953

50-
# Books: Author, Title (Edition Year)
54+
# Books: Author, Title (Edition Year) - captures full ordinal like "6th"
5155
"book": re.compile(
5256
r"([A-Z][a-zA-Z\.\s]+),\s+"
5357
r"([A-Z][^(]+)\s*"
54-
r"\((?:(\d+)(?:st|nd|rd|th)\s+ed\.\s+)?(\d{4})\)"
58+
r"\((?:(\d+(?:st|nd|rd|th))\s+ed\.\s+)?(\d{4})\)"
5559
),
5660

5761
# Short forms - Id.

0 commit comments

Comments
 (0)