Skip to content

Commit 27a17bc

Browse files
authored
Merge pull request #16 from delschlangen/claude/enhance-bluebook-citations-uO8FO
Fix HTML entities in titles and add comma after book authors
2 parents 2ff266d + ef79fb0 commit 27a17bc

File tree

1 file changed

+32
-15
lines changed

1 file changed

+32
-15
lines changed

backend/app/services/bluebook_rules.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Bluebook 21st Edition formatting rules engine.
33
"""
44

5+
import re
6+
import html
57
from typing import Optional, List, Tuple
68
from ..models.citation import Citation, CitationType, CitationContext
79
from ..utils.bluebook_patterns import (
@@ -11,6 +13,17 @@
1113
get_journal_abbreviation,
1214
)
1315

16+
17+
def clean_html(text: str) -> str:
18+
"""Remove HTML tags and decode HTML entities from text."""
19+
if not text:
20+
return text
21+
# Decode HTML entities like < > &
22+
text = html.unescape(text)
23+
# Remove HTML tags like <i>, </i>, <b>, etc.
24+
text = re.sub(r'<[^>]+>', '', text)
25+
return text.strip()
26+
1427
class BluebookFormatter:
1528
"""Formats citations according to Bluebook 21st Edition rules."""
1629

@@ -112,12 +125,13 @@ def format_regulation(self, citation: Citation, is_law_review: bool = True) -> s
112125
def format_law_review(self, citation: Citation, is_law_review: bool = True) -> str:
113126
"""Format law review article citation per Bluebook Rule 16."""
114127
parts = []
115-
128+
116129
if citation.author:
117-
parts.append(citation.author)
118-
130+
parts.append(clean_html(citation.author))
131+
119132
if citation.title:
120-
parts.append(f"*{citation.title}*")
133+
title = clean_html(citation.title)
134+
parts.append(f"*{title}*")
121135

122136
if citation.volume and citation.journal and citation.page:
123137
journal = get_journal_abbreviation(citation.journal)
@@ -136,35 +150,38 @@ def format_law_review(self, citation: Citation, is_law_review: bool = True) -> s
136150
def format_book(self, citation: Citation, is_law_review: bool = True) -> str:
137151
"""Format book citation per Bluebook Rule 15."""
138152
parts = []
139-
153+
140154
if citation.author:
141-
parts.append(citation.author.upper() if not is_law_review else citation.author)
142-
155+
# Author followed by comma
156+
parts.append(clean_html(citation.author) + ",")
157+
143158
if citation.title:
144159
# Book titles in small caps for law reviews
145-
parts.append(citation.title.upper())
146-
160+
title = clean_html(citation.title)
161+
parts.append(title.upper())
162+
147163
# Build parenthetical
148164
paren_parts = []
149165
if citation.edition:
150166
paren_parts.append(f"{citation.edition} ed.")
151167
if citation.year:
152168
paren_parts.append(str(citation.year))
153-
169+
154170
if paren_parts:
155171
parts.append(f"({' '.join(paren_parts)})")
156-
172+
157173
return " ".join(parts) + "."
158174

159175
def format_website(self, citation: Citation, is_law_review: bool = True) -> str:
160176
"""Format website citation per Bluebook Rule 18."""
161177
parts = []
162-
178+
163179
if citation.author:
164-
parts.append(citation.author)
165-
180+
parts.append(clean_html(citation.author))
181+
166182
if citation.title:
167-
parts.append(f"*{citation.title}*")
183+
title = clean_html(citation.title)
184+
parts.append(f"*{title}*")
168185

169186
if citation.url:
170187
parts.append(citation.url)

0 commit comments

Comments
 (0)