22Bluebook 21st Edition formatting rules engine.
33"""
44
5+ import re
6+ import html
57from typing import Optional , List , Tuple
68from ..models .citation import Citation , CitationType , CitationContext
79from ..utils .bluebook_patterns import (
1113 get_journal_abbreviation ,
1214)
1315
16+
17+ def clean_html (text : str ) -> str :
18+ """Remove HTML tags and decode HTML entities from text."""
19+ if not text :
20+ return text
21+ # Decode HTML entities like < > &
22+ text = html .unescape (text )
23+ # Remove HTML tags like <i>, </i>, <b>, etc.
24+ text = re .sub (r'<[^>]+>' , '' , text )
25+ return text .strip ()
26+
1427class BluebookFormatter :
1528 """Formats citations according to Bluebook 21st Edition rules."""
1629
@@ -112,12 +125,13 @@ def format_regulation(self, citation: Citation, is_law_review: bool = True) -> s
112125 def format_law_review (self , citation : Citation , is_law_review : bool = True ) -> str :
113126 """Format law review article citation per Bluebook Rule 16."""
114127 parts = []
115-
128+
116129 if citation .author :
117- parts .append (citation .author )
118-
130+ parts .append (clean_html ( citation .author ) )
131+
119132 if citation .title :
120- parts .append (f"*{ citation .title } *" )
133+ title = clean_html (citation .title )
134+ parts .append (f"*{ title } *" )
121135
122136 if citation .volume and citation .journal and citation .page :
123137 journal = get_journal_abbreviation (citation .journal )
@@ -136,35 +150,38 @@ def format_law_review(self, citation: Citation, is_law_review: bool = True) -> s
136150 def format_book (self , citation : Citation , is_law_review : bool = True ) -> str :
137151 """Format book citation per Bluebook Rule 15."""
138152 parts = []
139-
153+
140154 if citation .author :
141- parts .append (citation .author .upper () if not is_law_review else citation .author )
142-
155+ # Author followed by comma
156+ parts .append (clean_html (citation .author ) + "," )
157+
143158 if citation .title :
144159 # Book titles in small caps for law reviews
145- parts .append (citation .title .upper ())
146-
160+ title = clean_html (citation .title )
161+ parts .append (title .upper ())
162+
147163 # Build parenthetical
148164 paren_parts = []
149165 if citation .edition :
150166 paren_parts .append (f"{ citation .edition } ed." )
151167 if citation .year :
152168 paren_parts .append (str (citation .year ))
153-
169+
154170 if paren_parts :
155171 parts .append (f"({ ' ' .join (paren_parts )} )" )
156-
172+
157173 return " " .join (parts ) + "."
158174
159175 def format_website (self , citation : Citation , is_law_review : bool = True ) -> str :
160176 """Format website citation per Bluebook Rule 18."""
161177 parts = []
162-
178+
163179 if citation .author :
164- parts .append (citation .author )
165-
180+ parts .append (clean_html ( citation .author ) )
181+
166182 if citation .title :
167- parts .append (f"*{ citation .title } *" )
183+ title = clean_html (citation .title )
184+ parts .append (f"*{ title } *" )
168185
169186 if citation .url :
170187 parts .append (citation .url )
0 commit comments