Skip to content

Commit cee91f7

Browse files
committed
Initial commit: Bluebook citation generator
0 parents  commit cee91f7

36 files changed

+3687
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Deploy to GitHub Pages
2+
3+
on:
4+
push:
5+
branches: [main]
6+
workflow_dispatch:
7+
8+
permissions:
9+
contents: read
10+
pages: write
11+
id-token: write
12+
13+
concurrency:
14+
group: "pages"
15+
cancel-in-progress: false
16+
17+
jobs:
18+
build:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Checkout
22+
uses: actions/checkout@v4
23+
24+
- name: Setup Node
25+
uses: actions/setup-node@v4
26+
with:
27+
node-version: 20
28+
cache: npm
29+
cache-dependency-path: frontend/package-lock.json
30+
31+
- name: Install dependencies
32+
working-directory: frontend
33+
run: npm ci
34+
35+
- name: Build
36+
working-directory: frontend
37+
env:
38+
VITE_API_URL: ${{ vars.API_URL }}
39+
run: npm run build
40+
41+
- name: Upload artifact
42+
uses: actions/upload-pages-artifact@v3
43+
with:
44+
path: frontend/dist
45+
46+
deploy:
47+
environment:
48+
name: github-pages
49+
url: ${{ steps.deployment.outputs.page_url }}
50+
runs-on: ubuntu-latest
51+
needs: build
52+
steps:
53+
- name: Deploy to GitHub Pages
54+
id: deployment
55+
uses: actions/deploy-pages@v4
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
venv/
8+
ENV/
9+
.env
10+
11+
# Node
12+
node_modules/
13+
dist/
14+
.DS_Store
15+
16+
# IDE
17+
.vscode/
18+
.idea/
19+
20+
# Logs
21+
*.log
22+
23+
# Local env
24+
.env.local
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Bluebook Citation Generator
2+
3+
Automated legal citation formatting, completion, and verification using Bluebook 21st Edition rules.
4+
5+
## Features
6+
7+
- **Citation Extraction**: Automatically detects cases, statutes, regulations, articles, and books
8+
- **Citation Completion**: Looks up incomplete citations using free legal databases (CourtListener, CrossRef, Open Library)
9+
- **Bluebook Formatting**: Formats citations per Bluebook 21st Edition rules
10+
- **Context-Aware Short Forms**: Suggests Id., supra, and short case forms based on document context
11+
- **Unsourced Claim Detection**: Identifies statements that may need citations
12+
13+
## Tech Stack
14+
15+
- **Backend**: Python/FastAPI
16+
- **Frontend**: React + Vite + TailwindCSS
17+
- **APIs**: CourtListener, CrossRef, Open Library, eCFR (all free, no keys required)
18+
19+
## Local Development
20+
21+
### Backend
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
FROM python:3.11-slim
2+
3+
WORKDIR /app
4+
5+
# Install system dependencies
6+
RUN apt-get update && apt-get install -y \
7+
build-essential \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
# Copy requirements first for caching
11+
COPY requirements.txt .
12+
RUN pip install --no-cache-dir -r requirements.txt
13+
14+
# Copy application code
15+
COPY app ./app
16+
17+
# Create non-root user
18+
RUN useradd -m appuser && chown -R appuser:appuser /app
19+
USER appuser
20+
21+
EXPOSE 8000
22+
23+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

bluebook-citation-generator/backend/app/__init__.py

Whitespace-only changes.
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
"""
2+
FastAPI application for Bluebook Citation Generator.
3+
"""
4+
5+
from fastapi import FastAPI, UploadFile, File, HTTPException, Body
6+
from fastapi.middleware.cors import CORSMiddleware
7+
from contextlib import asynccontextmanager
8+
import uuid
9+
10+
from .services.parser import DocumentParser
11+
from .services.extractor import CitationExtractor
12+
from .services.bluebook_rules import BluebookFormatter, ShortFormManager
13+
from .services.context_analyzer import DocumentContextAnalyzer
14+
from .services.lookup_service import LegalLookupService, CitationCompleter
15+
from .services.source_finder import ClaimDetector
16+
from .models.citation import (
17+
Citation, DocumentAnalysis, UploadResponse,
18+
AnalysisResponse, AnalysisStats, CitationType
19+
)
20+
21+
# Global services
22+
parser = DocumentParser()
23+
extractor = CitationExtractor()
24+
formatter = BluebookFormatter()
25+
lookup_service: LegalLookupService = None
26+
27+
@asynccontextmanager
28+
async def lifespan(app: FastAPI):
29+
"""Manage application lifespan."""
30+
global lookup_service
31+
lookup_service = LegalLookupService()
32+
yield
33+
await lookup_service.close()
34+
35+
app = FastAPI(
36+
title="Bluebook Citation Generator",
37+
description="Automated citation formatting per Bluebook 21st Edition",
38+
version="1.0.0",
39+
lifespan=lifespan,
40+
)
41+
42+
# CORS configuration
43+
app.add_middleware(
44+
CORSMiddleware,
45+
allow_origins=[
46+
"http://localhost:3000",
47+
"http://localhost:5173",
48+
"http://127.0.0.1:5173",
49+
],
50+
allow_credentials=True,
51+
allow_methods=["*"],
52+
allow_headers=["*"],
53+
)
54+
55+
56+
@app.get("/")
57+
async def root():
58+
"""Root endpoint."""
59+
return {"message": "Bluebook Citation Generator API", "version": "1.0.0"}
60+
61+
62+
@app.get("/health")
63+
async def health_check():
64+
"""Health check endpoint."""
65+
return {"status": "healthy", "version": "1.0.0"}
66+
67+
68+
@app.post("/api/upload", response_model=UploadResponse)
69+
async def upload_document(file: UploadFile = File(...)):
70+
"""Upload a legal document for citation analysis."""
71+
allowed_types = {
72+
"application/pdf": "pdf",
73+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
74+
"text/plain": "txt",
75+
}
76+
77+
if file.content_type not in allowed_types:
78+
raise HTTPException(
79+
status_code=400,
80+
detail=f"Unsupported file type: {file.content_type}. Supported: PDF, DOCX, TXT"
81+
)
82+
83+
content = await file.read()
84+
85+
try:
86+
document_text = parser.parse(content, file.content_type)
87+
except Exception as e:
88+
raise HTTPException(status_code=400, detail=f"Failed to parse document: {str(e)}")
89+
90+
doc_id = str(uuid.uuid4())
91+
92+
analyzer = DocumentContextAnalyzer()
93+
structure = analyzer.analyze_document_structure(document_text)
94+
95+
preview_length = 500
96+
preview = document_text[:preview_length]
97+
if len(document_text) > preview_length:
98+
preview += "..."
99+
100+
return UploadResponse(
101+
document_id=doc_id,
102+
filename=file.filename or "document",
103+
word_count=structure["estimated_word_count"],
104+
citation_style=structure["citation_style"],
105+
has_footnotes=structure["has_footnotes"],
106+
text_preview=preview,
107+
full_text=document_text,
108+
)
109+
110+
111+
@app.post("/api/analyze", response_model=AnalysisResponse)
112+
async def analyze_citations(
113+
document_id: str = Body(...),
114+
text: str = Body(...),
115+
filename: str = Body(default="document"),
116+
):
117+
"""Extract and analyze all citations in the document."""
118+
# Extract citations
119+
citations = extractor.extract_all(text)
120+
121+
# Complete incomplete citations
122+
completer = CitationCompleter(lookup_service)
123+
completed_citations = []
124+
125+
for citation in citations:
126+
if citation.status.value in ["incomplete", "needs_verification"]:
127+
citation = await completer.complete_citation(citation)
128+
129+
# Generate formatted suggestion
130+
citation.suggested_correction = formatter.format_citation(citation)
131+
completed_citations.append(citation)
132+
133+
# Analyze citation sequence for short forms
134+
context_analyzer = DocumentContextAnalyzer()
135+
short_form_suggestions = context_analyzer.analyze_citation_sequence(completed_citations)
136+
137+
# Detect unsourced claims
138+
claim_detector = ClaimDetector()
139+
citation_positions = [(c.position_start, c.position_end) for c in completed_citations]
140+
unsourced = claim_detector.detect_unsourced_claims(text, citation_positions)
141+
142+
# Calculate stats
143+
stats = AnalysisStats(
144+
total_citations=len(completed_citations),
145+
complete=sum(1 for c in completed_citations if c.status.value == "complete"),
146+
incomplete=sum(1 for c in completed_citations if c.status.value == "incomplete"),
147+
needs_verification=sum(1 for c in completed_citations if c.status.value == "needs_verification"),
148+
unsourced_claims=len(unsourced),
149+
)
150+
151+
# Build analysis
152+
analysis = DocumentAnalysis(
153+
document_id=document_id,
154+
filename=filename,
155+
total_footnotes=max((c.footnote_number or 0) for c in completed_citations) if completed_citations else 0,
156+
citations=completed_citations,
157+
unsourced_claims=unsourced,
158+
)
159+
160+
return AnalysisResponse(
161+
analysis=analysis,
162+
short_form_suggestions=short_form_suggestions,
163+
stats=stats,
164+
)
165+
166+
167+
@app.post("/api/format")
168+
async def format_citation(citation_data: dict):
169+
"""Format a single citation according to Bluebook rules."""
170+
try:
171+
citation = Citation(**citation_data)
172+
formatted = formatter.format_citation(citation)
173+
174+
return {
175+
"original": citation.raw_text,
176+
"formatted": formatted,
177+
"type": citation.type.value,
178+
}
179+
except Exception as e:
180+
raise HTTPException(status_code=400, detail=str(e))
181+
182+
183+
@app.post("/api/lookup")
184+
async def lookup_citation(citation_data: dict):
185+
"""Look up a citation in legal databases."""
186+
try:
187+
citation = Citation(**citation_data)
188+
results = await lookup_service.lookup_citation(citation)
189+
return results
190+
except Exception as e:
191+
raise HTTPException(status_code=400, detail=str(e))
192+
193+
194+
@app.post("/api/lookup/case")
195+
async def lookup_case(
196+
parties: str = Body(default=None),
197+
citation: str = Body(default=None),
198+
):
199+
"""Look up a case by parties or citation string."""
200+
search_citation = Citation(
201+
type=CitationType.CASE,
202+
status="incomplete",
203+
raw_text=parties or citation or "",
204+
position_start=0,
205+
position_end=0,
206+
)
207+
208+
if parties and " v. " in parties:
209+
parts = parties.split(" v. ")
210+
search_citation.parties = [p.strip() for p in parts]
211+
elif parties and " v " in parties:
212+
parts = parties.split(" v ")
213+
search_citation.parties = [p.strip() for p in parts]
214+
215+
if citation:
216+
import re
217+
match = re.match(r"(\d+)\s+([A-Za-z.\s]+)\s+(\d+)", citation)
218+
if match:
219+
search_citation.volume = match.group(1)
220+
search_citation.reporter = match.group(2).strip()
221+
search_citation.page = match.group(3)
222+
223+
results = await lookup_service.lookup_citation(search_citation)
224+
return results
225+
226+
227+
if __name__ == "__main__":
228+
import uvicorn
229+
uvicorn.run(app, host="0.0.0.0", port=8000)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Models package
2+
from .citation import (
3+
Citation, CitationType, CitationStatus, CitationContext,
4+
UnsourcedClaim, DocumentAnalysis, UploadResponse,
5+
AnalysisStats, AnalysisResponse
6+
)

0 commit comments

Comments
 (0)