Skip to content

Commit 54d0ef3

Browse files
committed
add initial backend api
1 parent 4aa164c commit 54d0ef3

File tree

5 files changed

+438
-23
lines changed

5 files changed

+438
-23
lines changed

README.md

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -81,25 +81,25 @@ An alternate approach to get up and running is to use [Github Codespaces](https:
8181

8282
1. Clone the repository[^2]:
8383

84-
```bash
85-
git clone https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative.git
86-
cd ballot-initiative
87-
```
84+
```bash
85+
git clone https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative.git
86+
cd ballot-initiative
87+
```
8888

89-
[^2]: Optionally you may want to [fork this repository](https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative/fork)
89+
[^2]: Optionally you may want to [fork this repository](https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative/fork)
9090

9191
2. Create and activate a virtual environment:
9292

93-
```bash
94-
# Initalise project and install dependencies
95-
uv sync --all-extras --dev
93+
```bash
94+
# Initalise project and install dependencies
95+
uv sync --all-extras --dev
9696

97-
# Activate virtual environment
98-
# On Windows:
99-
venv\Scripts\activate
100-
# On macOS/Linux:
101-
source venv/bin/activate
102-
```
97+
# Activate virtual environment
98+
# On Windows:
99+
venv\Scripts\activate
100+
# On macOS/Linux:
101+
source venv/bin/activate
102+
```
103103

104104
3. Configure and save settings:
105105
- Make a copy of the `settings.example.toml` file and rename it to `settings.toml`.
@@ -110,9 +110,9 @@ source venv/bin/activate
110110

111111
1. Start the Streamlit app:
112112

113-
```bash
114-
uv run main.py
115-
```
113+
```bash
114+
uv run main.py
115+
```
116116

117117
2. Upload your files:
118118
- PDF of signed petitions
@@ -125,9 +125,19 @@ uv run main.py
125125
2. Activate the virtual environment
126126
3. Run the following command:
127127

128-
```bash
129-
uv run pytest
130-
```
128+
```bash
129+
uv run pytest
130+
```
131+
132+
### Running API
133+
134+
1. Navigate to the project root folder
135+
2. Activate the virtual environment
136+
3. Run the following command:
137+
138+
```bash
139+
uv run fastapi dev app/api.py
140+
```
131141

132142
## Project Documentation
133143

app/api.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import os
2+
from enum import Enum
3+
from io import BytesIO
4+
5+
import pandas as pd
6+
from fastapi import FastAPI, Response, UploadFile
7+
from fuzzy_match_helper import create_ocr_matched_df, create_select_voter_records
8+
from ocr_helper import create_ocr_df
9+
from settings.settings_repo import config
10+
from utils import logger
11+
12+
app = FastAPI()
13+
app.state.voter_records_df = None
14+
15+
class UploadFileTypes(str, Enum):
16+
voter_records = "voter_records"
17+
petition_signatures = "petition_signatures"
18+
19+
@app.post("/upload/{filetype}")
20+
def upload_file(filetype: UploadFileTypes, file: UploadFile, response: Response):
21+
"""Uploads file to the server and saves it to a temporary directory.
22+
23+
Args:
24+
filetype (UploadFileTypes): can be voter_records or petition_signatures
25+
"""
26+
logger.info(f"Received file: {file.filename} of type: {filetype}")
27+
28+
# Validate file type extension
29+
match filetype:
30+
case UploadFileTypes.petition_signatures:
31+
if not file.filename.endswith(".pdf"):
32+
response.status_code = 400
33+
return {"error": "Invalid file type. Only pdf files are allowed."}
34+
with open(os.path.join('temp', 'ballot.pdf'), "wb") as buffer:
35+
buffer.write(file.file.read())
36+
logger.info("File saved to temporary directory: temp/ballot.pdf")
37+
case UploadFileTypes.voter_records:
38+
if not file.filename.endswith(".csv"):
39+
response.status_code = 400
40+
return {"error": "Invalid file type. Only .csv files are allowed."}
41+
contents = file.file.read()
42+
buffer = BytesIO(contents)
43+
df = pd.read_csv(buffer, dtype=str)
44+
45+
# Create necessary columns
46+
df['Full Name'] = df["First_Name"] + ' ' + df['Last_Name']
47+
df['Full Address'] = df["Street_Number"] + " " + df["Street_Name"] + " " + \
48+
df["Street_Type"] + " " + df["Street_Dir_Suffix"]
49+
50+
required_columns = ["First_Name", "Last_Name", "Street_Number",
51+
"Street_Name", "Street_Type", "Street_Dir_Suffix"]
52+
app.state.voter_records_df = df
53+
54+
# Verify required columns
55+
if not all(col in df.columns for col in required_columns):
56+
response.status_code = 400
57+
return {"error": "Missing required columns in voter records file."}
58+
59+
60+
return {"filename": file.filename}
61+
62+
@app.post("/ocr")
63+
def ocr(response: Response):
64+
"""
65+
Triggers the OCR process on the uploaded petition signatures PDF file.
66+
"""
67+
if not os.path.exists('temp/ballot.pdf'):
68+
logger.error("No PDF file found for petition signatures")
69+
response.status_code = 400
70+
return {"error": "No PDF file found for petition signatures"}
71+
if app.state.voter_records_df is None:
72+
logger.error("No voter records file found")
73+
response.status_code = 400
74+
return {"error": "No voter records file found"}
75+
logger.info("Starting OCR processing...")
76+
# Process files if in processing state
77+
logger.info("Converting PDF to images...")
78+
79+
ocr_df = create_ocr_df(filedir='temp',
80+
filename='ballot.pdf')
81+
82+
logger.info("Compiling Voter Record Data...")
83+
84+
select_voter_records = create_select_voter_records(app.state.voter_records_df)
85+
86+
logger.info("Matching petition signatures to voter records...")
87+
88+
ocr_matched_df = create_ocr_matched_df(
89+
ocr_df,
90+
select_voter_records,
91+
threshold=config['BASE_THRESHOLD']
92+
)
93+
response.headers['Content-Disposition'] = 'attachment; filename=ocr_matched.csv'
94+
response.headers['Content-Type'] = 'text/csv'
95+
return ocr_matched_df.to_csv()
96+
97+
@app.delete("/clear")
98+
def clear_all_files():
99+
"""
100+
Delete all files
101+
"""
102+
app.state.voter_records_df = None
103+
if os.path.exists('temp/ballot.pdf'):
104+
os.remove('temp/ballot.pdf')
105+
logger.info("Deleted all files")
106+
else:
107+
logger.warning("No files to delete")
108+
return {"message": "All files deleted"}
109+

app/settings/settings_repo.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
logger,
88
)
99

10+
config = {"BASE_THRESHOLD": 85, "TOP_CROP": 0.385, "BOTTOM_CROP": 0.725}
11+
1012

1113
@dataclass
1214
class OpenAiConfig:

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ maintainers = [
1414
license = "MIT"
1515
dependencies = [
1616
"dotenv>=0.9.9",
17+
"fastapi[standard]>=0.115.12",
1718
"httpx>=0.28.1",
1819
"ipywidgets>=8.1.5",
1920
"langchain-core>=0.3.51",
@@ -27,6 +28,7 @@ dependencies = [
2728
"pillow>=11.1.0",
2829
"pydantic>=2.11.2",
2930
"pymupdf>=1.25.5",
31+
"python-multipart>=0.0.20",
3032
"rapidfuzz>=3.13.0",
3133
"requests>=2.32.3",
3234
"scikit-learn>=1.6.1",
@@ -35,7 +37,6 @@ dependencies = [
3537
"structlog>=25.2.0",
3638
"tomli>=2.2.1",
3739
"tomli-w>=1.2.0",
38-
"tqdm>=4.67.1",
3940
]
4041

4142
[dependency-groups]

0 commit comments

Comments
 (0)