Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 30 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,25 +81,25 @@ An alternate approach to get up and running is to use [Github Codespaces](https:

1. Clone the repository[^2]:

```bash
git clone https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative.git
cd ballot-initiative
```
```bash
git clone https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative.git
cd ballot-initiative
```

[^2]: Optionally you may want to [fork this repository](https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative/fork)
[^2]: Optionally you may want to [fork this repository](https://github.com/Civic-Tech-Ballot-Inititiave/Ballot-Initiative/fork)

2. Create and activate a virtual environment:

```bash
# Initalise project and install dependencies
uv sync --all-extras --dev
```bash
# Initalise project and install dependencies
uv sync --all-extras --dev

# Activate virtual environment
# On Windows:
venv\Scripts\activate
# On macOS/Linux:
source venv/bin/activate
```
# Activate virtual environment
# On Windows:
venv\Scripts\activate
# On macOS/Linux:
source venv/bin/activate
```

3. Configure and save settings:
- Make a copy of the `settings.example.toml` file and rename it to `settings.toml`.
Expand All @@ -110,9 +110,9 @@ source venv/bin/activate

1. Start the Streamlit app:

```bash
uv run main.py
```
```bash
uv run main.py
```

2. Upload your files:
- PDF of signed petitions
Expand All @@ -125,9 +125,19 @@ uv run main.py
2. Activate the virtual environment
3. Run the following command:

```bash
uv run pytest
```
```bash
uv run pytest
```

### Running API

1. Navigate to the project root folder
2. Activate the virtual environment
3. Run the following command:

```bash
uv run fastapi dev app/api.py
```

## Project Documentation

Expand Down
109 changes: 109 additions & 0 deletions app/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
from enum import Enum
from io import BytesIO

import pandas as pd
from fastapi import FastAPI, Response, UploadFile
from fuzzy_match_helper import create_ocr_matched_df, create_select_voter_records
from ocr_helper import create_ocr_df
from settings.settings_repo import config
from utils import logger

app = FastAPI()
app.state.voter_records_df = None

class UploadFileTypes(str, Enum):
voter_records = "voter_records"
petition_signatures = "petition_signatures"

@app.post("/upload/{filetype}")
def upload_file(filetype: UploadFileTypes, file: UploadFile, response: Response):
"""Uploads file to the server and saves it to a temporary directory.

Args:
filetype (UploadFileTypes): can be voter_records or petition_signatures
"""
logger.info(f"Received file: {file.filename} of type: {filetype}")

# Validate file type extension
match filetype:
case UploadFileTypes.petition_signatures:
if not file.filename.endswith(".pdf"):
response.status_code = 400
return {"error": "Invalid file type. Only pdf files are allowed."}
with open(os.path.join('temp', 'ballot.pdf'), "wb") as buffer:
buffer.write(file.file.read())
logger.info("File saved to temporary directory: temp/ballot.pdf")
case UploadFileTypes.voter_records:
if not file.filename.endswith(".csv"):
response.status_code = 400
return {"error": "Invalid file type. Only .csv files are allowed."}
contents = file.file.read()
buffer = BytesIO(contents)
df = pd.read_csv(buffer, dtype=str)

# Create necessary columns
df['Full Name'] = df["First_Name"] + ' ' + df['Last_Name']
df['Full Address'] = df["Street_Number"] + " " + df["Street_Name"] + " " + \
df["Street_Type"] + " " + df["Street_Dir_Suffix"]

required_columns = ["First_Name", "Last_Name", "Street_Number",
"Street_Name", "Street_Type", "Street_Dir_Suffix"]
app.state.voter_records_df = df

# Verify required columns
if not all(col in df.columns for col in required_columns):
response.status_code = 400
return {"error": "Missing required columns in voter records file."}


return {"filename": file.filename}

@app.post("/ocr")
def ocr(response: Response):
"""
Triggers the OCR process on the uploaded petition signatures PDF file.
"""
if not os.path.exists('temp/ballot.pdf'):
logger.error("No PDF file found for petition signatures")
response.status_code = 400
return {"error": "No PDF file found for petition signatures"}
if app.state.voter_records_df is None:
logger.error("No voter records file found")
response.status_code = 400
return {"error": "No voter records file found"}
logger.info("Starting OCR processing...")
# Process files if in processing state
logger.info("Converting PDF to images...")

ocr_df = create_ocr_df(filedir='temp',
filename='ballot.pdf')

logger.info("Compiling Voter Record Data...")

select_voter_records = create_select_voter_records(app.state.voter_records_df)

logger.info("Matching petition signatures to voter records...")

ocr_matched_df = create_ocr_matched_df(
ocr_df,
select_voter_records,
threshold=config['BASE_THRESHOLD']
)
response.headers['Content-Disposition'] = 'attachment; filename=ocr_matched.csv'
response.headers['Content-Type'] = 'text/csv'
return ocr_matched_df.to_csv()

@app.delete("/clear")
def clear_all_files():
"""
Delete all files
"""
app.state.voter_records_df = None
if os.path.exists('temp/ballot.pdf'):
os.remove('temp/ballot.pdf')
logger.info("Deleted all files")
else:
logger.warning("No files to delete")
return {"message": "All files deleted"}

2 changes: 2 additions & 0 deletions app/settings/settings_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
logger,
)

config = {"BASE_THRESHOLD": 85, "TOP_CROP": 0.385, "BOTTOM_CROP": 0.725}


@dataclass
class OpenAiConfig:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ maintainers = [
license = "MIT"
dependencies = [
"dotenv>=0.9.9",
"fastapi[standard]>=0.115.12",
"httpx>=0.28.1",
"ipywidgets>=8.1.5",
"langchain-core>=0.3.51",
Expand All @@ -27,6 +28,7 @@ dependencies = [
"pillow>=11.1.0",
"pydantic>=2.11.2",
"pymupdf>=1.25.5",
"python-multipart>=0.0.20",
"rapidfuzz>=3.13.0",
"requests>=2.32.3",
"scikit-learn>=1.6.1",
Expand All @@ -35,7 +37,6 @@ dependencies = [
"structlog>=25.2.0",
"tomli>=2.2.1",
"tomli-w>=1.2.0",
"tqdm>=4.67.1",
]

[dependency-groups]
Expand Down
Loading