1+ import os
2+ from enum import Enum
3+ from io import BytesIO
4+
5+ import pandas as pd
6+ from fastapi import FastAPI , Response , UploadFile
7+ from fuzzy_match_helper import create_ocr_matched_df , create_select_voter_records
8+ from ocr_helper import create_ocr_df
9+ from settings .settings_repo import config
10+ from utils import logger
11+
12+ app = FastAPI ()
13+ app .state .voter_records_df = None
14+
15+ class UploadFileTypes (str , Enum ):
16+ voter_records = "voter_records"
17+ petition_signatures = "petition_signatures"
18+
19+ @app .post ("/upload/{filetype}" )
20+ def upload_file (filetype : UploadFileTypes , file : UploadFile , response : Response ):
21+ """Uploads file to the server and saves it to a temporary directory.
22+
23+ Args:
24+ filetype (UploadFileTypes): can be voter_records or petition_signatures
25+ """
26+ logger .info (f"Received file: { file .filename } of type: { filetype } " )
27+
28+ # Validate file type extension
29+ match filetype :
30+ case UploadFileTypes .petition_signatures :
31+ if not file .filename .endswith (".pdf" ):
32+ response .status_code = 400
33+ return {"error" : "Invalid file type. Only pdf files are allowed." }
34+ with open (os .path .join ('temp' , 'ballot.pdf' ), "wb" ) as buffer :
35+ buffer .write (file .file .read ())
36+ logger .info ("File saved to temporary directory: temp/ballot.pdf" )
37+ case UploadFileTypes .voter_records :
38+ if not file .filename .endswith (".csv" ):
39+ response .status_code = 400
40+ return {"error" : "Invalid file type. Only .csv files are allowed." }
41+ contents = file .file .read ()
42+ buffer = BytesIO (contents )
43+ df = pd .read_csv (buffer , dtype = str )
44+
45+ # Create necessary columns
46+ df ['Full Name' ] = df ["First_Name" ] + ' ' + df ['Last_Name' ]
47+ df ['Full Address' ] = df ["Street_Number" ] + " " + df ["Street_Name" ] + " " + \
48+ df ["Street_Type" ] + " " + df ["Street_Dir_Suffix" ]
49+
50+ required_columns = ["First_Name" , "Last_Name" , "Street_Number" ,
51+ "Street_Name" , "Street_Type" , "Street_Dir_Suffix" ]
52+ app .state .voter_records_df = df
53+
54+ # Verify required columns
55+ if not all (col in df .columns for col in required_columns ):
56+ response .status_code = 400
57+ return {"error" : "Missing required columns in voter records file." }
58+
59+
60+ return {"filename" : file .filename }
61+
62+ @app .post ("/ocr" )
63+ def ocr (response : Response ):
64+ """
65+ Triggers the OCR process on the uploaded petition signatures PDF file.
66+ """
67+ if not os .path .exists ('temp/ballot.pdf' ):
68+ logger .error ("No PDF file found for petition signatures" )
69+ response .status_code = 400
70+ return {"error" : "No PDF file found for petition signatures" }
71+ if app .state .voter_records_df is None :
72+ logger .error ("No voter records file found" )
73+ response .status_code = 400
74+ return {"error" : "No voter records file found" }
75+ logger .info ("Starting OCR processing..." )
76+ # Process files if in processing state
77+ logger .info ("Converting PDF to images..." )
78+
79+ ocr_df = create_ocr_df (filedir = 'temp' ,
80+ filename = 'ballot.pdf' )
81+
82+ logger .info ("Compiling Voter Record Data..." )
83+
84+ select_voter_records = create_select_voter_records (app .state .voter_records_df )
85+
86+ logger .info ("Matching petition signatures to voter records..." )
87+
88+ ocr_matched_df = create_ocr_matched_df (
89+ ocr_df ,
90+ select_voter_records ,
91+ threshold = config ['BASE_THRESHOLD' ]
92+ )
93+ response .headers ['Content-Disposition' ] = 'attachment; filename=ocr_matched.csv'
94+ response .headers ['Content-Type' ] = 'text/csv'
95+ return ocr_matched_df .to_csv ()
96+
97+ @app .delete ("/clear" )
98+ def clear_all_files ():
99+ """
100+ Delete all files
101+ """
102+ app .state .voter_records_df = None
103+ if os .path .exists ('temp/ballot.pdf' ):
104+ os .remove ('temp/ballot.pdf' )
105+ logger .info ("Deleted all files" )
106+ else :
107+ logger .warning ("No files to delete" )
108+ return {"message" : "All files deleted" }
109+
0 commit comments