11import os
2- from enum import Enum
3- from io import BytesIO
42
5- import pandas as pd
6- from fastapi import FastAPI , Response , UploadFile
3+ from fastapi import FastAPI , Response
4+ from fastapi . middleware . cors import CORSMiddleware
75from fuzzy_match_helper import create_ocr_matched_df , create_select_voter_records
86from ocr_helper import create_ocr_df
7+ from routers import file
98from settings .settings_repo import config
109from utils import logger
1110
12- app = FastAPI ()
11+ app = FastAPI (root_path = "/api" )
1312app .state .voter_records_df = None
1413
15- class UploadFileTypes (str , Enum ):
16- voter_records = "voter_records"
17- petition_signatures = "petition_signatures"
18-
19- @app .post ("/upload/{filetype}" )
20- def upload_file (filetype : UploadFileTypes , file : UploadFile , response : Response ):
21- """Uploads file to the server and saves it to a temporary directory.
14+ origins = [
15+ "http://localhost" ,
16+ "http://localhost:5173" ,
17+ ]
2218
23- Args:
24- filetype (UploadFileTypes): can be voter_records or petition_signatures
25- """
26- logger .info (f"Received file: { file .filename } of type: { filetype } " )
27-
28- # Validate file type extension
29- match filetype :
30- case UploadFileTypes .petition_signatures :
31- if not file .filename .endswith (".pdf" ):
32- response .status_code = 400
33- return {"error" : "Invalid file type. Only pdf files are allowed." }
34- with open (os .path .join ('temp' , 'ballot.pdf' ), "wb" ) as buffer :
35- buffer .write (file .file .read ())
36- logger .info ("File saved to temporary directory: temp/ballot.pdf" )
37- case UploadFileTypes .voter_records :
38- if not file .filename .endswith (".csv" ):
39- response .status_code = 400
40- return {"error" : "Invalid file type. Only .csv files are allowed." }
41- contents = file .file .read ()
42- buffer = BytesIO (contents )
43- df = pd .read_csv (buffer , dtype = str )
44-
45- # Create necessary columns
46- df ['Full Name' ] = df ["First_Name" ] + ' ' + df ['Last_Name' ]
47- df ['Full Address' ] = df ["Street_Number" ] + " " + df ["Street_Name" ] + " " + \
48- df ["Street_Type" ] + " " + df ["Street_Dir_Suffix" ]
19+ app .add_middleware (
20+ CORSMiddleware ,
21+ allow_origins = origins ,
22+ allow_credentials = True ,
23+ allow_methods = ["*" ], # Allows all HTTP methods
24+ allow_headers = ["*" ], # Allows all headers
25+ )
4926
50- required_columns = ["First_Name" , "Last_Name" , "Street_Number" ,
51- "Street_Name" , "Street_Type" , "Street_Dir_Suffix" ]
52- app .state .voter_records_df = df
53-
54- # Verify required columns
55- if not all (col in df .columns for col in required_columns ):
56- response .status_code = 400
57- return {"error" : "Missing required columns in voter records file." }
27+ app .include_router (file .router )
5828
5929
60- return {"filename" : file .filename }
61-
62- @app .post ("/ocr" )
30+ @app .post ("/ocr" , tags = ["OCR" ])
6331def ocr (response : Response ):
6432 """
6533 Triggers the OCR process on the uploaded petition signatures PDF file.
6634 """
67- if not os .path .exists (' temp/ballot.pdf' ):
35+ if not os .path .exists (" temp/ballot.pdf" ):
6836 logger .error ("No PDF file found for petition signatures" )
6937 response .status_code = 400
7038 return {"error" : "No PDF file found for petition signatures" }
@@ -75,35 +43,17 @@ def ocr(response: Response):
7543 logger .info ("Starting OCR processing..." )
7644 # Process files if in processing state
7745 logger .info ("Converting PDF to images..." )
78-
79- ocr_df = create_ocr_df (filedir = 'temp' ,
80- filename = 'ballot.pdf' )
81-
46+
47+ ocr_df = create_ocr_df (filedir = "temp" , filename = "ballot.pdf" )
48+
8249 logger .info ("Compiling Voter Record Data..." )
8350
8451 select_voter_records = create_select_voter_records (app .state .voter_records_df )
85-
52+
8653 logger .info ("Matching petition signatures to voter records..." )
8754
8855 ocr_matched_df = create_ocr_matched_df (
89- ocr_df ,
90- select_voter_records ,
91- threshold = config ['BASE_THRESHOLD' ]
56+ ocr_df , select_voter_records , threshold = config ["BASE_THRESHOLD" ]
9257 )
93- response .headers ['Content-Disposition' ] = 'attachment; filename=ocr_matched.csv'
94- response .headers ['Content-Type' ] = 'text/csv'
95- return ocr_matched_df .to_csv ()
96-
97- @app .delete ("/clear" )
98- def clear_all_files ():
99- """
100- Delete all files
101- """
102- app .state .voter_records_df = None
103- if os .path .exists ('temp/ballot.pdf' ):
104- os .remove ('temp/ballot.pdf' )
105- logger .info ("Deleted all files" )
106- else :
107- logger .warning ("No files to delete" )
108- return {"message" : "All files deleted" }
109-
58+ response .headers ["Content-Type" ] = "application/json"
59+ return {"data" : ocr_matched_df .to_dict (orient = "records" ), "stats" : {}}
0 commit comments