11from fastapi import HTTPException
2- from sqlalchemy import select , func , cast
2+ from sqlalchemy import select , func
33from sqlalchemy .orm import Session
44from sqlalchemy .engine import Engine
55from sqlalchemy .dialects .postgresql import array_agg
66
7- from src .database .postgres .models import Attendance , StudentAttendance , StudentEmail
7+ from src .database .postgres .models import Attendance
88import gspread
99import pandas
10- import numpy as np
11- from typing import List , Dict
12- from datetime import date
1310from src .config import settings
1411
15-
1612def fetch_attendance (eng : Engine ):
1713 """
1814 Fetch roster from associated Accelerate tables, and return it as a pd dataframe
@@ -57,99 +53,4 @@ def fetch_attendance(eng: Engine):
5753 "End Date" : str ,
5854 "Processed On" : str }) # Date objects not allowed
5955 attendance_frame = attendance_frame .fillna ('' ) # Empty cells (na) not allowed, replaced with empty strings
60- return attendance_frame
61-
62- def fetch_group_attendance (eng : Engine , start_date : date , end_date : date , cti_ids : Dict [int , str ]):
63- """
64- Fetch attendance records and create an attendance matrix of select cti_ids and a date range,
65- given the associated Accelerate tables
66- @param eng: A SQLAlchemy Engine object that connects to the database
67- """
68- if not cti_ids :
69- return pandas .DataFrame (columns = ["cti_id" , "email" ])
70-
71- # 1) Build cti_id -> email mapping, defaulting to "NOT FOUND"
72- id_to_email = fetch_cti_emails (eng , cti_ids )
73-
74- # 2) Build base CTI/email frame
75- cti_data = pandas .DataFrame (
76- [{"cti_id" : cid , "email" : id_to_email .get (cid , "NOT FOUND" )} for cid in cti_ids ]
77- ).set_index ("cti_id" )
78-
79- # 3) Build date columns
80- dates = pandas .date_range (start_date , end_date )
81- date_grid = np .zeros ((len (cti_ids ), len (dates )), dtype = bool )
82- pandas_grid = pandas .DataFrame (date_grid , index = cti_data .index , columns = dates )
83-
84- result_grid = pandas .concat ([cti_data , pandas_grid ], axis = 1 )
85-
86- # 4) Fetch attendance for those CTI IDs and date range
87- attendance_query = (
88- select (
89- StudentAttendance .cti_id ,
90- cast (Attendance .session_start , date ).label ("session_date" ),
91- )
92- .join (Attendance , Attendance .session_id == StudentAttendance .session_id )
93- .where (StudentAttendance .cti_id .in_ (cti_ids ))
94- .where (Attendance .session_start .between (start_date , end_date ))
95- )
96-
97- attendance_frame = pandas .read_sql (attendance_query , eng )
98- if attendance_frame .empty :
99- return result_grid
100-
101- attendance_frame ["session_date" ] = pandas .to_datetime (attendance_frame ["session_date" ])
102-
103- # 5) Mark True where there was attendance
104- for row in attendance_frame .itertuples (index = False ):
105- # row.cti_id, row.session_date
106- if row .cti_id in result_grid .index and row .session_date in result_grid .columns :
107- result_grid .loc [row .cti_id , row .session_date ] = True
108-
109- result_grid .columns = result_grid .columns .map (lambda x : x .strftime ('%Y-%m-%d' ) if hasattr (x , 'strftime' ) else str (x ))
110-
111- return result_grid
112-
113- def fetch_cti_ids_from_sheet (spreadsheet_id : str , worksheet_name : str , gc : gspread .client .Client ) -> List [int ]:
114- sh = gc .open_by_key (spreadsheet_id )
115- worksheet = sh .worksheet (worksheet_name )
116-
117- headers = worksheet .row_values (1 )
118- headers = [header .strip ().lower () for header in headers ]
119-
120- try :
121- column_index = headers .index ("cti_id" ) + 1
122- except ValueError :
123- print ("Column name not found" )
124- return
125-
126- column_values = worksheet .col_values (column_index )
127-
128- data = []
129-
130- for value in column_values [1 :]:
131- if value :
132- try :
133- data .append (int (value ))
134- except ValueError :
135- # Skip
136- continue
137-
138- return data
139-
140- def fetch_cti_emails (eng : Engine , cti_ids : List [int ]) -> Dict [int , str ]:
141- ids_to_email = dict .fromkeys (cti_ids , "NOT FOUND" )
142-
143- attendance_query = (
144- select (
145- StudentEmail .cti_id ,
146- StudentEmail .email
147- )
148- .where (StudentEmail .cti_id .in_ (cti_ids ))
149- )
150-
151- email_frame = pandas .read_sql (attendance_query , eng )
152- for row in email_frame .iterrows ():
153- ids_to_email [row .cti_id ] = row .email
154-
155- return ids_to_email
56+ return attendance_frame
0 commit comments