Skip to content

Commit 6032c80

Browse files
committed
Full implementation of gsheet/group-attendance endpoint with a test verifying dataframe shapes.
1 parent 00e6185 commit 6032c80

File tree

7 files changed

+228
-135
lines changed

7 files changed

+228
-135
lines changed

src/api.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from src.students.withdrawal_processing.router import router as student_withdrawal_router
1313

1414
from src.gsheet.refresh.router import router as gsheet_refresh_router
15+
from src.gsheet.group_attendance.router import router as gsheet_router
1516
from src.utils.authorization import verify_api_key
1617

1718
api_router = APIRouter(dependencies=[Depends(verify_api_key)])
@@ -86,6 +87,13 @@
8687
tags=["Students"],
8788
)
8889

90+
# /api/gsheet/...
91+
api_router.include_router(
92+
gsheet_router,
93+
prefix="/gsheet",
94+
tags=["GSheet"],
95+
)
96+
8997
# /api/gsheet/refresh/...
9098
api_router.include_router(
9199
gsheet_refresh_router,

src/database/postgres/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Base(DeclarativeBase):
1313
# Default False (cloud environment) TODO: Move this elsewhere that's more universal
1414
env_required = False
1515
if env_required:
16-
load_dotenv(dotenv_path="./../../.env")
16+
load_dotenv(dotenv_path=".env")
1717

1818
# Engine & Session Configuration
1919
# Note that currently, sessions are the only way to interface with the database
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from typing import Any, Dict, Optional
2+
from fastapi import APIRouter, Depends, HTTPException, status
3+
from sqlalchemy.exc import SQLAlchemyError
4+
from sqlalchemy.orm import Session
5+
import gspread
6+
from datetime import date, timedelta
7+
8+
from src.config import settings
9+
from src.database.postgres.core import make_session
10+
from src.database.postgres.core import engine as CONN
11+
import src.gsheet.utils as utils
12+
import src.gsheet.group_attendance.service as service
13+
14+
router = APIRouter()
15+
16+
@router.post("/group-attendance",
17+
description="Load in attendance information given a group of IDs and a date range in the associated Google Sheet",
18+
response_description="Updated group attendance roster",
19+
status_code=status.HTTP_201_CREATED)
20+
def refresh_group_attendance(spreadsheet_id: str, spreadsheet_name: str, start_date: Optional[date] = None, end_date: Optional[date] = None, db: Session = Depends(make_session)) -> Dict[str, Any]:
21+
"""
22+
Copy database records of select students from a specified Google Sheet and record them onto the same Sheet given a date range
23+
"""
24+
try:
25+
if not start_date or not end_date:
26+
end_date = date.today()
27+
start_date = end_date - timedelta(days=settings.default_attendance_lookback_days)
28+
29+
if settings.app_env == "production":
30+
gc = utils.create_credentials()
31+
cti_ids = service.fetch_cti_ids_from_sheet(spreadsheet_id, spreadsheet_name, gc)
32+
key = settings.roster_sheet_key
33+
else:
34+
gc = gspread.service_account(filename='gspread_credentials.json')
35+
cti_ids = service.fetch_cti_ids_from_sheet(spreadsheet_id, spreadsheet_name, gc)
36+
key = settings.test_sheet_key
37+
38+
# pass CTI IDs into the group attendance service
39+
attendance_data = service.fetch_group_attendance(CONN, start_date, end_date, cti_ids)
40+
41+
return utils.write_to_gsheet(attendance_data, spreadsheet_name, gc, key)
42+
43+
except SQLAlchemyError as e:
44+
db.rollback()
45+
raise HTTPException(status_code=500, detail=str(e))
46+
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from fastapi import HTTPException
2+
from sqlalchemy import select, func, cast, and_, Date
3+
from sqlalchemy.orm import Session
4+
from sqlalchemy.engine import Engine
5+
from sqlalchemy.dialects.postgresql import array_agg
6+
7+
from src.database.postgres.models import Attendance, StudentAttendance, StudentEmail
8+
import gspread
9+
import pandas
10+
import numpy as np
11+
from typing import List, Dict
12+
from datetime import date
13+
from src.config import settings
14+
15+
def fetch_group_attendance(eng: Engine, start_date: date, end_date: date, cti_ids: Dict[int, str]):
16+
"""
17+
Fetch attendance records and create an attendance matrix of select cti_ids and a date range,
18+
given the associated Accelerate tables
19+
@param eng: A SQLAlchemy Engine object that connects to the database
20+
"""
21+
if not cti_ids:
22+
return pandas.DataFrame(columns=["cti_id", "email"])
23+
24+
# 1) Build cti_id -> email mapping, defaulting to "NOT FOUND"
25+
id_to_email = fetch_cti_emails(eng, cti_ids)
26+
27+
# 2) Build base CTI/email frame
28+
cti_data = pandas.DataFrame(
29+
[{"cti_id": cid, "email": id_to_email.get(cid, "NOT FOUND")} for cid in cti_ids]
30+
).set_index("cti_id")
31+
32+
# 3) Build date columns
33+
dates = pandas.date_range(start_date, end_date)
34+
date_grid = np.zeros((len(cti_ids), len(dates)), dtype=bool)
35+
pandas_grid = pandas.DataFrame(date_grid, index=cti_data.index, columns=dates)
36+
37+
result_grid = pandas.concat([cti_data, pandas_grid], axis=1)
38+
39+
# 4) Fetch attendance for those CTI IDs and date range
40+
attendance_query = (
41+
select(
42+
StudentAttendance.cti_id,
43+
cast(Attendance.session_start, Date).label("session_date"),
44+
)
45+
.join(Attendance, Attendance.session_id == StudentAttendance.session_id)
46+
.where(
47+
and_(StudentAttendance.cti_id.in_(cti_ids),
48+
cast(Attendance.session_start, Date).between(start_date, end_date),
49+
)
50+
)
51+
)
52+
53+
print(attendance_query)
54+
55+
attendance_frame = pandas.read_sql(attendance_query, eng)
56+
print(attendance_frame)
57+
if not attendance_frame.empty:
58+
attendance_frame["session_date"] = pandas.to_datetime(attendance_frame["session_date"])
59+
60+
for row in attendance_frame.itertuples(index=False):
61+
if row.cti_id in result_grid.index and row.session_date in result_grid.columns:
62+
result_grid.loc[row.cti_id, row.session_date] = True
63+
64+
# From here on, ALWAYS normalize before returning
65+
final_df = result_grid.reset_index()
66+
67+
# Normalize headers
68+
final_df.columns = [
69+
col.strftime("%Y-%m-%d") if hasattr(col, "strftime") else str(col)
70+
for col in final_df.columns
71+
]
72+
73+
# Simple integer index
74+
final_df.index = range(len(final_df))
75+
76+
# Everything as string so gspread/JSON is happy
77+
final_df = final_df.astype(str)
78+
79+
return final_df
80+
81+
def fetch_cti_ids_from_sheet(spreadsheet_id: str, worksheet_name: str, gc: gspread.client.Client) -> List[int]:
82+
sh = gc.open_by_key(spreadsheet_id)
83+
worksheet = sh.worksheet(worksheet_name)
84+
85+
headers = worksheet.row_values(1)
86+
headers = [header.strip().lower() for header in headers]
87+
88+
try:
89+
column_index = headers.index("cti_id") + 1
90+
except ValueError:
91+
print("Column name not found")
92+
return
93+
94+
column_values = worksheet.col_values(column_index)
95+
96+
data = []
97+
98+
for value in column_values[1:]:
99+
if value:
100+
try:
101+
data.append(int(value))
102+
except ValueError:
103+
# Skip
104+
continue
105+
106+
worksheet.clear()
107+
108+
return data
109+
110+
def fetch_cti_emails(eng: Engine, cti_ids: List[int]) -> Dict[int, str]:
111+
ids_to_email = dict.fromkeys(cti_ids, "NOT FOUND")
112+
113+
attendance_query = (
114+
select(
115+
StudentEmail.cti_id,
116+
StudentEmail.email
117+
)
118+
.where(
119+
and_(
120+
StudentEmail.cti_id.in_(cti_ids),
121+
StudentEmail.is_primary
122+
)
123+
)
124+
)
125+
126+
email_frame = pandas.read_sql(attendance_query, eng)
127+
for index, row in email_frame.iterrows():
128+
ids_to_email[row.cti_id] = row.email
129+
130+
return ids_to_email

src/gsheet/refresh/attendance/router.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35,35 +35,3 @@ def refresh_attendance(db: Session = Depends(make_session)) -> Dict[str, Any]:
3535
except SQLAlchemyError as e:
3636
db.rollback()
3737
raise HTTPException(status_code=500, detail=str(e))
38-
39-
@router.post("/group-attendance",
40-
description="Load in attendance information given a group of IDs and a date range in the associated Google Sheet",
41-
response_description="Updated group attendance roster",
42-
status_code=status.HTTP_201_CREATED)
43-
def refresh_group_attendance(spreadsheet_id: str, spreadsheet_name: str, start_date: Optional[date] = None, end_date: Optional[date] = None, db: Session = Depends(make_session)) -> Dict[str, Any]:
44-
"""
45-
Copy database records of select students from a specified Google Sheet and record them onto the same Sheet given a date range
46-
"""
47-
try:
48-
if not start_date or not end_date:
49-
end_date = date.today()
50-
start_date = end_date - timedelta(days=settings.default_attendance_lookback_days)
51-
52-
if settings.app_env == "production":
53-
gc = utils.create_credentials()
54-
cti_ids = service.fetch_cti_ids_from_sheet(spreadsheet_id, spreadsheet_name, gc)
55-
key = settings.roster_sheet_key
56-
else:
57-
gc = gspread.service_account(filename='gspread_credentials.json')
58-
cti_ids = service.fetch_cti_ids_from_sheet(spreadsheet_id, spreadsheet_name, gc)
59-
key = settings.test_sheet_key
60-
61-
# pass CTI IDs into the group attendance service
62-
attendance_data = service.fetch_group_attendance(CONN, start_date, end_date, cti_ids)
63-
64-
return utils.write_to_gsheet(attendance_data, spreadsheet_name, gc, key)
65-
66-
except SQLAlchemyError as e:
67-
db.rollback()
68-
raise HTTPException(status_code=500, detail=str(e))
69-
Lines changed: 3 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
from fastapi import HTTPException
2-
from sqlalchemy import select, func, cast
2+
from sqlalchemy import select, func
33
from sqlalchemy.orm import Session
44
from sqlalchemy.engine import Engine
55
from sqlalchemy.dialects.postgresql import array_agg
66

7-
from src.database.postgres.models import Attendance, StudentAttendance, StudentEmail
7+
from src.database.postgres.models import Attendance
88
import gspread
99
import pandas
10-
import numpy as np
11-
from typing import List, Dict
12-
from datetime import date
1310
from src.config import settings
1411

15-
1612
def fetch_attendance(eng: Engine):
1713
"""
1814
Fetch roster from associated Accelerate tables, and return it as a pd dataframe
@@ -57,99 +53,4 @@ def fetch_attendance(eng: Engine):
5753
"End Date": str,
5854
"Processed On": str}) # Date objects not allowed
5955
attendance_frame = attendance_frame.fillna('') # Empty cells (na) not allowed, replaced with empty strings
60-
return attendance_frame
61-
62-
def fetch_group_attendance(eng: Engine, start_date: date, end_date: date, cti_ids: Dict[int, str]):
63-
"""
64-
Fetch attendance records and create an attendance matrix of select cti_ids and a date range,
65-
given the associated Accelerate tables
66-
@param eng: A SQLAlchemy Engine object that connects to the database
67-
"""
68-
if not cti_ids:
69-
return pandas.DataFrame(columns=["cti_id", "email"])
70-
71-
# 1) Build cti_id -> email mapping, defaulting to "NOT FOUND"
72-
id_to_email = fetch_cti_emails(eng, cti_ids)
73-
74-
# 2) Build base CTI/email frame
75-
cti_data = pandas.DataFrame(
76-
[{"cti_id": cid, "email": id_to_email.get(cid, "NOT FOUND")} for cid in cti_ids]
77-
).set_index("cti_id")
78-
79-
# 3) Build date columns
80-
dates = pandas.date_range(start_date, end_date)
81-
date_grid = np.zeros((len(cti_ids), len(dates)), dtype=bool)
82-
pandas_grid = pandas.DataFrame(date_grid, index=cti_data.index, columns=dates)
83-
84-
result_grid = pandas.concat([cti_data, pandas_grid], axis=1)
85-
86-
# 4) Fetch attendance for those CTI IDs and date range
87-
attendance_query = (
88-
select(
89-
StudentAttendance.cti_id,
90-
cast(Attendance.session_start, date).label("session_date"),
91-
)
92-
.join(Attendance, Attendance.session_id == StudentAttendance.session_id)
93-
.where(StudentAttendance.cti_id.in_(cti_ids))
94-
.where(Attendance.session_start.between(start_date, end_date))
95-
)
96-
97-
attendance_frame = pandas.read_sql(attendance_query, eng)
98-
if attendance_frame.empty:
99-
return result_grid
100-
101-
attendance_frame["session_date"] = pandas.to_datetime(attendance_frame["session_date"])
102-
103-
# 5) Mark True where there was attendance
104-
for row in attendance_frame.itertuples(index=False):
105-
# row.cti_id, row.session_date
106-
if row.cti_id in result_grid.index and row.session_date in result_grid.columns:
107-
result_grid.loc[row.cti_id, row.session_date] = True
108-
109-
result_grid.columns = result_grid.columns.map(lambda x: x.strftime('%Y-%m-%d') if hasattr(x, 'strftime') else str(x))
110-
111-
return result_grid
112-
113-
def fetch_cti_ids_from_sheet(spreadsheet_id: str, worksheet_name: str, gc: gspread.client.Client) -> List[int]:
114-
sh = gc.open_by_key(spreadsheet_id)
115-
worksheet = sh.worksheet(worksheet_name)
116-
117-
headers = worksheet.row_values(1)
118-
headers = [header.strip().lower() for header in headers]
119-
120-
try:
121-
column_index = headers.index("cti_id") + 1
122-
except ValueError:
123-
print("Column name not found")
124-
return
125-
126-
column_values = worksheet.col_values(column_index)
127-
128-
data = []
129-
130-
for value in column_values[1:]:
131-
if value:
132-
try:
133-
data.append(int(value))
134-
except ValueError:
135-
# Skip
136-
continue
137-
138-
return data
139-
140-
def fetch_cti_emails(eng: Engine, cti_ids: List[int]) -> Dict[int, str]:
141-
ids_to_email = dict.fromkeys(cti_ids, "NOT FOUND")
142-
143-
attendance_query = (
144-
select(
145-
StudentEmail.cti_id,
146-
StudentEmail.email
147-
)
148-
.where(StudentEmail.cti_id.in_(cti_ids))
149-
)
150-
151-
email_frame = pandas.read_sql(attendance_query, eng)
152-
for row in email_frame.iterrows():
153-
ids_to_email[row.cti_id] = row.email
154-
155-
return ids_to_email
56+
return attendance_frame

0 commit comments

Comments
 (0)