88
99import asyncio
1010import json
11- from typing import Dict , List , Optional , Any
11+ import re
12+ from datetime import datetime
13+ from typing import Dict , List , Optional , Any , Union
1214from urllib .parse import urljoin
1315
1416import aiohttp
17+ import pytz
1518from selectolax .parser import HTMLParser
1619
1720from .models .meeting import Meeting
1821
1922BASE_URL = "https://tulsa-ok.granicus.com/ViewPublisher.php?view_id=4"
23+ CENTRAL_TZ = pytz .timezone ("America/Chicago" )
2024
2125
2226async def fetch_page (url : str , session : aiohttp .ClientSession ) -> str :
@@ -36,7 +40,95 @@ async def fetch_page(url: str, session: aiohttp.ClientSession) -> str:
3640 return await response .text ()
3741
3842
39- async def parse_meetings (html : str ) -> List [Dict [str , str ]]:
43+ def parse_date_string (date_str : str ) -> Optional [datetime ]:
44+ """
45+ Parse the date string into a datetime object with Central timezone.
46+
47+ Args:
48+ date_str: The raw date string from HTML
49+
50+ Returns:
51+ A datetime object with Central timezone or None if parsing fails
52+ """
53+ # Replace non-breaking spaces with regular spaces
54+ date_str = date_str .replace ("\u00a0 " , " " )
55+
56+ # Replace multiple spaces with a single space
57+ date_str = re .sub (r"\s+" , " " , date_str )
58+
59+ # Find the month, day, year, and time parts
60+ # Pattern typically looks like "March 12, 2025 - 5:00 PM"
61+ match = re .search (
62+ r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4}).*?(\d{1,2}):(\d{2})\s*([APM]{2})" ,
63+ date_str ,
64+ )
65+
66+ if match :
67+ month_str , day_str , year_str , hour_str , minute_str , am_pm = match .groups ()
68+
69+ # Convert month name to number
70+ try :
71+ month_num = datetime .strptime (month_str , "%B" ).month
72+ except ValueError :
73+ # Try abbreviated month name
74+ try :
75+ month_num = datetime .strptime (month_str , "%b" ).month
76+ except ValueError :
77+ return None
78+
79+ # Convert to integers
80+ day = int (day_str )
81+ year = int (year_str )
82+ hour = int (hour_str )
83+ minute = int (minute_str )
84+
85+ # Adjust hour for PM
86+ if am_pm .upper () == "PM" and hour < 12 :
87+ hour += 12
88+ elif am_pm .upper () == "AM" and hour == 12 :
89+ hour = 0
90+
91+ # Create naive datetime
92+ naive_dt = datetime (year , month_num , day , hour , minute )
93+
94+ # Localize to Central Time
95+ return CENTRAL_TZ .localize (naive_dt )
96+
97+ return None
98+
99+
100+ def clean_date_string (date_str : str ) -> str :
101+ """
102+ Clean up the date string by removing extra whitespace, newlines, and normalizing formats.
103+
104+ Args:
105+ date_str: The raw date string from HTML
106+
107+ Returns:
108+ A cleaned date string in the format "Month Day, Year - Time"
109+ """
110+ # Replace non-breaking spaces with regular spaces
111+ date_str = date_str .replace ("\u00a0 " , " " )
112+
113+ # Replace multiple spaces with a single space
114+ date_str = re .sub (r"\s+" , " " , date_str )
115+
116+ # Find the month, day, year, and time parts
117+ # Pattern typically looks like "March 12, 2025 - 5:00 PM"
118+ match = re .search (
119+ r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4}).*?(\d{1,2}:\d{2}\s*[APM]{2})" , date_str
120+ )
121+
122+ if match :
123+ month , day , year , time = match .groups ()
124+ # Format consistently
125+ return f"{ month } { day } , { year } - { time } "
126+
127+ # If the regex doesn't match, do basic cleanup
128+ return date_str .strip ()
129+
130+
131+ async def parse_meetings (html : str ) -> List [Dict [str , Any ]]:
40132 """
41133 Parse the meeting data from the HTML content.
42134
@@ -68,9 +160,17 @@ async def parse_meetings(html: str) -> List[Dict[str, str]]:
68160 if len (cells ) < 5 :
69161 continue
70162
163+ # Parse the date string into a datetime object
164+ date_text = cells [1 ].text ()
165+ date_obj = parse_date_string (date_text )
166+
167+ # Get a cleaned date string as a fallback
168+ date_str = clean_date_string (date_text )
169+
71170 meeting_data = {
72171 "meeting" : cells [0 ].text ().strip (),
73- "date" : cells [1 ].text ().strip (),
172+ "date" : date_obj .isoformat () if date_obj else date_str ,
173+ "date_display" : date_str , # Keep a human-readable version
74174 "duration" : cells [2 ].text ().strip (),
75175 "agenda" : None ,
76176 "video" : None ,
0 commit comments