@@ -77,42 +77,57 @@ def fetch_google_sheet_data(sheet_id: str, range_name: str = 'A:Z') -> List[Dict
7777def parse_workshop_data (raw_data : List [Dict ]) -> List [Dict ]:
7878 """
7979 Parse and transform raw Google Sheets data into workshop format
80- Expected columns: title, date, time, registration_url, instructor, location
80+ Expected columns from Salesforce export:
81+ - evsprk__Event_Title__c: Workshop title
82+ - evsprk__Start_Date__c: Start date (YYYY-MM-DD)
83+ - evsprk__Event_Homepage_Link__c: HTML link containing registration URL
8184 """
8285 workshops = []
8386
8487 for row in raw_data :
8588 try :
86- # Skip empty rows
87- if not row .get ('title' ) :
89+ # Skip empty rows or inactive workshops
90+ if not row .get ('evsprk__Event_Title__c' ) or row . get ( 'evsprk__Stage__c' ) != 'Active' :
8891 continue
8992
90- # Parse date and time
91- date_str = row .get ('date' , '' )
92- time_str = row .get ('time' , '' )
93+ # Get workshop title
94+ title = row .get ('evsprk__Event_Title__c' , '' ).strip ()
9395
94- # Combine date and time into ISO format
95- if date_str and time_str :
96- # Assuming date format is MM/DD/YYYY and time is HH:MM AM/PM
97- datetime_str = f"{ date_str } { time_str } "
98- workshop_datetime = datetime .strptime (datetime_str , "%m/%d/%Y %I:%M %p" )
99- workshop_datetime = workshop_datetime .replace (tzinfo = timezone .utc )
100- else :
101- continue # Skip if no date/time
96+ # Parse date (format: YYYY-MM-DD)
97+ date_str = row .get ('evsprk__Start_Date__c' , '' )
98+ if not date_str :
99+ continue
100+
101+ # Convert date to datetime (assuming workshops are during business hours)
102+ workshop_date = datetime .strptime (date_str , "%Y-%m-%d" )
103+ # Set a default time of 9 AM PST (5 PM UTC)
104+ workshop_datetime = workshop_date .replace (hour = 17 , minute = 0 , tzinfo = timezone .utc )
105+
106+ # Extract registration URL from HTML link
107+ registration_url = ''
108+ homepage_link = row .get ('evsprk__Event_Homepage_Link__c' , '' )
109+ if homepage_link :
110+ # Extract URL from HTML anchor tag
111+ import re
112+ url_match = re .search (r'href="([^"]+)"' , homepage_link )
113+ if url_match :
114+ registration_url = url_match .group (1 )
102115
103116 workshop = {
104- 'title' : row . get ( ' title' , '' ) ,
117+ 'title' : title ,
105118 'datetime_iso' : workshop_datetime .isoformat (),
106- 'registration_url' : row .get ('registration_url' , '' ),
107- 'instructor' : row .get ('instructor' , '' ),
108- 'location' : row .get ('location' , 'Online' ),
109- 'description' : row .get ('description' , '' )
119+ 'registration_url' : registration_url ,
120+ 'date' : workshop_date .strftime ("%b %d, %Y" ),
121+ 'time' : 'See event page for details' ,
122+ 'location' : 'Online' ,
123+ 'instructor' : 'D-Lab Staff'
110124 }
111125
112126 workshops .append (workshop )
113127
114128 except Exception as e :
115129 print (f"Error parsing workshop row: { e } " )
130+ print (f"Row data: { row } " )
116131 continue
117132
118133 # Sort by date
0 commit comments