Skip to content

Commit 16880d9

Browse files
committed
Update script to parse Salesforce export columns
- Update column mappings to match Salesforce export format - Parse evsprk__Event_Title__c as title - Parse evsprk__Start_Date__c as date (YYYY-MM-DD format) - Extract URL from evsprk__Event_Homepage_Link__c HTML - Only include Active workshops
1 parent 4e99d0e commit 16880d9

File tree

1 file changed

+34
-19
lines changed

1 file changed

+34
-19
lines changed

scripts/fetch_google_sheets.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,42 +77,57 @@ def fetch_google_sheet_data(sheet_id: str, range_name: str = 'A:Z') -> List[Dict
7777
def parse_workshop_data(raw_data: List[Dict]) -> List[Dict]:
7878
"""
7979
Parse and transform raw Google Sheets data into workshop format
80-
Expected columns: title, date, time, registration_url, instructor, location
80+
Expected columns from Salesforce export:
81+
- evsprk__Event_Title__c: Workshop title
82+
- evsprk__Start_Date__c: Start date (YYYY-MM-DD)
83+
- evsprk__Event_Homepage_Link__c: HTML link containing registration URL
8184
"""
8285
workshops = []
8386

8487
for row in raw_data:
8588
try:
86-
# Skip empty rows
87-
if not row.get('title'):
89+
# Skip empty rows or inactive workshops
90+
if not row.get('evsprk__Event_Title__c') or row.get('evsprk__Stage__c') != 'Active':
8891
continue
8992

90-
# Parse date and time
91-
date_str = row.get('date', '')
92-
time_str = row.get('time', '')
93+
# Get workshop title
94+
title = row.get('evsprk__Event_Title__c', '').strip()
9395

94-
# Combine date and time into ISO format
95-
if date_str and time_str:
96-
# Assuming date format is MM/DD/YYYY and time is HH:MM AM/PM
97-
datetime_str = f"{date_str} {time_str}"
98-
workshop_datetime = datetime.strptime(datetime_str, "%m/%d/%Y %I:%M %p")
99-
workshop_datetime = workshop_datetime.replace(tzinfo=timezone.utc)
100-
else:
101-
continue # Skip if no date/time
96+
# Parse date (format: YYYY-MM-DD)
97+
date_str = row.get('evsprk__Start_Date__c', '')
98+
if not date_str:
99+
continue
100+
101+
# Convert date to datetime (assuming workshops are during business hours)
102+
workshop_date = datetime.strptime(date_str, "%Y-%m-%d")
103+
# Set a default time of 9 AM PST (5 PM UTC)
104+
workshop_datetime = workshop_date.replace(hour=17, minute=0, tzinfo=timezone.utc)
105+
106+
# Extract registration URL from HTML link
107+
registration_url = ''
108+
homepage_link = row.get('evsprk__Event_Homepage_Link__c', '')
109+
if homepage_link:
110+
# Extract URL from HTML anchor tag
111+
import re
112+
url_match = re.search(r'href="([^"]+)"', homepage_link)
113+
if url_match:
114+
registration_url = url_match.group(1)
102115

103116
workshop = {
104-
'title': row.get('title', ''),
117+
'title': title,
105118
'datetime_iso': workshop_datetime.isoformat(),
106-
'registration_url': row.get('registration_url', ''),
107-
'instructor': row.get('instructor', ''),
108-
'location': row.get('location', 'Online'),
109-
'description': row.get('description', '')
119+
'registration_url': registration_url,
120+
'date': workshop_date.strftime("%b %d, %Y"),
121+
'time': 'See event page for details',
122+
'location': 'Online',
123+
'instructor': 'D-Lab Staff'
110124
}
111125

112126
workshops.append(workshop)
113127

114128
except Exception as e:
115129
print(f"Error parsing workshop row: {e}")
130+
print(f"Row data: {row}")
116131
continue
117132

118133
# Sort by date

0 commit comments

Comments
 (0)