@@ -37,38 +37,55 @@ def parse_data(self, page: str, **kwargs) -> dict:
37
37
paragraphs = bin_row .find_all ("p" )
38
38
39
39
for p in paragraphs :
40
- if p .get_text () and "Next collection:" in p .get_text ():
41
- date_str = p .get_text ().replace ("Next collection:" , "" ).strip ()
42
- # Extract day number from date string (e.g. "2" from "Friday 2nd May")
43
- day_number = int ("" .join (filter (str .isdigit , date_str )))
44
- # Replace ordinal in date string with plain number
45
- date_str = date_str .replace (
46
- get_date_with_ordinal (day_number ), str (day_number )
40
+ # Check for both singular and plural "Next collection(s):"
41
+ if p .get_text () and (
42
+ "Next collection:" in p .get_text ()
43
+ or "Next collections:" in p .get_text ()
44
+ ):
45
+ # Extract collection dates
46
+ date_text = (
47
+ p .get_text ()
48
+ .replace ("Next collection:" , "" )
49
+ .replace ("Next collections:" , "" )
50
+ .strip ()
47
51
)
48
52
49
- try :
50
- # Parse date with full format
51
- bin_date = datetime .strptime (date_str , "%A %d %B" )
52
-
53
- # Add current year since it's not in the date string
54
- current_year = datetime .now ().year
55
- bin_date = bin_date .replace (year = current_year )
56
-
57
- # If the date is in the past, it's probably for next year
58
- if bin_date < datetime .now ():
59
- bin_date = bin_date .replace (year = current_year + 1 )
60
-
61
- collections .append ((bin_type , bin_date ))
62
- print (
63
- f"Successfully parsed date for { bin_type } : { bin_date } "
64
- )
65
- break
66
-
67
- except ValueError as e :
68
- print (
69
- f"Failed to parse date '{ date_str } ' for { bin_type } : { e } "
70
- )
71
- continue
53
+ # Split multiple dates if comma-separated
54
+ date_strings = [date .strip () for date in date_text .split ("," )]
55
+
56
+ for date_str in date_strings :
57
+ try :
58
+ # Extract day number from date string (e.g. "2" from "Tuesday 27th May")
59
+ day_number = int ("" .join (filter (str .isdigit , date_str )))
60
+ # Replace ordinal in date string with plain number
61
+ date_str = date_str .replace (
62
+ get_date_with_ordinal (day_number ), str (day_number )
63
+ )
64
+
65
+ # Parse date with full format
66
+ bin_date = datetime .strptime (date_str , "%A %d %B" )
67
+
68
+ # Add current year since it's not in the date string
69
+ current_year = datetime .now ().year
70
+ bin_date = bin_date .replace (year = current_year )
71
+
72
+ # If the date is in the past, it's probably for next year
73
+ if bin_date < datetime .now ():
74
+ bin_date = bin_date .replace (year = current_year + 1 )
75
+
76
+ collections .append ((bin_type , bin_date ))
77
+ print (
78
+ f"Successfully parsed date for { bin_type } : { bin_date } "
79
+ )
80
+
81
+ except ValueError as e :
82
+ print (
83
+ f"Failed to parse date '{ date_str } ' for { bin_type } : { e } "
84
+ )
85
+ continue
86
+
87
+ # Found and processed the collection dates, so break the loop
88
+ break
72
89
73
90
except Exception as e :
74
91
print (f"Error processing bin row: { e } " )
0 commit comments