1+ # Standard library
12import re
2- import sys
3- import urllib
43from datetime import datetime
54from datetime import timezone
65from pathlib import Path
6+ from urllib import error as urllib_error
77
8+ # Third-party
89import iso3166
910import pandas as pd
1011
11- sys .path .append ("." )
12- from tidy_conf import fuzzy_match
13- from tidy_conf import load_conferences
14- from tidy_conf import merge_conferences
15- from tidy_conf .deduplicate import deduplicate
16- from tidy_conf .schema import get_schema
17- from tidy_conf .utils import fill_missing_required
18- from tidy_conf .yaml import load_title_mappings
19- from tidy_conf .yaml import write_df_yaml
20-
21-
22- def load_remote (year ):
12+ # Local imports
13+ try :
14+ from tidy_conf import fuzzy_match
15+ from tidy_conf import load_conferences
16+ from tidy_conf import merge_conferences
17+ from tidy_conf .deduplicate import deduplicate
18+ from tidy_conf .schema import get_schema
19+ from tidy_conf .utils import fill_missing_required
20+ from tidy_conf .yaml import load_title_mappings
21+ from tidy_conf .yaml import write_df_yaml
22+ except ImportError :
23+ from .tidy_conf import fuzzy_match
24+ from .tidy_conf import load_conferences
25+ from .tidy_conf import merge_conferences
26+ from .tidy_conf .deduplicate import deduplicate
27+ from .tidy_conf .schema import get_schema
28+ from .tidy_conf .utils import fill_missing_required
29+ from .tidy_conf .yaml import load_title_mappings
30+ from .tidy_conf .yaml import write_df_yaml
31+
32+
33+ def load_remote (year : int ) -> pd .DataFrame :
34+ """Load conference data from GitHub CSV for a specific year.
35+
36+ Parameters
37+ ----------
38+ year : int
39+ The year to load conference data for
40+
41+ Returns
42+ -------
43+ pd.DataFrame
44+ DataFrame containing conference data from the CSV
45+ """
2346 url = f"https://raw.githubusercontent.com/python-organizers/conferences/main/{ year } .csv"
2447
2548 # Read data and rename columns
@@ -31,8 +54,21 @@ def load_remote(year):
3154 return df
3255
3356
34- def map_columns (df , reverse = False ):
35- """Map columns to the schema."""
57+ def map_columns (df : pd .DataFrame , reverse : bool = False ) -> pd .DataFrame :
58+ """Map columns between CSV format and conference schema.
59+
60+ Parameters
61+ ----------
62+ df : pd.DataFrame
63+ DataFrame with columns to map
64+ reverse : bool, optional
65+ If True, map from schema to CSV format. Default is False
66+
67+ Returns
68+ -------
69+ pd.DataFrame
70+ DataFrame with mapped columns
71+ """
3672 cols = {
3773 "Subject" : "conference" ,
3874 "Start Date" : "start" ,
@@ -52,8 +88,18 @@ def map_columns(df, reverse=False):
5288 return df .rename (columns = cols )
5389
5490
55- def write_csv (df , year , csv_location ):
56- """Write the CSV files for the conferences."""
91+ def write_csv (df : pd .DataFrame , year : int , csv_location : str ) -> None :
92+ """Write the CSV files for the conferences.
93+
94+ Parameters
95+ ----------
96+ df : pd.DataFrame
97+ DataFrame containing conference data to write
98+ year : int
99+ The year for the CSV file
100+ csv_location : str
101+ Directory path where CSV files should be written
102+ """
57103 from logging_config import get_tqdm_logger
58104
59105 logger = get_tqdm_logger (__name__ )
@@ -155,8 +201,16 @@ def write_csv(df, year, csv_location):
155201 logger .info (f"Successfully wrote { Path (csv_location , f'{ y } .csv' )} " )
156202
157203
158- def main (year = None , base = "" ):
159- """Import Python conferences from a csv file Github."""
204+ def main (year : int | None = None , base : str = "" ) -> None :
205+ """Import Python conferences from a csv file on Github.
206+
207+ Parameters
208+ ----------
209+ year : int | None, optional
210+ Starting year for import. If None, uses current year
211+ base : str, optional
212+ Base directory path for data files. Default is empty string
213+ """
160214 from logging_config import get_tqdm_logger
161215
162216 # Setup tqdm-compatible logging for this module
@@ -190,7 +244,7 @@ def main(year=None, base=""):
190244 try :
191245 df = deduplicate (load_remote (year = y ), "conference" )
192246 df ["year" ] = y
193- except urllib . error .HTTPError :
247+ except urllib_error .HTTPError :
194248 break
195249 df_csv_raw = pd .concat ([df_csv_raw , df ], ignore_index = True )
196250
0 commit comments