1616from pyhelpers .store import load_data , save_data
1717
1818from .parser import get_catalogue , get_introduction , get_last_updated_date
19- from .utils import cd_data , collect_in_fetch_verbose , format_confirmation_prompt , home_page_url , \
20- print_collect_msg , print_conn_err , print_inst_conn_err , print_void_msg
19+ from .utils import cd_data , format_confirmation_prompt , get_collect_verbosity_for_fetch , \
20+ homepage_url , print_collection_message , print_connection_warning , \
21+ print_instance_connection_error , print_void_collection_message
2122
2223
2324class _Base :
@@ -30,7 +31,7 @@ class _Base:
3031 #: The key for accessing the data.
3132 KEY : str = ''
3233 #: The URL of the main web page for the data.
33- URL : str = home_page_url ()
34+ URL : str = homepage_url ()
3435 #: The key used to reference the last updated date in the data.
3536 KEY_TO_LAST_UPDATED_DATE : str = 'Last updated date'
3637
@@ -72,7 +73,7 @@ def __init__(self, data_dir=None, content_type=None, data_category="", data_clus
7273 'http://www.railwaycodes.org.uk/'
7374 """
7475
75- print_conn_err (verbose = verbose )
76+ print_connection_warning (verbose = verbose )
7677
7778 self .catalogue , self .introduction = None , None
7879
@@ -174,6 +175,7 @@ def _cdd(self, *sub_dir, mkdir=True, **kwargs):
174175
175176 @staticmethod
176177 def _format_confirmation_message (data_name , confirmation_prompt = None , initial = None , ** kwargs ):
178+ # noinspection PyShadowingNames
177179 """
178180 Generates a confirmation prompt message.
179181
@@ -185,6 +187,25 @@ def _format_confirmation_message(data_name, confirmation_prompt=None, initial=No
185187 :type data_name: str
186188 :return: The generated confirmation prompt as a string.
187189 :rtype: str
190+
191+ **Examples**::
192+
193+ >>> from pyrcs._base import _Base
194+ >>> from pyrcs.utils import format_confirmation_prompt
195+ >>> _b = _Base()
196+ >>> data_name = '"test_data_name"'
197+ >>> prompt = _b._format_confirmation_message(data_name)
198+ >>> prompt
199+ 'To collect data of "test_data_name"\n ?'
200+ >>> prompt = _b._format_confirmation_message(data_name, format_confirmation_prompt)
201+ >>> prompt
202+ 'To collect data of "test_data_name"\n ?'
203+ >>> prompt = _b._format_confirmation_message(data_name, "test message")
204+ >>> prompt
205+ 'test message'
206+ >>> prompt = _b._format_confirmation_message(data_name, format_confirmation_prompt, 'a')
207+ >>> prompt
208+ 'To collect data of "test_data_name" beginning with "a"\n ?'
188209 """
189210
190211 if confirmation_prompt :
@@ -224,80 +245,121 @@ def _collect_data_from_source(self, data_name, method, url=None, initial=None,
224245 confirmation_prompt = None , verbose = False , raise_error = False ,
225246 ** kwargs ):
226247 """
227- Collects data from the specified source webpage(s) .
248+ Collects and parses data from a specified source webpage.
228249
229- :param data_name: Name of the data to be collected.
230- :type data_name: str
231- :param url: URL of the webpage from which the data will be collected.
232- :type url: str
233- :param method: A callable function or method used to parse and extract data from the
234- webpage. This function should accept the following parameters:
250+ :param data_name: The descriptive name of the data being collected
251+ (used for lookups and messages).
252+ :type data_name: str | None
253+ :param method: The parsing function to execute upon successful data retrieval.
254+ The function **must** accept:
255+
256+ - ``source`` (*requests.Response*): The HTTP response object.
257+ - ``verbose`` (*bool | int*): The verbosity flag.
235258
236- - ``source``: The response object from the HTTP request.
237- - ``verbose``: Whether to print additional information during extraction.
259+ The function **may** optionally accept:
260+
261+ - ``data_name`` (*str*): Injected automatically if present in the function signature.
262+ - ``initial`` (*str*): Injected automatically if present in the function signature.
263+ - Any other arguments passed via ``**kwargs``.
238264
239265 :type method: typing.Callable
240- :param initial: The initial letter of the desired code or data; defaults to ``None``.
266+ :param url: The target URL. If ``None``, the method attempts to retrieve the URL from
267+ ``self.catalogue`` using ``initial`` or ``data_name`` as the key.
268+ :type url: str | None
269+ :param initial: The initial letter/code used to categorize the data
270+ (e.g. 'A' for stations starting with A); it is used as a fallback key for URL lookup
271+ if ``url`` is not provided.
241272 :type initial: str | None
242- :param additional_fields: Extra key-value pairs to be included in the returned dictionary
243- if data collection fails or is canceled; defaults to ``None``.
273+ :param additional_fields: Key-value pairs to include in the fallback dictionary
274+ if data collection fails; it is useful for ensuring consistent data structure
275+ (e.g. returning ``None`` for specific columns).
244276 :type additional_fields: dict | str | None
245277 :param confirmation_required: Whether user confirmation is required;
246278 if ``confirmation_required=True`` (default), prompts the user for confirmation
247279 before proceeding with data collection.
248280 :type confirmation_required: bool
249- :param confirmation_prompt:
250- :type confirmation_prompt:
251- :param verbose: Whether to print relevant information in the console; defaults to ``False``.
281+ :param confirmation_prompt: A custom message or a callable that generates a message
282+ for the confirmation prompt.
283+ :type confirmation_prompt: str | typing.Callable | None
284+ :param verbose: Whether to print status messages and errors to the console;
285+ defaults to ``False``.
252286 :type verbose: bool | int
253- :param raise_error: Whether to raise the provided exception ;
254- if ``raise_error= False`` (default), the error will be suppressed .
287+ :param raise_error: If ``True``, exceptions (network or parsing) are raised to the caller ;
288+ if ``False`` (default), exceptions are caught and fallback data is returned .
255289 :type raise_error: bool
256- :return: The collected data.
290+ :param kwargs: [Optional] Additional keyword arguments passed directly to the ``method``.
291+ :return: The data returned by ``method``, or a dictionary containing fallback values
292+ (e.g. ``{key: None}``) on failure.
257293 :rtype: pandas.DataFrame | dict | None
294+ :raises ValueError: If the URL cannot be resolved and ``raise_error=True``.
295+ :raises requests.RequestException: If a network error occurs and ``raise_error=True``.
296+
297+ **Examples**::
298+
299+ >>> from pyrcs._base import _Base
300+ >>> _b = _Base()
301+ >>> _b.catalogue = {'A': 'https://github.com/mikeqfu/pyrcs'}
302+ >>> _b._collect_data_from_source("test_data_name", method=_b._fallback_data)
258303 """
259304
305+ # Confirmation step
260306 prompt = self ._format_confirmation_message (
261307 data_name = data_name , confirmation_prompt = confirmation_prompt , initial = initial )
262308
263309 if not confirmed (prompt = prompt , confirmation_required = confirmation_required ):
264310 return None
265311
266- print_collect_msg (
312+ print_collection_message (
267313 data_name = data_name , initial = initial , verbose = verbose ,
268314 confirmation_required = confirmation_required )
269315
316+ # Prepare fallback data
270317 fallback_data = self ._fallback_data (key = initial , additional_fields = additional_fields )
271318
272- url_ = copy .copy (url or self .catalogue .get (initial or data_name ))
273- if not url_ :
274- if initial and verbose :
275- print (f'No data is available for codes beginning with "{ initial } ".' )
276- elif data_name and not url :
277- print ('Key not found in `.catalogue`. '
278- 'Check `.catalogue` for valid keys, and verify `initial` or `data_name`.' )
319+ # Resolve URL
320+ target_url = url or self .catalogue .get (initial or data_name )
321+
322+ if not target_url :
323+ if initial :
324+ err_msg = f'No data is available for codes beginning with "{ initial } ".'
325+ else : # defaults to data_name context
326+ err_msg = \
327+ f'"{ data_name } " not found in `.catalogue`. Check `.catalogue` for valid keys.'
328+
329+ if raise_error :
330+ raise ValueError (err_msg )
331+ elif verbose :
332+ print (err_msg )
279333 return fallback_data
280334
335+ # Fetch and process
281336 try :
282- source = requests .get (url = url_ , headers = fake_requests_headers ())
283- source .raise_for_status () # Raises HTTPError for bad responses (4xx, 5xx)
284- except Exception as e :
285- print_inst_conn_err (verbose = verbose , e = e )
286- return fallback_data
337+ # Network request
338+ source = requests .get (url = target_url , headers = fake_requests_headers (), timeout = 30 )
339+ source .raise_for_status () # Raises HTTPError for bad responses
287340
288- # Build kwargs dynamically based on method signature
289- kwargs .update ({'source' : source , 'verbose' : verbose })
341+ # Dynamic argument injection
342+ collector_kwargs = kwargs .copy ()
343+ collector_kwargs .update ({'source' : source , 'verbose' : verbose })
290344
291- for param in ('data_name' , 'initial' ):
292- if param in inspect .signature (method ).parameters :
293- kwargs .update ({param : locals ()[param ]})
345+ # Inspect the collector method to see if it requires extra arguments
346+ params = inspect .signature (method ).parameters
347+ if 'data_name' in params :
348+ collector_kwargs ['data_name' ] = data_name
349+ if 'initial' in params :
350+ collector_kwargs ['initial' ] = initial
294351
295- # Attempt method execution
296- try :
297- return method (** kwargs )
298- except Exception as e :
299- _print_failure_message (
300- e , prefix = "Failed. Error:" , verbose = verbose , raise_error = raise_error )
352+ # Execute Parsing Method
353+ data = method (** collector_kwargs )
354+
355+ return data
356+
357+ except requests .RequestException as e : # Handle network/HTTP errors
358+ print_instance_connection_error (verbose = verbose , e = e , raise_error = raise_error )
359+ return fallback_data
360+
361+ except Exception as e : # Handle parsing/method errors
362+ _print_failure_message (e , "Failed. Error:" , verbose = verbose , raise_error = raise_error )
301363 return fallback_data
302364
303365 def _make_file_pathname (self , data_name , ext = ".pkl" , data_dir = None , sub_dir = None , ** kwargs ):
@@ -410,7 +472,7 @@ def _save_data_to_file(self, data, data_name, ext=".pkl", dump_dir=None, sub_dir
410472 save_data (data = data , path_to_file = path_to_file , verbose = (verbose == 2 ), ** kwargs )
411473
412474 else :
413- print_void_msg (data_name = data_name , verbose = verbose )
475+ print_void_collection_message (data_name = data_name , verbose = verbose )
414476
415477 def _fetch_data_from_file (self , data_name , method , ext = ".pkl" , update = False , dump_dir = None ,
416478 verbose = False , raise_error = False , data_dir = None , sub_dir = None ,
@@ -485,7 +547,7 @@ def _fetch_data_from_file(self, data_name, method, ext=".pkl", update=False, dum
485547 data = load_data (path_to_file , verbose = (verbose == 2 ))
486548
487549 else :
488- verbose_ = collect_in_fetch_verbose (data_dir = dump_dir , verbose = verbose )
550+ verbose_ = get_collect_verbosity_for_fetch (data_dir = dump_dir , verbose = verbose )
489551
490552 kwargs .update ({'confirmation_required' : False , 'verbose' : verbose_ })
491553
0 commit comments