Skip to content

Commit 2ce0359

Browse files
authored
1.0.4.dev2 (Refactoring and test enhancements) (#73)
1 parent ac4354a commit 2ce0359

31 files changed

+1296
-534
lines changed

MANIFEST.in

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
# Include package data
22
include LICENSE
3+
include README.md
4+
include CHANGELOG.md
5+
include CITATION.cff
6+
7+
# Include package data in pyrcs/data
38
recursive-include pyrcs/data *
49

5-
# Exclude tests
10+
# Exclude development and build directories
611
prune tests
12+
prune docs
713
prune tutorials
14+
prune .venv
815
prune venv
916
prune dist
17+
prune *.egg-info
18+
19+
# Global exclusions (optional but good)
20+
global-exclude __pycache__

docs/source/utils.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ Validate inputs
1616
:toctree: _generated/
1717
:template: function.rst
1818

19-
is_home_connectable
19+
is_homepage_connectable
2020
is_str_float
2121
validate_initial
2222
validate_page_name
23-
collect_in_fetch_verbose
24-
fetch_all_verbose
23+
get_collect_verbosity_for_fetch
24+
get_batch_fetch_verbosity
2525

2626
Print messages
2727
~~~~~~~~~~~~~~
@@ -31,10 +31,10 @@ Print messages
3131
:template: function.rst
3232

3333
format_confirmation_prompt
34-
print_collect_msg
35-
print_conn_err
36-
print_inst_conn_err
37-
print_void_msg
34+
print_collection_message
35+
print_connection_warning
36+
print_instance_connection_error
37+
print_void_collection_message
3838

3939
Save and retrieve pre-packed data
4040
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ keywords = [
4444
]
4545
requires-python = ">=3.12"
4646
dependencies = [
47-
"pyhelpers >= 2.3.1",
47+
"pyhelpers >= 2.3.2",
4848
"beautifulsoup4"
4949
]
5050
classifiers = [

pyrcs/_base.py

Lines changed: 110 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
from pyhelpers.store import load_data, save_data
1717

1818
from .parser import get_catalogue, get_introduction, get_last_updated_date
19-
from .utils import cd_data, collect_in_fetch_verbose, format_confirmation_prompt, home_page_url, \
20-
print_collect_msg, print_conn_err, print_inst_conn_err, print_void_msg
19+
from .utils import cd_data, format_confirmation_prompt, get_collect_verbosity_for_fetch, \
20+
homepage_url, print_collection_message, print_connection_warning, \
21+
print_instance_connection_error, print_void_collection_message
2122

2223

2324
class _Base:
@@ -30,7 +31,7 @@ class _Base:
3031
#: The key for accessing the data.
3132
KEY: str = ''
3233
#: The URL of the main web page for the data.
33-
URL: str = home_page_url()
34+
URL: str = homepage_url()
3435
#: The key used to reference the last updated date in the data.
3536
KEY_TO_LAST_UPDATED_DATE: str = 'Last updated date'
3637

@@ -72,7 +73,7 @@ def __init__(self, data_dir=None, content_type=None, data_category="", data_clus
7273
'http://www.railwaycodes.org.uk/'
7374
"""
7475

75-
print_conn_err(verbose=verbose)
76+
print_connection_warning(verbose=verbose)
7677

7778
self.catalogue, self.introduction = None, None
7879

@@ -174,6 +175,7 @@ def _cdd(self, *sub_dir, mkdir=True, **kwargs):
174175

175176
@staticmethod
176177
def _format_confirmation_message(data_name, confirmation_prompt=None, initial=None, **kwargs):
178+
# noinspection PyShadowingNames
177179
"""
178180
Generates a confirmation prompt message.
179181
@@ -185,6 +187,25 @@ def _format_confirmation_message(data_name, confirmation_prompt=None, initial=No
185187
:type data_name: str
186188
:return: The generated confirmation prompt as a string.
187189
:rtype: str
190+
191+
**Examples**::
192+
193+
>>> from pyrcs._base import _Base
194+
>>> from pyrcs.utils import format_confirmation_prompt
195+
>>> _b = _Base()
196+
>>> data_name = '"test_data_name"'
197+
>>> prompt = _b._format_confirmation_message(data_name)
198+
>>> prompt
199+
'To collect data of "test_data_name"\n?'
200+
>>> prompt = _b._format_confirmation_message(data_name, format_confirmation_prompt)
201+
>>> prompt
202+
'To collect data of "test_data_name"\n?'
203+
>>> prompt = _b._format_confirmation_message(data_name, "test message")
204+
>>> prompt
205+
'test message'
206+
>>> prompt = _b._format_confirmation_message(data_name, format_confirmation_prompt, 'a')
207+
>>> prompt
208+
'To collect data of "test_data_name" beginning with "a"\n?'
188209
"""
189210

190211
if confirmation_prompt:
@@ -224,80 +245,121 @@ def _collect_data_from_source(self, data_name, method, url=None, initial=None,
224245
confirmation_prompt=None, verbose=False, raise_error=False,
225246
**kwargs):
226247
"""
227-
Collects data from the specified source webpage(s).
248+
Collects and parses data from a specified source webpage.
228249
229-
:param data_name: Name of the data to be collected.
230-
:type data_name: str
231-
:param url: URL of the webpage from which the data will be collected.
232-
:type url: str
233-
:param method: A callable function or method used to parse and extract data from the
234-
webpage. This function should accept the following parameters:
250+
:param data_name: The descriptive name of the data being collected
251+
(used for lookups and messages).
252+
:type data_name: str | None
253+
:param method: The parsing function to execute upon successful data retrieval.
254+
The function **must** accept:
255+
256+
- ``source`` (*requests.Response*): The HTTP response object.
257+
- ``verbose`` (*bool | int*): The verbosity flag.
235258
236-
- ``source``: The response object from the HTTP request.
237-
- ``verbose``: Whether to print additional information during extraction.
259+
The function **may** optionally accept:
260+
261+
- ``data_name`` (*str*): Injected automatically if present in the function signature.
262+
- ``initial`` (*str*): Injected automatically if present in the function signature.
263+
- Any other arguments passed via ``**kwargs``.
238264
239265
:type method: typing.Callable
240-
:param initial: The initial letter of the desired code or data; defaults to ``None``.
266+
:param url: The target URL. If ``None``, the method attempts to retrieve the URL from
267+
``self.catalogue`` using ``initial`` or ``data_name`` as the key.
268+
:type url: str | None
269+
:param initial: The initial letter/code used to categorize the data
270+
(e.g. 'A' for stations starting with A); it is used as a fallback key for URL lookup
271+
if ``url`` is not provided.
241272
:type initial: str | None
242-
:param additional_fields: Extra key-value pairs to be included in the returned dictionary
243-
if data collection fails or is canceled; defaults to ``None``.
273+
:param additional_fields: Key-value pairs to include in the fallback dictionary
274+
if data collection fails; it is useful for ensuring consistent data structure
275+
(e.g. returning ``None`` for specific columns).
244276
:type additional_fields: dict | str | None
245277
:param confirmation_required: Whether user confirmation is required;
246278
if ``confirmation_required=True`` (default), prompts the user for confirmation
247279
before proceeding with data collection.
248280
:type confirmation_required: bool
249-
:param confirmation_prompt:
250-
:type confirmation_prompt:
251-
:param verbose: Whether to print relevant information in the console; defaults to ``False``.
281+
:param confirmation_prompt: A custom message or a callable that generates a message
282+
for the confirmation prompt.
283+
:type confirmation_prompt: str | typing.Callable | None
284+
:param verbose: Whether to print status messages and errors to the console;
285+
defaults to ``False``.
252286
:type verbose: bool | int
253-
:param raise_error: Whether to raise the provided exception;
254-
if ``raise_error=False`` (default), the error will be suppressed.
287+
:param raise_error: If ``True``, exceptions (network or parsing) are raised to the caller;
288+
if ``False`` (default), exceptions are caught and fallback data is returned.
255289
:type raise_error: bool
256-
:return: The collected data.
290+
:param kwargs: [Optional] Additional keyword arguments passed directly to the ``method``.
291+
:return: The data returned by ``method``, or a dictionary containing fallback values
292+
(e.g. ``{key: None}``) on failure.
257293
:rtype: pandas.DataFrame | dict | None
294+
:raises ValueError: If the URL cannot be resolved and ``raise_error=True``.
295+
:raises requests.RequestException: If a network error occurs and ``raise_error=True``.
296+
297+
**Examples**::
298+
299+
>>> from pyrcs._base import _Base
300+
>>> _b = _Base()
301+
>>> _b.catalogue = {'A': 'https://github.com/mikeqfu/pyrcs'}
302+
>>> _b._collect_data_from_source("test_data_name", method=_b._fallback_data)
258303
"""
259304

305+
# Confirmation step
260306
prompt = self._format_confirmation_message(
261307
data_name=data_name, confirmation_prompt=confirmation_prompt, initial=initial)
262308

263309
if not confirmed(prompt=prompt, confirmation_required=confirmation_required):
264310
return None
265311

266-
print_collect_msg(
312+
print_collection_message(
267313
data_name=data_name, initial=initial, verbose=verbose,
268314
confirmation_required=confirmation_required)
269315

316+
# Prepare fallback data
270317
fallback_data = self._fallback_data(key=initial, additional_fields=additional_fields)
271318

272-
url_ = copy.copy(url or self.catalogue.get(initial or data_name))
273-
if not url_:
274-
if initial and verbose:
275-
print(f'No data is available for codes beginning with "{initial}".')
276-
elif data_name and not url:
277-
print('Key not found in `.catalogue`. '
278-
'Check `.catalogue` for valid keys, and verify `initial` or `data_name`.')
319+
# Resolve URL
320+
target_url = url or self.catalogue.get(initial or data_name)
321+
322+
if not target_url:
323+
if initial:
324+
err_msg = f'No data is available for codes beginning with "{initial}".'
325+
else: # defaults to data_name context
326+
err_msg = \
327+
f'"{data_name}" not found in `.catalogue`. Check `.catalogue` for valid keys.'
328+
329+
if raise_error:
330+
raise ValueError(err_msg)
331+
elif verbose:
332+
print(err_msg)
279333
return fallback_data
280334

335+
# Fetch and process
281336
try:
282-
source = requests.get(url=url_, headers=fake_requests_headers())
283-
source.raise_for_status() # Raises HTTPError for bad responses (4xx, 5xx)
284-
except Exception as e:
285-
print_inst_conn_err(verbose=verbose, e=e)
286-
return fallback_data
337+
# Network request
338+
source = requests.get(url=target_url, headers=fake_requests_headers(), timeout=30)
339+
source.raise_for_status() # Raises HTTPError for bad responses
287340

288-
# Build kwargs dynamically based on method signature
289-
kwargs.update({'source': source, 'verbose': verbose})
341+
# Dynamic argument injection
342+
collector_kwargs = kwargs.copy()
343+
collector_kwargs.update({'source': source, 'verbose': verbose})
290344

291-
for param in ('data_name', 'initial'):
292-
if param in inspect.signature(method).parameters:
293-
kwargs.update({param: locals()[param]})
345+
# Inspect the collector method to see if it requires extra arguments
346+
params = inspect.signature(method).parameters
347+
if 'data_name' in params:
348+
collector_kwargs['data_name'] = data_name
349+
if 'initial' in params:
350+
collector_kwargs['initial'] = initial
294351

295-
# Attempt method execution
296-
try:
297-
return method(**kwargs)
298-
except Exception as e:
299-
_print_failure_message(
300-
e, prefix="Failed. Error:", verbose=verbose, raise_error=raise_error)
352+
# Execute Parsing Method
353+
data = method(**collector_kwargs)
354+
355+
return data
356+
357+
except requests.RequestException as e: # Handle network/HTTP errors
358+
print_instance_connection_error(verbose=verbose, e=e, raise_error=raise_error)
359+
return fallback_data
360+
361+
except Exception as e: # Handle parsing/method errors
362+
_print_failure_message(e, "Failed. Error:", verbose=verbose, raise_error=raise_error)
301363
return fallback_data
302364

303365
def _make_file_pathname(self, data_name, ext=".pkl", data_dir=None, sub_dir=None, **kwargs):
@@ -410,7 +472,7 @@ def _save_data_to_file(self, data, data_name, ext=".pkl", dump_dir=None, sub_dir
410472
save_data(data=data, path_to_file=path_to_file, verbose=(verbose == 2), **kwargs)
411473

412474
else:
413-
print_void_msg(data_name=data_name, verbose=verbose)
475+
print_void_collection_message(data_name=data_name, verbose=verbose)
414476

415477
def _fetch_data_from_file(self, data_name, method, ext=".pkl", update=False, dump_dir=None,
416478
verbose=False, raise_error=False, data_dir=None, sub_dir=None,
@@ -485,7 +547,7 @@ def _fetch_data_from_file(self, data_name, method, ext=".pkl", update=False, dum
485547
data = load_data(path_to_file, verbose=(verbose == 2))
486548

487549
else:
488-
verbose_ = collect_in_fetch_verbose(data_dir=dump_dir, verbose=verbose)
550+
verbose_ = get_collect_verbosity_for_fetch(data_dir=dump_dir, verbose=verbose)
489551

490552
kwargs.update({'confirmation_required': False, 'verbose': verbose_})
491553

pyrcs/_updater.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77
from .collector import LineData, OtherAssets
88
from .parser import get_site_map
9-
from .utils import is_home_connectable, print_conn_err
9+
from .utils import is_homepage_connectable, print_connection_warning
1010

1111

1212
def _update_prepacked_data(verbose=False, interval=5, **kwargs):
13+
# noinspection PyUnresolvedReferences
1314
"""
1415
Updates pre-packed data.
1516
@@ -24,8 +25,8 @@ def _update_prepacked_data(verbose=False, interval=5, **kwargs):
2425
>>> _update_prepacked_data(verbose=True)
2526
"""
2627

27-
if not is_home_connectable():
28-
print_conn_err(verbose=verbose)
28+
if not is_homepage_connectable():
29+
print_connection_warning(verbose=verbose)
2930
print("Unable to update the data.")
3031

3132
else:

pyrcs/collector.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@
1717
from .other_assets import Buzzer, Depots, Features, HabdWild, SignalBoxes, Stations, Telegraph, \
1818
Tunnels, Viaducts, WaterTroughs
1919
from .parser import get_category_menu
20-
from .utils import is_home_connectable, print_conn_err, print_inst_conn_err
20+
from .utils import is_homepage_connectable, print_connection_warning, print_instance_connection_error
2121

2222

2323
class _Base:
2424
#: The name of the data.
2525
NAME: str = 'Railway Codes and other data'
2626

2727
def __init__(self, update=False, verbose=True, raise_error=False):
28-
if not is_home_connectable():
28+
if not is_homepage_connectable():
2929
self.connected = False
30-
print_conn_err(verbose=verbose)
30+
print_connection_warning(verbose=verbose)
3131

3232
else:
3333
self.connected = True
@@ -150,7 +150,7 @@ def update(self, confirmation_required=True, verbose=False, interval=5, init_upd
150150
"""
151151

152152
if not self.connected:
153-
print_inst_conn_err(verbose=verbose)
153+
print_instance_connection_error(verbose=verbose)
154154

155155
else:
156156
if confirmed("To update line data\n?", confirmation_required=confirmation_required):
@@ -320,7 +320,7 @@ def update(self, confirmation_required=True, verbose=False, interval=5, init_upd
320320
"""
321321

322322
if not self.connected:
323-
print_inst_conn_err(verbose=verbose)
323+
print_instance_connection_error(verbose=verbose)
324324

325325
else:
326326
if confirmed("To update data of other assets\n?", confirmation_required):

pyrcs/data/.metadata

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"Author": "Qian Fu",
66
"Affiliation": "University of Birmingham",
77
"Email": "q.fu@bham.ac.uk",
8-
"Version": "1.0.4.dev1",
8+
"Version": "1.0.4.dev2",
99
"License": "MIT",
1010
"First release": "August 2019"
1111
}

pyrcs/line_data/bridge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from .._base import _Base
1313
from ..parser import _get_last_updated_date
14-
from ..utils import home_page_url
14+
from ..utils import homepage_url
1515

1616

1717
class Bridges(_Base):
@@ -25,7 +25,7 @@ class Bridges(_Base):
2525
#: The key for accessing the data.
2626
KEY: str = 'Bridges'
2727
#: The URL of the main webpage for the data.
28-
URL: str = urllib.parse.urljoin(home_page_url(), '/bridges/bridges0.shtm')
28+
URL: str = urllib.parse.urljoin(homepage_url(), '/bridges/bridges0.shtm')
2929
#: The key used to reference the last updated date in the data.
3030
KEY_TO_LAST_UPDATED_DATE: str = 'Last updated date'
3131

0 commit comments

Comments
 (0)