Skip to content

Commit a6e315c

Browse files
authored
1.0.4.dev3 (Testing and logic improvements) (#75)
1 parent 2ce0359 commit a6e315c

File tree

16 files changed

+568
-247
lines changed

16 files changed

+568
-247
lines changed

pyrcs/_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def _make_file_pathname(self, data_name, ext=".pkl", data_dir=None, sub_dir=None
376376
:param ext: The file extension (including the leading dot), defaults to ``".pkl"``.
377377
:type ext: str
378378
:param data_dir: The directory where the file will be saved; defaults to ``None``.
379-
:type data_dir: str | None
379+
:type data_dir: str | os.PathLike | None
380380
:param sub_dir: A subdirectory name or a list of subdirectory names; defaults to ``None``.
381381
:type sub_dir: str | list | None
382382
:return: The pathname for saving the data file.
@@ -403,7 +403,7 @@ def _make_file_pathname(self, data_name, ext=".pkl", data_dir=None, sub_dir=None
403403
sub_dir_ = []
404404

405405
if data_dir:
406-
self.current_data_dir = validate_dir(path_to_dir=data_dir)
406+
self.current_data_dir = validate_dir(path_to_dir=None if data_dir is True else data_dir)
407407
file_pathname = os.path.join(self.current_data_dir, *sub_dir_, filename)
408408

409409
else: # data_dir is None or data_dir == ""
@@ -431,7 +431,7 @@ def _save_data_to_file(self, data, data_name, ext=".pkl", dump_dir=None, sub_dir
431431
:type ext: str | bool
432432
:param dump_dir: The directory where the file should be saved;
433433
if ``None`` (default) a default directory within the class is used.
434-
:type dump_dir: str | None
434+
:type dump_dir: str | os.PathLike | None
435435
:param sub_dir: A subdirectory name or a list of subdirectory names; defaults to ``None``.
436436
:type sub_dir: str | list | None
437437
:param verbose: Whether to print detailed information to the console; defaults to ``False``.

pyrcs/collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ def update(self, confirmation_required=True, verbose=False, interval=5, init_upd
354354

355355
# Stations
356356
print(f"\n{self.Stations.NAME}:")
357+
_ = self.Stations.fetch_catalogue(**update_args)
357358
_ = self.Stations.fetch_locations(**update_args)
358359

359360
time.sleep(interval)

pyrcs/data/.metadata

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"Author": "Qian Fu",
66
"Affiliation": "University of Birmingham",
77
"Email": "q.fu@bham.ac.uk",
8-
"Version": "1.0.4.dev2",
8+
"Version": "1.0.4.dev3",
99
"License": "MIT",
1010
"First release": "August 2019"
1111
}

pyrcs/line_data/elr_mileage.py

Lines changed: 70 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
`Engineer's Line References (ELRs) <http://www.railwaycodes.org.uk/elrs/elr0.shtm>`_.
44
"""
55

6-
import copy
76
import functools
87
import itertools
98
import os
@@ -473,46 +472,57 @@ def fetch_elr(self, initial=None, update=False, dump_dir=None, verbose=False, **
473472

474473
data = {self.KEY: data_, self.KEY_TO_LAST_UPDATED_DATE: latest_update_date}
475474

476-
if dump_dir is not None:
475+
if dump_dir:
477476
self._save_data_to_file(
478477
data=data, data_name=self.NAME, dump_dir=dump_dir, verbose=verbose)
479478

480479
return data
481480

482-
def _mileage_file_dump_names(self, elr):
483-
data_name = remove_punctuation(elr).lower()
481+
def _dump_mileage_file(self, mileage_file, dump_dir=None, verbose=False):
482+
"""
483+
Dump the collected mileage file data.
484484
485-
if data_name == "prn":
486-
data_name += "_"
485+
:param mileage_file: Data of the mileage file.
486+
:type mileage_file: dict
487+
:param dump_dir: The path to a directory where the mileage file data is saved;
488+
defaults to ``False``.
489+
:type dump_dir: str | os.PathLike | bool | None
490+
:param verbose: Whether to print relevant information to the console; defaults to ``False``.
491+
:type verbose: bool | int
492+
"""
487493

488-
dump_dir = self._cdd("mileage-files", data_name[0])
494+
if dump_dir is False:
495+
return None
489496

490-
return data_name, dump_dir
497+
data_name = mileage_file['ELR'].lower()
498+
data_name += ("_" if data_name == "prn" else "")
491499

492-
def _dump_mileage_file(self, elr, mileage_file, dump_it, verbose):
493-
if dump_it:
494-
data_name, dump_dir = self._mileage_file_dump_names(elr)
500+
if dump_dir is None:
501+
sub_dir = data_name[0]
502+
target_dir = self._cdd("mileage-files", sub_dir)
503+
else:
504+
target_dir = dump_dir
495505

496-
self._save_data_to_file(
497-
data=mileage_file, data_name=data_name, ext=".pkl", dump_dir=dump_dir,
498-
verbose=verbose)
506+
self._save_data_to_file(
507+
data=mileage_file, data_name=data_name, ext=".pkl", dump_dir=target_dir,
508+
verbose=verbose)
499509

500-
def _handle_err404(self, elr, notes_dat, parsed, dump_it, verbose):
510+
def _handle_err404(self, elr, notes_dat, parsed, dump_dir=False, verbose=False):
501511
elr_alt = re.search(r'(?<= )[A-Z]{3}(\d)?', notes_dat).group(0)
502512
mileage_file_alt = self.collect_mileage_file(
503-
elr=elr_alt, parsed=parsed, confirmation_required=False, dump_it=False,
513+
elr=elr_alt, parsed=parsed, confirmation_required=False, dump_dir=False,
504514
verbose=verbose)
505515

506516
if notes_dat.startswith('Now'):
507-
mileage_file_former = copy.copy(mileage_file_alt)
517+
mileage_file_former = mileage_file_alt.copy()
508518

509519
mileage_file_alt.update({'Formerly': elr})
510520
self._dump_mileage_file(
511-
elr_alt, mileage_file=mileage_file_alt, dump_it=dump_it, verbose=verbose)
521+
mileage_file=mileage_file_alt, dump_dir=dump_dir, verbose=verbose)
512522

513-
mileage_file_former.update(({'Now': elr_alt}))
523+
mileage_file_former.update(({'ELR': elr, 'Now': elr_alt}))
514524
self._dump_mileage_file(
515-
elr, mileage_file=mileage_file_former, dump_it=dump_it, verbose=verbose)
525+
mileage_file=mileage_file_former, dump_dir=dump_dir, verbose=verbose)
516526

517527
return mileage_file_alt
518528

@@ -753,16 +763,13 @@ def _parse_mileage_and_notes(self, content):
753763

754764
return mileage_data, notes_data
755765

756-
def _collect_mileage_file(self, source, elr, parsed=True, dump_it=False, verbose=False):
766+
def _collect_mileage_file(self, source, elr, parsed=True, dump_dir=False, verbose=False):
757767
soup = bs4.BeautifulSoup(markup=source.content, features='html.parser')
758768

759-
line_name = soup.find(name='h3').text
769+
line_name = soup.find(name='h3').get_text(strip=True)
760770

761771
sub_line_name_ = soup.find(name='h4')
762-
if sub_line_name_ is not None:
763-
sub_line_name = sub_line_name_.get_text()
764-
else:
765-
sub_line_name = ''
772+
sub_line_name = sub_line_name_.get_text().strip() if sub_line_name_ is not None else ''
766773

767774
err404 = {'"404" error: page not found', '404 error: page not found'}
768775
if any(x in err404 for x in {line_name, sub_line_name}):
@@ -771,7 +778,9 @@ def _collect_mileage_file(self, source, elr, parsed=True, dump_it=False, verbose
771778

772779
notes_dat = elr_data['Notes'].iloc[0]
773780
if re.match(r'(Now( part of)? |= |See )[A-Z]{3}(\d)?$', notes_dat):
774-
return self._handle_err404(elr, notes_dat, parsed, dump_it, verbose)
781+
mileage_file_alt = self._handle_err404(
782+
elr=elr, notes_dat=notes_dat, parsed=parsed, dump_dir=dump_dir, verbose=verbose)
783+
return mileage_file_alt
775784

776785
else:
777786
line_name, content = self._get_parsed_contents(elr_data, notes_dat)
@@ -809,12 +818,11 @@ def _collect_mileage_file(self, source, elr, parsed=True, dump_it=False, verbose
809818
if verbose in {True, 1}:
810819
print("Done.")
811820

812-
self._dump_mileage_file(
813-
elr=elr, mileage_file=mileage_file, dump_it=dump_it, verbose=verbose)
821+
self._dump_mileage_file(mileage_file=mileage_file, dump_dir=dump_dir, verbose=verbose)
814822

815823
return mileage_file
816824

817-
def collect_mileage_file(self, elr, parsed=True, confirmation_required=True, dump_it=False,
825+
def collect_mileage_file(self, elr, parsed=True, confirmation_required=True, dump_dir=False,
818826
verbose=False, raise_error=False):
819827
"""
820828
Collects the mileage file for a specific ELR from the source web page.
@@ -828,8 +836,9 @@ def collect_mileage_file(self, elr, parsed=True, confirmation_required=True, dum
828836
if ``confirmation_required=True`` (default), prompts the user for confirmation
829837
before proceeding with data collection.
830838
:type confirmation_required: bool
831-
:param dump_it: Whether to save the collected data as a pickle file; defaults to ``False``.
832-
:type dump_it: bool
839+
:param dump_dir: The path to a directory where the mileage file data is saved;
840+
if ``False`` (default), the data will not be dumped.
841+
:type dump_dir: str | os.PathLike | bool | None
833842
:param verbose: Whether to print relevant information to the console; defaults to ``False``.
834843
:type verbose: bool | int
835844
:param raise_error: Whether to raise the provided exception;
@@ -843,7 +852,7 @@ def collect_mileage_file(self, elr, parsed=True, confirmation_required=True, dum
843852
- In some cases, mileages may be unknown and thus left blank
844853
(e.g. ``'ANI2, Orton Junction with ROB (~3.05)'``).
845854
- Mileages in parentheses are not on that ELR but are included for reference
846-
(e.g., ``'ANL, (8.67) NORTHOLT [London Underground]'``).
855+
(e.g. ``'ANL, (8.67) NORTHOLT [London Underground]'``).
847856
- As with the main ELR list, mileages preceded by a tilde (~) are approximate.
848857
849858
**Examples**::
@@ -940,32 +949,32 @@ def collect_mileage_file(self, elr, parsed=True, confirmation_required=True, dum
940949
[4 rows x 8 columns]
941950
"""
942951

943-
elr_ = remove_punctuation(elr).upper()
952+
target_elr = remove_punctuation(elr).upper()
944953

945-
if elr_:
946-
if confirmed(f"To collect mileage file of \"{elr_}\"\n?", confirmation_required):
954+
if target_elr:
955+
if confirmed(f'To collect mileage file of "{target_elr}"\n?', confirmation_required):
947956
if verbose in {True, 1}:
948957
message_ = "Collecting the mileage file"
949958
if not confirmation_required:
950-
message_ += f' of "{elr_}"'
959+
message_ += f' of "{target_elr}"'
951960
print(message_, end=" ... ")
952961

953962
try:
954-
url = homepage_url() + f'/elrs/_mileages/{elr_[0]}/{elr_}.shtm'.lower()
963+
url = urllib.parse.urljoin(
964+
homepage_url(),
965+
f'/elrs/_mileages/{target_elr[0]}/{target_elr}.shtm'.lower())
955966
source = requests.get(url=url, headers=fake_requests_headers())
956-
967+
source.raise_for_status()
957968
except Exception as e:
958969
print_instance_connection_error(verbose=verbose, e=e)
959970
return None
960971

961972
try:
962973
return self._collect_mileage_file(
963-
source=source, elr=elr_, parsed=parsed, dump_it=dump_it,
974+
source=source, elr=target_elr, parsed=parsed, dump_dir=dump_dir,
964975
verbose=verbose)
965-
966976
except Exception as e:
967-
_print_failure_message(
968-
e=e, prefix="Errors:", verbose=verbose, raise_error=raise_error)
977+
_print_failure_message(e, "Errors:", verbose=verbose, raise_error=raise_error)
969978

970979
def fetch_mileage_file(self, elr, update=False, dump_dir=None, verbose=False,
971980
raise_error=False):
@@ -979,7 +988,7 @@ def fetch_mileage_file(self, elr, update=False, dump_dir=None, verbose=False,
979988
:type update: bool
980989
:param dump_dir: Path to the directory where the data file will be saved;
981990
defaults to ``None``.
982-
:type dump_dir: str | None
991+
:type dump_dir: str | os.PathLike | None
983992
:param verbose: Whether to print relevant information to the console; defaults to ``False``.
984993
:type verbose: bool | int
985994
:param raise_error: Whether to raise the provided exception;
@@ -992,9 +1001,12 @@ def fetch_mileage_file(self, elr, update=False, dump_dir=None, verbose=False,
9921001
**Examples**::
9931002
9941003
>>> from pyrcs.line_data import ELRMileages # from pyrcs import ELRMileages
1004+
>>> import tempfile
1005+
>>> import pathlib
1006+
>>> tmp_path = pathlib.Path(tempfile.TemporaryDirectory().name)
9951007
>>> em = ELRMileages()
9961008
>>> # Get the mileage file of 'AAL' (Now 'NAJ3')
997-
>>> aal_mileage_file = em.fetch_mileage_file(elr='AAL')
1009+
>>> aal_mileage_file = em.fetch_mileage_file(elr='AAL', dump_dir=tmp_path)
9981010
>>> type(aal_mileage_file)
9991011
dict
10001012
>>> list(aal_mileage_file.keys())
@@ -1020,7 +1032,7 @@ def fetch_mileage_file(self, elr, update=False, dump_dir=None, verbose=False,
10201032
12 18.0638 ... DCL 81.12
10211033
[13 rows x 8 columns]
10221034
>>> # Get the mileage file of 'MLA'
1023-
>>> mla_mileage_file = em.fetch_mileage_file(elr='MLA')
1035+
>>> mla_mileage_file = em.fetch_mileage_file(elr='MLA', dump_dir=tmp_path)
10241036
>>> type(mla_mileage_file)
10251037
dict
10261038
>>> list(mla_mileage_file.keys())
@@ -1044,25 +1056,28 @@ def fetch_mileage_file(self, elr, update=False, dump_dir=None, verbose=False,
10441056
3 0.1606 0.73 ... None
10451057
[4 rows x 8 columns]
10461058
>>> # Get the mileage file of 'LCG'
1047-
>>> mla_mileage_file = em.fetch_mileage_file(elr='LCG')
1059+
>>> mla_mileage_file = em.fetch_mileage_file(elr='LCG', dump_dir=tmp_path)
10481060
"""
10491061

10501062
try:
1051-
elr_ = remove_punctuation(elr)
1052-
data_name, _ = self._mileage_file_dump_names(elr_)
1053-
ext = ".pkl"
1054-
path_to_pickle = self._cdd("mileage-files", data_name[0], data_name + ext, mkdir=False)
1063+
target_elr = remove_punctuation(elr)
1064+
1065+
data_name = target_elr.lower()
1066+
data_name += ("_" if data_name == "prn" else "")
1067+
sub_dir, ext = data_name[0], ".pkl"
1068+
1069+
path_to_file = self._cdd("mileage-files", sub_dir, f"{data_name}{ext}", mkdir=False)
10551070

1056-
if os.path.isfile(path_to_pickle) and not update:
1057-
mileage_file = load_data(path_to_pickle)
1071+
if os.path.isfile(path_to_file) and not update:
1072+
mileage_file = load_data(path_to_file)
10581073

10591074
else:
10601075
verbose_ = get_collect_verbosity_for_fetch(data_dir=dump_dir, verbose=verbose)
10611076
mileage_file = self.collect_mileage_file(
1062-
elr=elr_, parsed=True, confirmation_required=False, dump_it=True,
1077+
elr=target_elr, parsed=True, confirmation_required=False, dump_dir=None,
10631078
verbose=verbose_)
10641079

1065-
if dump_dir is not None:
1080+
if dump_dir not in {False, None}:
10661081
self._save_data_to_file(
10671082
data=mileage_file, data_name=data_name, ext=ext, dump_dir=dump_dir,
10681083
verbose=verbose)

pyrcs/other_assets/tunnel.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,40 +90,41 @@ def _parse_length(x):
9090
"""
9191

9292
if '✖' in x:
93-
x, note_ = x.split('✖')
94-
note_ = ' (' + note_ + ')'
93+
x, note_suffix = x.split('✖')
94+
note_suffix = f' ({note_suffix.strip()})'
9595
else:
96-
note_ = ''
96+
note_suffix = ''
9797

98-
if re.match(r'[Uu]nknown', x):
99-
length = np.nan
100-
note = 'Unknown'
98+
if x == '':
99+
length, note = np.nan, 'Unavailable'
101100

102-
elif x == '':
103-
length = np.nan
104-
note = 'Unavailable'
101+
elif re.match(r'[Uu]nknown', x):
102+
length, note = np.nan, 'Unknown'
105103

106104
elif re.match(r'\d+m \d+yd?(- | to )?.*\d+m \d+yd?.*', x):
107105
miles_a, yards_a, miles_b, yards_b = re.findall(r'\d+', x)
108106
length_a = float(miles_a) * 1609.344 + float(yards_a) * 0.9144
109-
# measurement.measures.Distance(mi=miles_a).m + measurement.measures.Distance(yd=yards_a).m
107+
# from measurement.measures import Distance
108+
# Distance(mi=miles_a).m + Distance(yd=yards_a).m
110109
length_b = float(miles_b) * 1609.344 + float(yards_b) * 0.9144
111-
# measurement.measures.Distance(mi=miles_b).m + measurement.measures.Distance(yd=yards_b).m
110+
# Distance(mi=miles_b).m + Distance(yd=yards_b).m
112111
length = (length_a + length_b) / 2
113112
note = '-'.join([str(round(length_a, 2)), str(round(length_b, 2))]) + ' metres'
114113

115114
else:
116-
if re.match(r'(formerly )?c?≈?\d+m ?\d+yd?|ch', x):
115+
if re.match(r'(formerly )?c?≈?\d+m ?\d+(?:yd?|ch)', x):
117116
miles, yards = re.findall(r'\d+', x)
118117
if re.match(r'.*\d+ch$', x): # "yards" is "chains"
119-
yards = yards * 22 # measurement.measures.Distance(chain=yards).yd
118+
yards = float(yards) * 22 # measurement.measures.Distance(chain=yards).yd
120119

121120
if re.match(r'^c.*|^≈', x):
122121
note = 'Approximate'
123122
elif re.match(r'\d+y$', x):
124123
note = re.search(r'(?<=\dy).*$', x).group(0)
125124
elif re.match(r'^(formerly).*', x):
126125
note = 'Formerly'
126+
elif re.match(r'.*(?:yd?|ch)\s*(.+)$', x):
127+
note = re.search(r'(?:yd?|ch)\s*(.+)$', x).group(1)
127128
else:
128129
note = ''
129130

@@ -139,7 +140,7 @@ def _parse_length(x):
139140
# measurement.measures.Distance(mi=miles).m + measurement.measures.Distance(yd=yards).m
140141

141142
if note != '':
142-
note = note + note_
143+
note = note.strip() + note_suffix
143144

144145
return length, note
145146

0 commit comments

Comments
 (0)