Skip to content

Commit f657aae

Browse files
authored
Release 1.0.3 (#64)
1 parent 02f94ca commit f657aae

File tree

12 files changed

+617
-605
lines changed

12 files changed

+617
-605
lines changed

.github/workflows/github-pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- name: Set up Python
2525
uses: actions/setup-python@v5
2626
with:
27-
python-version: '3.11'
27+
python-version: '3.12'
2828
- name: Install dependencies
2929
run: |
3030
python -m pip install --upgrade pip

.github/workflows/test-coverage.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
- name: Set up Python
2121
uses: actions/setup-python@v5
2222
with:
23-
python-version: '3.10'
23+
python-version: '3.12'
2424

2525
- name: Install dependencies
2626
run: |
@@ -29,8 +29,7 @@ jobs:
2929
3030
- name: Run tests
3131
run: |
32-
python -m pytest -v --cov=pyrcs --cov-branch \
33-
--cov-report=term --cov-report=xml:coverage.xml tests/
32+
python -m pytest -v --cov=pyrcs --cov-branch --cov-report=term --cov-report=xml:coverage.xml tests/
3433
3534
- name: Debug coverage file
3635
run: ls -lah

.readthedocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ version: 2
99
build:
1010
os: ubuntu-22.04
1111
tools:
12-
python: "3.10"
12+
python: "3.12"
1313

1414
# Build documentation in the docs/ directory with Sphinx (this is the default documentation type)
1515
sphinx:

docs/source/requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
furo==2024.8.6
2-
pyhelpers==2.3.0
1+
furo==2025.9.25
2+
pyhelpers==2.3.1
33
sphinx-copybutton==0.5.2
4-
sphinx-new-tab-link==0.8.0
4+
sphinx-new-tab-link==0.8.1
55
sphinx-toggleprompt==0.6.0

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ keywords = [
4242
"Depots",
4343
"Tracks"
4444
]
45-
requires-python = ">=3.10"
45+
requires-python = ">=3.12"
4646
dependencies = [
47-
"pyhelpers >= 2.3.0",
47+
"pyhelpers >= 2.3.1",
4848
"beautifulsoup4"
4949
]
5050
classifiers = [

pyrcs/data/.metadata

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"Author": "Qian Fu",
66
"Affiliation": "School of Engineering, University of Birmingham",
77
"Email": "q.fu@bham.ac.uk",
8-
"Version": "1.0.2",
8+
"Version": "1.0.3",
99
"License": "MIT",
1010
"First release": "August 2019"
1111
}

pyrcs/data/site-map.json

Lines changed: 535 additions & 528 deletions
Large diffs are not rendered by default.

pyrcs/other_assets/depot.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -482,15 +482,18 @@ def _collect_gwr_codes(self, source, verbose=False):
482482

483483
span_tags = soup.find_all(name='span', attrs={'class': 'tab2'})
484484
num_codes_dict = dict([
485-
(int(span_tag.text), str(span_tag.next_sibling).replace(' = ', '').strip())
485+
(int(span_tag.text), str(span_tag.next_sibling).replace('=', '').strip())
486486
for span_tag in span_tags])
487487

488-
numerical_codes.rename(columns={'sort by division': 'Division'}, inplace=True)
489-
numerical_codes.Division = numerical_codes.Code.map(
490-
lambda x: num_codes_dict[int(str(x)[-1])])
488+
temp = numerical_codes.iloc[:, 0].str.split(' ', expand=True)
489+
temp.columns = ['Code', 'Division']
490+
temp.loc[:, 'Division'] = temp['Division'].map(lambda x: num_codes_dict[int(str(x)[-1])])
491+
numerical_codes = pd.concat(
492+
[temp, numerical_codes.drop(columns=numerical_codes.columns[0])], axis=1)
491493

492494
h3_titles = [h3.text for h3 in soup.find_all('h3')]
493495
gwr_depot_codes_data = dict(zip(h3_titles, [alphabetical_codes, numerical_codes]))
496+
gwr_depot_codes_data.update({'Keys to numerical codes': num_codes_dict})
494497

495498
gwr_depot_codes = {
496499
self.KEY_TO_GWR: gwr_depot_codes_data,

pyrcs/other_assets/station.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ def _check_row_spans(dat):
8484
dat0[['ELR', 'Mileage']] = dat0[['ELR', 'Mileage']].map(lambda x: x.split(' &&& '))
8585
dat0 = dat0.explode(['ELR', 'Mileage'], ignore_index=True)
8686

87-
dat0.sort_values(['Station'], ignore_index=True, inplace=True)
88-
8987
return dat0
9088

9189

@@ -122,22 +120,28 @@ def _parse_station_column(dat):
122120
x = 'Heathrow Junction [sometimes referred to as Heathrow Interchange]\t\t / [no CRS?]'
123121
"""
124122

125-
temp1 = dat['Station'].str.split('\t\t', n=1, expand=True)
126-
temp1.columns = ['Station', 'CRS']
127-
dat['Station'] = temp1['Station'].str.rstrip(' / ').str.strip()
123+
stn_col_name = [col for col in dat.columns if 'Station' in col][0]
124+
temp1 = dat[stn_col_name].str.split('\t\t', n=1, expand=True)
125+
126+
if stn_col_name != 'Station':
127+
dat.rename(columns={stn_col_name: 'Station'}, inplace=True)
128+
stn_col_name = 'Station'
129+
130+
temp1.columns = [stn_col_name, 'CRS']
131+
dat[stn_col_name] = temp1[stn_col_name].str.rstrip(' / ').str.strip()
128132

129133
# Get notes for stations
130134
stn_note_ = pd.Series('', index=dat.index)
131-
for i, x in enumerate(temp1['Station']):
135+
for i, x in enumerate(temp1[stn_col_name]):
132136
if '[' in x and ']' in x:
133137
y = re.search(r' \[(.*)](✖.*)?', x).group(0) # Station Note
134-
dat.loc[i, 'Station'] = x.replace(y, '').strip()
138+
dat.loc[i, stn_col_name] = str(x).replace(y, '').strip()
135139
if '✖' in y:
136140
stn_note_[i] = '; '.join([y_.strip(' []') for y_ in y.split('✖')])
137141
else:
138142
stn_note_[i] = y.strip(' []')
139143

140-
dat.insert(loc=dat.columns.get_loc('Station') + 1, column='Station Note', value=stn_note_)
144+
dat.insert(loc=dat.columns.get_loc(stn_col_name) + 1, column='Station Note', value=stn_note_)
141145

142146
temp2 = temp1['CRS'].str.replace(' / /', ' &&& ').str.split(
143147
r' | / ', regex=True, expand=True).fillna('')
@@ -154,7 +158,7 @@ def _parse_station_column(dat):
154158
lambda z: ' and '.join(['{} [{}]'.format(*z_.split('✖')) for z_ in z.split(' &&& ')])
155159
if ' &&& ' in z else z).str.strip()
156160

157-
dat = pd.concat([dat, temp2], axis=1)
161+
dat = pd.concat([dat, temp2], axis=1).sort_values(stn_col_name)
158162

159163
return dat
160164

pyrcs/parser.py

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def _prep_records(trs, ths, sep=' / '):
7373

7474
for td_no, td in enumerate(tds):
7575
if td.find('td'):
76-
text_ = td.find('a').contents + ["\t\t / "]
76+
text_ = [''] if td.find('a') is None else td.find('a').contents + ["\t\t / "]
7777
else:
7878
text_ = [_parse_other_tags_in_td_contents(x) for x in td.contents]
7979
# _move_element_to_end(text_, char='\t\t')
@@ -315,6 +315,10 @@ def parse_date(str_date, as_date_type=False):
315315
# == Extract information ===========================================================================
316316

317317

318+
def _clean_key(k_text):
319+
return k_text.replace("–", "-").strip("()").removesuffix(".shtml").removesuffix(".shtm")
320+
321+
318322
def _parse_dd_or_dt(dd_or_dt):
319323
"""
320324
Extracts text and href attributes from dt or dd elements.
@@ -339,50 +343,37 @@ def _parse_dd_or_dt(dd_or_dt):
339343
# text = f'{text[1].upper()}{text[2:-1]}'
340344
href = a_href.find('a').get('href')
341345

342-
return text.replace("–", "-"), href
346+
return _clean_key(text), href
343347

344348

345349
def _get_site_map_h3_dl_dt_dds(h3_dl_dt, next_dd=None):
346350
if next_dd is None:
347351
next_dd = h3_dl_dt.find_next('dd')
348352

349-
prev_dt = next_dd.find_previous(name='dt')
353+
prev_dt = next_dd.find_previous('dt')
350354

351355
h3_dl_dt_dds = {}
352356
while prev_dt == h3_dl_dt:
353357
next_dd_sub_dl_ = next_dd.find('dl')
354358

355-
if next_dd_sub_dl_ is None:
356-
next_dd_contents = [x for x in next_dd.contents if x != '\n']
357-
358-
if len(next_dd_contents) == 1:
359-
next_dd_content = next_dd_contents[0]
360-
text = next_dd_content.get_text(strip=True)
361-
href = next_dd_content.get(key='href')
362-
363-
else: # len(next_dd_contents) == 2:
364-
a_href, text = next_dd_contents
365-
if not isinstance(text, str):
366-
text, a_href = next_dd_contents
367-
368-
href = a_href.find(name='a').get(key='href')
369-
370-
h3_dl_dt_dds.update(
371-
{text.replace("–", "-"): urllib.parse.urljoin(home_page_url(), href)})
372-
373-
else:
374-
sub_dts = next_dd_sub_dl_.find_all(name='dt')
359+
if next_dd_sub_dl_:
360+
sub_dts = next_dd_sub_dl_.find_all('dt')
375361

376362
for sub_dt in sub_dts:
377363
sub_dt_text, _ = _parse_dd_or_dt(sub_dt)
378-
sub_dt_dds = sub_dt.find_next_siblings(name='dd')
364+
sub_dt_dds = sub_dt.find_next_siblings('dd')
379365
sub_dt_dds_dict = _get_site_map_sub_dl(h3_dl_dts=sub_dt_dds)
380366

381-
h3_dl_dt_dds.update({sub_dt_text.replace("–", "-"): sub_dt_dds_dict})
367+
h3_dl_dt_dds.update({_clean_key(sub_dt_text): sub_dt_dds_dict})
368+
369+
else:
370+
a = next_dd.find('a')
371+
text, href = _clean_key(a.get_text(strip=True)), a.get(key='href')
372+
h3_dl_dt_dds.update({text: urllib.parse.urljoin(home_page_url(), href)})
382373

383374
try:
384-
next_dd = next_dd.find_next_sibling(name='dd')
385-
prev_dt = next_dd.find_previous_sibling(name='dt')
375+
next_dd = next_dd.find_next_sibling('dd')
376+
prev_dt = next_dd.find_previous_sibling('dt')
386377
except AttributeError:
387378
break
388379

@@ -397,19 +388,20 @@ def _get_site_map_sub_dl(h3_dl_dts):
397388
h3_dl_dt_dd_dict = {}
398389

399390
for h3_dl_dt in h3_dl_dts:
400-
dt_text, dt_href = _parse_dd_or_dt(dd_or_dt=h3_dl_dt)
391+
dt_text_, dt_href = _parse_dd_or_dt(dd_or_dt=h3_dl_dt)
392+
dt_text = _clean_key(dt_text_)
401393

402394
if dt_href:
403395
h3_dl_dt_dd_dict.update({dt_text: urllib.parse.urljoin(home_page_url(), dt_href)})
404396

405397
else:
406398
next_dd = h3_dl_dt.find_next('dd')
407-
next_dd_sub_dl = next_dd.find(name='dd')
399+
next_dd_sub_dl = next_dd.find('dl')
408400

409401
if next_dd_sub_dl:
410402
# next_dd_sub_dl_dts = next_dd_sub_dl.find_all(name='dt')
411403
next_dd_sub_dl_dts = [
412-
dt for dt in next_dd.find_all(name='dt') if dt.has_attr('class')]
404+
dt for dt in next_dd.find_all('dt') if dt.has_attr('class')]
413405
h3_dl_dt_dd_dict.update({dt_text: _get_site_map_sub_dl(next_dd_sub_dl_dts)})
414406

415407
else:
@@ -427,11 +419,11 @@ def _get_site_map(source, parser='html.parser'):
427419
soup = bs4.BeautifulSoup(markup=source.content, features=parser)
428420
site_map = {}
429421

430-
h3s = soup.find_all(name='h3', attrs={"class": "site"})
422+
h3s = soup.find_all('h3', attrs={"class": "site"})
431423

432424
for h3 in h3s:
433425
h3_title = h3.get_text(strip=True)
434-
h3_dl_dts = h3.find_next(name='dl').find_all(name='dt') # h3 > dl > dt
426+
h3_dl_dts = h3.find_next('dl').find_all('dt') # h3 > dl > dt
435427

436428
if len(h3_dl_dts) == 1:
437429
dd_dict = {} # h3 > dl > dt > dd
@@ -442,12 +434,12 @@ def _get_site_map(source, parser='html.parser'):
442434
if h3_dl_dt_text == '':
443435
for dd in h3_dl_dt.find_next_siblings('dd'):
444436
text, href = _parse_dd_or_dt(dd)
445-
dd_dict.update({text: urllib.parse.urljoin(home_page_url(), href)})
437+
dd_dict.update({_clean_key(text): urllib.parse.urljoin(home_page_url(), href)})
446438

447439
else:
448440
dd_dict = _get_site_map_sub_dl(h3_dl_dts=h3_dl_dts)
449441

450-
site_map.update({h3_title: dd_dict})
442+
site_map.update({_clean_key(h3_title): dd_dict})
451443

452444
# noinspection SpellCheckingInspection
453445
site_map = update_dict_keys(
@@ -471,23 +463,23 @@ def get_site_map(update=False, confirmation_required=True, verbose=False, raise_
471463
:param raise_error: Whether to raise the provided exception;
472464
if ``raise_error=False``, the error will be suppressed; defaults to ``True``.
473465
:type raise_error: bool
474-
:return: An ordered dictionary containing the data of site map.
475-
:rtype: collections.OrderedDict | None
466+
:return: A dictionary containing the data of site map.
467+
:rtype: dict | None
476468
477469
**Examples**::
478470
479471
>>> from pyrcs.parser import get_site_map
480472
>>> site_map = get_site_map()
481473
>>> type(site_map)
482-
collections.OrderedDict
474+
dict
483475
>>> list(site_map.keys())
484476
['Home',
485477
'Line data',
486478
'Other assets',
487479
'"Legal/financial" lists',
488480
'Miscellaneous']
489481
>>> site_map['Home']
490-
{'index.shtml': 'http://www.railwaycodes.org.uk/index.shtml'}
482+
{'index': 'http://www.railwaycodes.org.uk/index.shtml'}
491483
"""
492484

493485
path_to_file = cd_data("site-map.json", mkdir=True)
@@ -691,9 +683,12 @@ def get_introduction(url, delimiter='\n', update=False, verbose=False, raise_err
691683

692684
try:
693685
source = requests.get(url=url, headers=fake_requests_headers())
694-
except requests.exceptions.ConnectionError:
695-
print_inst_conn_err(update=update, verbose=True if update else verbose)
696-
return None
686+
except requests.exceptions.ConnectionError as e:
687+
if raise_error:
688+
raise e # Raise the original connection error
689+
else:
690+
print_inst_conn_err(update=update, verbose=True if update else verbose, e=e)
691+
return None
697692

698693
try:
699694
introduction = _parse_introduction(source=source, delimiter=delimiter)

0 commit comments

Comments
 (0)