Release 1.0.3 (#64)

mikeqfu · web-flow · commit f657aaebb594 · 2025-11-28T19:13:33.000Z
diff --git a/.github/workflows/github-pages.yml b/.github/workflows/github-pages.yml
@@ -24,7 +24,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: '3.12'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
@@ -20,7 +20,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.12'
 
       - name: Install dependencies
         run: |
@@ -29,8 +29,7 @@ jobs:
 
       - name: Run tests
         run: |
-          python -m pytest -v --cov=pyrcs --cov-branch \
-          --cov-report=term --cov-report=xml:coverage.xml tests/
+          python -m pytest -v --cov=pyrcs --cov-branch --cov-report=term --cov-report=xml:coverage.xml tests/
 
       - name: Debug coverage file
         run: ls -lah
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -9,7 +9,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.10"
+    python: "3.12"
 
 # Build documentation in the docs/ directory with Sphinx (this is the default documentation type)
 sphinx:
diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt
@@ -1,5 +1,5 @@
-furo==2024.8.6
-pyhelpers==2.3.0
+furo==2025.9.25
+pyhelpers==2.3.1
 sphinx-copybutton==0.5.2
-sphinx-new-tab-link==0.8.0
+sphinx-new-tab-link==0.8.1
 sphinx-toggleprompt==0.6.0
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,9 +42,9 @@ keywords = [
     "Depots",
     "Tracks"
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = [
-    "pyhelpers >= 2.3.0",
+    "pyhelpers >= 2.3.1",
     "beautifulsoup4"
 ]
 classifiers = [
diff --git a/pyrcs/data/.metadata b/pyrcs/data/.metadata
@@ -5,7 +5,7 @@
     "Author": "Qian Fu",
     "Affiliation": "School of Engineering, University of Birmingham",
     "Email": "q.fu@bham.ac.uk",
-    "Version": "1.0.2",
+    "Version": "1.0.3",
     "License": "MIT",
     "First release": "August 2019"
 }
diff --git a/pyrcs/data/site-map.json b/pyrcs/data/site-map.json
diff --git a/pyrcs/other_assets/depot.py b/pyrcs/other_assets/depot.py
@@ -482,15 +482,18 @@ def _collect_gwr_codes(self, source, verbose=False):
 
         span_tags = soup.find_all(name='span', attrs={'class': 'tab2'})
         num_codes_dict = dict([
-            (int(span_tag.text), str(span_tag.next_sibling).replace(' = ', '').strip())
+            (int(span_tag.text), str(span_tag.next_sibling).replace('=', '').strip())
             for span_tag in span_tags])
 
-        numerical_codes.rename(columns={'sort by division': 'Division'}, inplace=True)
-        numerical_codes.Division = numerical_codes.Code.map(
-            lambda x: num_codes_dict[int(str(x)[-1])])
+        temp = numerical_codes.iloc[:, 0].str.split(' ', expand=True)
+        temp.columns = ['Code', 'Division']
+        temp.loc[:, 'Division'] = temp['Division'].map(lambda x: num_codes_dict[int(str(x)[-1])])
+        numerical_codes = pd.concat(
+            [temp, numerical_codes.drop(columns=numerical_codes.columns[0])], axis=1)
 
         h3_titles = [h3.text for h3 in soup.find_all('h3')]
         gwr_depot_codes_data = dict(zip(h3_titles, [alphabetical_codes, numerical_codes]))
+        gwr_depot_codes_data.update({'Keys to numerical codes': num_codes_dict})
 
         gwr_depot_codes = {
             self.KEY_TO_GWR: gwr_depot_codes_data,
diff --git a/pyrcs/other_assets/station.py b/pyrcs/other_assets/station.py
@@ -84,8 +84,6 @@ def _check_row_spans(dat):
     dat0[['ELR', 'Mileage']] = dat0[['ELR', 'Mileage']].map(lambda x: x.split(' &&& '))
     dat0 = dat0.explode(['ELR', 'Mileage'], ignore_index=True)
 
-    dat0.sort_values(['Station'], ignore_index=True, inplace=True)
-
     return dat0
 
 
@@ -122,22 +120,28 @@ def _parse_station_column(dat):
         x = 'Heathrow Junction [sometimes referred to as Heathrow Interchange]\t\t / [no CRS?]'
     """
 
-    temp1 = dat['Station'].str.split('\t\t', n=1, expand=True)
-    temp1.columns = ['Station', 'CRS']
-    dat['Station'] = temp1['Station'].str.rstrip(' / ').str.strip()
+    stn_col_name = [col for col in dat.columns if 'Station' in col][0]
+    temp1 = dat[stn_col_name].str.split('\t\t', n=1, expand=True)
+
+    if stn_col_name != 'Station':
+        dat.rename(columns={stn_col_name: 'Station'}, inplace=True)
+        stn_col_name = 'Station'
+
+    temp1.columns = [stn_col_name, 'CRS']
+    dat[stn_col_name] = temp1[stn_col_name].str.rstrip(' / ').str.strip()
 
     # Get notes for stations
     stn_note_ = pd.Series('', index=dat.index)
-    for i, x in enumerate(temp1['Station']):
+    for i, x in enumerate(temp1[stn_col_name]):
         if '[' in x and ']' in x:
             y = re.search(r' \[(.*)](✖.*)?', x).group(0)  # Station Note
-            dat.loc[i, 'Station'] = x.replace(y, '').strip()
+            dat.loc[i, stn_col_name] = str(x).replace(y, '').strip()
             if '✖' in y:
                 stn_note_[i] = '; '.join([y_.strip(' []') for y_ in y.split('✖')])
             else:
                 stn_note_[i] = y.strip(' []')
 
-    dat.insert(loc=dat.columns.get_loc('Station') + 1, column='Station Note', value=stn_note_)
+    dat.insert(loc=dat.columns.get_loc(stn_col_name) + 1, column='Station Note', value=stn_note_)
 
     temp2 = temp1['CRS'].str.replace(' / /', ' &&& ').str.split(
         r'  | / ', regex=True, expand=True).fillna('')
@@ -154,7 +158,7 @@ def _parse_station_column(dat):
         lambda z: ' and '.join(['{} [{}]'.format(*z_.split('✖')) for z_ in z.split(' &&& ')])
         if ' &&& ' in z else z).str.strip()
 
-    dat = pd.concat([dat, temp2], axis=1)
+    dat = pd.concat([dat, temp2], axis=1).sort_values(stn_col_name)
 
     return dat
 
diff --git a/pyrcs/parser.py b/pyrcs/parser.py
@@ -73,7 +73,7 @@ def _prep_records(trs, ths, sep=' / '):
 
         for td_no, td in enumerate(tds):
             if td.find('td'):
-                text_ = td.find('a').contents + ["\t\t / "]
+                text_ = [''] if td.find('a') is None else td.find('a').contents + ["\t\t / "]
             else:
                 text_ = [_parse_other_tags_in_td_contents(x) for x in td.contents]
             # _move_element_to_end(text_, char='\t\t')
@@ -315,6 +315,10 @@ def parse_date(str_date, as_date_type=False):
 # == Extract information ===========================================================================
 
 
+def _clean_key(k_text):
+    return k_text.replace("–", "-").strip("()").removesuffix(".shtml").removesuffix(".shtm")
+
+
 def _parse_dd_or_dt(dd_or_dt):
     """
     Extracts text and href attributes from dt or dd elements.
@@ -339,50 +343,37 @@ def _parse_dd_or_dt(dd_or_dt):
         #     text = f'{text[1].upper()}{text[2:-1]}'
         href = a_href.find('a').get('href')
 
-    return text.replace("–", "-"), href
+    return _clean_key(text), href
 
 
 def _get_site_map_h3_dl_dt_dds(h3_dl_dt, next_dd=None):
     if next_dd is None:
         next_dd = h3_dl_dt.find_next('dd')
 
-    prev_dt = next_dd.find_previous(name='dt')
+    prev_dt = next_dd.find_previous('dt')
 
     h3_dl_dt_dds = {}
     while prev_dt == h3_dl_dt:
         next_dd_sub_dl_ = next_dd.find('dl')
 
-        if next_dd_sub_dl_ is None:
-            next_dd_contents = [x for x in next_dd.contents if x != '\n']
-
-            if len(next_dd_contents) == 1:
-                next_dd_content = next_dd_contents[0]
-                text = next_dd_content.get_text(strip=True)
-                href = next_dd_content.get(key='href')
-
-            else:  # len(next_dd_contents) == 2:
-                a_href, text = next_dd_contents
-                if not isinstance(text, str):
-                    text, a_href = next_dd_contents
-
-                href = a_href.find(name='a').get(key='href')
-
-            h3_dl_dt_dds.update(
-                {text.replace("–", "-"): urllib.parse.urljoin(home_page_url(), href)})
-
-        else:
-            sub_dts = next_dd_sub_dl_.find_all(name='dt')
+        if next_dd_sub_dl_:
+            sub_dts = next_dd_sub_dl_.find_all('dt')
 
             for sub_dt in sub_dts:
                 sub_dt_text, _ = _parse_dd_or_dt(sub_dt)
-                sub_dt_dds = sub_dt.find_next_siblings(name='dd')
+                sub_dt_dds = sub_dt.find_next_siblings('dd')
                 sub_dt_dds_dict = _get_site_map_sub_dl(h3_dl_dts=sub_dt_dds)
 
-                h3_dl_dt_dds.update({sub_dt_text.replace("–", "-"): sub_dt_dds_dict})
+                h3_dl_dt_dds.update({_clean_key(sub_dt_text): sub_dt_dds_dict})
+
+        else:
+            a = next_dd.find('a')
+            text, href = _clean_key(a.get_text(strip=True)), a.get(key='href')
+            h3_dl_dt_dds.update({text: urllib.parse.urljoin(home_page_url(), href)})
 
         try:
-            next_dd = next_dd.find_next_sibling(name='dd')
-            prev_dt = next_dd.find_previous_sibling(name='dt')
+            next_dd = next_dd.find_next_sibling('dd')
+            prev_dt = next_dd.find_previous_sibling('dt')
         except AttributeError:
             break
 
@@ -397,19 +388,20 @@ def _get_site_map_sub_dl(h3_dl_dts):
     h3_dl_dt_dd_dict = {}
 
     for h3_dl_dt in h3_dl_dts:
-        dt_text, dt_href = _parse_dd_or_dt(dd_or_dt=h3_dl_dt)
+        dt_text_, dt_href = _parse_dd_or_dt(dd_or_dt=h3_dl_dt)
+        dt_text = _clean_key(dt_text_)
 
         if dt_href:
             h3_dl_dt_dd_dict.update({dt_text: urllib.parse.urljoin(home_page_url(), dt_href)})
 
         else:
             next_dd = h3_dl_dt.find_next('dd')
-            next_dd_sub_dl = next_dd.find(name='dd')
+            next_dd_sub_dl = next_dd.find('dl')
 
             if next_dd_sub_dl:
                 # next_dd_sub_dl_dts = next_dd_sub_dl.find_all(name='dt')
                 next_dd_sub_dl_dts = [
-                    dt for dt in next_dd.find_all(name='dt') if dt.has_attr('class')]
+                    dt for dt in next_dd.find_all('dt') if dt.has_attr('class')]
                 h3_dl_dt_dd_dict.update({dt_text: _get_site_map_sub_dl(next_dd_sub_dl_dts)})
 
             else:
@@ -427,11 +419,11 @@ def _get_site_map(source, parser='html.parser'):
     soup = bs4.BeautifulSoup(markup=source.content, features=parser)
     site_map = {}
 
-    h3s = soup.find_all(name='h3', attrs={"class": "site"})
+    h3s = soup.find_all('h3', attrs={"class": "site"})
 
     for h3 in h3s:
         h3_title = h3.get_text(strip=True)
-        h3_dl_dts = h3.find_next(name='dl').find_all(name='dt')  # h3 > dl > dt
+        h3_dl_dts = h3.find_next('dl').find_all('dt')  # h3 > dl > dt
 
         if len(h3_dl_dts) == 1:
             dd_dict = {}  # h3 > dl > dt > dd
@@ -442,12 +434,12 @@ def _get_site_map(source, parser='html.parser'):
             if h3_dl_dt_text == '':
                 for dd in h3_dl_dt.find_next_siblings('dd'):
                     text, href = _parse_dd_or_dt(dd)
-                    dd_dict.update({text: urllib.parse.urljoin(home_page_url(), href)})
+                    dd_dict.update({_clean_key(text): urllib.parse.urljoin(home_page_url(), href)})
 
         else:
             dd_dict = _get_site_map_sub_dl(h3_dl_dts=h3_dl_dts)
 
-        site_map.update({h3_title: dd_dict})
+        site_map.update({_clean_key(h3_title): dd_dict})
 
     # noinspection SpellCheckingInspection
     site_map = update_dict_keys(
@@ -471,23 +463,23 @@ def get_site_map(update=False, confirmation_required=True, verbose=False, raise_
     :param raise_error: Whether to raise the provided exception;
         if ``raise_error=False``, the error will be suppressed; defaults to ``True``.
     :type raise_error: bool
-    :return: An ordered dictionary containing the data of site map.
-    :rtype: collections.OrderedDict | None
+    :return: A dictionary containing the data of site map.
+    :rtype: dict | None
 
     **Examples**::
 
         >>> from pyrcs.parser import get_site_map
         >>> site_map = get_site_map()
         >>> type(site_map)
-        collections.OrderedDict
+        dict
         >>> list(site_map.keys())
         ['Home',
          'Line data',
          'Other assets',
          '"Legal/financial" lists',
          'Miscellaneous']
         >>> site_map['Home']
-        {'index.shtml': 'http://www.railwaycodes.org.uk/index.shtml'}
+        {'index': 'http://www.railwaycodes.org.uk/index.shtml'}
     """
 
     path_to_file = cd_data("site-map.json", mkdir=True)
@@ -691,9 +683,12 @@ def get_introduction(url, delimiter='\n', update=False, verbose=False, raise_err
 
     try:
         source = requests.get(url=url, headers=fake_requests_headers())
-    except requests.exceptions.ConnectionError:
-        print_inst_conn_err(update=update, verbose=True if update else verbose)
-        return None
+    except requests.exceptions.ConnectionError as e:
+        if raise_error:
+            raise e  # Raise the original connection error
+        else:
+            print_inst_conn_err(update=update, verbose=True if update else verbose, e=e)
+            return None
 
     try:
         introduction = _parse_introduction(source=source, delimiter=delimiter)
diff --git a/requirements.txt b/requirements.txt
@@ -1,20 +1,24 @@
-build==1.2.2.post1
+backports.tarfile==1.2.0
+build==1.3.0
 fqdn==1.5.1
-furo==2024.8.6
+furo==2025.9.25
+importlib-metadata==8.0.0
 isoduration==20.11.0
+jaraco-functools==4.3.0
 jaraco.collections==5.1.0
 jsonpointer==3.0.0
-notebook==7.4.4
+notebook==7.5.0
+pandas-stubs==2.3.2.250926
 pip-chill==1.0.3
-pkginfo==1.12.1.2
-pyhelpers==2.3.0
-pytest-cov==6.2.1
+pyhelpers==2.3.1
+pytest-cov==7.0.0
 pytest-xdist==3.8.0
+rfc3987-syntax==1.1.0
 sphinx-copybutton==0.5.2
-sphinx-new-tab-link==0.8.0
+sphinx-new-tab-link==0.8.1
 sphinx-toggleprompt==0.6.0
 tinycss2==1.4.0
 tomli==2.0.1
-twine==6.1.0
+twine==6.2.0
 uri-template==1.3.0
-webcolors==24.11.1
+webcolors==25.10.0
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -57,7 +57,7 @@ def test_get_site_map(monkeypatch, capfd):
     from pyrcs.parser import get_site_map
 
     main_keys = ['Home', 'Line data', 'Other assets', '"Legal/financial" lists', 'Miscellaneous']
-    home_value = {'index.shtml': 'http://www.railwaycodes.org.uk/index.shtml'}
+    home_value = {'index': 'http://www.railwaycodes.org.uk/index.shtml'}
 
     monkeypatch.setattr('builtins.input', lambda _: "Yes")
     site_map_dat = get_site_map(update=True, verbose=True)
@@ -116,7 +116,7 @@ def test_get_introduction(update, raise_error, capfd):
 
     url_ = url.replace('railwaycodes', '123')
     if raise_error:
-        with pytest.raises(IndexError):
+        with pytest.raises(requests.exceptions.ConnectionError):
             get_introduction(url=url_, update=True, raise_error=raise_error)
 
     else:
@@ -126,7 +126,7 @@ def test_get_introduction(update, raise_error, capfd):
         intro_text = get_introduction(url=url_, update=True, verbose=True, raise_error=raise_error)
         out, _ = capfd.readouterr()
         assert intro_text is None
-        assert "Failed." in out
+        assert "Failed" in out
 
 
 def test_get_catalogue():

Original file line number	Diff line number	Diff line change
`@@ -42,9 +42,9 @@ keywords = [`
`42`	`42`	`"Depots",`
`43`	`43`	`"Tracks"`
`44`	`44`	`]`
`45`		`-requires-python = ">=3.10"`
	`45`	`+requires-python = ">=3.12"`
`46`	`46`	`dependencies = [`
`47`		`- "pyhelpers >= 2.3.0",`
	`47`	`+ "pyhelpers >= 2.3.1",`
`48`	`48`	`"beautifulsoup4"`
`49`	`49`	`]`
`50`	`50`	`classifiers = [`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`"Author": "Qian Fu",`
`6`	`6`	`"Affiliation": "School of Engineering, University of Birmingham",`
`7`	`7`	`"Email": "q.fu@bham.ac.uk",`
`8`		`- "Version": "1.0.2",`
	`8`	`+ "Version": "1.0.3",`
`9`	`9`	`"License": "MIT",`
`10`	`10`	`"First release": "August 2019"`
`11`	`11`	`}`