openml
diff --git a/‎.github/workflows/dist.yaml‎
Lines changed: 18 additions & 1 deletion b/‎.github/workflows/dist.yaml‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎.github/workflows/docs.yaml‎
Lines changed: 18 additions & 1 deletion b/‎.github/workflows/docs.yaml‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎.github/workflows/pre-commit.yaml‎
Lines changed: 18 additions & 1 deletion b/‎.github/workflows/pre-commit.yaml‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎.github/workflows/release_docker.yaml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/release_docker.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 35 additions & 13 deletions b/‎.github/workflows/test.yml‎
Lines changed: 35 additions & 13 deletions
diff --git a/‎doc/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎doc/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/contributing.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/contributing.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openml/_api_calls.py‎
Lines changed: 14 additions & 11 deletions b/‎openml/_api_calls.py‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎openml/config.py‎
Lines changed: 17 additions & 14 deletions b/‎openml/config.py‎
Lines changed: 17 additions & 14 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 4 additions & 4 deletions b/‎openml/datasets/dataset.py‎
Lines changed: 4 additions & 4 deletions
@@ -1,6 +1,23 @@
 name: dist-check
 
-on: [push, pull_request]
+on:
+  workflow_dispatch:
+
+  push:
+    branches:
+      - main
+      - develop
+    tags:
+      - "v*.*.*"
+
+  pull_request:
+    branches:
+      - main
+      - develop
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   dist:
 
@@ -1,5 +1,22 @@
 name: Docs
-on: [pull_request, push]
+on:
+  workflow_dispatch:
+
+  push:
+    branches:
+      - main
+      - develop
+    tags:
+      - "v*.*.*"
+
+  pull_request:
+    branches:
+      - main
+      - develop
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   build-and-deploy:
 
@@ -1,6 +1,23 @@
 name: pre-commit
 
-on: [push]
+on:
+  workflow_dispatch:
+
+  push:
+    branches:
+      - main
+      - develop
+    tags:
+      - "v*.*.*"
+
+  pull_request:
+    branches:
+      - main
+      - develop
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   run-all-files:
 
@@ -1,6 +1,7 @@
 name: release-docker
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - 'develop'
@@ -11,6 +12,10 @@ on:
     branches:
       - 'develop'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
 
   docker:
 
@@ -1,6 +1,19 @@
 name: Tests
 
-on: [push, pull_request]
+on:
+  workflow_dispatch:
+
+  push:
+    branches:
+      - main
+      - develop
+    tags:
+      - "v*.*.*"
+
+  pull_request:
+    branches:
+      - main
+      - develop
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -12,25 +25,34 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.8", "3.9"]
-        scikit-learn: ["0.21.2", "0.22.2", "0.23.1", "0.24"]
+        python-version: ["3.8"]
+        # TODO(eddiebergman): We should consider testing against newer version I guess...
+        # We probably consider just having a `"1"` version to always test against latest
+        scikit-learn: ["0.23.1", "0.24"]
         os: [ubuntu-latest]
-        sklearn-only: ['true']
-        exclude:  # no scikit-learn 0.21.2 release for Python 3.8
-          - python-version: 3.8
-            scikit-learn: 0.21.2
+        sklearn-only: ["true"]
+        exclude:  # no scikit-learn 0.23 release for Python 3.9
+          - python-version: "3.9"
+            scikit-learn: "0.23.1"
         include:
-          - python-version: 3.8
+          - os: ubuntu-latest
+            python-version: "3.9"
+            scikit-learn: "0.24"
+            scipy: "1.10.0"
+            sklearn-only: "true"
+          # Include a code cov version
+          - code-cov: true
+            os: ubuntu-latest
+            python-version: "3.8"
             scikit-learn: 0.23.1
-            code-cov: true
             sklearn-only: 'false'
-            os: ubuntu-latest
+          # Include a windows test, for some reason on a later version of scikit-learn
           - os: windows-latest
-            sklearn-only: 'false'
+            python-version: "3.8"
             scikit-learn: 0.24.*
-            scipy: 1.10.0
+            scipy: "1.10.0"  # not sure why the explicit scipy version?
+            sklearn-only: 'false'
       fail-fast:  false
-      max-parallel: 4
 
     steps:
     - uses: actions/checkout@v4
 
@@ -119,7 +119,7 @@
 #
 # currently disabled because without intersphinx we cannot link to numpy.ndarray
 # nitpicky = True
-
+linkcheck_ignore = [r"https://test.openml.org/t/.*"]  # FIXME: to avoid test server bugs avoiding docs building
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 
@@ -19,7 +19,7 @@ In particular, a few ways to contribute to openml-python are:
    For more information, see the :ref:`extensions` below.
 
  * Bug reports. If something doesn't work for you or is cumbersome, please open a new issue to let
-   us know about the problem. See `this section <https://github.com/openml/openml-python/blob/main/CONTRIBUTING.md#user-content-reporting-bugs>`_.
+   us know about the problem. See `this section <https://github.com/openml/openml-python/blob/main/CONTRIBUTING.md>`_.
 
  * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific publication.
 
 
@@ -341,6 +341,9 @@ def _send_request(  # noqa: C901
     response: requests.Response | None = None
     delay_method = _human_delay if config.retry_policy == "human" else _robot_delay
 
+    # Error to raise in case of retrying too often. Will be set to the last observed exception.
+    retry_raise_e: Exception | None = None
+
     with requests.Session() as session:
         # Start at one to have a non-zero multiplier for the sleep
         for retry_counter in range(1, n_retries + 1):
@@ -384,10 +387,7 @@ def _send_request(  # noqa: C901
                 # which means trying again might resolve the issue.
                 if e.code != DATABASE_CONNECTION_ERRCODE:
                     raise e
-
-                delay = delay_method(retry_counter)
-                time.sleep(delay)
-
+                retry_raise_e = e
             except xml.parsers.expat.ExpatError as e:
                 if request_method != "get" or retry_counter >= n_retries:
                     if response is not None:
@@ -399,18 +399,21 @@ def _send_request(  # noqa: C901
                         f"Unexpected server error when calling {url}. Please contact the "
                         f"developers!\n{extra}"
                     ) from e
-
-                delay = delay_method(retry_counter)
-                time.sleep(delay)
-
+                retry_raise_e = e
             except (
                 requests.exceptions.ChunkedEncodingError,
                 requests.exceptions.ConnectionError,
                 requests.exceptions.SSLError,
                 OpenMLHashException,
-            ):
-                delay = delay_method(retry_counter)
-                time.sleep(delay)
+            ) as e:
+                retry_raise_e = e
+
+            # We can only be here if there was an exception
+            assert retry_raise_e is not None
+            if retry_counter >= n_retries:
+                raise retry_raise_e
+            delay = delay_method(retry_counter)
+            time.sleep(delay)
 
     assert response is not None
     return response
 
@@ -243,14 +243,11 @@ def _setup(config: _Config | None = None) -> None:
     config_dir = config_file.parent
 
     # read config file, create directory for config file
-    if not config_dir.exists():
-        try:
+    try:
+        if not config_dir.exists():
             config_dir.mkdir(exist_ok=True, parents=True)
-            cache_exists = True
-        except PermissionError:
-            cache_exists = False
-    else:
-        cache_exists = True
+    except PermissionError:
+        pass
 
     if config is None:
         config = _parse_config(config_file)
@@ -264,15 +261,21 @@ def _setup(config: _Config | None = None) -> None:
     set_retry_policy(config["retry_policy"], n_retries)
 
     _root_cache_directory = short_cache_dir.expanduser().resolve()
+
+    try:
+        cache_exists = _root_cache_directory.exists()
+    except PermissionError:
+        cache_exists = False
+
     # create the cache subdirectory
-    if not _root_cache_directory.exists():
-        try:
+    try:
+        if not _root_cache_directory.exists():
             _root_cache_directory.mkdir(exist_ok=True, parents=True)
-        except PermissionError:
-            openml_logger.warning(
-                "No permission to create openml cache directory at %s! This can result in "
-                "OpenML-Python not working properly." % _root_cache_directory,
-            )
+    except PermissionError:
+        openml_logger.warning(
+            "No permission to create openml cache directory at %s! This can result in "
+            "OpenML-Python not working properly." % _root_cache_directory,
+        )
 
     if cache_exists:
         _create_log_handlers()
 
@@ -589,7 +589,6 @@ def _load_data(self) -> tuple[pd.DataFrame | scipy.sparse.csr_matrix, list[bool]
         fpath = self.data_feather_file if self.cache_format == "feather" else self.data_pickle_file
         logger.info(f"{self.cache_format} load data {self.name}")
         try:
-            assert self.data_pickle_file is not None
             if self.cache_format == "feather":
                 assert self.data_feather_file is not None
                 assert self.feather_attribute_file is not None
@@ -599,6 +598,7 @@ def _load_data(self) -> tuple[pd.DataFrame | scipy.sparse.csr_matrix, list[bool]
                 with open(self.feather_attribute_file, "rb") as fh:  # noqa: PTH123
                     categorical, attribute_names = pickle.load(fh)  # noqa: S301
             else:
+                assert self.data_pickle_file is not None
                 with open(self.data_pickle_file, "rb") as fh:  # noqa: PTH123
                     data, categorical, attribute_names = pickle.load(fh)  # noqa: S301
         except FileNotFoundError as e:
@@ -681,14 +681,13 @@ def _convert_array_format(
         if array_format == "array" and not isinstance(data, scipy.sparse.spmatrix):
             # We encode the categories such that they are integer to be able
             # to make a conversion to numeric for backward compatibility
-            def _encode_if_category(column: pd.Series) -> pd.Series:
+            def _encode_if_category(column: pd.Series | np.ndarray) -> pd.Series | np.ndarray:
                 if column.dtype.name == "category":
                     column = column.cat.codes.astype(np.float32)
                     mask_nan = column == -1
                     column[mask_nan] = np.nan
                 return column
 
-            assert isinstance(data, (pd.DataFrame, pd.Series))
             if isinstance(data, pd.DataFrame):
                 columns = {
                     column_name: _encode_if_category(data.loc[:, column_name])
@@ -1090,7 +1089,8 @@ def _get_qualities_pickle_file(qualities_file: str) -> str:
     return qualities_file + ".pkl"
 
 
-def _read_qualities(qualities_file: Path) -> dict[str, float]:
+def _read_qualities(qualities_file: str | Path) -> dict[str, float]:
+    qualities_file = Path(qualities_file)
     qualities_pickle_file = Path(_get_qualities_pickle_file(str(qualities_file)))
     try:
         with qualities_pickle_file.open("rb") as fh_binary:
Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,7 @@`
`119`	`119`	`#`
`120`	`120`	`# currently disabled because without intersphinx we cannot link to numpy.ndarray`
`121`	`121`	`# nitpicky = True`
`122`		`-`
	`122`	`+linkcheck_ignore = [r"https://test.openml.org/t/.*"] # FIXME: to avoid test server bugs avoiding docs building`
`123`	`123`	`# -- Options for HTML output ----------------------------------------------`
`124`	`124`
`125`	`125`	`# The theme to use for HTML and HTML Help pages. See the documentation for`