Merge branch 'develop' into feature/filter-converged

mdbenito · mdbenito · commit 26ef93f2b695 · 2023-10-07T12:33:30.000+02:00
# Conflicts:
#	CHANGELOG.md
diff --git a/.github/workflows/run-tests-workflow.yaml b/.github/workflows/run-tests-workflow.yaml
@@ -31,8 +31,10 @@ jobs:
       with:
         key: tox-${{ github.ref }}-${{ runner.os }}-${{ hashFiles('tox.ini', 'requirements.txt') }}
         path: .tox
+    - name: Set up memcached
+      uses: niden/actions-memcached@v7
     - name: Test Base Modules
-      run: tox
+      run: tox -e base
       if: inputs.tests_to_run == 'base'
     - name: Test Modules that rely on PyTorch
       run: tox -e torch
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- No longer using docker within tests to start a memcached server
+  [PR #444](https://github.com/aai-institute/pyDVL/pull/444)
 - Faster semi-value computation with per-index check of stopping criteria (optional)
   [PR #437](https://github.com/aai-institute/pyDVL/pull/437)
 - Improvements and fixes to notebooks
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -95,11 +95,14 @@ run only certain tests using patterns (`-k`) or marker (`-m`).
 tox -e base -- <optional arguments>
 ```
 
-One important argument is `--do-not-start-memcache`. This prevents the test
-fixture from starting a new memcache server for testing and instead expects an
-already running local server listening on port 11211 (memcached's default port).
-If you run single tests within PyCharm, you will want to add this option to the
-run configurations.
+Two important arguments are `--memcached-service` which allows to change the
+default of `localhost:11211` (memcached's default) to a different address, and
+`-n` which sets the number of parallel workers for pytest-xdist. There are two
+layers of parallelization in the tests. An inner one within the tests
+themselves, i.e. the parallelism in the algorithms, and an outer one by
+pytest-xdist. The latter is controlled by the `-n` argument. If you experience
+segmentation faults with the tests, try running them with `-n 0` to disable
+parallelization.
 
 To test modules that rely on PyTorch, use:
 
@@ -326,7 +329,22 @@ Refer to its official
 [readme](https://github.com/nektos/act#installation-through-package-managers)
 for more installation options.
 
-#### Cheatsheat
+#### act cheatsheet
+
+By default, `act` will run **all  workflows** in `.github/workflows`. You can
+use the `-W` flag to specify a specific workflow file to run, or you can rely
+on the job id to be unique (but then you'll see warnings for the workflows
+without that job id).
+
+```shell
+# Run only the main tests for python 3.8 after a push event (implicit) 
+act -W .github/workflows/run-tests-workflow.yaml \
+    -j run-tests \
+    --input tests_to_run=base\
+    --input python_version=3.8
+```
+
+Other common flags are: 
 
 ```shell
 # List all actions for all events:
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -15,7 +15,6 @@ nbconvert>=7.2.9
 nbstripout == 0.6.1
 pytest==7.2.2
 pytest-cov
-pytest-docker==2.0.0
 pytest-mock
 pytest-timeout
 pytest-lazy-fixture
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,4 +1,7 @@
+from __future__ import annotations
+
 import functools
+import logging
 import os
 from collections import defaultdict
 from dataclasses import asdict
@@ -31,10 +34,10 @@ def is_memcache_responsive(hostname, port):
 
 def pytest_addoption(parser):
     parser.addoption(
-        "--do-not-start-memcache",
+        "--memcached-service",
         action="store_true",
-        help="When this flag is used, memcache won't be started by a fixture"
-        " and is instead expected to be already running",
+        default="localhost:11211",
+        help="Address of memcached server to use for tests.",
     )
     group = parser.getgroup("tolerate")
     group.addoption(
@@ -82,64 +85,17 @@ def pytorch_seed(seed):
 
 
 @pytest.fixture(scope="session")
-def do_not_start_memcache(request):
-    return request.config.getoption("--do-not-start-memcache")
-
-
-@pytest.fixture(scope="session")
-def docker_services(
-    docker_compose_command,
-    docker_compose_file,
-    docker_compose_project_name,
-    docker_setup,
-    docker_cleanup,
-    do_not_start_memcache,
-):
-    """Start all services from a docker compose file (`docker-compose up`).
-    After test are finished, shutdown all services (`docker-compose down`)."""
-    from pytest_docker.plugin import get_docker_services
-
-    if do_not_start_memcache:
-        yield
-    else:
-        with get_docker_services(
-            docker_compose_command,
-            docker_compose_file,
-            docker_compose_project_name,
-            docker_setup,
-            docker_cleanup,
-        ) as docker_service:
-            yield docker_service
-
-
-@pytest.fixture(scope="session")
-def memcached_service(docker_ip, docker_services, do_not_start_memcache):
-    """Ensure that memcached service is up and responsive.
-
-    If `do_not_start_memcache` is True then we just return the default values
-    'localhost', 11211
-    """
-    if do_not_start_memcache:
-        return "localhost", 11211
-    else:
-        # `port_for` takes a container port and returns the corresponding host port
-        port = docker_services.port_for("memcached", 11211)
-        hostname, port = docker_ip, port
-        docker_services.wait_until_responsive(
-            timeout=30.0,
-            pause=0.5,
-            check=lambda: is_memcache_responsive(hostname, port),
-        )
-        return hostname, port
+def memcached_service(request) -> tuple[str, int]:
+    opt = request.config.getoption("--memcached-service", default="localhost:11211")
+    host, port = opt.split(":")
+    return host, int(port)
 
 
 @pytest.fixture(scope="function")
 def memcache_client_config(memcached_service) -> MemcachedClientConfig:
-    client_config = MemcachedClientConfig(
+    return MemcachedClientConfig(
         server=memcached_service, connect_timeout=1.0, timeout=1, no_delay=True
     )
-    Client(**asdict(client_config)).flush_all()
-    return client_config
 
 
 @pytest.fixture(scope="function")
@@ -151,11 +107,9 @@ def memcached_client(memcache_client_config) -> Tuple[Client, MemcachedClientCon
         c.flush_all()
         return c, memcache_client_config
     except Exception as e:
-        print(
-            f"Could not connect to memcached server "
-            f'{memcache_client_config["server"]}: {e}'
-        )
-        raise e
+        raise ConnectionError(
+            f"Could not connect to memcached at {memcache_client_config.server}"
+        ) from e
 
 
 @pytest.fixture(scope="function")
@@ -400,6 +354,14 @@ def pytest_configure(config):
     )
     config._tolerate_session = TolerateErrorsSession(config)
 
+    worker_id = os.environ.get("PYTEST_XDIST_WORKER")
+    if worker_id is not None:
+        logging.basicConfig(
+            format="%(asctime)s %(levelname)s %(message)s",
+            filename=f"tests_{worker_id}.log",
+            level=logging.DEBUG,
+        )
+
 
 def pytest_runtest_setup(item: pytest.Item):
     marker = item.get_closest_marker("tolerate")
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
diff --git a/tests/utils/test_caching.py b/tests/utils/test_caching.py
@@ -70,68 +70,58 @@ def foo(indices: NDArray[np.int_], *args, **kwargs) -> float:
     assert hits_after - hits_before >= n_runs - 2
 
 
-def test_memcached_repeated_training(memcached_client):
+def test_memcached_repeated_training(memcached_client, worker_id: str):
     _, config = memcached_client
 
     @memcached(
         client_config=config,
         time_threshold=0,  # Always cache results
         allow_repeated_evaluations=True,
         rtol_stderr=0.01,
-        # Note that we typically do NOT want to ignore run_id
-        ignore_args=["job_id", "run_id"],
     )
-    def foo(indices: NDArray[np.int_]) -> float:
-        # from pydvl.utils.logging import logger
-        # logger.info(f"run_id: {run_id}, running...")
+    def foo(indices: NDArray[np.int_], uid: str) -> float:
         return float(np.sum(indices)) + np.random.normal(scale=10)
 
     n = 7
-    foo(np.arange(n))
+    foo(np.arange(n), worker_id)
     for _ in range(10_000):
-        result = foo(np.arange(n))
+        result = foo(np.arange(n), worker_id)
 
     assert (result - np.sum(np.arange(n))) < 1
     assert foo.stats.sets < foo.stats.hits
 
 
-def test_memcached_faster_with_repeated_training(memcached_client):
+def test_memcached_faster_with_repeated_training(memcached_client, worker_id: str):
     _, config = memcached_client
 
     @memcached(
         client_config=config,
         time_threshold=0,  # Always cache results
         allow_repeated_evaluations=True,
         rtol_stderr=0.1,
-        # Note that we typically do NOT want to ignore run_id
-        ignore_args=["job_id", "run_id"],
     )
-    def foo_cache(indices: NDArray[np.int_]) -> float:
-        # from pydvl.utils.logging import logger
-        # logger.info(f"run_id: {run_id}, running...")
+    def foo_cache(indices: NDArray[np.int_], uid: str) -> float:
         sleep(0.01)
         return float(np.sum(indices)) + np.random.normal(scale=1)
 
-    def foo_no_cache(indices: NDArray[np.int_]) -> float:
-        # from pydvl.utils.logging import logger
-        # logger.info(f"run_id: {run_id}, running...")
+    def foo_no_cache(indices: NDArray[np.int_], uid: str) -> float:
         sleep(0.01)
         return float(np.sum(indices)) + np.random.normal(scale=1)
 
     n = 3
-    foo_cache(np.arange(n))
-    foo_no_cache(np.arange(n))
+    foo_cache(np.arange(n), worker_id)
+    foo_no_cache(np.arange(n), worker_id)
 
     start = time()
     for _ in range(300):
-        result_fast = foo_cache(np.arange(n))
+        result_fast = foo_cache(np.arange(n), worker_id)
     end = time()
     fast_time = end - start
 
     start = time()
     results_slow = []
     for _ in range(300):
-        result = foo_no_cache(np.arange(n))
+        result = foo_no_cache(np.arange(n), worker_id)
         results_slow.append(result)
     end = time()
     slow_time = end - start
diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = base, report, docs
+envlist = base
 wheel = true
 
 [testenv]
@@ -12,12 +12,12 @@ setenv =
 [testenv:base]
 description = Tests base modules
 commands =
-    pytest -n auto --cov "{envsitepackagesdir}/pydvl" -m "not torch" {posargs}
+    pytest -n auto --dist worksteal --cov "{envsitepackagesdir}/pydvl" -m "not torch" {posargs}
 
 [testenv:torch]
 description = Tests modules that rely on pytorch
 commands =
-    pytest -n auto --cov "{envsitepackagesdir}/pydvl" -m torch {posargs}
+    pytest -n auto --dist worksteal --cov "{envsitepackagesdir}/pydvl" -m torch {posargs}
 extras =
     influence
 
@@ -26,7 +26,7 @@ description = Tests notebooks
 setenv =
     PYTHONPATH={toxinidir}/notebooks
 commands =
-    pytest -n auto notebooks/ --cov "{envsitepackagesdir}/pydvl" {posargs}
+    pytest -n auto --dist worksteal notebooks/ --cov "{envsitepackagesdir}/pydvl" {posargs}
 deps =
     {[testenv]deps}
     jupyter==1.0.0