Skip to content

Commit 701fa2a

Browse files
Merge pull request #222 from NeurodataWithoutBorders/negative_job_slicing
Add negative job slicing
2 parents 1f4010c + e5f0ef5 commit 701fa2a

File tree

4 files changed

+61
-3
lines changed

4 files changed

+61
-3
lines changed

nwbinspector/nwbinspector.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
)
3030
from .register_checks import InspectorMessage, Importance
3131
from .tools import get_s3_urls_and_dandi_paths
32-
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read
32+
from .utils import FilePathType, PathType, OptionalListOfStrings, robust_s3_read, calculate_number_of_cpu
3333

3434
INTERNAL_CONFIGS = dict(dandi=Path(__file__).parent / "internal_configs" / "dandi.inspector_config.yaml")
3535

@@ -318,6 +318,8 @@ def inspect_all(
318318
The default is the lowest level, BEST_PRACTICE_SUGGESTION.
319319
n_jobs : int
320320
Number of jobs to use in parallel. Set to -1 to use all available resources.
321+
This may also be a negative integer x from -2 to -(number_of_cpus - 1) which acts like negative slicing by using
322+
all available CPUs minus x.
321323
Set to 1 (also the default) to disable.
322324
skip_validate : bool, optional
323325
Skip the PyNWB validation step. This may be desired for older NWBFiles (< schema version v2.10).
@@ -341,6 +343,7 @@ def inspect_all(
341343
Importance[importance_threshold] if isinstance(importance_threshold, str) else importance_threshold
342344
)
343345
modules = modules or []
346+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
344347
if progress_bar_options is None:
345348
progress_bar_options = dict(position=0, leave=False)
346349
if stream:

nwbinspector/tools.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pynwb import NWBFile
99

10-
from .utils import is_module_installed
10+
from .utils import is_module_installed, calculate_number_of_cpu
1111

1212

1313
def make_minimal_nwbfile():
@@ -43,6 +43,7 @@ def get_s3_urls_and_dandi_paths(dandiset_id: str, version_id: Optional[str] = No
4343
), "The specified 'path' is not a proper DANDISet ID. It should be a six-digit numeric identifier."
4444

4545
s3_urls_to_dandi_paths = dict()
46+
n_jobs = calculate_number_of_cpu(requested_cpu=n_jobs)
4647
if n_jobs != 1:
4748
with DandiAPIClient() as client:
4849
dandiset = client.get_dandiset(dandiset_id=dandiset_id, version_id=version_id)

nwbinspector/utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Commonly reused logic for evaluating conditions; must not have external dependencies."""
2+
import os
23
import re
34
import json
45
import numpy as np
@@ -130,3 +131,25 @@ def robust_s3_read(
130131
except Exception as exc:
131132
raise exc
132133
raise TimeoutError(f"Unable to complete the command ({command.__name__}) after {max_retries} attempts!")
134+
135+
136+
def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
137+
"""
138+
Calculate the number CPUs to use with respect to negative slicing and check against maximal available resources.
139+
140+
Parameters
141+
----------
142+
requested_cpu : int, optional
143+
The desired number of CPUs to use.
144+
145+
The default is 1.
146+
"""
147+
total_cpu = os.cpu_count()
148+
assert requested_cpu <= total_cpu, f"Requested more CPUs ({requested_cpu}) than are available ({total_cpu})!"
149+
assert requested_cpu >= -(
150+
total_cpu - 1
151+
), f"Requested fewer CPUs ({requested_cpu}) than are available ({total_cpu})!"
152+
if requested_cpu > 0:
153+
return requested_cpu
154+
else:
155+
return total_cpu + requested_cpu

tests/test_utils.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1+
import os
12
from packaging import version
23

34
from hdmf.testing import TestCase
45

56
from nwbinspector import Importance
6-
from nwbinspector.utils import format_byte_size, check_regular_series, is_dict_in_string, get_package_version
7+
from nwbinspector.utils import (
8+
format_byte_size,
9+
check_regular_series,
10+
is_dict_in_string,
11+
get_package_version,
12+
calculate_number_of_cpu,
13+
)
714

815

916
def test_format_byte_size():
@@ -104,3 +111,27 @@ def test_get_package_version_type():
104111

105112
def test_get_package_version_value():
106113
assert get_package_version("hdmf") >= version.parse("3.1.1") # minimum supported PyNWB version
114+
115+
116+
class TestCalulcateNumberOfCPU(TestCase):
117+
total_cpu = os.cpu_count()
118+
119+
def test_request_more_than_available_assert(self):
120+
requested_cpu = 2500
121+
with self.assertRaisesWith(
122+
exc_type=AssertionError,
123+
exc_msg=f"Requested more CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
124+
):
125+
calculate_number_of_cpu(requested_cpu=requested_cpu)
126+
127+
def test_request_fewer_than_available_assert(self):
128+
requested_cpu = -2500
129+
with self.assertRaisesWith(
130+
exc_type=AssertionError,
131+
exc_msg=f"Requested fewer CPUs ({requested_cpu}) than are available ({self.total_cpu})!",
132+
):
133+
calculate_number_of_cpu(requested_cpu=requested_cpu)
134+
135+
def test_calculate_number_of_cpu_negative_value(self):
136+
requested_cpu = -1 # CI only has 2 jobs available
137+
assert calculate_number_of_cpu(requested_cpu=requested_cpu) == requested_cpu % self.total_cpu

0 commit comments

Comments
 (0)