Skip to content

Commit ab69eeb

Browse files
authored
Error out gracefully on missing optional module (#600)
* Do not load scipy unconditionally. SciPy is considered an optional dependency, so the sssom.cli module should not unconditionally attempt to load it -- this makes scipy a de facto mandatory dependency. SciPy is only needed for the `correlations` subcommand, so when that command is used we try to load the scipy module, and error out cleanly with a message indicating that this particular subcommand requires SciPy. * Error out gracefully when an optional module is missing. Apply to the optional modules `networkx` and `pansql` the same principle as for the `scipy` module: when they are not available, the commands that depend on them should fail gracefully with an explicit error message, rather than spit out an uncaught ModuleNotFoundError. * Apply black-mandated style.
1 parent af65fad commit ab69eeb

File tree

1 file changed

+34
-5
lines changed

1 file changed

+34
-5
lines changed

src/sssom/cli.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import yaml
2525
from curies import Converter
2626
from rdflib import Graph
27-
from scipy.stats import chi2_contingency
2827

2928
from sssom.constants import (
3029
DEFAULT_VALIDATION_TYPES,
@@ -308,7 +307,13 @@ def dosql(query: str, inputs: List[str], output: TextIO):
308307
FROM file1 INNER JOIN file2 WHERE file1.object_id = file2.subject_id" FROM file1.sssom.tsv file2.sssom.tsv`
309308
""" # noqa: DAR101
310309
# should start with from_tsv and MOST should return write_sssom
311-
run_sql_query(query=query, inputs=inputs, output=output)
310+
try:
311+
run_sql_query(query=query, inputs=inputs, output=output)
312+
except ModuleNotFoundError as e:
313+
if e.name == "pansql":
314+
raise click.ClickException("The dosql command requires the optional pansql module.")
315+
raise
316+
312317
# n = 1
313318
# new_msdf = MappingSetDataFrame()
314319
# while len(inputs) >= n:
@@ -414,7 +419,14 @@ def partition(inputs: List[str], output_directory: str):
414419
doc = docs.pop()
415420
"""for d2 in docs:
416421
doc.mapping_set.mappings += d2.mapping_set.mappings"""
417-
cliquedocs = split_into_cliques(doc)
422+
try:
423+
cliquedocs = split_into_cliques(doc)
424+
except ModuleNotFoundError as e:
425+
if e.name == "networkx":
426+
raise click.ClickException(
427+
"The partition command requires the optional networkx module."
428+
)
429+
raise
418430
for n, cdoc in enumerate(cliquedocs, start=1):
419431
ofn = f"{output_directory}/clique_{n}.sssom.tsv"
420432
# logging.info(f'Writing to {ofn}. Size={len(cdoc.mapping_set.mappings)}')
@@ -438,7 +450,14 @@ def cliquesummary(input: str, output: TextIO, metadata: str, statsfile: str):
438450
else:
439451
meta_obj = yaml.safe_load(open(metadata))
440452
doc = parse_sssom_table(input, meta=meta_obj)
441-
df = summarize_cliques(doc)
453+
try:
454+
df = summarize_cliques(doc)
455+
except ModuleNotFoundError as e:
456+
if e.name == "networkx":
457+
raise click.ClickException(
458+
"The cliquesummary command requires the optional networkx module."
459+
)
460+
raise
442461
df.to_csv(output, sep="\t")
443462
if statsfile is None:
444463
logging.info(df.describe)
@@ -469,6 +488,11 @@ def crosstab(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str
469488
@input_argument
470489
def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str]):
471490
"""Calculate correlations."""
491+
try:
492+
from scipy.stats import chi2_contingency
493+
except ModuleNotFoundError:
494+
raise click.ClickException("The correlations command requires the optional scipy module.")
495+
472496
msdf = parse_sssom_table(input)
473497
df = remove_unmatched(msdf.df)
474498
if len(df) == 0:
@@ -669,7 +693,12 @@ def filter(input: str, output: TextIO, **kwargs):
669693
:param output: Output location.
670694
:param kwargs: Filter options provided by user which generate queries (e.g.: --subject_id x:%).
671695
"""
672-
filter_file(input=input, output=output, **kwargs)
696+
try:
697+
filter_file(input=input, output=output, **kwargs)
698+
except ModuleNotFoundError as e:
699+
if e.name == "pansql":
700+
raise click.ClickException("The filter command requires the pansql optional module.")
701+
raise
673702

674703

675704
@main.command()

0 commit comments

Comments
 (0)