Skip to content

Commit 589da9a

Browse files
authored
Make stricter type checking (#634)
1 parent 427f6e8 commit 589da9a

File tree

8 files changed

+30
-23
lines changed

8 files changed

+30
-23
lines changed

src/sssom/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,8 @@ def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple[str,
533533
for j, v in row.items():
534534
logging.info(f"{i} x {j} = {v}")
535535
rows.append((v, i, j))
536-
for row in sorted(rows, key=itemgetter(0)):
537-
print(*row, sep="\t")
536+
for rrow in sorted(rows, key=itemgetter(0)):
537+
print(*rrow, sep="\t")
538538

539539

540540
@main.command()

src/sssom/cliques.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@
3030
import networkx
3131

3232

33-
def to_digraph(msdf: MappingSetDataFrame) -> "networkx.DiGraph":
33+
def to_digraph(msdf: MappingSetDataFrame) -> "networkx.DiGraph[str]":
3434
"""Convert to a graph where the nodes are entities' CURIEs and edges are their mappings."""
3535
import networkx as nx
3636

3737
doc = to_mapping_set_document(msdf)
38-
g = nx.DiGraph()
38+
g: "networkx.DiGraph[str]" = nx.DiGraph()
3939
if doc.mapping_set.mappings is not None:
4040
for mapping in doc.mapping_set.mappings:
4141
if not isinstance(mapping, Mapping):

src/sssom/parsers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,9 @@ def _get_mapping_dict(
538538
# only if the value exists, is not NaN, and the key is in the schema's mapping slots.
539539
# The value could be a string or a list and is handled accordingly via _address_multivalued_slot().
540540

541-
mdict = {
541+
mdict: dict[str, str | list[str]] = {
542542
k: _address_multivalued_slot(k, v)
543-
for k, v in row.items()
543+
for k, v in row.to_dict().items()
544544
if v and pd.notna(v) and k in mapping_slots
545545
}
546546

@@ -1002,7 +1002,7 @@ def _get_mapping_set_from_df(df: pd.DataFrame, meta: Optional[MetadataType] = No
10021002

10031003
mapping_slots = set(_get_sssom_schema_object().mapping_slots)
10041004

1005-
df.apply(
1005+
df.apply( # type:ignore
10061006
lambda row: _add_valid_mapping_to_list(
10071007
_get_mapping_dict(row, bad_attrs, mapping_slots), mapping_set.mappings
10081008
),

src/sssom/util.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ def infer_cardinality(self, scope: Optional[List[str]] = None) -> None:
426426
# Helper function to transform a row into a string that represents
427427
# a subject (or object) in a given scope; `side` is either `subject`
428428
# or `object`.
429-
def _to_string(row: dict[str, Any], side: str) -> str:
429+
def _to_string(row: pd.Series, side: str) -> str:
430430
# We prepend a one-letter code (`L` or `E`) to the actual subject
431431
# or object so that literal and non-literal mapping records are
432432
# always distinguishable and can be counted separately.
@@ -718,7 +718,6 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate: bool = False) -> p
718718
key = [SUBJECT_ID, OBJECT_ID]
719719
else:
720720
key = [SUBJECT_ID, OBJECT_ID, PREDICATE_ID]
721-
dfmax: pd.DataFrame
722721
if not df.empty:
723722
dfmax = df.groupby(key, as_index=False)[CONFIDENCE].apply(max).drop_duplicates()
724723
max_conf: Dict[Tuple[str, ...], float] = {}
@@ -1197,7 +1196,9 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
11971196

11981197
# GroupBy and SELECT ONLY maximum confidence
11991198
max_confidence_df: pd.DataFrame
1200-
max_confidence_df = combined_normalized_subset.groupby(TRIPLES_IDS, as_index=False)[
1199+
max_confidence_df = combined_normalized_subset.groupby(
1200+
TRIPLES_IDS, as_index=False
1201+
)[ # type:ignore
12011202
CONFIDENCE
12021203
].max()
12031204

@@ -1267,14 +1268,14 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
12671268
# This needs to happen because the columns in df
12681269
# not in reconciled_df_subset will be NaN otherwise
12691270
# which is incorrect.
1270-
reconciled_df = df.merge(
1271+
reconciled_df: pd.DataFrame = df.merge(
12711272
reconciled_df_subset, how="right", on=list(reconciled_df_subset.columns)
12721273
).fillna(df)
12731274

12741275
if nan_df.empty:
12751276
return_df = reconciled_df
12761277
else:
1277-
return_df = reconciled_df.append(nan_df).drop_duplicates()
1278+
return_df = reconciled_df.append(nan_df).drop_duplicates() # type:ignore
12781279

12791280
if not confidence_in_original:
12801281
return_df = return_df.drop(columns=[CONFIDENCE], axis=1)

src/sssom/validators.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def check_strict_curie_format(
208208

209209
for column in entity_reference_slots:
210210
if column in msdf.df.columns:
211-
for index, value in msdf.df[column].items():
211+
for index, value in msdf.df[column].to_dict().items():
212212
if pd.notna(value) and "|" in str(value):
213213
message = f"{value} contains a pipe ('|') character (row {index + 1}, column '{column}')."
214214
validation_results.append(

tests/test_parsers.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -326,19 +326,20 @@ def test_read_sssom_table(self) -> None:
326326
"mapping_justification",
327327
]
328328
for idx, row in msdf.df.iterrows():
329-
for k, v in row.items():
329+
for k, v in row.to_dict().items():
330+
xxx = imported_df.iloc[idx][k] # type:ignore
330331
if v == np.nan:
331-
self.assertTrue(math.isnan(imported_df.iloc[idx][k]))
332+
self.assertTrue(math.isnan(xxx))
332333
else:
333334
if k not in list_cols:
334335
if v is np.nan:
335-
self.assertTrue(imported_df.iloc[idx][k] is v)
336+
self.assertTrue(xxx is v)
336337
else:
337-
self.assertEqual(imported_df.iloc[idx][k], v)
338+
self.assertEqual(xxx, v)
338339
elif k == "mapping_justification":
339-
self.assertEqual(imported_df.iloc[idx][k], v)
340+
self.assertEqual(xxx, v)
340341
else:
341-
self.assertEqual(imported_df.iloc[idx][k], v)
342+
self.assertEqual(xxx, v)
342343

343344
def test_parse_obographs_merged(self) -> None:
344345
"""Test parsing OBO Graph JSON using custom prefix_map."""

tests/test_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,9 @@ def test_invert_asymmetric_nodes(self) -> None:
208208
msdf = parse_sssom_table(f"{data_dir}/asymmetric.tsv")
209209
inverted_df = invert_mappings(msdf.df, merge_inverted=False)
210210
self.assertEqual(len(inverted_df), len(msdf.df))
211-
original_subject_labels = msdf.df["subject_label"].values
212-
inverted_object_labels = inverted_df["object_label"].values
213-
self.assertNotIn(False, original_subject_labels == inverted_object_labels)
211+
original_subject_labels = msdf.df["subject_label"]
212+
inverted_object_labels = inverted_df["object_label"]
213+
self.assertTrue((original_subject_labels == inverted_object_labels).all())
214214

215215

216216
class TestUtils(unittest.TestCase):

tox.ini

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,14 @@ deps =
104104
types-requests
105105
click-types
106106
linkml-runtime
107+
pandas-stubs
108+
scipy-stubs
109+
types-jsonschema
110+
types-networkx
107111
extras =
108112
rdflib-endpoint
109-
commands = mypy --install-types --non-interactive --ignore-missing-imports --strict src/sssom tests/
113+
commands =
114+
mypy --ignore-missing-imports --strict src/ tests/
110115
description = Run the mypy tool to check static typing on the project.
111116

112117
[testenv:docstr-coverage]

0 commit comments

Comments
 (0)