Skip to content

Commit edc2058

Browse files
committed
fix: only check resource existence for index columns
1 parent fc20f40 commit edc2058

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

src/mavedb/lib/validation/dataframe/calibration.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,15 @@ def validate_and_standardize_calibration_classes_dataframe(
7676
for c in column_mapping:
7777
if c in {calibration_variant_column_name, hgvs_nt_column, hgvs_pro_column}:
7878
validate_variant_column(standardized_classes_df[c], column_mapping[c] == index_column)
79-
validate_index_existence_in_score_set(db, score_set, standardized_classes_df[c], index_column)
8079
elif c == calibration_class_column_name:
8180
validate_data_column(standardized_classes_df[c], force_numeric=False)
8281
validate_calibration_classes(calibration, standardized_classes_df[c])
8382

83+
if c == index_column:
84+
validate_index_existence_in_score_set(
85+
db, score_set, standardized_classes_df[column_mapping[c]], column_mapping[c]
86+
)
87+
8488
return standardized_classes_df, index_column
8589

8690

@@ -146,6 +150,9 @@ def validate_index_existence_in_score_set(
146150
Returns:
147151
None: Function returns nothing if validation passes.
148152
"""
153+
print(index_column.tolist())
154+
print(index_column_name)
155+
149156
if index_column_name.lower() == calibration_variant_column_name:
150157
existing_resources = set(
151158
db.scalars(

tests/validation/dataframe/test_calibration.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,16 @@ def mock_dependencies(self):
3636
patch("mavedb.lib.validation.dataframe.calibration.validate_no_null_rows") as mock_validate_no_null,
3737
patch("mavedb.lib.validation.dataframe.calibration.validate_variant_column") as mock_validate_variant,
3838
patch("mavedb.lib.validation.dataframe.calibration.validate_data_column") as mock_validate_data,
39+
patch(
40+
"mavedb.lib.validation.dataframe.calibration.validate_index_existence_in_score_set"
41+
) as mock_validate_index_existence,
3942
):
4043
yield {
4144
"standardize_dataframe": mock_standardize,
4245
"validate_no_null_rows": mock_validate_no_null,
4346
"validate_variant_column": mock_validate_variant,
4447
"validate_data_column": mock_validate_data,
48+
"validate_index_existence_in_score_set": mock_validate_index_existence,
4549
}
4650

4751
def test_validate_and_standardize_calibration_classes_dataframe_success(self, mock_dependencies):
@@ -351,6 +355,52 @@ def test_validate_and_standardize_calibration_classes_dataframe_empty_dataframe(
351355
with pytest.raises(ValidationError, match=f"missing required column: '{calibration_class_column_name}'"):
352356
validate_and_standardize_calibration_classes_dataframe(mock_db, mock_score_set, mock_calibration, input_df)
353357

358+
def test_validate_and_standardize_calibration_classes_dataframe_multiple_candidate_index_columns(
359+
self, mock_dependencies
360+
):
361+
"""Test successful validation when multiple candidate index columns are present."""
362+
mock_db = Mock()
363+
mock_score_set = Mock()
364+
mock_score_set.id = 123
365+
mock_calibration = Mock()
366+
mock_calibration.class_based = True
367+
368+
input_df = pd.DataFrame(
369+
{
370+
calibration_variant_column_name: ["var1", "var2"],
371+
hgvs_nt_column: ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"],
372+
calibration_class_column_name: ["A", "B"],
373+
}
374+
)
375+
standardized_df = pd.DataFrame(
376+
{
377+
calibration_variant_column_name: ["var1", "var2"],
378+
hgvs_nt_column: ["NM_000546.5:c.215C>G", "NM_000546.5:c.743G>A"],
379+
calibration_class_column_name: ["A", "B"],
380+
}
381+
)
382+
383+
mock_dependencies["standardize_dataframe"].return_value = standardized_df
384+
mock_dependencies["validate_index_existence_in_score_set"].return_value = None
385+
386+
mock_scalars = Mock()
387+
mock_scalars.all.return_value = ["var1", "var2"]
388+
mock_db.scalars.return_value = mock_scalars
389+
390+
mock_classification1 = Mock()
391+
mock_classification1.class_ = "A"
392+
mock_classification2 = Mock()
393+
mock_classification2.class_ = "B"
394+
mock_calibration.functional_classifications = [mock_classification1, mock_classification2]
395+
396+
result, index_column = validate_and_standardize_calibration_classes_dataframe(
397+
mock_db, mock_score_set, mock_calibration, input_df
398+
)
399+
400+
assert result.equals(standardized_df)
401+
assert index_column == calibration_variant_column_name
402+
mock_dependencies["validate_index_existence_in_score_set"].assert_called_once()
403+
354404

355405
class TestValidateCalibrationDfColumnNames:
356406
"""Test suite for validate_calibration_df_column_names function."""

0 commit comments

Comments
 (0)