Skip to content

Commit 8391f67

Browse files
committed
Cleanup
1 parent b57c11f commit 8391f67

File tree

4 files changed

+12
-90
lines changed

4 files changed

+12
-90
lines changed

src/mavedb/lib/validation/dataframe/dataframe.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@
2929
STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column, guide_sequence_column)
3030

3131

32+
def clean_col_name(col: str) -> str:
33+
col = col.strip()
34+
# Only remove quotes if the column name is fully quoted
35+
if (col.startswith('"') and col.endswith('"')) or (col.startswith("'") and col.endswith("'")):
36+
col = col[1:-1]
37+
38+
return col.strip()
39+
40+
3241
def validate_and_standardize_dataframe_pair(
3342
scores_df: pd.DataFrame,
3443
counts_df: Optional[pd.DataFrame],
@@ -212,15 +221,7 @@ def standardize_dict_keys(d: dict[str, Any]) -> dict[str, Any]:
212221
The standardized dictionary
213222
"""
214223

215-
def clean_key(key: str) -> str:
216-
key = key.strip()
217-
# Only remove quotes if the key is fully quoted
218-
if (key.startswith('"') and key.endswith('"')) or (key.startswith("'") and key.endswith("'")):
219-
key = key[1:-1]
220-
221-
return key.strip()
222-
223-
return {clean_key(k): v for k, v in d.items()}
224+
return {clean_col_name(k): v for k, v in d.items()}
224225

225226

226227
def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
@@ -246,15 +247,7 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
246247
The standardized dataframe
247248
"""
248249

249-
def clean_column(col: str) -> str:
250-
col = col.strip()
251-
# Only remove quotes if the column name is fully quoted
252-
if (col.startswith('"') and col.endswith('"')) or (col.startswith("'") and col.endswith("'")):
253-
col = col[1:-1]
254-
255-
return col.strip()
256-
257-
cleaned_columns = {c: clean_column(c) for c in df.columns}
250+
cleaned_columns = {c: clean_col_name(c) for c in df.columns}
258251
df.rename(columns=cleaned_columns, inplace=True)
259252

260253
column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS}

src/mavedb/view_models/score_set.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from __future__ import annotations
33

44
import json
5-
import logging
65
from copy import deepcopy
76
from datetime import date
87
from typing import Any, Callable, Collection, Optional, Sequence, Type, TypeVar, Union
@@ -44,8 +43,6 @@
4443
)
4544
from mavedb.view_models.user import SavedUser, User
4645

47-
logger = logging.getLogger(__name__)
48-
4946
UnboundedRange = tuple[Union[float, None], Union[float, None]]
5047

5148
Model = TypeVar("Model", bound=BaseModel)
@@ -110,7 +107,6 @@ class ScoreSetModifyBase(ScoreSetBase):
110107
doi_identifiers: Optional[list[DoiIdentifierCreate]] = None
111108
target_genes: list[TargetGeneCreate]
112109
score_ranges: Optional[ScoreSetRangesCreate] = None
113-
# dataset_columns: Optional[DatasetColumnsCreate] = {}
114110

115111

116112
class ScoreSetModify(ScoreSetModifyBase):
@@ -459,10 +455,6 @@ def publication_identifiers_validator(cls, value: Any) -> list[PublicationIdenti
459455
assert isinstance(value, Collection), "Publication identifier lists must be a collection"
460456
return list(value) # Re-cast into proper list-like type
461457

462-
# @field_validator("dataset_columns")
463-
# def camelize_dataset_columns_keys(cls, value: dict) -> dict:
464-
# return camelize(value)
465-
466458
# These 'synthetic' fields are generated from other model properties. Transform data from other properties as needed, setting
467459
# the appropriate field on the model itself. Then, proceed with Pydantic ingestion once fields are created.
468460
@model_validator(mode="before")

src/mavedb/view_models/target_gene.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import date
22
from typing import Any, Optional, Sequence
33

4-
from pydantic import ConfigDict, Field, model_validator
4+
from pydantic import Field, model_validator
55
from typing_extensions import Self
66

77
from mavedb.lib.validation.exceptions import ValidationError
@@ -25,8 +25,6 @@ class TargetGeneBase(BaseModel):
2525
category: TargetCategory
2626
external_identifiers: Sequence[external_gene_identifier_offset.ExternalGeneIdentifierOffsetBase]
2727

28-
model_config = ConfigDict(from_attributes=True)
29-
3028

3129
class TargetGeneModify(TargetGeneBase):
3230
pass

tests/view_models/test_score_set.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -407,67 +407,6 @@ def test_score_set_update_all_optional(attribute, updated_data):
407407
ScoreSetUpdateAllOptional(**{attribute: updated_data})
408408

409409

410-
# def test_saved_score_set_data_set_columns_are_camelized():
411-
# score_set = TEST_MINIMAL_SEQ_SCORESET_RESPONSE.copy()
412-
# score_set["urn"] = "urn:score-set-xxx"
413-
414-
# # Remove pre-set synthetic properties
415-
# score_set.pop("metaAnalyzesScoreSetUrns")
416-
# score_set.pop("metaAnalyzedByScoreSetUrns")
417-
# score_set.pop("primaryPublicationIdentifiers")
418-
# score_set.pop("secondaryPublicationIdentifiers")
419-
# score_set.pop("datasetColumns")
420-
421-
# # Convert fields expecting an object to attributed objects
422-
# external_identifiers = {"refseq_offset": None, "ensembl_offset": None, "uniprot_offset": None}
423-
# target_genes = [
424-
# dummy_attributed_object_from_dict({**target, **external_identifiers}) for target in score_set["targetGenes"]
425-
# ]
426-
# score_set["targetGenes"] = [SavedTargetGene.model_validate(target) for target in target_genes]
427-
428-
# # Set synthetic properties with dummy attributed objects to mock SQLAlchemy model objects.
429-
# score_set["meta_analyzes_score_sets"] = [
430-
# dummy_attributed_object_from_dict({"urn": "urn:meta-analyzes-xxx", "superseding_score_set": None})
431-
# ]
432-
# score_set["meta_analyzed_by_score_sets"] = [
433-
# dummy_attributed_object_from_dict({"urn": "urn:meta-analyzed-xxx", "superseding_score_set": None})
434-
# ]
435-
# score_set["publication_identifier_associations"] = [
436-
# dummy_attributed_object_from_dict(
437-
# {
438-
# "publication": PublicationIdentifier(**SAVED_PUBMED_PUBLICATION),
439-
# "primary": True,
440-
# }
441-
# ),
442-
# dummy_attributed_object_from_dict(
443-
# {
444-
# "publication": PublicationIdentifier(
445-
# **{**SAVED_PUBMED_PUBLICATION, **{"identifier": TEST_BIORXIV_IDENTIFIER}}
446-
# ),
447-
# "primary": False,
448-
# }
449-
# ),
450-
# dummy_attributed_object_from_dict(
451-
# {
452-
# "publication": PublicationIdentifier(
453-
# **{**SAVED_PUBMED_PUBLICATION, **{"identifier": TEST_BIORXIV_IDENTIFIER}}
454-
# ),
455-
# "primary": False,
456-
# }
457-
# ),
458-
# ]
459-
460-
# # The camelized dataset columns we are testing
461-
# score_set["dataset_columns"] = {"camelize_me": "test", "noNeed": "test"}
462-
463-
# score_set_attributed_object = dummy_attributed_object_from_dict(score_set)
464-
# saved_score_set = SavedScoreSet.model_validate(score_set_attributed_object)
465-
466-
# assert sorted(list(saved_score_set.dataset_columns.keys())) == sorted(
467-
# [camelize(k) for k in score_set["dataset_columns"].keys()]
468-
# )
469-
470-
471410
@pytest.mark.parametrize(
472411
"exclude",
473412
["publication_identifier_associations", "meta_analyzes_score_sets", "meta_analyzed_by_score_sets"],

0 commit comments

Comments
 (0)