Cleanup

davereinhart · davereinhart · commit 8391f6746066 · 2025-10-27T12:49:14.000-07:00
diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py
@@ -29,6 +29,15 @@
 STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column, guide_sequence_column)
 
 
+def clean_col_name(col: str) -> str:
+    col = col.strip()
+    # Only remove quotes if the column name is fully quoted
+    if (col.startswith('"') and col.endswith('"')) or (col.startswith("'") and col.endswith("'")):
+        col = col[1:-1]
+
+    return col.strip()
+
+
 def validate_and_standardize_dataframe_pair(
     scores_df: pd.DataFrame,
     counts_df: Optional[pd.DataFrame],
@@ -212,15 +221,7 @@ def standardize_dict_keys(d: dict[str, Any]) -> dict[str, Any]:
         The standardized dictionary
     """
 
-    def clean_key(key: str) -> str:
-        key = key.strip()
-        # Only remove quotes if the key is fully quoted
-        if (key.startswith('"') and key.endswith('"')) or (key.startswith("'") and key.endswith("'")):
-            key = key[1:-1]
-
-        return key.strip()
-
-    return {clean_key(k): v for k, v in d.items()}
+    return {clean_col_name(k): v for k, v in d.items()}
 
 
 def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
@@ -246,15 +247,7 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
         The standardized dataframe
     """
 
-    def clean_column(col: str) -> str:
-        col = col.strip()
-        # Only remove quotes if the column name is fully quoted
-        if (col.startswith('"') and col.endswith('"')) or (col.startswith("'") and col.endswith("'")):
-            col = col[1:-1]
-
-        return col.strip()
-
-    cleaned_columns = {c: clean_column(c) for c in df.columns}
+    cleaned_columns = {c: clean_col_name(c) for c in df.columns}
     df.rename(columns=cleaned_columns, inplace=True)
 
     column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS}
diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py
@@ -2,7 +2,6 @@
 from __future__ import annotations
 
 import json
-import logging
 from copy import deepcopy
 from datetime import date
 from typing import Any, Callable, Collection, Optional, Sequence, Type, TypeVar, Union
@@ -44,8 +43,6 @@
 )
 from mavedb.view_models.user import SavedUser, User
 
-logger = logging.getLogger(__name__)
-
 UnboundedRange = tuple[Union[float, None], Union[float, None]]
 
 Model = TypeVar("Model", bound=BaseModel)
@@ -110,7 +107,6 @@ class ScoreSetModifyBase(ScoreSetBase):
     doi_identifiers: Optional[list[DoiIdentifierCreate]] = None
     target_genes: list[TargetGeneCreate]
     score_ranges: Optional[ScoreSetRangesCreate] = None
-    # dataset_columns: Optional[DatasetColumnsCreate] = {}
 
 
 class ScoreSetModify(ScoreSetModifyBase):
@@ -459,10 +455,6 @@ def publication_identifiers_validator(cls, value: Any) -> list[PublicationIdenti
         assert isinstance(value, Collection), "Publication identifier lists must be a collection"
         return list(value)  # Re-cast into proper list-like type
 
-    # @field_validator("dataset_columns")
-    # def camelize_dataset_columns_keys(cls, value: dict) -> dict:
-    #     return camelize(value)
-
     # These 'synthetic' fields are generated from other model properties. Transform data from other properties as needed, setting
     # the appropriate field on the model itself. Then, proceed with Pydantic ingestion once fields are created.
     @model_validator(mode="before")
diff --git a/src/mavedb/view_models/target_gene.py b/src/mavedb/view_models/target_gene.py
@@ -1,7 +1,7 @@
 from datetime import date
 from typing import Any, Optional, Sequence
 
-from pydantic import ConfigDict, Field, model_validator
+from pydantic import Field, model_validator
 from typing_extensions import Self
 
 from mavedb.lib.validation.exceptions import ValidationError
@@ -25,8 +25,6 @@ class TargetGeneBase(BaseModel):
     category: TargetCategory
     external_identifiers: Sequence[external_gene_identifier_offset.ExternalGeneIdentifierOffsetBase]
 
-    model_config = ConfigDict(from_attributes=True)
-
 
 class TargetGeneModify(TargetGeneBase):
     pass
diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py
@@ -407,67 +407,6 @@ def test_score_set_update_all_optional(attribute, updated_data):
     ScoreSetUpdateAllOptional(**{attribute: updated_data})
 
 
-# def test_saved_score_set_data_set_columns_are_camelized():
-#     score_set = TEST_MINIMAL_SEQ_SCORESET_RESPONSE.copy()
-#     score_set["urn"] = "urn:score-set-xxx"
-
-#     # Remove pre-set synthetic properties
-#     score_set.pop("metaAnalyzesScoreSetUrns")
-#     score_set.pop("metaAnalyzedByScoreSetUrns")
-#     score_set.pop("primaryPublicationIdentifiers")
-#     score_set.pop("secondaryPublicationIdentifiers")
-#     score_set.pop("datasetColumns")
-
-#     # Convert fields expecting an object to attributed objects
-#     external_identifiers = {"refseq_offset": None, "ensembl_offset": None, "uniprot_offset": None}
-#     target_genes = [
-#         dummy_attributed_object_from_dict({**target, **external_identifiers}) for target in score_set["targetGenes"]
-#     ]
-#     score_set["targetGenes"] = [SavedTargetGene.model_validate(target) for target in target_genes]
-
-#     # Set synthetic properties with dummy attributed objects to mock SQLAlchemy model objects.
-#     score_set["meta_analyzes_score_sets"] = [
-#         dummy_attributed_object_from_dict({"urn": "urn:meta-analyzes-xxx", "superseding_score_set": None})
-#     ]
-#     score_set["meta_analyzed_by_score_sets"] = [
-#         dummy_attributed_object_from_dict({"urn": "urn:meta-analyzed-xxx", "superseding_score_set": None})
-#     ]
-#     score_set["publication_identifier_associations"] = [
-#         dummy_attributed_object_from_dict(
-#             {
-#                 "publication": PublicationIdentifier(**SAVED_PUBMED_PUBLICATION),
-#                 "primary": True,
-#             }
-#         ),
-#         dummy_attributed_object_from_dict(
-#             {
-#                 "publication": PublicationIdentifier(
-#                     **{**SAVED_PUBMED_PUBLICATION, **{"identifier": TEST_BIORXIV_IDENTIFIER}}
-#                 ),
-#                 "primary": False,
-#             }
-#         ),
-#         dummy_attributed_object_from_dict(
-#             {
-#                 "publication": PublicationIdentifier(
-#                     **{**SAVED_PUBMED_PUBLICATION, **{"identifier": TEST_BIORXIV_IDENTIFIER}}
-#                 ),
-#                 "primary": False,
-#             }
-#         ),
-#     ]
-
-#     # The camelized dataset columns we are testing
-#     score_set["dataset_columns"] = {"camelize_me": "test", "noNeed": "test"}
-
-#     score_set_attributed_object = dummy_attributed_object_from_dict(score_set)
-#     saved_score_set = SavedScoreSet.model_validate(score_set_attributed_object)
-
-#     assert sorted(list(saved_score_set.dataset_columns.keys())) == sorted(
-#         [camelize(k) for k in score_set["dataset_columns"].keys()]
-#     )
-
-
 @pytest.mark.parametrize(
     "exclude",
     ["publication_identifier_associations", "meta_analyzes_score_sets", "meta_analyzed_by_score_sets"],