Skip to content

Commit 00ad422

Browse files
committed
WIP: Move DatasetColumns pydantic models to new module
1 parent 7686c17 commit 00ad422

File tree

5 files changed

+70
-46
lines changed

5 files changed

+70
-46
lines changed

src/mavedb/lib/validation/dataframe/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Any, Optional, Tuple, TYPE_CHECKING
22

3-
from mavedb.view_models.score_set import DatasetColumnMetadata
3+
from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata
44
import numpy as np
55
import pandas as pd
66

src/mavedb/view_models/score_set.py

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from copy import deepcopy
99

1010
from humps import camelize
11+
from mavedb.view_models.score_set_dataset_columns import DatasetColumnsCreate, SavedDatasetColumns, DatasetColumns
1112
from pydantic import field_validator, model_validator, create_model
1213
from pydantic.fields import FieldInfo
1314
from fastapi import Form
@@ -119,6 +120,7 @@ class ScoreSetModifyBase(ScoreSetBase):
119120
doi_identifiers: Optional[list[DoiIdentifierCreate]] = None
120121
target_genes: list[TargetGeneCreate]
121122
score_ranges: Optional[ScoreSetRangesCreate] = None
123+
# dataset_columns: Optional[DatasetColumnsCreate] = {}
122124

123125
class ScoreSetModify(ScoreSetModifyBase):
124126
"""View model that adds custom validators to ScoreSetModifyBase."""
@@ -366,43 +368,6 @@ def as_form(
366368
license_id=license_id,
367369
)
368370

369-
class DatasetColumnMetadata(BaseModel):
370-
"""Metadata for individual dataset columns."""
371-
372-
description: str
373-
details: Optional[str] = None
374-
375-
class DatasetColumns(BaseModel):
376-
"""Dataset columns view model representing the dataset columns property of a score set."""
377-
378-
score_columns: Optional[list[str]] = None
379-
count_columns: Optional[list[str]] = None
380-
score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None
381-
count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None
382-
383-
@field_validator("score_columns_metadata", "count_columns_metadata")
384-
def validate_dataset_columns_metadata(cls, v: Optional[dict[str, DatasetColumnMetadata]]) -> Optional[dict[str, DatasetColumnMetadata]]:
385-
if not v:
386-
return None
387-
DatasetColumnMetadata.model_validate(v)
388-
return v
389-
390-
@model_validator(mode="after")
391-
def validate_dataset_columns_metadata_keys(self) -> Self:
392-
if self.score_columns_metadata is not None and self.score_columns is None:
393-
raise ValidationError("Score columns metadata cannot be provided without score columns.")
394-
elif self.score_columns_metadata is not None and self.score_columns is not None:
395-
for key in self.score_columns_metadata.keys():
396-
if key not in self.score_columns:
397-
raise ValidationError(f"Score column metadata key '{key}' does not exist in score_columns list.")
398-
399-
if self.count_columns_metadata is not None and self.count_columns is None:
400-
raise ValidationError("Count columns metadata cannot be provided without count columns.")
401-
elif self.count_columns_metadata is not None and self.count_columns is not None:
402-
for key in self.count_columns_metadata.keys():
403-
if key not in self.count_columns:
404-
raise ValidationError(f"Count column metadata key '{key}' does not exist in count_columns list.")
405-
return self
406371

407372
class ShortScoreSet(BaseModel):
408373
"""
@@ -486,7 +451,7 @@ class SavedScoreSet(ScoreSetBase):
486451
created_by: Optional[SavedUser] = None
487452
modified_by: Optional[SavedUser] = None
488453
target_genes: Sequence[SavedTargetGene]
489-
dataset_columns: DatasetColumns
454+
dataset_columns: Optional[SavedDatasetColumns] = None
490455
external_links: dict[str, ExternalLink]
491456
contributors: Sequence[Contributor]
492457
score_ranges: Optional[SavedScoreSetRanges] = None
@@ -504,9 +469,9 @@ def publication_identifiers_validator(cls, value: Any) -> list[PublicationIdenti
504469
assert isinstance(value, Collection), "Publication identifier lists must be a collection"
505470
return list(value) # Re-cast into proper list-like type
506471

507-
@field_validator("dataset_columns")
508-
def camelize_dataset_columns_keys(cls, value: dict) -> dict:
509-
return camelize(value)
472+
# @field_validator("dataset_columns")
473+
# def camelize_dataset_columns_keys(cls, value: dict) -> dict:
474+
# return camelize(value)
510475

511476
# These 'synthetic' fields are generated from other model properties. Transform data from other properties as needed, setting
512477
# the appropriate field on the model itself. Then, proceed with Pydantic ingestion once fields are created.
@@ -570,6 +535,7 @@ class ScoreSet(SavedScoreSet):
570535
mapping_state: Optional[MappingState] = None
571536
mapping_errors: Optional[dict] = None
572537
score_ranges: Optional[ScoreSetRanges] = None # type: ignore[assignment]
538+
dataset_columns: Optional[DatasetColumns] = None # type: ignore[assignment]
573539

574540

575541
class ScoreSetWithVariants(ScoreSet):
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
2+
from mavedb.view_models import record_type_validator, set_record_type
3+
from mavedb.view_models.base.base import BaseModel
4+
from typing import Optional
5+
from typing_extensions import Self
6+
7+
from pydantic import field_validator, model_validator
8+
from mavedb.lib.validation.exceptions import ValidationError
9+
10+
class DatasetColumnMetadata(BaseModel):
11+
"""Metadata for individual dataset columns."""
12+
13+
description: str
14+
details: Optional[str] = None
15+
16+
class DatasetColumnsBase(BaseModel):
17+
"""Dataset columns view model representing the dataset columns property of a score set."""
18+
19+
score_columns: Optional[list[str]] = None
20+
count_columns: Optional[list[str]] = None
21+
score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None
22+
count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None
23+
24+
@field_validator("score_columns_metadata", "count_columns_metadata")
25+
def validate_dataset_columns_metadata(cls, v: Optional[dict[str, DatasetColumnMetadata]]) -> Optional[dict[str, DatasetColumnMetadata]]:
26+
if not v:
27+
return None
28+
DatasetColumnMetadata.model_validate(v)
29+
return v
30+
31+
@model_validator(mode="after")
32+
def validate_dataset_columns_metadata_keys(self) -> Self:
33+
if self.score_columns_metadata is not None and self.score_columns is None:
34+
raise ValidationError("Score columns metadata cannot be provided without score columns.")
35+
elif self.score_columns_metadata is not None and self.score_columns is not None:
36+
for key in self.score_columns_metadata.keys():
37+
if key not in self.score_columns:
38+
raise ValidationError(f"Score column metadata key '{key}' does not exist in score_columns list.")
39+
40+
if self.count_columns_metadata is not None and self.count_columns is None:
41+
raise ValidationError("Count columns metadata cannot be provided without count columns.")
42+
elif self.count_columns_metadata is not None and self.count_columns is not None:
43+
for key in self.count_columns_metadata.keys():
44+
if key not in self.count_columns:
45+
raise ValidationError(f"Count column metadata key '{key}' does not exist in count_columns list.")
46+
return self
47+
48+
class SavedDatasetColumns(DatasetColumnsBase):
49+
pass
50+
51+
class DatasetColumns(SavedDatasetColumns):
52+
pass
53+
54+
class DatasetColumnsCreate(DatasetColumnsBase):
55+
pass
56+
57+
class DatasetColumnsModify(DatasetColumnsBase):
58+
pass

src/mavedb/worker/jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from arq import ArqRedis
1010
from arq.jobs import Job, JobStatus
1111
from cdot.hgvs.dataproviders import RESTDataProvider
12-
from mavedb.view_models.score_set import DatasetColumnMetadata
12+
from mavedb.view_models.score_set_dataset_columns import DatasetColumnMetadata
1313
from sqlalchemy import cast, delete, null, select
1414
from sqlalchemy.dialects.postgresql import JSONB
1515
from sqlalchemy.orm import Session

tests/helpers/util/variant.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from arq import ArqRedis
55
from cdot.hgvs.dataproviders import RESTDataProvider
66
from fastapi.testclient import TestClient
7-
from mavedb.view_models.score_set import DatasetColumns
7+
from mavedb.view_models.score_set_dataset_columns import DatasetColumnsCreate
88
from sqlalchemy.orm import Session
99
from sqlalchemy import select
1010
from unittest.mock import patch
@@ -101,12 +101,12 @@ def mock_worker_variant_insertion(
101101
assert num_variants == 3
102102

103103
item.processing_state = ProcessingState.success
104-
item.dataset_columns = DatasetColumns(
104+
item.dataset_columns = DatasetColumnsCreate(
105105
score_columns=columns_for_dataset(scores),
106106
count_columns=columns_for_dataset(counts),
107107
score_columns_metadata=scores_column_metadata if scores_column_metadata is not None else {},
108108
count_columns_metadata=counts_column_metadata if counts_column_metadata is not None else {},
109-
)
109+
).model_dump()
110110

111111
db.add(item)
112112
db.commit()

0 commit comments

Comments
 (0)