Skip to content

Commit 6acc240

Browse files
authored
Issue 260 classification content (#261)
* Add content to vcv and rcv classification. Fix bug where 'classifications' were being included in the disassembled rcv_accession as well as in their own table * Add content column to vcv/rcv classification bq schemas
1 parent 6d32923 commit 6acc240

File tree

4 files changed

+21
-5
lines changed

4 files changed

+21
-5
lines changed

clinvar_ingest/cloud/bigquery/bq_json_schemas/rcv_accession_classification.bq.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,9 @@
3434
{
3535
"name": "clinical_impact_clinical_significance",
3636
"type": "STRING"
37+
},
38+
{
39+
"name": "content",
40+
"type": "STRING"
3741
}
3842
]

clinvar_ingest/cloud/bigquery/bq_json_schemas/variation_archive_classification.bq.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,9 @@
4242
{
4343
"name": "clinical_impact_clinical_significance",
4444
"type": "STRING"
45+
},
46+
{
47+
"name": "content",
48+
"type": "STRING"
4549
}
4650
]

clinvar_ingest/cloud/bigquery/processing_history.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def write_started( # noqa: PLR0913
252252
f"release_tag={release_tag}, bucket_dir={bucket_dir}"
253253
)
254254
_logger.warning(
255-
f"Expected 0 rows to exist for the started event, but found {row.c}."
255+
f"Expected 0 rows to exist for the started event, but found {row.c}. "
256256
f"file_type={file_type}, release_date={release_date}, "
257257
f"release_tag={release_tag}, bucket_dir={bucket_dir}"
258258
)

clinvar_ingest/model/variation_archive.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -787,9 +787,11 @@ class RcvAccessionClassification(Model):
787787
clinical_impact_assertion_type: str
788788
clinical_impact_clinical_significance: str
789789

790+
content: dict
791+
790792
@staticmethod
791793
def jsonifiable_fields() -> list[str]:
792-
return []
794+
return ["content"]
793795

794796
def __post_init__(self):
795797
self.entity_type = "rcv_accession_classification"
@@ -802,7 +804,9 @@ def from_xml_single(inp: dict, statement_type: StatementType, rcv_id: str):
802804
or OncogenicityClassification entry. The statement_type is the key
803805
from the original `Classifications` XML/dict, indicating the type.
804806
"""
805-
raw_description = extract(inp, "Description")
807+
# TODO is there a chance they add fields to Description? Maybe don't extract.
808+
# raw_description = extract(inp, "Description")
809+
raw_description = get(inp, "Description") or {}
806810
return RcvAccessionClassification(
807811
rcv_id=rcv_id,
808812
statement_type=statement_type,
@@ -819,6 +823,7 @@ def from_xml_single(inp: dict, statement_type: StatementType, rcv_id: str):
819823
raw_description,
820824
"@ClinicalImpactClinicalSignificance",
821825
),
826+
content=inp,
822827
)
823828

824829
@staticmethod
@@ -945,7 +950,7 @@ def disassemble(self):
945950
yield from c.disassemble()
946951
del self_copy.classifications
947952

948-
yield self
953+
yield self_copy
949954

950955

951956
@dataclasses.dataclass
@@ -965,9 +970,11 @@ class VariationArchiveClassification(Model):
965970
clinical_impact_assertion_type: str
966971
clinical_impact_clinical_significance: str
967972

973+
content: dict
974+
968975
@staticmethod
969976
def jsonifiable_fields() -> list[str]:
970-
return []
977+
return ["content"]
971978

972979
def __post_init__(self):
973980
self.entity_type = "variation_archive_classification"
@@ -998,6 +1005,7 @@ def from_xml_single(inp: dict, statement_type: StatementType, vcv_id: str):
9981005
interp_description,
9991006
"@ClinicalImpactClinicalSignificance",
10001007
),
1008+
content=inp,
10011009
)
10021010

10031011
@staticmethod

0 commit comments

Comments
 (0)