Skip to content

Commit d220490

Browse files
authored
feat!: Update nomenclature to allow for gene symbols without identifiers (#265)
1 parent f9acd56 commit d220490

File tree

3 files changed

+19
-214
lines changed

3 files changed

+19
-214
lines changed

notebooks/evidence_matching/fusion_evidence_matching.ipynb

Lines changed: 1 addition & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -75,224 +75,27 @@
7575
"Unable to get MANE Transcript data for gene: RN7SKP118\n",
7676
"Could not find a transcript for RN7SKP118 on NC_000016.10\n",
7777
"Gene does not exist in UTA: AC021660.2\n",
78-
"1 validation error for AssayedFusion\n",
79-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
80-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
81-
"Traceback (most recent call last):\n",
82-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
83-
" translated_fusion = await translator_method(\n",
84-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
85-
" fusion, self.coordinate_type, self.assembly\n",
86-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
87-
" )\n",
88-
" ^\n",
89-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
90-
" return self._format_fusion(\n",
91-
" ~~~~~~~~~~~~~~~~~~~^\n",
92-
" AssayedFusion,\n",
93-
" ^^^^^^^^^^^^^^\n",
94-
" ...<9 lines>...\n",
95-
" reads=read_data,\n",
96-
" ^^^^^^^^^^^^^^^^\n",
97-
" )\n",
98-
" ^\n",
99-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
100-
" fusion = fusion_type(**params)\n",
101-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
102-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
103-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
104-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
105-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
10678
"Unable to get MANE Transcript data for gene: EEF1A1P13\n",
10779
"Could not find a transcript for EEF1A1P13 on NC_000005.10\n",
10880
"Gene does not exist in UTA: AC098590.1\n",
10981
"Gene does not exist in UTA: AC099789.1\n",
110-
"1 validation error for AssayedFusion\n",
111-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': Causat...=None, mappings=None))]}, input_type=dict]\n",
112-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
113-
"Traceback (most recent call last):\n",
114-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
115-
" translated_fusion = await translator_method(\n",
116-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
117-
" fusion, self.coordinate_type, self.assembly\n",
118-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
119-
" )\n",
120-
" ^\n",
121-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
122-
" return self._format_fusion(\n",
123-
" ~~~~~~~~~~~~~~~~~~~^\n",
124-
" AssayedFusion,\n",
125-
" ^^^^^^^^^^^^^^\n",
126-
" ...<9 lines>...\n",
127-
" reads=read_data,\n",
128-
" ^^^^^^^^^^^^^^^^\n",
129-
" )\n",
130-
" ^\n",
131-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
132-
" fusion = fusion_type(**params)\n",
133-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
134-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
135-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
136-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': Causat...=None, mappings=None))]}, input_type=dict]\n",
137-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
13882
"Unable to get MANE Transcript data for gene: USP27X-DT\n",
13983
"38584945 on NC_000021.9 occurs more than 150 bp outside the exon boundaries of the NM_182918.4 transcript, indicating this may not be a chimeric transcript junction and is unlikely to represent a contiguous coding sequence. Confirm that the genomic position 38584945 is being used to represent transcript junction and not DNA breakpoint.\n",
14084
"Unable to get MANE Transcript data for gene: LINC00158\n",
14185
"Gene does not exist in UTA: AP001341.1\n",
142-
"1 validation error for AssayedFusion\n",
143-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...=None, mappings=None))]}, input_type=dict]\n",
144-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
145-
"Traceback (most recent call last):\n",
146-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
147-
" translated_fusion = await translator_method(\n",
148-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
149-
" fusion, self.coordinate_type, self.assembly\n",
150-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
151-
" )\n",
152-
" ^\n",
153-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
154-
" return self._format_fusion(\n",
155-
" ~~~~~~~~~~~~~~~~~~~^\n",
156-
" AssayedFusion,\n",
157-
" ^^^^^^^^^^^^^^\n",
158-
" ...<9 lines>...\n",
159-
" reads=read_data,\n",
160-
" ^^^^^^^^^^^^^^^^\n",
161-
" )\n",
162-
" ^\n",
163-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
164-
" fusion = fusion_type(**params)\n",
165-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
166-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
167-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
168-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...=None, mappings=None))]}, input_type=dict]\n",
169-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
17086
"Gene does not exist in UTA: AC021660.2\n",
171-
"1 validation error for AssayedFusion\n",
172-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
173-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
174-
"Traceback (most recent call last):\n",
175-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
176-
" translated_fusion = await translator_method(\n",
177-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
178-
" fusion, self.coordinate_type, self.assembly\n",
179-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
180-
" )\n",
181-
" ^\n",
182-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
183-
" return self._format_fusion(\n",
184-
" ~~~~~~~~~~~~~~~~~~~^\n",
185-
" AssayedFusion,\n",
186-
" ^^^^^^^^^^^^^^\n",
187-
" ...<9 lines>...\n",
188-
" reads=read_data,\n",
189-
" ^^^^^^^^^^^^^^^^\n",
190-
" )\n",
191-
" ^\n",
192-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
193-
" fusion = fusion_type(**params)\n",
194-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
195-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
196-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
197-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
198-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
19987
"Gene does not exist in UTA: AC021660.2\n",
200-
"1 validation error for AssayedFusion\n",
201-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
202-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
203-
"Traceback (most recent call last):\n",
204-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
205-
" translated_fusion = await translator_method(\n",
206-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
207-
" fusion, self.coordinate_type, self.assembly\n",
208-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
209-
" )\n",
210-
" ^\n",
211-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
212-
" return self._format_fusion(\n",
213-
" ~~~~~~~~~~~~~~~~~~~^\n",
214-
" AssayedFusion,\n",
215-
" ^^^^^^^^^^^^^^\n",
216-
" ...<9 lines>...\n",
217-
" reads=read_data,\n",
218-
" ^^^^^^^^^^^^^^^^\n",
219-
" )\n",
220-
" ^\n",
221-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
222-
" fusion = fusion_type(**params)\n",
223-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
224-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
225-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
226-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...e, anchoredReads=None)]}, input_type=dict]\n",
227-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
22888
"35720747 on NC_000001.11 occurs more than 150 bp outside the exon boundaries of the NM_022111.4 transcript, indicating this may not be a chimeric transcript junction and is unlikely to represent a contiguous coding sequence. Confirm that the genomic position 35720747 is being used to represent transcript junction and not DNA breakpoint.\n",
22989
"Gene does not exist in UTA: C1orf216\n",
23090
"Gene does not exist in UTA: RF00100\n",
23191
"Unable to get MANE Transcript data for gene: RN7SKP76\n",
23292
"Could not find a transcript for RN7SKP76 on NC_000016.10\n",
233-
"1 validation error for AssayedFusion\n",
234-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': Causat...None), mappings=None))]}, input_type=dict]\n",
235-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
236-
"Traceback (most recent call last):\n",
237-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
238-
" translated_fusion = await translator_method(\n",
239-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
240-
" fusion, self.coordinate_type, self.assembly\n",
241-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
242-
" )\n",
243-
" ^\n",
244-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
245-
" return self._format_fusion(\n",
246-
" ~~~~~~~~~~~~~~~~~~~^\n",
247-
" AssayedFusion,\n",
248-
" ^^^^^^^^^^^^^^\n",
249-
" ...<9 lines>...\n",
250-
" reads=read_data,\n",
251-
" ^^^^^^^^^^^^^^^^\n",
252-
" )\n",
253-
" ^\n",
254-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
255-
" fusion = fusion_type(**params)\n",
256-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
257-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
258-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
259-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': Causat...None), mappings=None))]}, input_type=dict]\n",
260-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
26193
"Unable to get MANE Transcript data for gene: RN7SK\n",
26294
"Unable to get MANE Transcript data for gene: RN7SKP76\n",
26395
"Could not find a transcript for RN7SKP76 on NC_000016.10\n",
26496
"143950183 on NC_000008.11 occurs more than 150 bp outside the exon boundaries of the NM_201384.3 transcript, indicating this may not be a chimeric transcript junction and is unlikely to represent a contiguous coding sequence. Confirm that the genomic position 143950183 is being used to represent transcript junction and not DNA breakpoint.\n",
26597
"Unable to get MANE Transcript data for gene: XXYLT1-AS2\n",
266-
"Gene does not exist in UTA: AC091132.5\n",
267-
"1 validation error for AssayedFusion\n",
268-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...=None, mappings=None))]}, input_type=dict]\n",
269-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
270-
"Traceback (most recent call last):\n",
271-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/harvester.py\", line 101, in load_records\n",
272-
" translated_fusion = await translator_method(\n",
273-
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
274-
" fusion, self.coordinate_type, self.assembly\n",
275-
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
276-
" )\n",
277-
" ^\n",
278-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 380, in from_star_fusion\n",
279-
" return self._format_fusion(\n",
280-
" ~~~~~~~~~~~~~~~~~~~^\n",
281-
" AssayedFusion,\n",
282-
" ^^^^^^^^^^^^^^\n",
283-
" ...<9 lines>...\n",
284-
" reads=read_data,\n",
285-
" ^^^^^^^^^^^^^^^^\n",
286-
" )\n",
287-
" ^\n",
288-
" File \"/Users/rsjxa001/fusion_project/fusor/src/fusor/translator.py\", line 121, in _format_fusion\n",
289-
" fusion = fusion_type(**params)\n",
290-
" File \"/Users/rsjxa001/fusion_project/fusor/venv/lib/python3.13/site-packages/pydantic/main.py\", line 253, in __init__\n",
291-
" validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)\n",
292-
"pydantic_core._pydantic_core.ValidationError: 1 validation error for AssayedFusion\n",
293-
" Value error, Unrecognized type, should only pass entities with properties [type=value_error, input_value={'causativeEvent': None, ...=None, mappings=None))]}, input_type=dict]\n",
294-
" For further information visit https://errors.pydantic.dev/2.11/v/value_error\n",
295-
"7 fusion(s) were dropped during translation\n"
98+
"Gene does not exist in UTA: AC091132.5\n"
29699
]
297100
}
298101
],

src/fusor/models.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -584,26 +584,30 @@ def _access_object_attr(
584584
raise ValueError(msg)
585585

586586
@classmethod
587-
def _fetch_gene_id(
587+
def _fetch_gene_id_or_name(
588588
cls,
589589
obj: dict | BaseModel,
590590
alt_field: str | None = None,
591591
) -> str | None:
592-
"""Get gene ID if element includes a gene annotation.
592+
"""Get gene ID or name if element includes a gene annotation.
593593
594594
:param obj: element to fetch gene from. Might not contain a gene (e.g. it's a
595595
TemplatedSequenceElement) so we have to use safe checks to fetch.
596596
:param alt_field: the field to fetch the gene from, if it is not called "gene" (ex: associatedGene instead)
597-
:return: gene ID if gene is defined
597+
:return: gene ID or name if gene is defined
598598
"""
599599
gene_info = cls._access_object_attr(obj, alt_field if alt_field else "gene")
600600
if gene_info:
601601
gene_id = cls._access_object_attr(gene_info, "primaryCoding")
602-
if isinstance(gene_id, str):
603-
return gene_id
604-
gene_id = cls._access_object_attr(gene_id, "id")
605602
if gene_id:
606-
return gene_id
603+
if isinstance(gene_id, str):
604+
return gene_id
605+
gene_id = cls._access_object_attr(gene_id, "id")
606+
if gene_id:
607+
return gene_id
608+
gene_name = cls._access_object_attr(gene_info, "name")
609+
if gene_name:
610+
return gene_name
607611
return None
608612

609613
@model_validator(mode="before")
@@ -638,12 +642,14 @@ def enforce_element_quantities(cls, values):
638642
uq_gene_msg = "Fusions must form a chimeric transcript from two or more genes, or a novel interaction between a rearranged regulatory element with the expressed product of a partner gene."
639643
gene_ids = []
640644
if reg_element:
641-
gene_id = cls._fetch_gene_id(obj=reg_element, alt_field="associatedGene")
645+
gene_id = cls._fetch_gene_id_or_name(
646+
obj=reg_element, alt_field="associatedGene"
647+
)
642648
if gene_id:
643649
gene_ids.append(gene_id)
644650

645651
for element in structure:
646-
gene_id = cls._fetch_gene_id(obj=element)
652+
gene_id = cls._fetch_gene_id_or_name(obj=element)
647653
if gene_id:
648654
gene_ids.append(gene_id)
649655

src/fusor/nomenclature.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,8 @@ def gene_nomenclature(element: GeneElement) -> str:
126126
127127
:param element: a gene element object
128128
:return: element nomenclature representation
129-
:raises ValueError: if unable to retrieve gene ID
130129
"""
131-
if element.gene.primaryCoding:
132-
gene_id = element.gene.primaryCoding.id
133-
else:
134-
raise ValueError
130+
gene_id = element.gene.primaryCoding.id if element.gene.primaryCoding else "unknown"
135131
return f"{element.gene.name}({gene_id})"
136132

137133

@@ -164,14 +160,14 @@ def generate_nomenclature(fusion: Fusion, sr: SeqRepo) -> str:
164160
parts.append(element.linkerSequence.sequence.root)
165161
elif isinstance(element, TranscriptSegmentElement):
166162
if not any(
167-
[gene == element.gene.label for gene in element_genes] # noqa: C419
163+
[gene == element.gene.name for gene in element_genes] # noqa: C419
168164
):
169165
parts.append(tx_segment_nomenclature(element))
170166
elif isinstance(element, TemplatedSequenceElement):
171167
parts.append(templated_seq_nomenclature(element, sr))
172168
elif isinstance(element, GeneElement):
173169
if not any(
174-
[gene == element.gene.label for gene in element_genes] # noqa: C419
170+
[gene == element.gene.name for gene in element_genes] # noqa: C419
175171
):
176172
parts.append(gene_nomenclature(element))
177173
else:

0 commit comments

Comments
 (0)