Skip to content

Commit 14d8ae2

Browse files
authored
feat: add to_grc output (#70)
1 parent 783d16e commit 14d8ae2

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

src/agct/seqref_registry.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,28 @@
1515
class Assembly(StrEnum):
1616
"""Constrain reference genome assembly values.
1717
18+
Uses UCSC-style names because we're fetching UCSC chainfiles.
19+
1820
We could conceivably support every UCSC chainfile offering, but for now, we'll
1921
stick with internal use cases only.
2022
"""
2123

2224
HG38 = "hg38"
2325
HG19 = "hg19"
2426

27+
@property
28+
def as_grc(self) -> str:
29+
"""Return official Genome Reference Consortium assembly names
30+
31+
:return: `"GRCh38"` or `"GRCh37"`
32+
:raise ValueError: if unrecognized enum option
33+
"""
34+
if self.value == "hg38":
35+
return "GRCh38"
36+
if self.value == "hg19":
37+
return "GRCh37"
38+
raise ValueError
39+
2540

2641
class Chromosome(StrEnum):
2742
"""Constrain chromosome values to UCSC-style names.

tests/test_seqref_registry.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@
88
)
99

1010

11+
def test_assembly_enum():
12+
assert Assembly("hg38") == Assembly.HG38
13+
assert Assembly.HG38 == "hg38"
14+
assert Assembly("hg19") == Assembly.HG19
15+
assert Assembly.HG19 == "hg19"
16+
17+
18+
def test_assembly_enum_to_ncbi():
19+
assert Assembly.HG19.as_grc == "GRCh37"
20+
assert Assembly.HG38.as_grc == "GRCh38"
21+
22+
1123
def test_assembly_fetcher():
1224
assert get_seqinfo_from_refget_id("SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB") == (
1325
Assembly.HG38,

0 commit comments

Comments
 (0)