Skip to content

Commit bfcf59a

Browse files
refactor chromosome name validation to be more streamlined
1 parent 713f702 commit bfcf59a

File tree

1 file changed

+28
-21
lines changed

1 file changed

+28
-21
lines changed

src/anyvlm/utils/types.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,36 +34,43 @@ class UscsAssemblyBuild(StrEnum):
3434
]
3535

3636

37+
def is_valid_chromosome_name(chromosome_name: str) -> bool:
38+
"""Checks whether or not a provided chromosome name is valid.
39+
40+
:param chromosome_name: The chromosome name to validate.
41+
:return: `True` if the chromosome name is a number between 1-22, or the values "X" or "Y"; else `False`.
42+
"""
43+
min_chromosome_number = 1
44+
max_chromosome_number = 22
45+
try:
46+
return (
47+
chromosome_name in {"X", "Y"}
48+
or min_chromosome_number <= int(chromosome_name) <= max_chromosome_number
49+
)
50+
except ValueError:
51+
return False
52+
53+
3754
def _normalize_chromosome_name(chromosome_name: str) -> str:
38-
"""Normalize a chromosome name. Input must be a string consisting of either a number between 1-22, or 'X' or 'Y';
39-
optionally prefixed with 'chr'.
55+
"""Normalize a chromosome name. Input must be a string consisting of either a number between 1-22,
56+
or 'X' or 'Y'; optionally prefixed with 'chr'.
4057
4158
:param chromosome_name: The name of the chromosome to normalize, following the rules stated above.
4259
:return: The chromosome name, stripped of it's 'chr' prefix if it was added
4360
"""
44-
error_message = (
45-
"Invalid chromosome. Must be 1-22, 'X,' or 'Y,' with optional 'chr' prefix."
46-
)
47-
4861
# strip the 'chr' prefix if it was included
4962
chromosome_name = (
50-
chromosome_name[3:].upper()
63+
chromosome_name[3:]
5164
if chromosome_name.lower().startswith("chr")
52-
else chromosome_name.upper()
53-
)
54-
55-
# chromosome name must be either an int, or "X" or "Y"
56-
try:
57-
int(chromosome_name)
58-
except ValueError:
59-
if chromosome_name not in ["X", "Y"]:
60-
raise ValueError(error_message) from None
65+
else chromosome_name
66+
).upper()
6167

62-
# if chromosome name is an int, it must be between 1-22
63-
if chromosome_name not in range(1, 23): # stop is exclusive so we need to add 1
64-
raise ValueError(error_message)
65-
66-
return chromosome_name
68+
if is_valid_chromosome_name(chromosome_name):
69+
return chromosome_name
70+
error_message = (
71+
"Invalid chromosome. Must be 1-22, 'X,' or 'Y,' with optional 'chr' prefix."
72+
)
73+
raise ValueError(error_message)
6774

6875

6976
ChromosomeName = Annotated[str, BeforeValidator(_normalize_chromosome_name)]

0 commit comments

Comments
 (0)