Skip to content

Commit 937469c

Browse files
committed
Smart Sort Fix and cleanup
- Fix 4th field comparison bug - Remove unused function `smart_sort_alleles` - cleanup - upped version to `0.0.21`
1 parent 6e75def commit 937469c

File tree

3 files changed

+111
-53
lines changed

3 files changed

+111
-53
lines changed

pyard/smart_sort.py

Lines changed: 21 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33

44
expr_regex = re.compile('[NQLSGg]')
55

6+
67
@functools.lru_cache(maxsize=None)
78
def smart_sort_comparator(a1, a2):
89
"""
910
Natural sort 2 given alleles.
1011
11-
Python sorts strings lexographically but HLA alleles need
12+
Python sorts strings lexicographically but HLA alleles need
1213
to be sorted by numerical values in each field of the HLA nomenclature.
1314
1415
:param a1: first allele
@@ -19,85 +20,53 @@ def smart_sort_comparator(a1, a2):
1920
if a1 == a2:
2021
return 0
2122

22-
2323
# remove any non-numerics
2424
a1 = re.sub(expr_regex, '', a1)
2525
a2 = re.sub(expr_regex, '', a2)
26+
27+
# Check to see if they are still the same alleles
28+
if a1 == a2:
29+
return 0
30+
2631
# Extract and Compare first fields first
27-
a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
28-
a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
32+
a1_f1 = int(a1[a1.find('*') + 1:a1.find(':')])
33+
a2_f1 = int(a2[a2.find('*') + 1:a2.find(':')])
2934

3035
if a1_f1 < a2_f1:
3136
return -1
3237
if a1_f1 > a2_f1:
3338
return 1
3439

35-
# If the first fields are equal, try the 2nd fields
40+
a1_fields = a1.split(':')
41+
a2_fields = a2.split(':')
3642

37-
a1_f2 = int(a1.split(':')[1])
38-
a2_f2 = int(a2.split(':')[1])
43+
# If the first fields are equal, try the 2nd fields
44+
a1_f2 = int(a1_fields[1])
45+
a2_f2 = int(a2_fields[1])
3946

4047
if a1_f2 < a2_f2:
4148
return -1
4249
if a1_f2 > a2_f2:
4350
return 1
4451

45-
# If the two fields are equal, try the 3rd fields
46-
47-
a1_f3 = int(a1.split(':')[2])
48-
a2_f3 = int(a2.split(':')[2])
52+
# If the second fields are equal, try the 3rd fields
53+
a1_f3 = int(a1_fields[2])
54+
a2_f3 = int(a2_fields[2])
4955

5056
if a1_f3 < a2_f3:
5157
return -1
5258
if a1_f3 > a2_f3:
5359
return 1
5460

55-
# If the two fields are equal, try the 4th fields
56-
57-
a1_f4 = int(a1.split(':')[3])
58-
a2_f3 = int(a2.split(':')[3])
61+
# If the third fields are equal, try the 4th fields
62+
a1_f4 = int(a1_fields[3])
63+
a2_f4 = int(a2_fields[3])
5964

6065
if a1_f4 < a2_f4:
6166
return -1
6267
if a1_f4 > a2_f4:
6368
return 1
6469

65-
66-
67-
# All fields are equal
70+
# All fields are considered equal after 4th field
6871
return 0
6972

70-
def smart_sort_alleles(a1, a2):
71-
"""
72-
Natural sort 2 given alleles.
73-
74-
Python sorts strings lexographically but HLA alleles need
75-
to be sorted by numerical values in each field of the HLA nomenclature.
76-
77-
:param a1: first allele
78-
:param a2: second allele
79-
"""
80-
# Check to see if they are the same alleles
81-
if a1 == a2:
82-
return [a1, a2]
83-
84-
# Extract and Compare first fields first
85-
a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
86-
a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
87-
88-
if a1_f1 < a2_f1:
89-
return [a1, a2]
90-
if a1_f1 > a2_f1:
91-
return [a2, a1]
92-
93-
# If the first fields are equal, try the 2nd fields
94-
a1_f2 = int(a1[a1.find(':')+1:])
95-
a2_f2 = int(a2[a2.find(':')+1:])
96-
97-
if a1_f2 < a2_f2:
98-
return [a1, a2]
99-
if a1_f2 > a2_f2:
100-
return [a2, a1]
101-
102-
# All fields are equal
103-
return [a1, a2]

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
setup(
4444
name='py-ard',
45-
version='0.0.20',
45+
version='0.0.21',
4646
description="ARD reduction for HLA with python",
4747
long_description=readme + '\n\n' + history,
4848
author="CIBMTR",

tests/test_smart_sort.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import unittest
2+
3+
from pyard.smart_sort import smart_sort_comparator
4+
5+
6+
class TestSmartSort(unittest.TestCase):
7+
8+
def setUp(self) -> None:
9+
super().setUp()
10+
11+
def test_same_comparator(self):
12+
allele = "HLA-A*01:01"
13+
self.assertEqual(smart_sort_comparator(allele, allele), 0)
14+
15+
def test_equal_comparator(self):
16+
allele1 = "HLA-A*01:01"
17+
allele2 = "HLA-A*01:01"
18+
self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
19+
20+
def test_equal_comparator_G(self):
21+
# Should compare without G
22+
allele1 = "HLA-A*01:01G"
23+
allele2 = "HLA-A*01:01"
24+
self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
25+
26+
def test_equal_comparator_NG(self):
27+
# Should compare without N and G
28+
allele1 = "HLA-A*01:01G"
29+
allele2 = "HLA-A*01:01N"
30+
self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
31+
32+
def test_first_field_comparator_le(self):
33+
allele1 = "HLA-A*01:01"
34+
allele2 = "HLA-A*02:01"
35+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
36+
37+
def test_first_field_comparator_ge(self):
38+
allele1 = "HLA-A*02:01"
39+
allele2 = "HLA-A*01:01"
40+
self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
41+
42+
def test_second_field_comparator_le(self):
43+
allele1 = "HLA-A*01:01"
44+
allele2 = "HLA-A*01:02"
45+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
46+
47+
def test_second_field_comparator_le_smart(self):
48+
allele1 = "HLA-A*01:29"
49+
allele2 = "HLA-A*01:100"
50+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
51+
52+
def test_second_field_comparator_ge(self):
53+
allele1 = "HLA-A*01:02"
54+
allele2 = "HLA-A*01:01"
55+
self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
56+
57+
def test_third_field_comparator_le(self):
58+
allele1 = "HLA-A*01:01:01"
59+
allele2 = "HLA-A*01:01:20"
60+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
61+
62+
def test_third_field_comparator_le_smart(self):
63+
allele1 = "HLA-A*01:01:29"
64+
allele2 = "HLA-A*01:01:100"
65+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
66+
67+
def test_third_field_comparator_ge(self):
68+
allele1 = "HLA-A*01:01:02"
69+
allele2 = "HLA-A*01:01:01"
70+
self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
71+
72+
def test_fourth_field_comparator_le(self):
73+
allele1 = "HLA-A*01:01:01:01"
74+
allele2 = "HLA-A*01:01:01:20"
75+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
76+
77+
def test_fourth_field_comparator_le_smart(self):
78+
allele1 = "HLA-A*01:01:01:39"
79+
allele2 = "HLA-A*01:01:01:200"
80+
self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
81+
82+
def test_fourth_field_comparator_ge(self):
83+
allele1 = "HLA-A*01:01:01:30"
84+
allele2 = "HLA-A*01:01:01:09"
85+
self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
86+
87+
88+
if __name__ == '__main__':
89+
unittest.main()

0 commit comments

Comments
 (0)