Skip to content

Commit 5f91e1b

Browse files
authored
DATAOPS-779: Fix undetermined percentage handler (#117)
* Add nan phix values support to %Undetermined handler * Add .vscode to gitignore and bump version * Add new miseq testdata with index reads * Use MiSeq dataset with index in interop parser tests * Remove MiSeqDemo * Restructure mean phix calculation to avoid numpy warning * Add pull requests as GHA trigger * Make interop dict for Receiver in interop test
1 parent dfba84e commit 5f91e1b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+30230
-151
lines changed

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Run Unit Tests
22

3-
on: [push]
3+
on: [push, pull_request]
44

55
jobs:
66
build:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ dist/
88
.python-env/
99
.coverage
1010
.cache
11+
.vscode/

checkQC/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
__version__ = "4.0.3"
2+
__version__ = "4.0.5-rc1"

checkQC/handlers/undetermined_percentage_handler.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
from collections import defaultdict
3+
import numpy as np
34

45
from checkQC.handlers.qc_handler import QCHandler, QCErrorFatal, QCErrorWarning
56
from checkQC.parsers.stats_json_parser import StatsJsonParser
@@ -33,13 +34,12 @@ def collect(self, signal):
3334
self.phix_aligned[value["lane"]][value["read"]] = value["percent_phix"]
3435

3536
def _compute_mean_percentage_phix_aligned_for_lanes(self):
36-
lane_and_mean_percentage_phix_aliged = {}
37+
lane_and_mean_percentage_phix_aligned = {}
3738
for lane, reads in self.phix_aligned.items():
38-
mean = 0
39-
for read, value in reads.items():
40-
mean += value / len(reads)
41-
lane_and_mean_percentage_phix_aliged[lane] = mean
42-
return lane_and_mean_percentage_phix_aliged
39+
reads_list = list(reads.values())
40+
mean_phix = 0 if all(np.isnan(reads_list)) else np.nanmean(reads_list)
41+
lane_and_mean_percentage_phix_aligned[lane] = mean_phix
42+
return lane_and_mean_percentage_phix_aligned
4343

4444
def check_qc(self):
4545

@@ -74,13 +74,13 @@ def create_data_dict(value):
7474

7575
if self.error() != self.UNKNOWN and percentage_undetermined > compute_threshold(self.error()):
7676
yield QCErrorFatal("The percentage of undetermined indexes was"
77-
" to high on lane {}, it was: {:.2f}%".format(lane_nbr,
77+
" too high on lane {}, it was: {:.2f}%".format(lane_nbr,
7878
percentage_undetermined),
7979
ordering=lane_nbr,
8080
data=create_data_dict(self.error()))
8181
elif self.warning() != self.UNKNOWN and percentage_undetermined > compute_threshold(self.warning()):
8282
yield QCErrorWarning("The percentage of undetermined indexes was "
83-
"to high on lane {}, it was: {:.2f}%".format(lane_nbr,
83+
"too high on lane {}, it was: {:.2f}%".format(lane_nbr,
8484
percentage_undetermined),
8585
ordering=lane_nbr,
8686
data=create_data_dict(self.warning()))

checkQC/parsers/interop_parser.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,6 @@ def run(self):
148148
lanes = summary.lane_count()
149149

150150
for lane in range(lanes):
151-
# The interop library uses zero based indexing,
152-
#however most people uses read 1/2
153-
# to denote the different reads,
154-
#this enumeration is used to transform from
155-
# zero based indexing to this form. /JD 2017-10-27
156151
for read_nbr in range(summary.size()):
157152
read = summary.at(read_nbr).at(lane)
158153
error_rate = read.error_rate().mean()

tests/handlers/test_undetermined_percentage_handler.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import unittest
2+
import numpy as np
23

34
from checkQC.handlers.undetermined_percentage_handler import UndeterminedPercentageHandler
45

@@ -17,9 +18,9 @@ def setUp(self):
1718

1819
percentage_phix_key = "percent_phix"
1920
percentage_phix_value_lane_1_read_1 = {"lane": 1, "read": 1, "percent_phix": 1}
20-
percentage_phix_value_lane_1_read_2 = {"lane": 1, "read": 2, "percent_phix": 1}
21-
percentage_phix_value_lane_2_read_1 = {"lane": 2, "read": 1, "percent_phix": 1}
22-
percentage_phix_value_lane_2_read_2 = {"lane": 2, "read": 2, "percent_phix": 1}
21+
percentage_phix_value_lane_1_read_2 = {"lane": 1, "read": 2, "percent_phix": np.nan}
22+
percentage_phix_value_lane_2_read_1 = {"lane": 2, "read": 1, "percent_phix": np.nan}
23+
percentage_phix_value_lane_2_read_2 = {"lane": 2, "read": 2, "percent_phix": np.nan}
2324
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_1_read_1))
2425
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_1_read_2))
2526
undetermined_handler.collect((percentage_phix_key, percentage_phix_value_lane_2_read_1))
@@ -37,7 +38,7 @@ def test_all_is_fine(self):
3738
self.assertEqual(errors_and_warnings, [])
3839

3940
def test_warning(self):
40-
qc_config = {'name': 'UndeterminedPercentageHandler', 'error': 2, 'warning': 1}
41+
qc_config = {'name': 'UndeterminedPercentageHandler', 'error': 3, 'warning': 1}
4142
self.set_qc_config(qc_config)
4243
errors_and_warnings = list(self.undetermined_handler.check_qc())
4344
self.assertEqual(len(errors_and_warnings), 2)

tests/parsers/test_interop_parser.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -15,93 +15,105 @@ class TestInteropParser(unittest.TestCase):
1515

1616
class Receiver(object):
1717
def __init__(self):
18-
self.error_rate_values = []
19-
self.percent_q30_values = []
20-
self.percent_q30_per_cycle = []
18+
self.metrics = {'error_rate': [],
19+
'percent_q30': [],
20+
'percent_q30_per_cycle': [],
21+
'percent_phix': [],
22+
}
2123
self.subscriber = self.subscribe()
2224
next(self.subscriber)
2325

2426
def subscribe(self):
2527
while True:
2628
interop_stat = yield
2729
key = list(interop_stat)[0]
28-
if key == "error_rate":
29-
self.error_rate_values.append(interop_stat)
30-
if key == "percent_q30":
31-
self.percent_q30_values.append(interop_stat)
32-
if key == "percent_q30_per_cycle":
33-
self.percent_q30_per_cycle.append(interop_stat)
30+
self.metrics[key].append(interop_stat)
3431

3532
def send(self, value):
3633
self.subscriber.send(value)
3734

38-
runfolder = os.path.join(os.path.dirname(__file__), "..",
35+
runfolder = os.path.join(os.path.dirname(__file__), "..",
3936
"resources",
40-
"MiSeqDemo")
41-
interop_parser = InteropParser(runfolder=runfolder,
37+
"230825_M04034_0043_000000000-L6NVV")
38+
interop_parser = InteropParser(runfolder=runfolder,
4239
parser_configurations=None)
4340
subscriber = Receiver()
4441
interop_parser.add_subscribers(subscriber)
4542
interop_parser.run()
4643

4744
def test_read_error_rate(self):
48-
self.assertListEqual(self.subscriber.error_rate_values,
49-
[('error_rate',
50-
{'lane': 1,
51-
'read': 1,
52-
'error_rate': 1.5317546129226685}),
53-
('error_rate',
54-
{'lane': 1,
55-
'read': 2,
56-
'error_rate': 1.9201501607894897})])
57-
45+
error_rates = [x[1]['error_rate'] for x in self.subscriber.metrics['error_rate']]
46+
self.assertEqual(error_rates[0], 0.587182343006134)
47+
self.assertTrue(np.isnan(error_rates[1]))
48+
self.assertTrue(np.isnan(error_rates[2]))
49+
self.assertEqual(error_rates[3], 0.8676796555519104)
50+
51+
def test_percent_phix(self):
52+
phix = [x[1]['percent_phix'] for x in self.subscriber.metrics['percent_phix']]
53+
self.assertEqual(phix[0], 15.352058410644531)
54+
self.assertTrue(np.isnan(phix[1]))
55+
self.assertTrue(np.isnan(phix[2]))
56+
self.assertEqual(phix[3], 14.5081205368042)
5857

5958
def test_percent_q30(self):
60-
self.assertListEqual(self.subscriber.percent_q30_values,
61-
[('percent_q30',
62-
{'lane': 1,
63-
'read': 1,
64-
'percent_q30': 93.42070007324219,
59+
self.assertListEqual(self.subscriber.metrics['percent_q30'],
60+
[('percent_q30',
61+
{'lane': 1,
62+
'read': 1,
63+
'percent_q30': 95.3010025024414,
6564
'is_index_read': False}),
66-
('percent_q30',
67-
{'lane': 1,
68-
'read': 2,
69-
'percent_q30': 84.4270248413086,
65+
('percent_q30',
66+
{'lane': 1,
67+
'read': 2,
68+
'percent_q30': 82.97042846679688,
69+
'is_index_read': True}),
70+
('percent_q30',
71+
{'lane': 1,
72+
'read': 3,
73+
'percent_q30': 97.44789123535156,
74+
'is_index_read': True}),
75+
('percent_q30',
76+
{'lane': 1,
77+
'read': 4,
78+
'percent_q30': 90.55824279785156,
7079
'is_index_read': False})])
71-
72-
def test_percent_q30_per_cycle(self):
73-
percent_q30_per_cycle = self.subscriber.percent_q30_per_cycle
80+
81+
def test_percent_q30_per_cycle_subscriber_output(self):
82+
percent_q30_per_cycle = self.subscriber.metrics['percent_q30_per_cycle']
7483
self.assertEqual(percent_q30_per_cycle[0][1]['read'], 1)
7584
self.assertAlmostEqual(
7685
percent_q30_per_cycle[0][1]['percent_q30_per_cycle'][10],
77-
98.41526794433594
86+
96.68322,
87+
places=5,
7888
)
7989

8090
self.assertEqual(percent_q30_per_cycle[1][1]['read'], 2)
91+
self.assertTrue(percent_q30_per_cycle[1][1]['is_index_read'])
8192
self.assertAlmostEqual(
82-
percent_q30_per_cycle[1][1]['percent_q30_per_cycle'][10],
83-
95.20341491699219
93+
percent_q30_per_cycle[1][1]['percent_q30_per_cycle'][1],
94+
80.69179,
95+
places=5,
8496
)
85-
97+
8698
def test_get_percent_q30_per_cycle(self):
8799
q_metrics = imaging(self.runfolder,
88100
valid_to_load=['Q'])
89-
101+
90102
percent_q30_per_cycle = InteropParser.get_percent_q30_per_cycle(
91103
q_metrics=q_metrics,
92-
lane_nr=0,
104+
lane_nr=0,
93105
read_nr=0,
94106
is_index_read=False,
95107
)
96108

97109
expected_out = {
98-
6: 98.76343,
99-
48: 97.841576,
100-
90: 96.81421,
101-
132: 95.90264,
102-
174: 94.69448,
103-
216: 91.90525,
104-
258: 87.162094,
110+
6: 97.17214,
111+
18: 97.1332,
112+
25: 97.38965,
113+
50: 96.62786,
114+
75: 96.30572,
115+
100: 94.63465,
116+
136: 92.64536,
105117
}
106118

107119
#Select cycles from the expected_out-dict.

0 commit comments

Comments
 (0)