88import tempfile
99import os
1010from src .read_groups import (
11- FileNameGrouper ,
12- ReadTableGrouper ,
13- BarcodeSpotGrouper ,
1411 AlignmentTagReadGrouper ,
1512 ReadIdSplitReadGrouper ,
16- parse_grouping_spec ,
17- get_grouping_strategy_names
13+ ReadTableGrouper ,
1814)
1915
2016
@@ -33,38 +29,12 @@ def has_tag(self, tag_name):
3329 return tag_name in self ._tags
3430
3531
36- class TestFileNameGrouper :
37- """Test FileNameGrouper class."""
38-
39- def test_init (self ):
40- """Test FileNameGrouper initialization."""
41- file_dict = {"file1" : "label1" , "file2" : "label2" }
42- grouper = FileNameGrouper (file_dict )
43- assert grouper .file_dict == file_dict
44-
45- def test_get_group (self ):
46- """Test getting group from filename."""
47- file_dict = {"file1.bam" : "sample1" , "file2.bam" : "sample2" }
48- grouper = FileNameGrouper (file_dict )
49-
50- assert grouper .get_group ("read1" , "file1.bam" ) == "sample1"
51- assert grouper .get_group ("read2" , "file2.bam" ) == "sample2"
52-
53- def test_get_group_missing_file (self ):
54- """Test getting group from missing file."""
55- file_dict = {"file1.bam" : "sample1" }
56- grouper = FileNameGrouper (file_dict )
57-
58- # Should return filename if not in dict
59- assert grouper .get_group ("read1" , "file3.bam" ) == "file3.bam"
60-
61-
6232class TestAlignmentTagReadGrouper :
6333 """Test AlignmentTagReadGrouper class."""
6434
6535 def test_init_default_tag (self ):
6636 """Test AlignmentTagReadGrouper with default RG tag."""
67- grouper = AlignmentTagReadGrouper (None )
37+ grouper = AlignmentTagReadGrouper ()
6838 assert grouper .tag == "RG"
6939
7040 def test_init_custom_tag (self ):
@@ -76,15 +46,15 @@ def test_get_group_with_tag(self):
7646 """Test getting group when read has tag."""
7747 grouper = AlignmentTagReadGrouper ("CB" )
7848 alignment = MockAlignment (tags = {"CB" : "ACTGACTG" })
79- assert grouper .get_group_id (alignment , None , ) == "ACTGACTG"
49+ assert grouper .get_group_id (alignment , None ) == "ACTGACTG"
8050
8151 def test_get_group_missing_tag (self ):
8252 """Test getting group when read lacks tag."""
8353 grouper = AlignmentTagReadGrouper ("CB" )
8454 alignment = MockAlignment ()
85- result = grouper .get_group_id (alignment , None , )
86- # Should return None or some default value
87- assert result is None or result == ""
55+ result = grouper .get_group_id (alignment , None )
56+ # Should return None
57+ assert result is None
8858
8959
9060class TestReadIdSplitReadGrouper :
@@ -99,28 +69,34 @@ def test_get_group_with_delimiter(self):
9969 """Test getting group from read ID with delimiter."""
10070 grouper = ReadIdSplitReadGrouper ("_" )
10171
102- assert grouper .get_group_id (MockAlignment (read_id = "read_001_groupA" )) == "groupA"
103- assert grouper .get_group_id (MockAlignment (read_id = "read_002_groupB" )) == "groupB"
72+ alignment1 = MockAlignment (read_id = "read_001_groupA" )
73+ alignment2 = MockAlignment (read_id = "read_002_groupB" )
74+
75+ assert grouper .get_group_id (alignment1 ) == "groupA"
76+ assert grouper .get_group_id (alignment2 ) == "groupB"
10477
10578 def test_get_group_no_delimiter (self ):
10679 """Test getting group from read ID without delimiter."""
10780 grouper = ReadIdSplitReadGrouper ("_" )
81+ alignment = MockAlignment (read_id = "read001" )
10882
109- # Should return read ID if no delimiter found
110- assert grouper .get_group_id (MockAlignment (read_id = "read001" )) == "read001"
83+ # Returns empty string if no delimiter found
84+ result = grouper .get_group_id (alignment )
85+ assert result == ""
11186
11287 def test_get_group_multiple_delimiters (self ):
11388 """Test getting group with multiple delimiters."""
11489 grouper = ReadIdSplitReadGrouper ("_" )
90+ alignment = MockAlignment (read_id = "prefix_middle_suffix" )
11591
11692 # Should return last part after delimiter
117- assert grouper .get_group_id (MockAlignment ( read_id = "prefix_middle_suffix" ) ) == "suffix"
93+ assert grouper .get_group_id (alignment ) == "suffix"
11894
11995
12096class TestReadTableGrouper :
12197 """Test ReadTableGrouper class."""
12298
123- def test_init_and_load (self ):
99+ def test_init_and_get_group (self ):
124100 """Test ReadTableGrouper initialization and file loading."""
125101 # Create temporary file with read-group mapping
126102 with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
@@ -135,9 +111,13 @@ def test_init_and_load(self):
135111 group_id_column_index = 1 ,
136112 delim = '\t ' )
137113
138- assert grouper .get_group_id ("read_001" ) == "groupA"
139- assert grouper .get_group_id ("read_002" ) == "groupB"
140- assert grouper .get_group_id ("read_003" ) == "groupA"
114+ alignment1 = MockAlignment (read_id = "read_001" )
115+ alignment2 = MockAlignment (read_id = "read_002" )
116+ alignment3 = MockAlignment (read_id = "read_003" )
117+
118+ assert grouper .get_group_id (alignment1 ) == "groupA"
119+ assert grouper .get_group_id (alignment2 ) == "groupB"
120+ assert grouper .get_group_id (alignment3 ) == "groupA"
141121 finally :
142122 os .unlink (temp_file )
143123
@@ -148,150 +128,18 @@ def test_missing_read(self):
148128 temp_file = f .name
149129
150130 try :
151- grouper = ReadTableGrouper ([temp_file ], read_col = 0 , group_col = 1 , delimiter = '\t ' )
152- grouper .load ()
131+ grouper = ReadTableGrouper (temp_file ,
132+ read_id_column_index = 0 ,
133+ group_id_column_index = 1 ,
134+ delim = '\t ' )
153135
154- # Should return None or read ID for missing reads
155- result = grouper .get_group ("read_999" )
156- assert result is None or result == ""
136+ alignment = MockAlignment (read_id = "read_999" )
137+ # Should return "NA" for missing reads
138+ result = grouper .get_group_id (alignment )
139+ assert result == "NA"
157140 finally :
158141 os .unlink (temp_file )
159142
160143
161- class TestBarcodeSpotGrouper :
162- """Test BarcodeSpotGrouper class."""
163-
164- def test_init (self ):
165- """Test BarcodeSpotGrouper initialization."""
166- # Create temp files
167- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
168- f .write ("read_001\t ACTG\t GGGG\n " )
169- barcode_file = f .name
170-
171- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
172- f .write ("ACTG\t cellTypeA\n " )
173- spot_file = f .name
174-
175- try :
176- grouper = BarcodeSpotGrouper ([barcode_file ], [spot_file ])
177- assert grouper .barcode_files == [barcode_file ]
178- assert grouper .spot_files == [spot_file ]
179- finally :
180- os .unlink (barcode_file )
181- os .unlink (spot_file )
182-
183- def test_load_and_get_group (self ):
184- """Test loading barcode-spot mappings."""
185- # Create temp barcode file
186- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
187- f .write ("read_001\t ACTG\t GGGG\n " )
188- f .write ("read_002\t TGCA\t CCCC\n " )
189- barcode_file = f .name
190-
191- # Create temp spot mapping file
192- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
193- f .write ("ACTG\t cellTypeA\n " )
194- f .write ("TGCA\t cellTypeB\n " )
195- spot_file = f .name
196-
197- try :
198- grouper = BarcodeSpotGrouper ([barcode_file ], [spot_file ])
199- grouper .load ()
200-
201- # Should map read -> barcode -> cell type
202- assert grouper .get_group ("read_001" ) == "cellTypeA"
203- assert grouper .get_group ("read_002" ) == "cellTypeB"
204- finally :
205- os .unlink (barcode_file )
206- os .unlink (spot_file )
207-
208- def test_missing_barcode_in_spot_map (self ):
209- """Test read with barcode not in spot mapping."""
210- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
211- f .write ("read_001\t ACTG\t GGGG\n " )
212- barcode_file = f .name
213-
214- with tempfile .NamedTemporaryFile (mode = 'w' , delete = False , suffix = '.tsv' ) as f :
215- f .write ("TGCA\t cellTypeB\n " ) # Different barcode
216- spot_file = f .name
217-
218- try :
219- grouper = BarcodeSpotGrouper ([barcode_file ], [spot_file ])
220- grouper .load ()
221-
222- # Should return None or barcode for unmapped barcodes
223- result = grouper .get_group ("read_001" )
224- assert result is None or result == "ACTG"
225- finally :
226- os .unlink (barcode_file )
227- os .unlink (spot_file )
228-
229-
230- class TestParseGroupingSpec :
231- """Test grouping specification parsing."""
232-
233- def test_parse_file_name (self ):
234- """Test parsing file_name grouping."""
235- file_dict = {"file1" : "label1" }
236- groupers = parse_grouping_spec (["file_name" ], file_dict )
237-
238- assert len (groupers ) == 1
239- assert isinstance (groupers [0 ], FileNameGrouper )
240-
241- def test_parse_tag (self ):
242- """Test parsing tag grouping."""
243- groupers = parse_grouping_spec (["tag:CB" ], {})
244-
245- assert len (groupers ) == 1
246- assert isinstance (groupers [0 ], AlignmentTagReadGrouper )
247- assert groupers [0 ].tag_name == "CB"
248-
249- def test_parse_read_id (self ):
250- """Test parsing read_id grouping."""
251- groupers = parse_grouping_spec (["read_id:_" ], {})
252-
253- assert len (groupers ) == 1
254- assert isinstance (groupers [0 ], ReadIdSplitReadGrouper )
255- assert groupers [0 ].delimiter == "_"
256-
257- def test_parse_multiple_strategies (self ):
258- """Test parsing multiple grouping strategies."""
259- file_dict = {"file1" : "label1" }
260- groupers = parse_grouping_spec (["file_name" , "tag:CB" ], file_dict )
261-
262- assert len (groupers ) == 2
263- assert isinstance (groupers [0 ], FileNameGrouper )
264- assert isinstance (groupers [1 ], AlignmentTagReadGrouper )
265-
266-
267- class TestGetGroupingStrategyNames :
268- """Test getting grouping strategy names."""
269-
270- def test_file_name_strategy (self ):
271- """Test file_name strategy name."""
272- file_dict = {"file1" : "label1" }
273- groupers = parse_grouping_spec (["file_name" ], file_dict )
274- names = get_grouping_strategy_names (groupers )
275-
276- assert names == ["file_name" ]
277-
278- def test_tag_strategy (self ):
279- """Test tag strategy name."""
280- groupers = parse_grouping_spec (["tag:CB" ], {})
281- names = get_grouping_strategy_names (groupers )
282-
283- assert names == ["tag" ]
284-
285- def test_multiple_strategies (self ):
286- """Test multiple strategy names."""
287- file_dict = {"file1" : "label1" }
288- groupers = parse_grouping_spec (["file_name" , "tag:RG" ], file_dict )
289- names = get_grouping_strategy_names (groupers )
290-
291- assert len (names ) == 2
292- assert "file_name" in names
293- assert "tag" in names
294-
295-
296144if __name__ == '__main__' :
297145 pytest .main ([__file__ , '-v' ])
0 commit comments