Skip to content

Commit 08454e3

Browse files
committed
2 parents 2215fce + 2b22916 commit 08454e3

File tree

2 files changed

+73
-25
lines changed

2 files changed

+73
-25
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
[![dockerhub](https://img.shields.io/docker/v/aghozlane/meteor?label=aghozlane/meteor&logo=docker)](https://hub.docker.com/r/aghozlane/meteor/)
88
![Github Actions](https://github.com/metagenopolis/meteor/actions/workflows/main.yml/badge.svg)
99
[![codecov](https://codecov.io/gh/metagenopolis/meteor/graph/badge.svg?token=AXAEIUY7DX)](https://codecov.io/gh/metagenopolis/meteor)
10-
[![DOI](https://zenodo.org/badge/722959292.svg)](https://zenodo.org/doi/10.5281/zenodo.10912587)
10+
1111

1212
## Introduction
1313

meteor/tests/test_phylogeny.py

Lines changed: 72 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -125,51 +125,99 @@ def test_remove_edge_labels(phylogeny_builder: Phylogeny):
125125
# assert len(lines) == 4
126126

127127
def test_generate_pairwise_comparison_table(phylogeny_builder: Phylogeny, tmp_path: Path):
128-
"""Test _generate_pairwise_comparison_table method"""
129-
# Create mock sequences
128+
"""Test _generate_pairwise_comparison_table method with current implementation"""
129+
# Create test sequences with clear, verifiable patterns
130130
sequences = {
131-
"sample1": "ATCGATCGATCG",
132-
"sample2": "ATCGATCGATCG",
133-
"sample3": "ATCGNTCGATCG"
131+
"sample1": "ATCGATCGATCG", # All ACGT (12 maximal info positions)
132+
"sample2": "ATCGATCGATCG", # Identical to sample1
133+
"sample3": "ATRYNNNHBVVT", # Mixed: ACGT, 2-base IUPAC (R,Y), 3-base IUPAC (H,B,V), and N's
134+
"sample4": "ATNYNNNHBVNT", # Similar to sample3 but with more Ns
134135
}
135136

136-
# Create mock distance matrix
137+
# Mock distance matrix with proper get_distance method
137138
class MockDistanceMatrix:
138139
def __init__(self):
139-
self.names = ['sample1', 'sample2', 'sample3']
140+
self.names = ['sample1', 'sample2', 'sample3', 'sample4']
141+
self.distances = {
142+
'sample1-sample2': 0.00005, # same_strain
143+
'sample1-sample3': 0.008, # same_species
144+
'sample1-sample4': 0.012, # same_subspecies
145+
'sample2-sample3': 0.008, # same_species
146+
'sample2-sample4': 0.012, # same_subspecies
147+
'sample3-sample4': 0.025, # divergent
148+
}
140149

141150
def get_distance(self, name1, name2):
142-
# Simple distance calculation for testing
143151
if name1 == name2:
144152
return 0.0
145-
return 0.1
153+
key1 = f"{name1}-{name2}"
154+
key2 = f"{name2}-{name1}"
155+
return self.distances.get(key1, self.distances.get(key2, 0.1))
146156

147157
dists = MockDistanceMatrix()
148-
149-
# Define output file
150158
output_file = tmp_path / "test_comparison.tsv"
151159

152160
# Call the method
153161
phylogeny_builder._generate_pairwise_comparison_table(sequences, dists, output_file)
154162

155-
# Verify file exists
163+
# Verify file exists and can be read
156164
assert output_file.exists()
157-
158-
# Read and verify content
159165
df = pd.read_csv(output_file, sep='\t')
160166

161-
# Check that we have the expected columns
162-
expected_columns = ['sample1', 'sample2', 'total_length', 'coverage_overlap_nq',
163-
'coverage_overlap_all', 'compared_bases_count', 'coverage_sample1_pct',
164-
'coverage_sample2_pct', 'mean_depth_sample1', 'mean_depth_sample2', 'distance']
165-
assert all(col in df.columns for col in expected_columns)
167+
# Check columns match current implementation exactly
168+
expected_columns = [
169+
'sample1', 'sample2', 'total_length', 'overlap_noN_info_count',
170+
'overlap_noIUPAC_info_count', 'overlap_noN_info_pc', 'overlap_noIUPAC_info_pc',
171+
'noN_info_pc_sample1', 'noN_info_pc_sample2', 'noIUPAC_info_pc_sample1',
172+
'noIUPAC_info_pc_sample2', 'distance', 'distance_category'
173+
]
174+
assert list(df.columns) == expected_columns
175+
176+
# Check 6 rows for 4 samples (C(4,2) = 6)
177+
assert len(df) == 6
178+
179+
# Test sample1 vs sample2 (identical, all ACGT, distance 0.00005 -> same_strain)
180+
row_12 = df[(df['sample1'] == 'sample1') & (df['sample2'] == 'sample2')].iloc[0]
181+
assert row_12['total_length'] == 12
182+
assert row_12['overlap_noN_info_count'] == 12
183+
assert row_12['overlap_noIUPAC_info_count'] == 12
184+
assert row_12['distance_category'] == 'same_strain'
185+
186+
# Test sample1 vs sample3 (distance 0.008 -> same_species)
187+
row_13 = df[(df['sample1'] == 'sample1') & (df['sample2'] == 'sample3')].iloc[0]
188+
assert row_13['total_length'] == 12
189+
190+
# Both have minimal info at positions: 0,1,2,3,7,8,9,10,11 = 9 positions
191+
# (positions 4,5,6 have N in sample3 which is NOT in MINIMAL_INFO)
192+
assert row_13['overlap_noN_info_count'] == 9
193+
194+
# Both have maximal info (ACGT) at positions: 0,1,11 = 3 positions
195+
assert row_13['overlap_noIUPAC_info_count'] == 3
196+
assert row_13['distance_category'] == 'same_species'
197+
198+
# Test sample3 vs sample4 (distance 0.025 -> divergent)
199+
row_34 = df[(df['sample1'] == 'sample3') & (df['sample2'] == 'sample4')].iloc[0]
200+
assert row_34['distance_category'] == 'divergent'
201+
202+
# Verify data integrity
203+
assert all(df['total_length'] == 12)
204+
assert all(df['overlap_noN_info_count'] >= df['overlap_noIUPAC_info_count'])
205+
assert all(df['overlap_noN_info_pc'] >= df['overlap_noIUPAC_info_pc'])
206+
assert all(df['distance'] >= 0)
207+
assert all(df['distance_category'].isin(['same_strain', 'same_species', 'same_subspecies', 'divergent']))
166208

167-
# Check that we have the expected number of rows (3 samples, so 3 pairs: 1-2, 1-3, 2-3)
168-
# Actually, with 3 samples we should have 3 pairs: (0,1), (0,2), (1,2)
169-
assert len(df) == 3
209+
# Check each category has correct distance ranges
210+
strain_rows = df[df['distance_category'] == 'same_strain']
211+
assert all(strain_rows['distance'] <= 0.0001)
170212

171-
# Check that distances are properly recorded
172-
assert all(df['distance'] >= 0)
213+
species_rows = df[df['distance_category'] == 'same_species']
214+
assert all((species_rows['distance'] > 0.0001) & (species_rows['distance'] <= 0.01))
215+
216+
subspecies_rows = df[df['distance_category'] == 'same_subspecies']
217+
assert all((subspecies_rows['distance'] > 0.01) & (subspecies_rows['distance'] <= 0.015))
218+
219+
divergent_rows = df[df['distance_category'] == 'divergent']
220+
assert all(divergent_rows['distance'] > 0.015)
173221

174222

175223
def test_execute(phylogeny_builder: Phylogeny):

0 commit comments

Comments
 (0)