Skip to content

Commit 657e74d

Browse files
authored
Merge pull request #85 from pvanheus/rewrite_count_constant_sites
Rewrite how count_constant_sites is implemented, add test
2 parents 52e9b68 + 5108681 commit 657e74d

File tree

8 files changed

+63
-23
lines changed

8 files changed

+63
-23
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.5.0
1+
2.5.1

src/alignment-file.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,18 @@ void get_bases_for_each_snp(char filename[], char ** bases_for_snps)
105105
gzclose(fp);
106106
}
107107

108-
void detect_snps(char filename[], int pure_mode, int output_monomorphic, int output_constant_site_counts)
108+
void detect_snps(char filename[], int pure_mode, int output_monomorphic) {
109+
detect_snps_count_constant_sites(filename, pure_mode, output_monomorphic, NULL);
110+
}
111+
112+
void detect_snps_count_constant_sites(char filename[], int pure_mode, int output_monomorphic, int* constant_site_counts)
109113
{
110114
int i;
111115
int l;
112116
number_of_snps = 0;
113117
number_of_samples = 0;
114118
length_of_genome = 0;
115119
char * first_sequence;
116-
int base_counts[] = {0, 0, 0, 0};
117120
/* array below allows quick mapping of A, C, T and G characters to indices in base_counts array */
118121
const int char_to_base_count_index[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3};
119122

@@ -195,15 +198,12 @@ void detect_snps(char filename[], int pure_mode, int output_monomorphic, int out
195198
{
196199
snp_locations[current_snp_index] = i;
197200
current_snp_index++;
198-
} else if (is_pure(first_sequence[i])) {
199-
base_counts[char_to_base_count_index[(int) toupper(first_sequence[i])]]++;
201+
} else if (constant_site_counts != NULL && is_pure(first_sequence[i])) {
202+
constant_site_counts[char_to_base_count_index[(int) toupper(first_sequence[i])]]++;
200203
}
201204

202205
}
203206

204-
if (output_constant_site_counts)
205-
printf("%d,%d,%d,%d\n", base_counts[0], base_counts[1], base_counts[2], base_counts[3]);
206-
207207
free(first_sequence);
208208
kseq_destroy(seq);
209209
gzclose(fp);

src/alignment-file.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222

2323
#include "kseq.h"
2424

25-
void detect_snps( char filename[], int pure_mode, int output_monomorphic, int output_constant_site_counts);
25+
void detect_snps( char filename[], int pure_mode, int output_monomorphic);
26+
void detect_snps_count_constant_sites(char filename[], int pure_mode, int output_monomorphic, int *constant_site_counts);
2627
void get_bases_for_each_snp(char filename[], char ** bases_for_snps);
2728
int is_unknown(char base);
2829
int get_length_of_genome();

src/main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <ctype.h>
2525
#include <unistd.h>
2626
#include <getopt.h>
27-
#include "alignment-file.h"
2827
#include "snp-sites.h"
2928
#include "config.h"
3029

@@ -126,7 +125,7 @@ int main (int argc, char **argv) {
126125
strncpy(multi_fasta_filename, argv[optind], FILENAME_MAX);
127126

128127
if (output_constant_site_counts) {
129-
detect_snps(multi_fasta_filename, pure_mode, output_monomorphic, output_constant_site_counts);
128+
count_constant_sites(multi_fasta_filename, output_filename);
130129
} else if( pure_mode || output_monomorphic)
131130
{
132131
generate_snp_sites_with_ref_pure_mono(multi_fasta_filename,

src/snp-sites.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ static int generate_snp_sites_generic(char filename[],
3838
int output_monomorphic)
3939
{
4040
int i;
41-
detect_snps(filename, pure_mode, output_monomorphic, 0);
41+
detect_snps(filename, pure_mode, output_monomorphic);
4242

4343
bases_for_snps = calloc(get_number_of_snps()+1, sizeof(char*));
4444

@@ -141,7 +141,32 @@ int generate_snp_sites_with_ref_pure_mono(char filename[],
141141
output_filename, output_reference, pure_mode, output_monomorphic);
142142
}
143143

144+
void count_constant_sites(char multi_fasta_filename[], char output_filename[]) {
145+
char cwd[100];
146+
FILE *input_file;
147+
FILE *output_file;
148+
int *constant_site_counts = NULL;
144149

150+
output_file = (FILE *) fopen(output_filename, "w");
151+
if (!output_file) {
152+
fprintf(stderr, "ERROR: cannot open %s for writing: %s\n", output_filename, strerror(errno));
153+
exit(EXIT_FAILURE);
154+
}
155+
156+
constant_site_counts = (int *) calloc(4, sizeof(int));
157+
if (constant_site_counts == NULL) {
158+
fprintf(stderr, "ERROR: cannot allocated memory for constant_site_counts");
159+
exit(EXIT_FAILURE);
160+
}
161+
162+
detect_snps_count_constant_sites(multi_fasta_filename, 0, 0, constant_site_counts);
163+
164+
165+
fprintf(output_file, "%d,%d,%d,%d\n", constant_site_counts[0], constant_site_counts[1],
166+
constant_site_counts[2], constant_site_counts[3]);
167+
fclose(output_file);
168+
free(constant_site_counts);
169+
}
145170
// Inefficient
146171
void strip_directory_from_filename(char * input_filename, char * output_filename)
147172
{

src/snp-sites.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#define _SNP_SITES_H_
2323

2424
#include <stdio.h>
25+
#include <stdlib.h>
26+
#include <string.h>
27+
#include <errno.h>
2528

2629
int generate_snp_sites(char filename[],
2730
int output_multi_fasta_file,
@@ -42,7 +45,9 @@ int generate_snp_sites_with_ref_pure_mono(char filename[],
4245
int output_reference,
4346
int pure_mode,
4447
int output_monomorphic);
45-
48+
49+
void count_constant_sites(char multi_fasta_filename[], char filename[]);
50+
4651
void strip_directory_from_filename(char *input_filename,
4752
char *output_filename);
4853

tests/check-snp-sites.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -183,64 +183,64 @@ END_TEST
183183

184184
START_TEST (valid_genome_length)
185185
{
186-
detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0,0);
186+
detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0);
187187
fail_unless( get_length_of_genome() == 2000 );
188188
}
189189
END_TEST
190190

191191
START_TEST (valid_genome_length_with_multiple_lines_per_sequence)
192192
{
193-
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0);
193+
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0);
194194
fail_unless( get_length_of_genome() == 2000 );
195195
}
196196
END_TEST
197197

198198
START_TEST (valid_number_of_sequences_in_file)
199199
{
200-
detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0,0);
200+
detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0);
201201
fail_unless( get_number_of_samples() == 109 );
202202
}
203203
END_TEST
204204

205205
START_TEST (valid_number_of_sequences_in_file_with_multiple_lines_per_sequence)
206206
{
207-
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0);
207+
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0);
208208
fail_unless( get_number_of_samples() == 109 );
209209
}
210210
END_TEST
211211

212212
START_TEST (number_of_snps_detected)
213213
{
214-
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0);
214+
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0);
215215
fail_unless( get_number_of_snps() == 5);
216216
}
217217
END_TEST
218218

219219
START_TEST (number_of_snps_detected_small)
220220
{
221-
detect_snps("../tests/data/small_alignment.aln",0,0,0);
221+
detect_snps("../tests/data/small_alignment.aln",0,0);
222222
fail_unless( get_number_of_snps() == 1);
223223
}
224224
END_TEST
225225

226226
START_TEST (detect_snps_pure_mode)
227227
{
228-
detect_snps("../tests/data/pure_mode_alignment.aln",1,0,0);
228+
detect_snps("../tests/data/pure_mode_alignment.aln",1,0);
229229
fail_unless( get_number_of_snps() == 2);
230230
}
231231
END_TEST
232232

233233

234234
START_TEST (detect_snps_pure_mode_monomorphic)
235235
{
236-
detect_snps("../tests/data/pure_mode_monomorphic_alignment.aln",1,1,0);
236+
detect_snps("../tests/data/pure_mode_monomorphic_alignment.aln",1,1);
237237
fail_unless( get_number_of_snps() == 3);
238238
}
239239
END_TEST
240240

241241
START_TEST (sample_names_from_alignment_file)
242242
{
243-
detect_snps("../tests/data/small_alignment.aln",0,0,0);
243+
detect_snps("../tests/data/small_alignment.aln",0,0);
244244
char ** current_sequence_names = get_sequence_names();
245245

246246
fail_unless(strcmp(current_sequence_names[0],"reference_sequence") == 0);
@@ -269,6 +269,14 @@ START_TEST (check_strip_directory_from_filename_with_directory)
269269
}
270270
END_TEST
271271

272+
START_TEST (check_count_constant_sites)
273+
{
274+
count_constant_sites("../tests/data/small_alignment.aln", "small_alignment.constant_site_counts.txt");
275+
fail_unless(compare_files("../tests/data/small_alignment.constant_site_counts.txt", "small_alignment.constant_site_counts.txt"));
276+
remove("small_alignment.constant_site_counts.txt");
277+
}
278+
END_TEST
279+
272280
Suite * snp_sites_suite (void)
273281
{
274282
Suite *s = suite_create ("Creating_SNP_Sites");
@@ -302,7 +310,8 @@ Suite * snp_sites_suite (void)
302310
tcase_add_test (tc_snp_sites, valid_phylip_plus_reference);
303311
tcase_add_test (tc_snp_sites, valid_alignment_with_pure_mode);
304312
tcase_add_test (tc_snp_sites, valid_alignment_with_monomorphic_sites);
305-
313+
tcase_add_test (tc_snp_sites, check_count_constant_sites);
314+
306315
tcase_add_exit_test(tc_snp_sites, invalid_with_uneven_file_lengths,EXIT_FAILURE);
307316
remove("uneven_alignment.aln.snp_sites.aln");
308317
suite_add_tcase (s, tc_snp_sites);
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
2,2,1,2

0 commit comments

Comments
 (0)