Skip to content

Commit 497b6e0

Browse files
committed
Simplify vcf code with calloc
Use calloc and strlen to simplify the vcf related code
1 parent d78487d commit 497b6e0

File tree

4 files changed

+35
-32
lines changed

4 files changed

+35
-32
lines changed

src/vcf.c

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "vcf.h"
2727
#include "alignment-file.h"
2828
#include "snp-sites.h"
29+
#include <assert.h>
2930

3031
void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples)
3132
{
@@ -67,7 +68,6 @@ void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int num
6768
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples)
6869
{
6970
char reference_base = bases_for_snp[0];
70-
char alt_bases[MAXIMUM_NUMBER_OF_ALT_BASES];
7171
if(reference_base == '\0')
7272
{
7373
return;
@@ -88,7 +88,7 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
8888
// ALT
8989
// Need to look through list and find unique characters
9090

91-
alternative_bases(reference_base, bases_for_snp, alt_bases, number_of_samples);
91+
char * alt_bases = alternative_bases(reference_base, bases_for_snp, number_of_samples);
9292
char * alternative_bases_string = format_alternative_bases(alt_bases);
9393
fprintf( vcf_file_pointer, "%s\t", alternative_bases_string );
9494
free(alternative_bases_string);
@@ -107,22 +107,24 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
107107

108108
// Bases for each sample
109109
output_vcf_row_samples_bases(vcf_file_pointer, reference_base, alt_bases, bases_for_snp, number_of_samples );
110+
free(alt_bases);
110111

111112
fprintf( vcf_file_pointer, "\n");
112113
}
113114

114115

115-
void alternative_bases(char reference_base, char * bases_for_snp, char alt_bases[], int number_of_samples)
116+
char * alternative_bases(char reference_base, char * bases_for_snp, int number_of_samples)
116117
{
117118
int i;
118119
int num_alt_bases = 0;
120+
char * alt_bases = calloc(MAXIMUM_NUMBER_OF_ALT_BASES+1, sizeof(char));
119121
for(i=0; i< number_of_samples; i++ )
120122
{
121123
if((bases_for_snp[i] != reference_base) && (bases_for_snp[i] != '-') && (toupper(bases_for_snp[i]) != 'N') )
122124
{
123125
if(check_if_char_in_string(alt_bases, bases_for_snp[i], num_alt_bases) == 0)
124126
{
125-
if (num_alt_bases > MAXIMUM_NUMBER_OF_ALT_BASES - 2)
127+
if (num_alt_bases >= MAXIMUM_NUMBER_OF_ALT_BASES)
126128
{
127129
fprintf(stderr, "Unexpectedly large number of alternative bases found between sequences. Please check input file is not corrupted\n\n");
128130
fflush(stderr);
@@ -133,13 +135,14 @@ void alternative_bases(char reference_base, char * bases_for_snp, char alt_bases
133135
}
134136
}
135137
}
136-
alt_bases[num_alt_bases] = '\0';
138+
return alt_bases;
137139
}
138140

139141
char * format_allele_index(char base, char reference_base, char * alt_bases)
140142
{
141-
int maximum_format_length = (int) log10((double) MAXIMUM_NUMBER_OF_ALT_BASES) + 1;
142-
char * result = malloc((maximum_format_length + 1)*sizeof(char));
143+
int length_of_alt_bases = strlen(alt_bases);
144+
assert(length_of_alt_bases < 100);
145+
char * result = calloc(3, sizeof(char));
143146
int index;
144147
if (reference_base == base || toupper(base) == 'N' || base == '-')
145148
{
@@ -148,45 +151,29 @@ char * format_allele_index(char base, char reference_base, char * alt_bases)
148151
else
149152
{
150153
sprintf(result, ".");
151-
for (index = 1; index<MAXIMUM_NUMBER_OF_ALT_BASES; index++)
154+
for (index = 1; index <= length_of_alt_bases; index++)
152155
{
153156
if (alt_bases[index-1] == base)
154157
{
155158
sprintf(result, "%i", index);
156159
break;
157160
}
158-
if (alt_bases[index-1] == '\0')
159-
{
160-
break;
161-
}
162161
}
163162
}
164163
return result;
165164
}
166165

167166
char * format_alternative_bases(char * alt_bases)
168167
{
169-
char * formatted_alt_bases = malloc(MAXIMUM_NUMBER_OF_ALT_BASES*2*sizeof(char));
168+
int number_of_alt_bases = strlen(alt_bases);
169+
assert( number_of_alt_bases < MAXIMUM_NUMBER_OF_ALT_BASES );
170+
char * formatted_alt_bases = calloc(number_of_alt_bases*2 + 1, sizeof(char));
170171
int i;
171-
for (i = 0; i < MAXIMUM_NUMBER_OF_ALT_BASES; i++)
172+
formatted_alt_bases[0] = alt_bases[0];
173+
for (i = 1; i < number_of_alt_bases; i++)
172174
{
173-
if (alt_bases[i] == '\0')
174-
{
175-
if (i == 0)
176-
{
177-
formatted_alt_bases[0] = '\0';
178-
}
179-
else
180-
{
181-
formatted_alt_bases[i*2 - 1] = '\0';
182-
}
183-
break;
184-
}
185-
else
186-
{
187-
formatted_alt_bases[i*2] = alt_bases[i];
188-
formatted_alt_bases[i*2 + 1] = ',';
189-
}
175+
formatted_alt_bases[i*2 - 1] = ',';
176+
formatted_alt_bases[i*2] = alt_bases[i];
190177
}
191178
return formatted_alt_bases;
192179
}

src/vcf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, c
2626
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples);
2727
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples);
2828
void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base, char * alt_bases, char * bases_for_snp, int number_of_samples);
29-
void alternative_bases(char reference_base, char * bases_for_snp, char alt_bases[], int number_of_samples);
29+
char * alternative_bases(char reference_base, char * bases_for_snp, int number_of_samples);
3030
char * format_alternative_bases(char *);
3131
char * format_allele_index(char, char, char *);
3232
int check_if_char_in_string(char search_string[], char target_char, int search_string_length);

tests/check-vcf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@
2828
#include "check-vcf.h"
2929
#include "vcf.h"
3030

31+
void check_alternative_bases(char reference_base, char * bases_for_snp, int number_of_samples, char * expected_result)
32+
{
33+
char * result;
34+
result = alternative_bases(reference_base, bases_for_snp, number_of_samples);
35+
ck_assert_str_eq(result, expected_result);
36+
free(result);
37+
}
38+
39+
START_TEST (alternative_bases_test)
40+
{
41+
check_alternative_bases('A', "AGCT-nN", 6, "GCT");
42+
}
43+
END_TEST
44+
3145
void check_format_alternative_bases(char * test_case, char * expected_result)
3246
{
3347
char * result;
@@ -78,6 +92,7 @@ Suite * vcf_suite (void)
7892
Suite *s = suite_create ("Creating_VCF_file");
7993

8094
TCase *tc_vcf_file = tcase_create ("vcf_file");
95+
tcase_add_test (tc_vcf_file, alternative_bases_test);
8196
tcase_add_test (tc_vcf_file, format_alternative_bases_test);
8297
tcase_add_test (tc_vcf_file, format_allele_index_test);
8398
suite_add_tcase (s, tc_vcf_file);

tests/check-vcf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#ifndef _CHECK_VCF_H_
2121
#define _CHECK_VCF_H_
2222

23+
void check_alternative_bases(char, char *, int, char *);
2324
void check_format_alternative_bases(char *, char *);
2425
void check_format_allele_index(char, char, char *, char *);
2526
Suite * vcf_suite (void);

0 commit comments

Comments
 (0)