2626#include "vcf.h"
2727#include "alignment-file.h"
2828#include "snp-sites.h"
29+ #include <assert.h>
2930
3031void create_vcf_file (char filename [], int snp_locations [],int number_of_snps , char * * bases_for_snps , char * * sequence_names , int number_of_samples )
3132{
@@ -67,7 +68,6 @@ void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int num
6768void output_vcf_row (FILE * vcf_file_pointer , char * bases_for_snp , int snp_location , int number_of_samples )
6869{
6970 char reference_base = bases_for_snp [0 ];
70- char alt_bases [MAXIMUM_NUMBER_OF_ALT_BASES ];
7171 if (reference_base == '\0' )
7272 {
7373 return ;
@@ -88,7 +88,7 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
8888 // ALT
8989 // Need to look through list and find unique characters
9090
91- alternative_bases (reference_base , bases_for_snp , alt_bases , number_of_samples );
91+ char * alt_bases = alternative_bases (reference_base , bases_for_snp , number_of_samples );
9292 char * alternative_bases_string = format_alternative_bases (alt_bases );
9393 fprintf ( vcf_file_pointer , "%s\t" , alternative_bases_string );
9494 free (alternative_bases_string );
@@ -107,22 +107,24 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
107107
108108 // Bases for each sample
109109 output_vcf_row_samples_bases (vcf_file_pointer , reference_base , alt_bases , bases_for_snp , number_of_samples );
110+ free (alt_bases );
110111
111112 fprintf ( vcf_file_pointer , "\n" );
112113}
113114
114115
115- void alternative_bases (char reference_base , char * bases_for_snp , char alt_bases [] , int number_of_samples )
116+ char * alternative_bases (char reference_base , char * bases_for_snp , int number_of_samples )
116117{
117118 int i ;
118119 int num_alt_bases = 0 ;
120+ char * alt_bases = calloc (MAXIMUM_NUMBER_OF_ALT_BASES + 1 , sizeof (char ));
119121 for (i = 0 ; i < number_of_samples ; i ++ )
120122 {
121123 if ((bases_for_snp [i ] != reference_base ) && (bases_for_snp [i ] != '-' ) && (toupper (bases_for_snp [i ]) != 'N' ) )
122124 {
123125 if (check_if_char_in_string (alt_bases , bases_for_snp [i ], num_alt_bases ) == 0 )
124126 {
125- if (num_alt_bases > MAXIMUM_NUMBER_OF_ALT_BASES - 2 )
127+ if (num_alt_bases >= MAXIMUM_NUMBER_OF_ALT_BASES )
126128 {
127129 fprintf (stderr , "Unexpectedly large number of alternative bases found between sequences. Please check input file is not corrupted\n\n" );
128130 fflush (stderr );
@@ -133,13 +135,14 @@ void alternative_bases(char reference_base, char * bases_for_snp, char alt_bases
133135 }
134136 }
135137 }
136- alt_bases [ num_alt_bases ] = '\0' ;
138+ return alt_bases ;
137139}
138140
139141char * format_allele_index (char base , char reference_base , char * alt_bases )
140142{
141- int maximum_format_length = (int ) log10 ((double ) MAXIMUM_NUMBER_OF_ALT_BASES ) + 1 ;
142- char * result = malloc ((maximum_format_length + 1 )* sizeof (char ));
143+ int length_of_alt_bases = strlen (alt_bases );
144+ assert (length_of_alt_bases < 100 );
145+ char * result = calloc (3 , sizeof (char ));
143146 int index ;
144147 if (reference_base == base || toupper (base ) == 'N' || base == '-' )
145148 {
@@ -148,45 +151,29 @@ char * format_allele_index(char base, char reference_base, char * alt_bases)
148151 else
149152 {
150153 sprintf (result , "." );
151- for (index = 1 ; index < MAXIMUM_NUMBER_OF_ALT_BASES ; index ++ )
154+ for (index = 1 ; index <= length_of_alt_bases ; index ++ )
152155 {
153156 if (alt_bases [index - 1 ] == base )
154157 {
155158 sprintf (result , "%i" , index );
156159 break ;
157160 }
158- if (alt_bases [index - 1 ] == '\0' )
159- {
160- break ;
161- }
162161 }
163162 }
164163 return result ;
165164}
166165
167166char * format_alternative_bases (char * alt_bases )
168167{
169- char * formatted_alt_bases = malloc (MAXIMUM_NUMBER_OF_ALT_BASES * 2 * sizeof (char ));
168+ int number_of_alt_bases = strlen (alt_bases );
169+ assert ( number_of_alt_bases < MAXIMUM_NUMBER_OF_ALT_BASES );
170+ char * formatted_alt_bases = calloc (number_of_alt_bases * 2 + 1 , sizeof (char ));
170171 int i ;
171- for (i = 0 ; i < MAXIMUM_NUMBER_OF_ALT_BASES ; i ++ )
172+ formatted_alt_bases [0 ] = alt_bases [0 ];
173+ for (i = 1 ; i < number_of_alt_bases ; i ++ )
172174 {
173- if (alt_bases [i ] == '\0' )
174- {
175- if (i == 0 )
176- {
177- formatted_alt_bases [0 ] = '\0' ;
178- }
179- else
180- {
181- formatted_alt_bases [i * 2 - 1 ] = '\0' ;
182- }
183- break ;
184- }
185- else
186- {
187- formatted_alt_bases [i * 2 ] = alt_bases [i ];
188- formatted_alt_bases [i * 2 + 1 ] = ',' ;
189- }
175+ formatted_alt_bases [i * 2 - 1 ] = ',' ;
176+ formatted_alt_bases [i * 2 ] = alt_bases [i ];
190177 }
191178 return formatted_alt_bases ;
192179}
0 commit comments