Skip to content

Commit bd2428e

Browse files
committed
Merge pull request #20 from sanger-pathogens/BT_add_vcf_sequence_length
Add reference sequence length to VCF output
2 parents 3451d0a + b4497b1 commit bd2428e

File tree

6 files changed

+10
-7
lines changed

6 files changed

+10
-7
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.0.0
1+
2.0.1

src/snp-sites.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_v
108108
concat_strings_created_with_malloc(vcf_output_filename,extension);
109109
}
110110

111-
create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
111+
create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome);
112112
free(vcf_output_filename);
113113
}
114114

src/vcf.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@
2828
#include "snp-sites.h"
2929
#include <assert.h>
3030

31-
void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples)
31+
void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples, size_t length_of_genome)
3232
{
3333
FILE *vcf_file_pointer;
3434
char * base_filename;
3535
base_filename = (char *) malloc(MAX_FILENAME_SIZE*sizeof(char));
3636
strcpy(base_filename, filename);
3737

3838
vcf_file_pointer=fopen(base_filename, "w");
39-
output_vcf_header(vcf_file_pointer,sequence_names, number_of_samples);
39+
output_vcf_header(vcf_file_pointer,sequence_names, number_of_samples, length_of_genome);
4040
output_vcf_snps(vcf_file_pointer, bases_for_snps, snp_locations, number_of_snps, number_of_samples);
4141
fclose(vcf_file_pointer);
4242
free(base_filename);
@@ -51,10 +51,11 @@ void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_
5151
}
5252
}
5353

54-
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples)
54+
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples, size_t length_of_genome)
5555
{
5656
int i;
5757
fprintf( vcf_file_pointer, "##fileformat=VCFv4.1\n" );
58+
fprintf( vcf_file_pointer, "##contig=<ID=1,length=%i>\n", length_of_genome );
5859
fprintf( vcf_file_pointer, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n" );
5960
fprintf( vcf_file_pointer, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" );
6061

src/vcf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
#ifndef _VCF_H_
2222
#define _VCF_H_
2323

24-
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples);
25-
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples);
24+
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples, size_t length_of_genome);
25+
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples, size_t length_of_genome);
2626
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples);
2727
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples);
2828
void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base, char * alt_bases, char * bases_for_snp, int number_of_samples);

tests/data/alignment_file_one_line_per_sequence.aln.vcf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
##fileformat=VCFv4.1
2+
##contig=<ID=1,length=2000>
23
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
34
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 2956_6_1 2956_6_2 2956_6_3 2956_6_4 2956_6_5 2956_6_6 3002_8_1 3002_8_2 3002_8_3 3002_8_4 3002_8_5 3002_8_6 3002_8_7 4056_2_10 4056_2_11 4056_2_1 4056_2_12 4056_2_2 4056_2_3 4056_2_4 4056_2_5 4056_2_6 4056_2_7 4056_2_9 4056_6_10 4056_6_11 4056_6_12 4056_6_2 4056_6_3 4056_6_4 4056_6_5 4056_6_6 4056_6_7 4056_6_9 4056_7_10 4056_7_11 4056_7_1 4056_7_12 4056_7_7 4056_7_8 4056_7_9 4056_8_10 4056_8_1 4056_8_12 4056_8_2 4056_8_3 4056_8_4 4056_8_6 4056_8_8 4056_8_9 4075_3_11 4075_3_12 4075_3_2 4075_3_3 4075_3_5 4075_3_6 4075_3_7 4075_3_8 4075_3_9 4370_2_11 4370_2_12 4370_2_2 4370_2_3 4370_2_4 4370_2_7 4370_2_8 4370_2_9 4370_3_11 4370_3_1 4370_3_6 4370_3_7 4370_3_8 5174_5_1 5174_5_2 5174_5_3 5174_5_4 5174_5_5 5174_5_6 5174_5_7 5174_5_9 5174_6_10 5174_6_1 5174_6_2 5174_6_3 5174_6_4 5174_6_5 5174_6_6 5174_6_7 5174_6_8 5174_6_9 5174_7_10 5174_7_1 5174_7_2 5174_7_3 5174_7_4 5174_7_5 5174_7_6 5174_7_7 5174_7_8 5174_7_9 5174_8_1 5174_8_2 5174_8_3 5174_8_5 5174_8_6 5174_8_8 5174_8_9 Vibrio_parahaemolyticus Vibrio_vulnificus
45
1 825 . A G . . . GT 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0

tests/data/alignment_file_with_n.aln.vcf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
##fileformat=VCFv4.1
2+
##contig=<ID=1,length=2000>
23
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
34
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 2956_6_1 2956_6_2 2956_6_3 2956_6_4 2956_6_5 2956_6_6 3002_8_1 3002_8_2 3002_8_3 3002_8_4 3002_8_5 3002_8_6 3002_8_7 4056_2_10 4056_2_11 4056_2_1 4056_2_12 4056_2_2 4056_2_3 4056_2_4 4056_2_5 4056_2_6 4056_2_7 4056_2_9 4056_6_10 4056_6_11 4056_6_12 4056_6_2 4056_6_3 4056_6_4 4056_6_5 4056_6_6 4056_6_7 4056_6_9 4056_7_10 4056_7_11 4056_7_1 4056_7_12 4056_7_7 4056_7_8 4056_7_9 4056_8_10 4056_8_1 4056_8_12 4056_8_2 4056_8_3 4056_8_4 4056_8_6 4056_8_8 4056_8_9 4075_3_11 4075_3_12 4075_3_2 4075_3_3 4075_3_5 4075_3_6 4075_3_7 4075_3_8 4075_3_9 4370_2_11 4370_2_12 4370_2_2 4370_2_3 4370_2_4 4370_2_7 4370_2_8 4370_2_9 4370_3_11 4370_3_1 4370_3_6 4370_3_7 4370_3_8 5174_5_1 5174_5_2 5174_5_3 5174_5_4 5174_5_5 5174_5_6 5174_5_7 5174_5_9 5174_6_10 5174_6_1 5174_6_2 5174_6_3 5174_6_4 5174_6_5 5174_6_6 5174_6_7 5174_6_8 5174_6_9 5174_7_10 5174_7_1 5174_7_2 5174_7_3 5174_7_4 5174_7_5 5174_7_6 5174_7_7 5174_7_8 5174_7_9 5174_8_1 5174_8_2 5174_8_3 5174_8_5 5174_8_6 5174_8_8 5174_8_9 Vibrio_parahaemolyticus Vibrio_vulnificus
45
1 825 . A G . . . GT 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0

0 commit comments

Comments
 (0)