1
+ use crate :: store:: OUTPUT ;
1
2
use crate :: store:: VCF ;
2
- use crate :: store:: { GenCode , OUTPUT } ;
3
+ use crate :: store:: { GenCodeExon , GenCodeGene , GenCodeTranscript } ;
3
4
use async_std:: task;
4
5
use std:: error:: Error ;
5
6
use std:: fs:: File ;
@@ -16,7 +17,7 @@ use std::process::Command;
16
17
*/
17
18
18
19
pub async fn varaltanno ( pathfile : & str , variant : & str ) -> Result < String , Box < dyn Error > > {
19
- let _ = Command :: new ( "wegt " ) .
20
+ let _ = Command :: new ( "wget " ) .
20
21
arg ( "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_48/gencode.v48.chr_patch_hapl_scaff.annotation.gtf.gz" )
21
22
. output ( )
22
23
. expect ( "command failed" ) ;
@@ -26,82 +27,240 @@ pub async fn varaltanno(pathfile: &str, variant: &str) -> Result<String, Box<dyn
26
27
. expect ( "command failed" ) ;
27
28
let fileopen = File :: open ( pathfile) . expect ( "file not present" ) ;
28
29
let fileread = BufReader :: new ( fileopen) ;
29
- let gtfresults: Vec < GenCode > =
30
- task:: block_on ( gtfread ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" ) ) . unwrap ( ) ;
30
+ let gtfresults_gene: Vec < GenCodeGene > = task:: block_on ( gtfread_gene_annotation (
31
+ "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" ,
32
+ ) )
33
+ . unwrap ( ) ;
34
+ let gtfresults_exon: Vec < GenCodeExon > = task:: block_on ( gtfread_exon_annotation (
35
+ "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" ,
36
+ ) )
37
+ . unwrap ( ) ;
38
+ let gtfresults_transcript: Vec < GenCodeTranscript > = task:: block_on (
39
+ gtfread_transcript_annotation ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" ) ,
40
+ )
41
+ . unwrap ( ) ;
31
42
let mut vcstring_file: Vec < VCF > = Vec :: new ( ) ;
32
43
for i in fileread. lines ( ) {
33
44
let linevcf = i. expect ( "file not present" ) ;
34
- let linevec: Vec < String > = linevcf
35
- . split ( "\t " )
36
- . map ( |x| x. to_string ( ) )
37
- . collect :: < Vec < _ > > ( ) ;
38
- vcstring_file. push ( VCF {
39
- chrom : linevec[ 0 ] . to_string ( ) ,
40
- pos : linevec[ 1 ] . parse :: < usize > ( ) . unwrap ( ) ,
41
- id : linevec[ 2 ] . to_string ( ) ,
42
- refnuc : linevec[ 3 ] . to_string ( ) ,
43
- altnuc : linevec[ 4 ] . to_string ( ) ,
44
- qual : linevec[ 5 ] . to_string ( ) ,
45
- } ) ;
45
+ if !linevcf. starts_with ( "#" ) {
46
+ let linevec: Vec < String > = linevcf
47
+ . split ( "\t " )
48
+ . map ( |x| x. to_string ( ) )
49
+ . collect :: < Vec < _ > > ( ) ;
50
+ vcstring_file. push ( VCF {
51
+ chrom : linevec[ 0 ] . to_string ( ) ,
52
+ pos : linevec[ 1 ] . parse :: < usize > ( ) . unwrap ( ) ,
53
+ id : linevec[ 2 ] . to_string ( ) ,
54
+ refnuc : linevec[ 3 ] . to_string ( ) ,
55
+ altnuc : linevec[ 4 ] . to_string ( ) ,
56
+ qual : linevec[ 5 ] . to_string ( ) ,
57
+ } ) ;
58
+ }
46
59
}
47
60
48
- let mut output: Vec < OUTPUT > = Vec :: new ( ) ;
61
+ let mut output_gene: Vec < OUTPUT > = Vec :: new ( ) ;
62
+ let mut output_exon: Vec < OUTPUT > = Vec :: new ( ) ;
63
+ let mut output_transcript: Vec < OUTPUT > = Vec :: new ( ) ;
64
+
49
65
for i in vcstring_file. iter ( ) {
50
- for j in gtfresults . iter ( ) {
51
- if i. pos > j. start && i. pos <= j. stop && j . typeannotate == variant {
52
- output . push ( OUTPUT {
66
+ for j in gtfresults_gene . iter ( ) {
67
+ if i. pos > j. start && i. pos <= j. stop && i . altnuc == variant {
68
+ output_gene . push ( OUTPUT {
53
69
chrom : i. chrom . clone ( ) ,
54
70
pos : i. pos . clone ( ) . to_string ( ) ,
55
71
id : i. id . clone ( ) ,
56
- refnuc : i. id . clone ( ) ,
72
+ refnuc : i. refnuc . clone ( ) ,
57
73
altnuc : i. altnuc . clone ( ) ,
58
74
typeannotate : j. typeannotate . clone ( ) ,
75
+ geneid : j. geneid . clone ( ) ,
59
76
genename : j. genename . clone ( ) ,
60
77
} ) ;
61
78
}
62
79
}
63
80
}
64
81
65
- let mut mutwrite = File :: create ( "annotationfile.txt" ) . expect ( "file not present" ) ;
66
- writeln ! (
67
- mutwrite,
68
- "{}\t {}\t {}\t {}\t {}\t {}\t {}" ,
69
- "chrom" , "pos" , "id" , "refnuc" , "altnuc" , "typeannotate" , "genename"
70
- )
71
- . expect ( "file not found" ) ;
72
- for i in output. iter ( ) {
82
+ for i in vcstring_file. iter ( ) {
83
+ for j in gtfresults_exon. iter ( ) {
84
+ if i. pos > j. start && i. pos <= j. stop && i. altnuc == variant {
85
+ output_exon. push ( OUTPUT {
86
+ chrom : i. chrom . clone ( ) ,
87
+ pos : i. pos . clone ( ) . to_string ( ) ,
88
+ id : i. id . clone ( ) ,
89
+ refnuc : i. refnuc . clone ( ) ,
90
+ altnuc : i. altnuc . clone ( ) ,
91
+ typeannotate : j. typeannotate . clone ( ) ,
92
+ geneid : j. geneid . clone ( ) ,
93
+ genename : j. genename . clone ( ) ,
94
+ } ) ;
95
+ }
96
+ }
97
+ }
98
+
99
+ for i in vcstring_file. iter ( ) {
100
+ for j in gtfresults_transcript. iter ( ) {
101
+ if i. pos > j. start && i. pos <= j. stop && i. altnuc == variant {
102
+ output_transcript. push ( OUTPUT {
103
+ chrom : i. chrom . clone ( ) ,
104
+ pos : i. pos . clone ( ) . to_string ( ) ,
105
+ id : i. id . clone ( ) ,
106
+ refnuc : i. refnuc . clone ( ) ,
107
+ altnuc : i. altnuc . clone ( ) ,
108
+ typeannotate : j. typeannotate . clone ( ) ,
109
+ geneid : j. geneid . clone ( ) ,
110
+ genename : j. genename . clone ( ) ,
111
+ } ) ;
112
+ }
113
+ }
114
+ }
115
+
116
+ let mut mutwrite_gene = File :: create ( "annotationfile-gene.txt" ) . expect ( "file not present" ) ;
117
+ let mut mutwrite_exon = File :: create ( "annotationfile-exon.txt" ) . expect ( "file not present" ) ;
118
+ let mut mutwrite_transcript =
119
+ File :: create ( "annotationfile-transcript.txt" ) . expect ( "file not present" ) ;
120
+
121
+ for i in output_gene. iter ( ) {
73
122
writeln ! (
74
- mutwrite ,
75
- "{}\t {}\t {}\t {}\t {}\t {}\t {}" ,
76
- i. chrom, i. pos, i. id, i. refnuc, i. altnuc, i. typeannotate, i. genename
123
+ mutwrite_gene ,
124
+ "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {} " ,
125
+ i. chrom, i. pos, i. id, i. refnuc, i. altnuc, i. typeannotate, i. geneid , i . genename
77
126
)
78
127
. expect ( "line not found" ) ;
79
128
}
129
+
130
+ for i in output_exon. iter ( ) {
131
+ writeln ! (
132
+ mutwrite_exon,
133
+ "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}" ,
134
+ i. chrom, i. pos, i. id, i. refnuc, i. altnuc, i. typeannotate, i. geneid, i. genename
135
+ )
136
+ . expect ( "line not found" ) ;
137
+ }
138
+
139
+ for i in output_transcript. iter ( ) {
140
+ writeln ! (
141
+ mutwrite_transcript,
142
+ "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}" ,
143
+ i. chrom, i. pos, i. id, i. refnuc, i. altnuc, i. typeannotate, i. geneid, i. genename
144
+ )
145
+ . expect ( "line not found" ) ;
146
+ }
147
+
80
148
Ok ( "The regions have been annotated" . to_string ( ) )
81
149
}
82
150
83
- pub async fn gtfread ( gtffile : & str ) -> Result < Vec < GenCode > , Box < dyn Error > > {
151
+ pub async fn gtfread_gene_annotation ( gtffile : & str ) -> Result < Vec < GenCodeGene > , Box < dyn Error > > {
84
152
let fileopen = File :: open ( gtffile) . expect ( "file not found" ) ;
85
153
let fileread = BufReader :: new ( fileopen) ;
86
- let mut gtf_vector: Vec < GenCode > = Vec :: new ( ) ;
154
+ let mut gtf_vector: Vec < GenCodeGene > = Vec :: new ( ) ;
87
155
for i in fileread. lines ( ) {
88
156
let linegtf = i. expect ( "line not found" ) ;
89
- let linevec: Vec < String > = linegtf
90
- . split ( "\t " )
91
- . map ( |x| x. to_string ( ) )
92
- . collect :: < Vec < String > > ( ) ;
93
- let linecollect: String = linevec[ 9 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 2 ]
94
- . replace ( " " , "-" )
95
- . split ( "-" )
96
- . collect :: < Vec < _ > > ( ) [ 1 ]
97
- . to_string ( ) ;
98
- gtf_vector. push ( GenCode {
99
- chrom : linevec[ 0 ] . clone ( ) ,
100
- typeannotate : linevec[ 2 ] . clone ( ) ,
101
- start : linevec[ 3 ] . parse :: < usize > ( ) . unwrap ( ) ,
102
- stop : linevec[ 4 ] . parse :: < usize > ( ) . unwrap ( ) ,
103
- genename : linecollect,
104
- } )
157
+ if !linegtf. starts_with ( "#" ) {
158
+ let linevec: Vec < String > = linegtf
159
+ . split ( "\t " )
160
+ . map ( |x| x. to_string ( ) )
161
+ . collect :: < Vec < String > > ( ) ;
162
+ if linevec[ 2 ] == "gene" {
163
+ let linecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 2 ]
164
+ . replace ( " " , "-" )
165
+ . split ( "-" )
166
+ . collect :: < Vec < _ > > ( ) [ 2 ]
167
+ . to_string ( )
168
+ . replace ( "\" " , "" ) ;
169
+ let genecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 0 ]
170
+ . replace ( " " , "-" )
171
+ . split ( "-" )
172
+ . collect :: < Vec < _ > > ( ) [ 1 ]
173
+ . to_string ( )
174
+ . replace ( "\" " , "" ) ;
175
+ gtf_vector. push ( GenCodeGene {
176
+ chrom : linevec[ 0 ] . clone ( ) ,
177
+ typeannotate : linevec[ 2 ] . clone ( ) ,
178
+ start : linevec[ 3 ] . parse :: < usize > ( ) . unwrap ( ) ,
179
+ stop : linevec[ 4 ] . parse :: < usize > ( ) . unwrap ( ) ,
180
+ geneid : genecollect,
181
+ genename : linecollect,
182
+ } )
183
+ }
184
+ }
185
+ }
186
+ Ok ( gtf_vector)
187
+ }
188
+
189
+ pub async fn gtfread_exon_annotation ( gtffile : & str ) -> Result < Vec < GenCodeExon > , Box < dyn Error > > {
190
+ let fileopen = File :: open ( gtffile) . expect ( "file not found" ) ;
191
+ let fileread = BufReader :: new ( fileopen) ;
192
+ let mut gtf_vector: Vec < GenCodeExon > = Vec :: new ( ) ;
193
+ for i in fileread. lines ( ) {
194
+ let linegtf = i. expect ( "line not found" ) ;
195
+ if !linegtf. starts_with ( "#" ) {
196
+ let linevec: Vec < String > = linegtf
197
+ . split ( "\t " )
198
+ . map ( |x| x. to_string ( ) )
199
+ . collect :: < Vec < String > > ( ) ;
200
+ if linevec[ 2 ] == "exon" {
201
+ let linecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 3 ]
202
+ . replace ( " " , "-" )
203
+ . split ( "-" )
204
+ . collect :: < Vec < _ > > ( ) [ 2 ]
205
+ . to_string ( )
206
+ . replace ( "\" " , "" ) ;
207
+ let genecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 0 ]
208
+ . replace ( " " , "-" )
209
+ . split ( "-" )
210
+ . collect :: < Vec < _ > > ( ) [ 1 ]
211
+ . to_string ( )
212
+ . replace ( "\" " , "" ) ;
213
+
214
+ gtf_vector. push ( GenCodeExon {
215
+ chrom : linevec[ 0 ] . clone ( ) ,
216
+ typeannotate : linevec[ 2 ] . clone ( ) ,
217
+ start : linevec[ 3 ] . parse :: < usize > ( ) . unwrap ( ) ,
218
+ stop : linevec[ 4 ] . parse :: < usize > ( ) . unwrap ( ) ,
219
+ geneid : genecollect,
220
+ genename : linecollect,
221
+ } )
222
+ }
223
+ }
224
+ }
225
+ Ok ( gtf_vector)
226
+ }
227
+
228
+ pub async fn gtfread_transcript_annotation (
229
+ gtffile : & str ,
230
+ ) -> Result < Vec < GenCodeTranscript > , Box < dyn Error > > {
231
+ let fileopen = File :: open ( gtffile) . expect ( "file not found" ) ;
232
+ let fileread = BufReader :: new ( fileopen) ;
233
+ let mut gtf_vector: Vec < GenCodeTranscript > = Vec :: new ( ) ;
234
+ for i in fileread. lines ( ) {
235
+ let linegtf = i. expect ( "line not found" ) ;
236
+ if !linegtf. starts_with ( "#" ) {
237
+ let linevec: Vec < String > = linegtf
238
+ . split ( "\t " )
239
+ . map ( |x| x. to_string ( ) )
240
+ . collect :: < Vec < String > > ( ) ;
241
+ if linevec[ 2 ] == "transcript" {
242
+ let linecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 3 ]
243
+ . replace ( " " , "-" )
244
+ . split ( "-" )
245
+ . collect :: < Vec < _ > > ( ) [ 2 ]
246
+ . to_string ( )
247
+ . replace ( "\" " , "" ) ;
248
+ let genecollect: String = linevec[ 8 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 0 ]
249
+ . replace ( " " , "-" )
250
+ . split ( "-" )
251
+ . collect :: < Vec < _ > > ( ) [ 1 ]
252
+ . to_string ( )
253
+ . replace ( "\" " , "" ) ;
254
+ gtf_vector. push ( GenCodeTranscript {
255
+ chrom : linevec[ 0 ] . clone ( ) ,
256
+ typeannotate : linevec[ 2 ] . clone ( ) ,
257
+ start : linevec[ 3 ] . parse :: < usize > ( ) . unwrap ( ) ,
258
+ stop : linevec[ 4 ] . parse :: < usize > ( ) . unwrap ( ) ,
259
+ geneid : genecollect,
260
+ genename : linecollect,
261
+ } )
262
+ }
263
+ }
105
264
}
106
265
Ok ( gtf_vector)
107
266
}
0 commit comments