1
- use std :: fs :: File ;
2
- use std :: io :: { BufReader , BufRead } ;
1
+ use crate :: store :: VCF ;
2
+ use crate :: store :: { GENCODE , OUTPUT } ;
3
3
use std:: error:: Error ;
4
+ use std:: fs:: File ;
5
+ use std:: io:: Write ;
6
+ use std:: io:: { BufRead , BufReader } ;
4
7
use std:: process:: Command ;
5
- use crate :: store:: VCF ;
6
-
7
8
8
9
/*
9
10
Author Gaurav Sablok
@@ -13,40 +14,93 @@ use crate::store::VCF;
13
14
Date: 2025-6-9
14
15
*/
15
16
16
- pub fn varaltanno ( pathfile : & str , variant : & str ) -> Result < String , Box < dyn Error > > {
17
-
17
+ pub async fn varaltanno ( pathfile : & str , variant : & str ) -> Result < String , Box < dyn Error > > {
18
18
let _ = Command :: new ( "wegt" ) .
19
19
arg ( "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_48/gencode.v48.chr_patch_hapl_scaff.annotation.gtf.gz" )
20
20
. output ( )
21
21
. expect ( "command failed" ) ;
22
- let _ = Command :: new ( "gunzip" ) . arg ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf.gz" )
22
+ let _ = Command :: new ( "gunzip" )
23
+ . arg ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf.gz" )
23
24
. output ( )
24
25
. expect ( "command failed" ) ;
25
26
let fileopen = File :: open ( pathfile) . expect ( "file not present" ) ;
26
27
let fileread = BufReader :: new ( fileopen) ;
27
- let gtfopen = File :: open ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" )
28
- . expect ( "file not present" ) ;
29
- let gtfread = BufReader :: new ( gtfopen) ;
30
- let exonvector: Vec < String > = Vec :: new ( ) ;
31
- let gene: Vec < String > = Vec :: new ( ) ;
28
+ let gtfresults: Vec < GENCODE > =
29
+ gtfread ( "gencode.v48.chr_patch_hapl_scaff.annotation.gtf" ) . unwrap ( ) ;
32
30
let mut vcstring_file: Vec < VCF > = Vec :: new ( ) ;
33
- for i in fileread. lines ( ) {
31
+ for i in fileread. lines ( ) {
34
32
let linevcf = i. expect ( "file not present" ) ;
35
- let linevec: Vec < String > = linevcf. split ( "\t " ) .
36
- map ( |x|x. to_string ( ) )
33
+ let linevec: Vec < String > = linevcf
34
+ . split ( "\t " )
35
+ . map ( |x| x. to_string ( ) )
37
36
. collect :: < Vec < _ > > ( ) ;
38
- vcstring_file. push ( VCF {
39
- chrom : linevec[ 0 ] . to_string ( ) ,
40
- pos : linevec[ 1 ] . parse :: < usize > ( ) . unwrap ( ) ,
41
- id : linevec[ 2 ] . to_string ( ) ,
42
- refnuc : linevec[ 3 ] . to_string ( ) ,
43
- altnuc : linevec[ 4 ] . to_string ( ) ,
44
- qual : linevec[ 5 ] . to_string ( ) ,
37
+ vcstring_file. push ( VCF {
38
+ chrom : linevec[ 0 ] . to_string ( ) ,
39
+ pos : linevec[ 1 ] . parse :: < usize > ( ) . unwrap ( ) ,
40
+ id : linevec[ 2 ] . to_string ( ) ,
41
+ refnuc : linevec[ 3 ] . to_string ( ) ,
42
+ altnuc : linevec[ 4 ] . to_string ( ) ,
43
+ qual : linevec[ 5 ] . to_string ( ) ,
45
44
} ) ;
46
45
}
47
46
47
+ let mut output: Vec < OUTPUT > = Vec :: new ( ) ;
48
+ for i in vcstring_file. iter ( ) {
49
+ for j in gtfresults. iter ( ) {
50
+ if i. pos > j. start && i. pos <= j. stop && j. typeannotate == variant {
51
+ output. push ( OUTPUT {
52
+ chrom : i. chrom . clone ( ) ,
53
+ pos : i. pos . clone ( ) . to_string ( ) ,
54
+ id : i. id . clone ( ) ,
55
+ refnuc : i. id . clone ( ) ,
56
+ altnuc : i. altnuc . clone ( ) ,
57
+ typeannotate : j. typeannotate . clone ( ) ,
58
+ genename : j. genename . clone ( ) ,
59
+ } ) ;
60
+ }
61
+ }
62
+ }
48
63
49
-
50
-
64
+ let mut mutwrite = File :: create ( "annotationfile.txt" ) . expect ( "file not present" ) ;
65
+ writeln ! (
66
+ mutwrite,
67
+ "{}\t {}\t {}\t {}\t {}\t {}\t {}" ,
68
+ "chrom" , "pos" , "id" , "refnuc" , "altnuc" , "typeannotate" , "genename"
69
+ )
70
+ . expect ( "file not found" ) ;
71
+ for i in output. iter ( ) {
72
+ writeln ! (
73
+ mutwrite,
74
+ "{}\t {}\t {}\t {}\t {}\t {}\t {}" ,
75
+ i. chrom, i. pos, i. id, i. refnuc, i. altnuc, i. typeannotate, i. genename
76
+ )
77
+ . expect ( "line not found" ) ;
78
+ }
51
79
Ok ( "The regions have been annotated" . to_string ( ) )
52
80
}
81
+
82
+ pub fn gtfread ( gtffile : & str ) -> Result < Vec < GENCODE > , Box < dyn Error > > {
83
+ let fileopen = File :: open ( gtffile) . expect ( "file not found" ) ;
84
+ let fileread = BufReader :: new ( fileopen) ;
85
+ let mut gtf_vector: Vec < GENCODE > = Vec :: new ( ) ;
86
+ for i in fileread. lines ( ) {
87
+ let linegtf = i. expect ( "line not found" ) ;
88
+ let linevec: Vec < String > = linegtf
89
+ . split ( "\t " )
90
+ . map ( |x| x. to_string ( ) )
91
+ . collect :: < Vec < String > > ( ) ;
92
+ let linecollect: String = linevec[ 9 ] . split ( ";" ) . collect :: < Vec < _ > > ( ) [ 2 ]
93
+ . replace ( " " , "-" )
94
+ . split ( "-" )
95
+ . collect :: < Vec < _ > > ( ) [ 1 ]
96
+ . to_string ( ) ;
97
+ gtf_vector. push ( GENCODE {
98
+ chrom : linevec[ 0 ] . clone ( ) ,
99
+ typeannotate : linevec[ 2 ] . clone ( ) ,
100
+ start : linevec[ 3 ] . parse :: < usize > ( ) . unwrap ( ) ,
101
+ stop : linevec[ 4 ] . parse :: < usize > ( ) . unwrap ( ) ,
102
+ genename : linecollect,
103
+ } )
104
+ }
105
+ Ok ( gtf_vector)
106
+ }
0 commit comments