Skip to content

Commit a505da1

Browse files
authored
Merge branch 'LCrossman:main' into readme-update
2 parents 12d4d91 + c9a69f0 commit a505da1

File tree

7 files changed

+201
-38
lines changed

7 files changed

+201
-38
lines changed

microBioRust/K12_ribo.gbk

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
LOCUS NC_000913 913 bp DNA linear CON 01-Sep-2025
2+
DEFINITION Escherichia coli
3+
ACCESSION NC_000913
4+
KEYWORDS .
5+
SOURCE Escherichia coli str. K-12 substr. MG1655
6+
ORGANISM Escherichia coli str. K-12 substr. MG1655
7+
Bacteria; Pseudomonadati; Pseudomonadota; Gammaproteobacteria;
8+
Enterobacterales; Enterobacteriaceae; Escherichia.
9+
FEATURES Location/Qualifiers
10+
source <1..>913
11+
/id="source_1"
12+
/organism="Escherichia coli str. K-12 substr. MG1655"
13+
/mol_type="genomic DNA"
14+
/strain="K-12"
15+
/sub_strain="MG1655"
16+
/db_xref="taxon:511145"
17+
source complement(1..913)
18+
gene complement(10..363)
19+
/gene="rplR"
20+
/locus_tag="b3304"
21+
/gene_synonym="ECK3291"
22+
/db_xref="ASAP:ABE-0010825"
23+
/db_xref="ECOCYC:EG10879"
24+
/db_xref="GeneID:947804"
25+
CDS complement(10..363)
26+
/gene="rplR"
27+
/locus_tag="b3304"
28+
/gene_synonym="ECK3291"
29+
/codon_start=1
30+
/transl_table=11
31+
/product="50S ribosomal subunit protein L18"
32+
/protein_id="NP_417763.1"
33+
/db_xref="UniProtKB/Swiss-Prot:P0C018"
34+
/db_xref="ASAP:ABE-0010825"
35+
/db_xref="ECOCYC:EG10879"
36+
/db_xref="GeneID:947804"
37+
/translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNG
38+
SEVLVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGR
39+
VQALADAAREAGLQF"
40+
gene complement(373..906)
41+
/gene="rplF"
42+
/locus_tag="b3305"
43+
/gene_synonym="ECK3292"
44+
/db_xref="ASAP:ABE-0010827"
45+
/db_xref="ECOCYC:EG10869"
46+
/db_xref="GeneID:947803"
47+
CDS complement(373..906)
48+
/gene="rplF"
49+
/locus_tag="b3305"
50+
/gene_synonym="ECK3292"
51+
/codon_start=1
52+
/transl_table=11
53+
/product="50S ribosomal subunit protein L6"
54+
/protein_id="NP_417764.1"
55+
/db_xref="UniProtKB/Swiss-Prot:P0AG55"
56+
/db_xref="ASAP:ABE-0010827"
57+
/db_xref="ECOCYC:EG10869"
58+
/db_xref="GeneID:947803"
59+
/translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVK
60+
HADNTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNV
61+
INLSLGFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGK
62+
GVRYADEVVRTKEAKKK"
63+
BASE COUNT 214 a 256 c 223 g 220 t
64+
ORIGIN
65+
1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg
66+
61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc
67+
121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag
68+
181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg
69+
241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc
70+
301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc
71+
361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg
72+
421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg
73+
481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc
74+
541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat
75+
601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa
76+
661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc
77+
721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac
78+
781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac
79+
841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg
80+
901 agacattttt tcc
81+
//

microBioRust/src/embl.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,3 +1394,78 @@ impl Config {
13941394
Ok(Config { filename })
13951395
}
13961396
}
1397+
1398+
#[cfg(test)]
1399+
mod tests {
1400+
use super::*;
1401+
#[test]
1402+
#[allow(unused_mut)]
1403+
#[allow(unused_variables)]
1404+
#[allow(dead_code)]
1405+
#[allow(unused_assignments)]
1406+
#[allow(unused_imports)]
1407+
fn test_read_file() {
1408+
let content = std::fs::read_to_string("example.embl").expect("error reading file");
1409+
assert!(content.contains("ID"));
1410+
assert!(content.len() > 0);
1411+
}
1412+
#[test]
1413+
#[allow(unused_mut)]
1414+
#[allow(unused_variables)]
1415+
#[allow(dead_code)]
1416+
#[allow(unused_assignments)]
1417+
#[allow(unused_imports)]
1418+
fn test_parse_embl() {
1419+
let file_embl = "example.embl";
1420+
let records = embl!(&file_embl);
1421+
assert!(records.len() > 0);
1422+
}
1423+
#[test]
1424+
#[allow(unused_mut)]
1425+
#[allow(unused_variables)]
1426+
#[allow(dead_code)]
1427+
#[allow(unused_assignments)]
1428+
#[allow(unused_imports)]
1429+
fn test_parse_source_attributes() {
1430+
let file_embl = "example.embl";
1431+
let records = embl!(&file_embl);
1432+
if let Some(record) = records.first() {
1433+
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
1434+
assert_eq!(key, &"source_AM236082_1".to_string());
1435+
}
1436+
}
1437+
}
1438+
#[test]
1439+
#[allow(unused_mut)]
1440+
#[allow(unused_variables)]
1441+
#[allow(dead_code)]
1442+
#[allow(unused_assignments)]
1443+
#[allow(unused_imports)]
1444+
fn test_parse_cds_attributes() {
1445+
let file_embl = "example.embl";
1446+
let records = embl!(&file_embl);
1447+
if let Some(record) = records.first() {
1448+
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
1449+
assert_eq!(locus_tag, &"pRL80001".to_string());
1450+
assert_eq!(record.cds.get_gene(&locus_tag).as_deref(), Some(&"repAp8".to_string()));
1451+
}
1452+
}
1453+
}
1454+
#[test]
1455+
#[allow(unused_mut)]
1456+
#[allow(unused_variables)]
1457+
#[allow(dead_code)]
1458+
#[allow(unused_assignments)]
1459+
#[allow(unused_imports)]
1460+
fn test_parse_sequence_attributes() {
1461+
let file_embl = "example.embl";
1462+
let records = embl!(&file_embl);
1463+
if let Some(record) = records.first() {
1464+
if let Some((key, vals)) = record.cds.attributes.first_key_value() {
1465+
assert_eq!(key, &"pRL80001".to_string());
1466+
assert_eq!(record.seq_features.get_sequence_faa(&key), Some(&"VENPAQLQKAIHKLIAAHARDLSGALHEHRVKLYPPEARKTLRSFSSIEAAKLIGVNDGYLRHLSLEGKGPQPEIGNNNRRSYSVETIQALREYLDENGKGDRRYSPRRSGREHLQVITAVNFKGGSGKTTTAAHLAQYLALNGYRVLAIDLDPQASMSALHGFQPEFDVGDNETLYGAVRYDEERRPLKDIIKKTYFANLDLVPGNLELMEFEHDTAKVLGSNDRKNIFFTRMDDAIASVADDYDVVVVDCPPQLGFLTISALCAATAVLVTVHPQMLDVMSMCQFLLMTSELLSVVADAGGSMNYDWMRYLVTRYEPGDGPQNQMVSFMRTMFGDHVLNHPMLKSTAISDAGITKQTLYEVSRDQFTRATYDRAMESLDNVNSEIEQLIQSSWGRK".to_string()));
1467+
}
1468+
}
1469+
}
1470+
}
1471+

microBioRust/src/gbk.rs

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ where
752752
//collects the DNA sequence and translations on the correct strand
753753
if stra == -1 {
754754
if cod > 1 {
755-
println!("reverse strand coding start more than one {:?}", &iterablecount);
755+
//println!("reverse strand coding start more than one {:?}", &iterablecount);
756756
if sto + 1 <= record.sequence.len() {
757757
sliced_sequence = &record.sequence[sta+cod..sto+1];
758758
}
@@ -770,7 +770,7 @@ where
770770
else {
771771
sliced_sequence = &record.sequence[sta..sto];
772772
}
773-
//println!("iterable count after is {:?}", &iterablecount);
773+
println!("iterable count after is {:?}", &iterablecount);
774774
}
775775
let cds_char = sliced_sequence;
776776
let prot_seq = translate(&revcomp(cds_char.as_bytes()));
@@ -789,7 +789,7 @@ where
789789
sliced_sequence = &record.sequence[sta+cod-1..sto];
790790
}
791791
else {
792-
//println!("forward strand codon value one cnt {:?}", &iterablecount);
792+
println!("forward strand codon value one cnt {:?}", &iterablecount);
793793
sliced_sequence = &record.sequence[sta-1..sto];
794794
}
795795
let cds_char = sliced_sequence;
@@ -1575,8 +1575,7 @@ mod tests {
15751575
#[allow(unused_assignments)]
15761576
#[allow(unused_imports)]
15771577
fn test_read_file() {
1578-
println!("in testing of read file");
1579-
let content = std::fs::read_to_string("rhizexample.gbk").expect("error reading file");
1578+
let content = std::fs::read_to_string("K12_ribo.gbk").expect("error reading file");
15801579
assert!(content.contains("LOCUS"));
15811580
assert!(content.len() > 0);
15821581
}
@@ -1587,10 +1586,9 @@ mod tests {
15871586
#[allow(unused_assignments)]
15881587
#[allow(unused_imports)]
15891588
fn test_parse_gbk() {
1590-
let file_gbk = "rhizexample.gbk";
1589+
let file_gbk = "K12_ribo.gbk";
15911590
let records = genbank!(&file_gbk);
15921591
assert!(records.len() > 0);
1593-
println!("records len is {:?}", &records.len());
15941592
}
15951593
#[test]
15961594
#[allow(unused_mut)]
@@ -1599,10 +1597,13 @@ mod tests {
15991597
#[allow(unused_assignments)]
16001598
#[allow(unused_imports)]
16011599
fn test_parse_source_attributes() {
1602-
let file_gbk = "rhizexample.gbk";
1600+
let file_gbk = "K12_ribo.gbk";
16031601
let records = genbank!(&file_gbk);
1604-
let record = records[0].clone();
1605-
assert_eq!(record.id, "AM236082");
1602+
if let Some(record) = records.first() {
1603+
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
1604+
assert_eq!(key, &"source_NC_000913_1".to_string());
1605+
}
1606+
}
16061607
}
16071608
#[test]
16081609
#[allow(unused_mut)]
@@ -1611,11 +1612,14 @@ mod tests {
16111612
#[allow(unused_assignments)]
16121613
#[allow(unused_imports)]
16131614
fn test_parse_cds_attributes() {
1614-
let file_gbk = "rhizexample.gbk";
1615+
let file_gbk = "K12_ribo.gbk";
16151616
let records = genbank!(&file_gbk);
1616-
let record = records[0].clone();
1617-
assert_eq!(record.cds.locus_tag.clone().unwrap(), "pRL80142".to_string());
1618-
//assert_eq!(record.cds.get_gene(&record.id).as_deref(), Some(&"trbBp8".to_string()));
1617+
if let Some(record) = records.first() {
1618+
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
1619+
assert_eq!(locus_tag, &"b3304".to_string());
1620+
assert_eq!(record.cds.get_gene(&locus_tag).as_deref(), Some(&"rplR".to_string()));
1621+
}
1622+
}
16191623
}
16201624
#[test]
16211625
#[allow(unused_mut)]
@@ -1624,11 +1628,14 @@ mod tests {
16241628
#[allow(unused_assignments)]
16251629
#[allow(unused_imports)]
16261630
fn test_parse_sequence_attributes() {
1627-
let file_gbk = "rhizexample.gbk";
1631+
let file_gbk = "K12_ribo.gbk";
16281632
let records = genbank!(&file_gbk);
1629-
let record = records[0].clone();
1630-
let loc_tag = &record.cds.locus_tag.clone().unwrap();
1631-
assert_eq!(record.seq_features.get_sequence_faa(&loc_tag), Some(&"MLQSHSRLVRKLQDALGEHLCIALEDPTVVEIMLNPDGKLFIERLGHGVAPAGEMQATAAETVIGSVAHALQSEADGERPIISGELPIGGHRFEGLLPPVVNSPTFTIRRRASRLIPLDDYVTAKIMTEAQASIIRSAITNRLNIVIAGGTGSGKTTLANAVIAEIVSSAPEDRMVILEDTSEIQCAAENAVCLHTSDAVDMARLLKSTMRLRPDRIIVGEVRDGAALTLLKAWNTGHPGGVTTIHSNSAMSALRRLEQLTSEASQQPMQAVIGEAVDLVISIERAGRGRRVREVLHVEGFNGSRYQTEHYPQIDEDSHAA".to_string()));
1633+
if let Some(record) = records.first() {
1634+
if let Some((key, vals)) = record.cds.attributes.first_key_value() {
1635+
assert_eq!(key, &"b3304".to_string());
1636+
assert_eq!(record.seq_features.get_sequence_faa(&key), Some(&"MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF".to_string()));
1637+
}
1638+
}
16321639
}
16331640
}
16341641

microBioRust/tests/create_new_record.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ fn create_new_record() -> Result<(), anyhow::Error> {
3232
.set_mol_type("DNA".to_string())
3333
.set_strain("K-12 substr. MG1655".to_string())
3434
// culture_collection.clone()
35-
.set_type_material("type strain of Escherichia coli K12".to_string())
35+
//.set_type_material("type strain of Escherichia coli K12".to_string())
3636
.set_db_xref("PRJNA57779".to_string());
3737
record
3838
.cds
@@ -98,22 +98,22 @@ ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK"
9898
)
9999
.set_codon_start(1)
100100
.set_strand(-1);
101-
record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA
102-
TTGGAACCCGGAACGGTCAAAGGATACATCTTTGATGCCTTTTTCCAGAGCGCGTTCAGC
103-
GACAGCTTTACCCACAGCTGCAGCCGCGTCTTTGTTACCGGTGTACTTCAGTTGTTCAGC
104-
GATAGCTTTTTCTACAGTAGAAGCAGCTACCAGAACTTCAGAACCGTTCGGTGCAATTAC
105-
CTGTGCGTAAATGTGACGCGGGGTACGATGTACCACCAGGCGAGTTGCGCCCAGCTCCTG
106-
GAGCTTGCGGCGTGCGCGGGTCGCACGACGGATACGAGCAGATTTCTTATCCATAGTGTT
107-
ACCTTACTTCTTCTTAGCCTCTTTGGTACGCACGACTTCGTCGGCGTAACGAACACCCTT
108-
GCCTTTATAAGGCTCAGGACGACGGTAGGCGCGCAGATCCGCTGCAACCTGGCCGATCAC
109-
CTGCTTATCAGCGCCTTTCAGCACGATTTCAGTCTGAGTCGGACATTCAGCAGTGATACC
110-
CGCAGGCAGCTGATGGTCAACAGGATGAGAGAAACCCAGAGACAGGTTAATCACATTGCC
111-
TTTAACCGCTGCACGGTAACCTACACCAACCAGCTGCAGCTTCTTAGTGAAGCCTTCGGT
112-
AACACCGATAACCATTGAGTTCAGCAGGGCACGCGCGGTACCAGCCTGTGCCCAACCGTC
113-
TGCGTAACCATCACGCGGACCGAAGGTCAGGGTATTATCTGCATGTTTAACTTCAACAGC
114-
ATCGTTGAGAGTACGAGTCAGCTCGCCGTTTTTACCTTTGATCGTAATAACCTGACCGTT
115-
GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACA"
116-
.to_string();
101+
record.sequence = "acctctaccttagaactgaaggccagcttcacgggcagcatctgccagtgcctggacacg
102+
accatgatattggaacccggaacggtcaaaggatacatctttgatgcctttttccagagc
103+
gcgttcagcgacagctttacccacagctgcagccgcgtctttgttaccggtgtacttcag
104+
ttgttcagcgatagctttttctacagtagaagcagctaccagaacttcagaaccgttcgg
105+
tgcaattacctgtgcgtaaatgtgacgcggggtacgatgtaccaccaggcgagttgcgcc
106+
cagctcctggagcttgcggcgtgcgcgggtcgcacgacggatacgagcagatttcttatc
107+
catagtgttaccttacttcttcttagcctctttggtacgcacgacttcgtcggcgtaacg
108+
aacacccttgcctttataaggctcaggacgacggtaggcgcgcagatccgctgcaacctg
109+
gccgatcacctgcttatcagcgcctttcagcacgatttcagtctgagtcggacattcagc
110+
agtgatacccgcaggcagctgatggtcaacaggatgagagaaacccagagacaggttaat
111+
cacattgcctttaaccgctgcacggtaacctacaccaaccagctgcagcttcttagtgaa
112+
gccttcggtaacaccgataaccattgagttcagcagggcacgcgcggtaccagcctgtgc
113+
ccaaccgtctgcgtaaccatcacgcggaccgaaggtcagggtattatctgcatgtttaac
114+
ttcaacagcatcgttgagagtacgagtcagctcgccgtttttacctttgatcgtaataac
115+
ctgaccgttgatttttacgtcaacgccggcaggaacaacgaccggtgctttagcaacacg
116+
agacattttttcc".to_string();
117117
gff_write(
118118
seq_region.clone(),
119119
vec![record.clone()],

microBioRust/tests/genbank_to_faa.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use microBioRust::gbk::Reader;
22
use std::fs;
33
#[test]
44
pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
5-
let file_gbk = fs::File::open("test_output.gbk")?;
5+
let file_gbk = fs::File::open("K12_ribo.gbk")?;
66
let reader = Reader::new(file_gbk);
77
let mut records = reader.records();
88
let mut read_counter: u32 = 0;

microBioRust/tests/genbank_to_ffn.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use microBioRust::gbk::Reader;
22
use std::fs;
33
#[test]
44
pub fn genbank_to_ffn() -> Result<(), anyhow::Error> {
5-
let file_gbk = fs::File::open("test_output.gbk")?;
5+
let file_gbk = fs::File::open("K12_ribo.gbk")?;
66
let reader = Reader::new(file_gbk);
77
let mut records = reader.records();
88
let mut read_counter: u32 = 0;

microBioRust/tests/genbank_to_gff.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::fs;
44
use std::io;
55
#[test]
66
pub fn genbank_to_gff() -> io::Result<()> {
7-
let file_gbk = fs::File::open("test_output.gbk")?;
7+
let file_gbk = fs::File::open("K12_ribo.gbk")?;
88
let _prev_start: u32 = 0;
99
let mut prev_end: u32 = 0;
1010
let reader = Reader::new(file_gbk);

0 commit comments

Comments
 (0)