Skip to content

Commit 1298c60

Browse files
authored
Merge pull request #126 from y1zhou/feat/read-options
Add ReadOptions implementation
2 parents a21b17c + e82ec7f commit 1298c60

29 files changed

+2533
-309
lines changed

benches/benchmark.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
use pdbtbx::*;
21
use std::fs::File;
32
use std::io::prelude::*;
43
use std::io::BufWriter;
54
use std::time::{Duration, Instant};
65

6+
use pdbtbx::*;
7+
78
fn main() {
89
// Setup the data needed
910
let pdb_names = vec![
@@ -19,8 +20,11 @@ fn main() {
1920
("big", "example-pdbs/pTLS-6484.cif"),
2021
];
2122
let mut models = Vec::with_capacity(pdb_names.len());
23+
let parser = ReadOptions::default()
24+
.set_level(crate::StrictnessLevel::Loose)
25+
.set_format(Format::Pdb);
2226
for (name, path) in &pdb_names {
23-
models.push((*name, open_pdb(path, StrictnessLevel::Loose).unwrap().0))
27+
models.push((*name, parser.read(path).unwrap().0))
2428
}
2529
let mut results = Vec::new();
2630

@@ -56,7 +60,10 @@ fn main() {
5660
}
5761

5862
fn bench_open(filename: &str) {
59-
let (_pdb, _errors) = open(filename, StrictnessLevel::Loose).unwrap();
63+
let (_pdb, _errors) = ReadOptions::default()
64+
.set_level(crate::StrictnessLevel::Loose)
65+
.read(filename)
66+
.unwrap();
6067
}
6168

6269
fn bench_transformation(mut pdb: PDB) {

example-pdbs/rosetta_model.cif

Lines changed: 1960 additions & 0 deletions
Large diffs are not rendered by default.

examples/selection.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
use pdbtbx::*;
22

33
fn main() {
4-
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
4+
let (pdb, _errors) = ReadOptions::default()
5+
.set_level(StrictnessLevel::Loose)
6+
.set_format(Format::Pdb)
7+
.read("example-pdbs/1ubq.pdb")
8+
.unwrap();
59

610
// Two ways of selecting the following atom in the PDB file, the first search can be somewhat faster
711
// because it can discard other chains which the second search has to test.

examples/sphere.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
1-
use pdbtbx::*;
21
use rayon::iter::ParallelIterator;
32

3+
use pdbtbx::*;
4+
45
fn main() {
5-
atom_sphere();
6-
residue_sphere();
7-
find_clashes();
6+
let (pdb, _errors) = ReadOptions::new()
7+
.set_level(StrictnessLevel::Loose)
8+
.set_format(Format::Pdb)
9+
.read("example-pdbs/1ubq.pdb")
10+
.unwrap();
11+
12+
atom_sphere(&pdb);
13+
residue_sphere(&pdb);
14+
find_clashes(&pdb);
815
}
916

1017
/// Find all Atoms in a sphere around a single origin Atom with a user-defined radius
1118
/// This is using the features `rstar` and `rayon`.
12-
fn atom_sphere() {
13-
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
19+
fn atom_sphere(pdb: &PDB) {
1420
let (origin_id, radius): (usize, f64) = (12, 3.5);
1521

1622
// Leverage parallel searching
@@ -31,8 +37,7 @@ fn atom_sphere() {
3137
/// Find all Atoms belonging to a Residue that has at least one Atom within a sphere of
3238
/// user-defined origin and radius.
3339
/// This is using the features `rstar` and `rayon`.
34-
fn residue_sphere() {
35-
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
40+
fn residue_sphere(pdb: &PDB) {
3641
let (origin_id, radius): (usize, f64) = (12, 3.5);
3742

3843
let sphere_origin = pdb
@@ -72,8 +77,7 @@ fn residue_sphere() {
7277
/// Results for Atoms within the same Residue are excluded as well as those from the C and N Atoms
7378
/// constituting the peptide bond of neighbouring amino acids.
7479
/// Also, Atoms are not counted twice.
75-
fn find_clashes() {
76-
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
80+
fn find_clashes(pdb: &PDB) {
7781
let tree = pdb.create_hierarchy_rtree();
7882

7983
let mut clashing_atoms = Vec::new();

examples/waterbox.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
use pdbtbx::*;
21
use std::env;
32
use std::path::Path;
43
use std::time::Instant;
54

5+
use pdbtbx::*;
6+
67
fn main() {
78
let filename = env::current_dir()
89
.unwrap()
@@ -23,8 +24,11 @@ fn main() {
2324
fn create_waterbox(size: (f64, f64, f64)) -> PDB {
2425
let now = Instant::now();
2526

26-
let (mut liquid, _errors) =
27-
open_pdb("example-pdbs/liquid.pdb", StrictnessLevel::Loose).unwrap();
27+
let (mut liquid, _errors) = ReadOptions::new()
28+
.set_level(StrictnessLevel::Loose)
29+
.set_format(Format::Pdb)
30+
.read("example-pdbs/liquid.pdb")
31+
.unwrap();
2832

2933
let time = now.elapsed();
3034

src/error/context.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ impl Context {
110110
} else {
111111
Context::Line {
112112
linenumber: pos.line,
113-
line: pos.text.lines().into_iter().next().unwrap().to_string(),
113+
line: pos.text.lines().next().unwrap().to_string(),
114114
offset: 0,
115115
length: 3,
116116
}
@@ -132,7 +132,6 @@ impl Context {
132132
lines: start
133133
.text
134134
.lines()
135-
.into_iter()
136135
.take(end.line - start.line)
137136
.map(ToString::to_string)
138137
.collect::<Vec<String>>(),

src/lib.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@
2424
//!
2525
//! ```rust
2626
//! use pdbtbx::*;
27-
//! let (mut pdb, _errors) = pdbtbx::open(
28-
//! "example-pdbs/1ubq.pdb",
29-
//! StrictnessLevel::Medium
30-
//! ).unwrap();
27+
//! let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb").unwrap();
3128
//!
3229
//! pdb.remove_atoms_by(|atom| atom.element() == Some(&Element::H)); // Remove all H atoms
3330
//!
@@ -65,15 +62,15 @@
6562
doc = r##"
6663
```rust
6764
use pdbtbx::*;
68-
let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb", pdbtbx::StrictnessLevel::Medium).unwrap();
65+
let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb").unwrap();
6966
// You can loop over all atoms within 3.5 Aͦ of a specific atom
7067
// Note: The `locate_within_distance` method takes a squared distance
7168
let tree = pdb.create_atom_rtree();
7269
for atom in tree.locate_within_distance(pdb.atom(42).unwrap().pos(), 3.5 * 3.5) {
7370
println!("{}", atom);
7471
}
7572
76-
// You can even get information about the hierarchy of these atoms
73+
// You can even get information about the hierarchy of these atoms
7774
// (the chain, residue and conformer that contain this atom)
7875
let tree = pdb.create_hierarchy_rtree();
7976
let mut total = 0;

src/read/general.rs

Lines changed: 30 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,29 @@
1-
use std::io::{BufRead, Read, Seek};
2-
3-
use super::*;
41
use crate::error::*;
52
use crate::structs::PDB;
63
use crate::StrictnessLevel;
74

8-
#[cfg(feature = "compression")]
9-
use super::mmcif::open_mmcif_bufread;
10-
#[cfg(feature = "compression")]
11-
use flate2::read::GzDecoder;
12-
#[cfg(feature = "compression")]
13-
use std::fs;
5+
use super::*;
146

157
/// Standard return type for reading a file.
16-
pub type ReadResult = std::result::Result<(PDB, Vec<PDBError>), Vec<PDBError>>;
8+
pub type ReadResult = Result<(PDB, Vec<PDBError>), Vec<PDBError>>;
179

18-
/// Open an atomic data file, either PDB or mmCIF/PDBx. The correct type will be
19-
/// determined based on the file extension. This function is equivalent to
20-
/// [`ReadOptions::read()`] with default options, apart from the `level` which
21-
/// can be set by the `level` parameter.
10+
/// Open an atomic data file, either PDB or mmCIF/PDBx.
11+
///
12+
/// This function is equivalent to [`ReadOptions::read()`] with default options.
13+
/// The correct type will be determined based on the file extension.
14+
/// Gzipped files can also be opened directly if file extensions are
15+
/// `.pdb.gz`, `.pdb1.gz`, `.mmcif.gz`, or `.cif.gz`.
2216
///
2317
/// # Errors
2418
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
2519
///
2620
/// # Related
27-
/// If you want to open a file from memory see [`open_raw`]. There are also function to open a specified file type directly
28-
/// see [`crate::open_pdb`] and [`crate::open_mmcif`] respectively.
29-
pub fn open(filename: impl AsRef<str>, level: StrictnessLevel) -> ReadResult {
30-
open_with_options(filename, &ReadOptions::new().set_level(level))
31-
}
32-
33-
/// Opens a files based on the given options.
34-
pub(in crate::read) fn open_with_options(
35-
filename: impl AsRef<str>,
36-
options: &ReadOptions,
37-
) -> ReadResult {
38-
if check_extension(&filename, "pdb") {
39-
open_pdb(filename, options.level)
40-
} else if check_extension(&filename, "cif") {
41-
open_mmcif(filename, options.level)
42-
} else {
43-
Err(vec![PDBError::new(
44-
ErrorLevel::BreakingError,
45-
"Incorrect extension",
46-
"Could not determine the type of the given file, make it .pdb or .cif",
47-
Context::show(filename.as_ref()),
48-
)])
49-
}
21+
/// If you want to open a file from memory see [`ReadOptions::read_raw`].
22+
/// The file type can be set explicitly with [`ReadOptions::set_format`].
23+
/// These functions are useful if you are using a non-standard compression algorithm or way of
24+
/// storing the data.
25+
pub fn open(filename: impl AsRef<str>) -> ReadResult {
26+
ReadOptions::default().read(filename)
5027
}
5128

5229
/// Open a compressed atomic data file, either PDB or mmCIF/PDBx. The correct type will be
@@ -56,104 +33,20 @@ pub(in crate::read) fn open_with_options(
5633
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
5734
///
5835
/// # Related
59-
/// If you want to open a file from memory see [`open_raw`], [`crate::open_pdb_raw`] and [`crate::open_mmcif_bufread`].
36+
/// If you want to open a file from memory see [`ReadOptions::read_raw`].
37+
/// The file type can be set explicitly with [`ReadOptions::set_format`].
6038
/// These functions are useful if you are using a non-standard compression algorithm or way of
6139
/// storing the data.
6240
#[cfg(feature = "compression")]
41+
#[deprecated(
42+
since = "0.12.0",
43+
note = "Please use `ReadOptions::default().set_decompress(true).read(filename)` instead"
44+
)]
6345
pub fn open_gz(filename: impl AsRef<str>, level: StrictnessLevel) -> ReadResult {
64-
let filename = filename.as_ref();
65-
66-
if check_extension(filename, "gz") {
67-
// open a decompression stream
68-
let file = fs::File::open(filename).map_err(|_| {
69-
vec![PDBError::new(
70-
ErrorLevel::BreakingError,
71-
"Could not open file",
72-
"Could not open the given file, make sure it exists and you have the correct permissions",
73-
Context::show(filename),
74-
)]
75-
})?;
76-
77-
let decompressor = GzDecoder::new(file);
78-
79-
let reader = std::io::BufReader::new(decompressor);
80-
81-
if check_extension(&filename[..filename.len() - 3], "pdb") {
82-
open_pdb_raw(reader, Context::show(filename), level)
83-
} else if check_extension(&filename[..filename.len() - 3], "cif") {
84-
open_mmcif_bufread(reader, level)
85-
} else {
86-
Err(vec![PDBError::new(
87-
ErrorLevel::BreakingError,
88-
"Incorrect extension",
89-
"Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
90-
Context::show(filename),
91-
)])
92-
}
93-
} else {
94-
Err(vec![PDBError::new(
95-
ErrorLevel::BreakingError,
96-
"Incorrect extension",
97-
"Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
98-
Context::show(filename),
99-
)])
100-
}
101-
}
102-
103-
/// Open a stream with either PDB or mmCIF data. The distinction is made on the start of the first line.
104-
/// If it starts with `HEADER` it is a PDB file, if it starts with `data_` it is a mmCIF file.
105-
///
106-
/// # Errors
107-
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
108-
/// It returns a breaking error if the buffer could not be read, the file type could not be determined form the first line, or there was a breaking error in the file itself.
109-
/// See the `PDBError` for more details.
110-
///
111-
/// # Related
112-
/// If you want to open a file see [`open`]. There are also function to open a specified file type directly
113-
/// see [`crate::open_pdb_raw`] and [`crate::open_mmcif_raw`] respectively.
114-
pub fn open_raw<T: std::io::Read + std::io::Seek>(
115-
mut input: std::io::BufReader<T>,
116-
level: StrictnessLevel,
117-
) -> ReadResult {
118-
let mut first_line = String::new();
119-
if input.read_line(&mut first_line).is_err() {
120-
return Err(vec![PDBError::new(
121-
ErrorLevel::BreakingError,
122-
"Buffer could not be read",
123-
"The buffer provided to `open_raw` could not be read.",
124-
Context::None,
125-
)]);
126-
}
127-
if input.rewind().is_err() {
128-
return Err(vec![PDBError::new(
129-
ErrorLevel::BreakingError,
130-
"Buffer could not be read",
131-
"The buffer provided to `open_raw` could not be rewound to the start.",
132-
Context::None,
133-
)]);
134-
}
135-
if first_line.starts_with("HEADER") {
136-
open_pdb_raw(input, Context::None, level)
137-
} else if first_line.starts_with("data_") {
138-
let mut contents = String::new();
139-
if input.read_to_string(&mut contents).is_ok() {
140-
open_mmcif_raw(&contents, level)
141-
} else {
142-
Err(vec![PDBError::new(
143-
ErrorLevel::BreakingError,
144-
"Buffer could not be read",
145-
"The buffer provided to `open_raw` could not be read to end.",
146-
Context::show(&first_line),
147-
)])
148-
}
149-
} else {
150-
Err(vec![PDBError::new(
151-
ErrorLevel::BreakingError,
152-
"Could not determine file type",
153-
"Could not determine the type of the given file, make it .pdb or .cif",
154-
Context::show(&first_line),
155-
)])
156-
}
46+
ReadOptions::default()
47+
.set_level(level)
48+
.guess_format(filename.as_ref())
49+
.read(filename)
15750
}
15851

15952
#[cfg(test)]
@@ -162,19 +55,17 @@ mod tests {
16255

16356
#[test]
16457
fn open_invalid() {
165-
assert!(open("file.png", StrictnessLevel::Medium).is_err());
166-
assert!(open("file.mmcif", StrictnessLevel::Medium).is_err());
167-
assert!(open("file.pdbml", StrictnessLevel::Medium).is_err());
168-
assert!(open("file.pd", StrictnessLevel::Medium).is_err());
58+
assert!(open("file.png").is_err());
59+
assert!(open("file.mmcif").is_err());
60+
assert!(open("file.pdbml").is_err());
61+
assert!(open("file.pd").is_err());
16962
}
17063

17164
#[test]
17265
fn open_not_existing() {
173-
let pdb =
174-
open("file.pdb", StrictnessLevel::Medium).expect_err("This file should not exist.");
66+
let pdb = open("file.pdb").expect_err("This file should not exist.");
17567
assert_eq!(pdb[0].short_description(), "Could not open file");
176-
let cif =
177-
open("file.cif", StrictnessLevel::Medium).expect_err("This file should not exist.");
68+
let cif = open("file.cif").expect_err("This file should not exist.");
17869
assert_eq!(cif[0].short_description(), "Could not open file");
17970
}
18071
}

0 commit comments

Comments
 (0)