Skip to content

Commit 3e7adde

Browse files
committed
Merge branch 'master' of https://github.com/douweschulte/pdbtbx
2 parents f37570b + c3dedf0 commit 3e7adde

File tree

7 files changed

+129
-88
lines changed

7 files changed

+129
-88
lines changed

src/general_docs/mod.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//! Here you can find high level documentation for this crate.
2+
//! * About saving: [`mod@save`]
3+
//! * About the PDB hierarchy used: [`structs`]
4+
//!
5+
//! Good references for more in depth information:
6+
//! * PDB: [spec](https://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html)
7+
//! * mmCIF: [spec](https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Index/) [docs](https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Groups/index.html)
8+
//! * PDB to mmCIF conversion: [wwpdb](https://mmcif.wwpdb.org/docs/pdb_to_pdbx_correspondences.html)
9+
10+
use crate::*;
11+
12+
#[doc = include_str!("../save/general.md")]
13+
pub mod save {}
14+
15+
#[doc = include_str!("../structs/general.md")]
16+
pub mod structs {}

src/lib.rs

Lines changed: 5 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -41,70 +41,8 @@
4141
//! pdbtbx::save(&pdb, "dump/1ubq_no_hydrogens.pdb", pdbtbx::StrictnessLevel::Loose);
4242
//! ```
4343
//!
44-
//! ## PDB Hierarchy
45-
//! As explained in depth in the [documentation of CCTBX](https://cci.lbl.gov/cctbx_docs/iotbx/iotbx.pdb.html#iotbx-pdb-hierarchy)
46-
//! it can be quite hard to properly define a hierarchy for PDB files which works for all files.
47-
//! This library follows the hierarchy presented by CCTBX [`Grosse-Kunstleve, R. W. et al`], but renames the `residue_group` and
48-
//! `atom_group` constructs. This gives the following hierarchy, with the main identifying characteristics annotated per level.
49-
//!
50-
//! * [PDB]
51-
//! * [Model] \
52-
//! Serial number
53-
//! * [Chain] \
54-
//! Id
55-
//! * [Residue] (analogous to `residue_group` in CCTBX) \
56-
//! Serial number \
57-
//! Insertion code
58-
//! * [Conformer] (analogous to `atom_group` in CCTBX) \
59-
//! Name \
60-
//! Alternative location
61-
//! * [Atom] \
62-
//! Serial number \
63-
//! Name
64-
//!
65-
//! ## Iterating over the PDB Hierarchy
66-
//!
67-
//! ```rust
68-
//! use pdbtbx::*;
69-
//! let (mut pdb, _errors) = pdbtbx::open(
70-
//! "example-pdbs/1ubq.pdb",
71-
//! pdbtbx::StrictnessLevel::Medium
72-
//! ).unwrap();
73-
//!
74-
//! // Iterating over all levels
75-
//! for model in pdb.models() {
76-
//! for chain in model.chains() {
77-
//! for residue in chain.residues() {
78-
//! for conformer in residue.conformers() {
79-
//! for atom in conformer.atoms() {
80-
//! // Do the calculations
81-
//! }
82-
//! }
83-
//! }
84-
//! }
85-
//! }
86-
//! // Or only over a couple of levels (just like in the example above)
87-
//! for residue in pdb.residues() {
88-
//! for atom in residue.atoms() {
89-
//! // Do the calculations
90-
//! }
91-
//! }
92-
//! // Or with access to the information with a single line
93-
//! for hierarchy in pdb.atoms_with_hierarchy() {
94-
//! println!("Atom {} in Conformer {} in Residue {} in Chain {} in Model {}",
95-
//! hierarchy.atom().serial_number(),
96-
//! hierarchy.conformer().name(),
97-
//! hierarchy.residue().serial_number(),
98-
//! hierarchy.chain().id(),
99-
//! hierarchy.model().serial_number()
100-
//! );
101-
//! }
102-
//! // Or with mutable access to the members of the hierarchy
103-
//! for mut hierarchy in pdb.atoms_with_hierarchy_mut() {
104-
//! let new_x = hierarchy.atom().x() * 1.5;
105-
//! hierarchy.atom_mut().set_x(new_x);
106-
//! }
107-
//! ```
44+
//! ## High level documentation
45+
//! [general_docs]
10846
//!
10947
//! ## Parallelization
11048
//! [Rayon](https://crates.io/crates/rayon) is used to create parallel iterators for all logical candidates. Use
@@ -150,7 +88,6 @@ println!("There are {} backbone atoms within 3.5Aͦ of the atom at index 42", to
15088
"##
15189
)]
15290
#![doc = "## References"]
153-
#![doc = "1. [`Grosse-Kunstleve, R. W. et al`] Grosse-Kunstleve, R. W., Sauter, N. K., Moriarty, N. W., & Adams, P. D. (2002). TheComputational Crystallography Toolbox: crystallographic algorithms in a reusable software framework. Journal of Applied Crystallography, 35(1), 126–136. [https://doi.org/10.1107/s0021889801017824](https://doi.org/10.1107/s0021889801017824)"]
15491
#![doc = "1. [`Perkel, J. M.`] Perkel, J. M. (2020). Why scientists are turning to Rust. Nature, 588(7836), 185–186. [https://doi.org/10.1038/d41586-020-03382-2](https://doi.org/10.1038/d41586-020-03382-2)"]
15592
// Set linting behaviour
15693
#![deny(
@@ -202,6 +139,9 @@ mod transformation;
202139
/// To validate certain invariants of PDB files
203140
mod validate;
204141

142+
#[cfg(doc)]
143+
pub mod general_docs;
144+
205145
pub use error::*;
206146
pub use read::*;
207147
pub use save::*;

src/read/mmcif/parser.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,11 @@ fn parse_atoms(input: &Loop, pdb: &mut PDB) -> Option<Vec<PDBError>> {
330330
18, ATOM_NAME, "atom_site.label_atom_id", Required;
331331
19, ATOM_OCCUPANCY, "atom_site.occupancy", Optional;
332332
20, ATOM_SEQ_ID, "atom_site.label_seq_id", Required;
333-
21, ATOM_TYPE, "atom_site.type_symbol", Required;
334-
22, ATOM_X, "atom_site.Cartn_x", Required;
335-
23, ATOM_Y, "atom_site.Cartn_y", Required;
336-
24, ATOM_Z, "atom_site.Cartn_z", Required;
333+
21, ATOM_AUTH_SEQ_ID, "atom_site.auth_seq_id", Optional;
334+
22, ATOM_TYPE, "atom_site.type_symbol", Required;
335+
23, ATOM_X, "atom_site.Cartn_x", Required;
336+
24, ATOM_Y, "atom_site.Cartn_y", Required;
337+
25, ATOM_Z, "atom_site.Cartn_z", Required;
337338
);
338339

339340
let positions_: Vec<Result<Option<usize>, PDBError>> = COLUMNS
@@ -393,8 +394,10 @@ fn parse_atoms(input: &Loop, pdb: &mut PDB) -> Option<Vec<PDBError>> {
393394
let residue_name =
394395
parse_column!(get_text, ATOM_COMP_ID).expect("Residue name should be provided");
395396
#[allow(clippy::cast_possible_wrap)]
396-
let residue_number = parse_column!(get_isize, ATOM_SEQ_ID)
397-
.unwrap_or_else(|| pdb.total_residue_count() as isize);
397+
let residue_number = parse_column!(get_isize, ATOM_AUTH_SEQ_ID).unwrap_or_else(|| {
398+
parse_column!(get_isize, ATOM_SEQ_ID)
399+
.unwrap_or_else(|| pdb.total_residue_count() as isize)
400+
});
398401
let chain_name =
399402
parse_column!(get_text, ATOM_ASYM_ID).expect("Chain name should be provided");
400403
let pos_x = parse_column!(get_f64, ATOM_X).expect("Atom X position should be provided");

src/save/general.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Saving
2+
3+
Once you have your [`PDB`] struct you can save it in a couple of ways. The output can be made in two different file formats: PDB and mmCIF. The saving functions represent this choice: [`save_pdb()`] and [`save_mmcif()`] are clear in the ouput format while [`save()`] chooses the format based on the path given if the extension is `pdb` it will generate a PDB file, if the extension is `cif` it will generate a mmCIF file.
4+
5+
The other extra option is choosing the `*_raw` functions. These do not validate the [`PDB`] structs before saving and output directly to a [`std::io::BufWriter`]. The validation uses the [`validate_pdb()`] or [`validate()`] functions internally.
6+
7+
## All functions
8+
| Format | Normal | Without validation |
9+
| --- | --- | --- |
10+
| Based on input | [`save()`] | ... |
11+
| PDB | [`save_pdb()`] | [`save_pdb_raw()`] |
12+
| mmCIF | [`save_mmcif()`] | [`save_mmcif_raw()`] |

src/save/mmcif.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ pub fn save_mmcif(
4444
/// Save the given PDB struct to the given BufWriter.
4545
/// It does not validate or renumber the PDB, so if that is needed that needs to be done in preparation.
4646
/// It does change the output format based on the StrictnessLevel given.
47-
///
48-
/// ## Warning
49-
/// This function is unstable and unfinished!
5047
#[allow(clippy::unwrap_used)]
5148
pub fn save_mmcif_raw<T: Write>(pdb: &PDB, mut sink: BufWriter<T>) {
5249
/// Write a piece of text to the file, has the same structure as format!
@@ -236,6 +233,7 @@ _atom_site.label_comp_id
236233
_atom_site.label_asym_id
237234
_atom_site.label_entity_id
238235
_atom_site.label_seq_id
236+
_atom_site.auth_seq_id
239237
_atom_site.pdbx_PDB_ins_code
240238
_atom_site.Cartn_x
241239
_atom_site.Cartn_y
@@ -266,7 +264,7 @@ _atom_site.aniso_U[3][3]"
266264
let mut chain_index = 0;
267265
for chain in model.chains() {
268266
chain_index += 1;
269-
for residue in chain.residues() {
267+
for (residue_index, residue) in chain.residues().enumerate() {
270268
for conformer in residue.conformers() {
271269
for atom in conformer.atoms() {
272270
let mut data = vec![
@@ -280,14 +278,15 @@ _atom_site.aniso_U[3][3]"
280278
conformer.name().to_string(), // Residue name
281279
chain.id().to_string(), // Chain name
282280
chain_index.to_string(), // Entity ID, using chain serial number
281+
(residue_index + 1).to_string(), // `label_seq_id` defined to be [1-N] where N is the index
283282
residue.serial_number().to_string(), // Residue serial number
284283
residue.insertion_code().unwrap_or(".").to_string(), // Insertion code
285-
print_float(atom.x()), // X
286-
print_float(atom.y()), // Y
287-
print_float(atom.z()), // Z
288-
print_float(atom.occupancy()), // OCC/Q
289-
print_float(atom.b_factor()), // B
290-
atom.charge().to_string(), // Charge
284+
print_float(atom.x()), // X
285+
print_float(atom.y()), // Y
286+
print_float(atom.z()), // Z
287+
print_float(atom.occupancy()), // OCC/Q
288+
print_float(atom.b_factor()), // B
289+
atom.charge().to_string(), // Charge
291290
model.serial_number().to_string(), // Model serial number
292291
];
293292
if anisou {

src/structs/general.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# PDB Hierarchy
2+
As explained in depth in the [documentation of CCTBX](https://cci.lbl.gov/cctbx_docs/iotbx/iotbx.pdb.html#iotbx-pdb-hierarchy)
3+
it can be quite hard to properly define a hierarchy for PDB files which works for all files.
4+
This library follows the hierarchy presented by CCTBX [`Grosse-Kunstleve, R. W. et al`], but renames the `residue_group` and
5+
`atom_group` constructs. This gives the following hierarchy, with the main identifying characteristics annotated per level.
6+
* [PDB]
7+
* [Model] \
8+
Serial number
9+
* [Chain] \
10+
Id
11+
* [Residue] (analogous to `residue_group` in CCTBX) \
12+
Serial number \
13+
Insertion code
14+
* [Conformer] (analogous to `atom_group` in CCTBX) \
15+
Name \
16+
Alternative location
17+
* [Atom] \
18+
Serial number \
19+
Name
20+
21+
# Iterating over the PDB Hierarchy
22+
```rust
23+
use pdbtbx::*;
24+
let (mut pdb, _errors) = pdbtbx::open(
25+
"example-pdbs/1ubq.pdb",
26+
pdbtbx::StrictnessLevel::Medium
27+
).unwrap();
28+
// Iterating over all levels
29+
for model in pdb.models() {
30+
for chain in model.chains() {
31+
for residue in chain.residues() {
32+
for conformer in residue.conformers() {
33+
for atom in conformer.atoms() {
34+
// Do the calculations
35+
}
36+
}
37+
}
38+
}
39+
}
40+
// Or only over a couple of levels (just like in the example above)
41+
for residue in pdb.residues() {
42+
for atom in residue.atoms() {
43+
// Do the calculations
44+
}
45+
}
46+
// Or with access to the information with a single line
47+
for hierarchy in pdb.atoms_with_hierarchy() {
48+
println!("Atom {} in Conformer {} in Residue {} in Chain {} in Model {}",
49+
hierarchy.atom().serial_number(),
50+
hierarchy.conformer().name(),
51+
hierarchy.residue().serial_number(),
52+
hierarchy.chain().id(),
53+
hierarchy.model().serial_number()
54+
);
55+
}
56+
// Or with mutable access to the members of the hierarchy
57+
for mut hierarchy in pdb.atoms_with_hierarchy_mut() {
58+
let new_x = hierarchy.atom().x() * 1.5;
59+
hierarchy.atom_mut().set_x(new_x);
60+
}
61+
```
62+
63+
# References
64+
1. [`Grosse-Kunstleve, R. W. et al`] Grosse-Kunstleve, R. W., Sauter, N. K., Moriarty, N. W., & Adams, P. D. (2002). TheComputational Crystallography Toolbox: crystallographic algorithms in a reusable software framework. Journal of Applied Crystallography, 35(1), 126–136. [https://doi.org/10.1107/s0021889801017824](https://doi.org/10.1107/s0021889801017824)

src/validate.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::structs::*;
1010
/// ## Invariants Not Tested
1111
/// * Numbering of all structs, serial numbers should be unique. To enforce this the `renumber()` function should be called on the PDB struct.
1212
pub fn validate(pdb: &PDB) -> Vec<PDBError> {
13-
// Print warnings/errors and return a bool for success
1413
let mut errors = Vec::new();
1514
if pdb.model_count() > 1 {
1615
errors.append(&mut validate_models(pdb));
@@ -19,17 +18,25 @@ pub fn validate(pdb: &PDB) -> Vec<PDBError> {
1918
if pdb.atoms().next().is_none() {
2019
errors.push(PDBError::new(
2120
ErrorLevel::BreakingError,
22-
"No Atoms parsed",
23-
"No Atoms could be parsed from the given file. Please make sure it is a valid PDB/mmCIF file.",
24-
Context::None)
25-
)
21+
"No Atoms",
22+
"No Atoms in the given PDB struct while validating.",
23+
Context::None,
24+
))
2625
}
2726
errors
2827
}
2928

30-
/// Validates this models specifically for the PDB format
29+
/// Validates this models specifically for the PDB format.
30+
/// It returns PDBErrors with the warning messages.
31+
/// It extends the validation specified in the [`validate`] function with PDB specific validations.
32+
///
33+
/// ## Invariants Tested
34+
/// * Values fitting in the range of the PDB format columns, both numbers and textual values.
35+
///
36+
/// ## Invariants Not Tested
37+
/// * Numbering of all structs, serial numbers should be unique. To enforce this the `renumber()` function should be called on the PDB struct.
3138
pub fn validate_pdb(pdb: &PDB) -> Vec<PDBError> {
32-
let mut errors = Vec::new();
39+
let mut errors = validate(pdb);
3340
for model in pdb.models() {
3441
if model.serial_number() > 9999 {
3542
errors.push(PDBError::new(

0 commit comments

Comments
 (0)