Skip to content

Commit c9fb078

Browse files
authored
MRG: panic when FSStorage::load_sig encounters more than one Signature in a JSON record (#3333)
This PR was originally about debugging sourmash-bio/sourmash_plugin_branchwater#445, but that's going to require more work to fix properly. For now, I would like to nominate it for merge because sourmash fails silently in this situation, and that's Bad. In brief, the main thing this PR does is panic with an `unimplemented!` when `FSStorage::load_sig` encounters more than one `Signature` in a JSON record. This PR also adds a bit of documentation to `InnerStorage`, per the bottom of [this comment](sourmash-bio/sourmash_plugin_branchwater#445 (comment)). --- The problem at hand: when loading a `SigStore`/`Signature` from a `Storage`, sourmash only loads the first one and ignores any others. https://github.com/sourmash-bio/sourmash/blob/26b50f3e3566006fd6356a4f8b4d47c5e381aeec/src/core/src/storage/mod.rs#L34-L38 This results from the concept of a `Signature` as containing one or more sketches; the history of this is described [here](#616 (comment)), and it leads to some interesting silliness [in the Python layer](https://github.com/sourmash-bio/sourmash/blob/d63c464e825529fa54bb7e8b81faa53b858b09de/src/sourmash/save_load.py#L297). The contrapositive is that, in Rust, a single `Signature` can include multiple sketches, e.g. with different ksizes. So this works fine for the wort case where we have a single `.sig` file with k=21, k=31, k51. Note that the Python layer (and hence the entire sourmash CLI) fully supports multiple `Signature`s in JSON: this is well tested and well covered behavior. The branchwater plugin runs into it because it is using the Rust layer and the API is not fully fleshed out there. ---
1 parent f707db4 commit c9fb078

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

src/core/src/collection.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,23 @@ mod test {
416416
}
417417
}
418418

419+
#[test]
420+
#[should_panic] // for now...
421+
fn sigstore_sig_from_record_2() {
422+
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
423+
filename.push("../../tests/test-data/short.sig.gz");
424+
let v = [filename];
425+
let collection = Collection::from_paths(&v).expect("no sigs!?");
426+
427+
// pull off first record
428+
let v: Vec<_> = collection.iter().collect();
429+
let (_idx, rec) = v.first().expect("no records in collection?!");
430+
431+
// this will panic with "unimplemented" because there are two
432+
// sketches and that is not supported.
433+
let _first_sig = collection.sig_from_record(rec).expect("no sig!?");
434+
}
435+
419436
#[test]
420437
fn sigstore_selection_moltype_zip() {
421438
// load test sigs

src/core/src/storage/mod.rs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ pub trait Storage {
3434
/// Load signature from internal path
3535
fn load_sig(&self, path: &str) -> Result<SigStore> {
3636
let raw = self.load(path)?;
37-
let sig = Signature::from_reader(&mut &raw[..])?
38-
// TODO: select the right sig?
39-
.swap_remove(0);
37+
let mut vs = Signature::from_reader(&mut &raw[..])?;
38+
if vs.len() > 1 {
39+
unimplemented!("only one Signature currently allowed");
40+
}
41+
let sig = vs.swap_remove(0);
4042

4143
Ok(sig.into())
4244
}
@@ -70,6 +72,16 @@ pub enum StorageError {
7072
MissingFeature(String, String),
7173
}
7274

75+
/// InnerStorage: a catch-all type that allows using any Storage in
76+
/// parallel contexts.
77+
///
78+
/// Arc allows ref counting to share it between threads;
79+
/// RwLock makes sure there is only one writer possible (and a lot of readers);
80+
/// dyn Storage so we can init with anything that implements the Storage trait.
81+
82+
// Send + Sync + 'static is kind of a cheat to avoid lifetimes issues: we
83+
// should get rid of that 'static if possible... -- Luiz.
84+
7385
#[derive(Clone)]
7486
pub struct InnerStorage(Arc<RwLock<dyn Storage + Send + Sync + 'static>>);
7587

@@ -299,9 +311,12 @@ impl Storage for FSStorage {
299311

300312
fn load_sig(&self, path: &str) -> Result<SigStore> {
301313
let raw = self.load(path)?;
302-
let sig = Signature::from_reader(&mut &raw[..])?
303-
// TODO: select the right sig?
304-
.swap_remove(0);
314+
315+
let mut vs = Signature::from_reader(&mut &raw[..])?;
316+
if vs.len() > 1 {
317+
unimplemented!("only one Signature currently allowed when using 'load_sig'");
318+
}
319+
let sig = vs.swap_remove(0);
305320

306321
Ok(sig.into())
307322
}
@@ -369,9 +384,11 @@ impl Storage for ZipStorage {
369384

370385
fn load_sig(&self, path: &str) -> Result<SigStore> {
371386
let raw = self.load(path)?;
372-
let sig = Signature::from_reader(&mut &raw[..])?
373-
// TODO: select the right sig?
374-
.swap_remove(0);
387+
let mut vs = Signature::from_reader(&mut &raw[..])?;
388+
if vs.len() > 1 {
389+
unimplemented!("only one Signature currently allowed");
390+
}
391+
let sig = vs.swap_remove(0);
375392

376393
Ok(sig.into())
377394
}

tests/test-data/short.sig.gz

10.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)