Skip to content

Commit 21fecf6

Browse files
committed
implemented substores
Ref: annotation/stam#29
1 parent dba86c4 commit 21fecf6

File tree

4 files changed

+230
-1
lines changed

4 files changed

+230
-1
lines changed

src/annotationstore.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::error::PyStamError;
1212
use crate::query::*;
1313
use crate::resources::PyTextResource;
1414
use crate::selector::PySelector;
15+
use crate::substore::PyAnnotationSubStore;
1516
use crate::textselection::PyTextSelection;
1617
use stam::*;
1718
use stamtools::split::{split, SplitMode};
@@ -255,6 +256,30 @@ impl PyAnnotationStore {
255256
})
256257
}
257258

259+
/// Load an existing annotation store as a dependency to this one
260+
fn add_substore(&mut self, filename: &str) -> PyResult<PyAnnotationSubStore> {
261+
let store_clone = self.store.clone();
262+
self.map_mut(|store| {
263+
let handle = store.add_substore(filename)?;
264+
Ok(PyAnnotationSubStore {
265+
handle,
266+
store: store_clone,
267+
})
268+
})
269+
}
270+
271+
/// Create a new annotation store as a dependency of this one
272+
fn add_new_substore(&mut self, id: &str, filename: &str) -> PyResult<PyAnnotationSubStore> {
273+
let store_clone = self.store.clone();
274+
self.map_mut(|store| {
275+
let handle = store.add_new_substore(id, filename)?;
276+
Ok(PyAnnotationSubStore {
277+
handle,
278+
store: store_clone,
279+
})
280+
})
281+
}
282+
258283
/// Adds an annotation. Returns an :obj:`Annotation` instance pointing to the added annotation.
259284
///
260285
/// Args:
@@ -336,6 +361,14 @@ impl PyAnnotationStore {
336361
})
337362
}
338363

364+
/// Returns a generator over all substores in this store
365+
fn substores(&self) -> PyResult<PySubStoreIter> {
366+
Ok(PySubStoreIter {
367+
store: self.store.clone(),
368+
index: 0,
369+
})
370+
}
371+
339372
/// Returns the number of annotations in the store (not substracting deletions)
340373
fn annotations_len(&self) -> PyResult<usize> {
341374
self.map(|store| Ok(store.annotations_len()))
@@ -351,6 +384,11 @@ impl PyAnnotationStore {
351384
self.map(|store| Ok(store.datasets_len()))
352385
}
353386

387+
/// Returns the number of substores in the store (not substracting deletions)
388+
fn substores_len(&self) -> PyResult<usize> {
389+
self.map(|store| Ok(store.substores_len()))
390+
}
391+
354392
fn shrink_to_fit(&mut self) -> PyResult<()> {
355393
self.map_mut(|store| Ok(store.shrink_to_fit(true)))
356394
}
@@ -804,3 +842,55 @@ impl PyResourceIter {
804842
}
805843
}
806844
}
845+
846+
#[pyclass(name = "SubStoreIter")]
847+
struct PySubStoreIter {
848+
pub(crate) store: Arc<RwLock<AnnotationStore>>,
849+
pub(crate) index: usize,
850+
}
851+
852+
#[pymethods]
853+
impl PySubStoreIter {
854+
fn __iter__(pyself: PyRef<'_, Self>) -> PyRef<'_, Self> {
855+
pyself
856+
}
857+
858+
fn __next__(mut pyself: PyRefMut<'_, Self>) -> Option<PyAnnotationSubStore> {
859+
pyself.index += 1; //increment first (prevent exclusive mutability issues)
860+
let result = pyself.map(|store| {
861+
let handle: AnnotationSubStoreHandle = AnnotationSubStoreHandle::new(pyself.index - 1);
862+
if let Ok(substore) = store.get(handle) {
863+
//index is one ahead, prevents exclusive lock issues
864+
let handle = substore.handle().expect("annotation must have an ID");
865+
Some(PyAnnotationSubStore {
866+
handle,
867+
store: pyself.store.clone(),
868+
})
869+
} else {
870+
None
871+
}
872+
});
873+
if result.is_some() {
874+
result
875+
} else {
876+
if pyself.index >= pyself.map(|store| Some(store.annotations_len())).unwrap() {
877+
None
878+
} else {
879+
Self::__next__(pyself)
880+
}
881+
}
882+
}
883+
}
884+
885+
impl PySubStoreIter {
886+
fn map<T, F>(&self, f: F) -> Option<T>
887+
where
888+
F: FnOnce(&AnnotationStore) -> Option<T>,
889+
{
890+
if let Ok(store) = self.store.read() {
891+
f(&store)
892+
} else {
893+
None //should never happen here
894+
}
895+
}
896+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ mod error;
1010
mod query;
1111
mod resources;
1212
mod selector;
13+
mod substore;
1314
mod textselection;
1415

1516
use crate::annotation::{PyAnnotation, PyAnnotations};
@@ -19,6 +20,7 @@ use crate::annotationstore::PyAnnotationStore;
1920
use crate::error::PyStamError;
2021
use crate::resources::{PyCursor, PyOffset, PyTextResource};
2122
use crate::selector::{PySelector, PySelectorKind};
23+
use crate::substore::PyAnnotationSubStore;
2224
use crate::textselection::{PyTextSelection, PyTextSelectionOperator, PyTextSelections};
2325

2426
const VERSION: &'static str = env!("CARGO_PKG_VERSION");
@@ -43,5 +45,6 @@ fn stam(py: Python<'_>, m: &PyModule) -> PyResult<()> {
4345
m.add_class::<PyAnnotations>()?;
4446
m.add_class::<PyData>()?;
4547
m.add_class::<PyTextSelections>()?;
48+
m.add_class::<PyAnnotationSubStore>()?;
4649
Ok(())
4750
}

src/substore.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
use pyo3::exceptions::{PyRuntimeError, PyValueError};
2+
use pyo3::prelude::*;
3+
use pyo3::pyclass::CompareOp;
4+
use pyo3::types::*;
5+
use std::borrow::Cow;
6+
use std::ops::FnOnce;
7+
use std::sync::{Arc, RwLock};
8+
9+
use crate::annotation::PyAnnotations;
10+
use crate::error::PyStamError;
11+
use stam::*;
12+
13+
#[pyclass(dict, module = "stam", name = "AnnotationSubStore")]
14+
/// This holds an annotation store that is included as a depenency into another one
15+
///
16+
/// The text *SHOULD* be in
17+
/// [Unicode Normalization Form C (NFC)](https://www.unicode.org/reports/tr15/) but
18+
/// *MAY* be in another unicode normalization forms.
19+
pub(crate) struct PyAnnotationSubStore {
20+
pub(crate) handle: AnnotationSubStoreHandle,
21+
pub(crate) store: Arc<RwLock<AnnotationStore>>,
22+
}
23+
24+
impl PyAnnotationSubStore {
25+
pub(crate) fn new(
26+
handle: AnnotationSubStoreHandle,
27+
store: Arc<RwLock<AnnotationStore>>,
28+
) -> Self {
29+
Self { handle, store }
30+
}
31+
32+
pub(crate) fn new_py<'py>(
33+
handle: AnnotationSubStoreHandle,
34+
store: Arc<RwLock<AnnotationStore>>,
35+
py: Python<'py>,
36+
) -> &'py PyAny {
37+
Self::new(handle, store).into_py(py).into_ref(py)
38+
}
39+
}
40+
41+
#[pymethods]
42+
impl PyAnnotationSubStore {
43+
/// Returns the public ID (by value, aka a copy)
44+
/// Don't use this for ID comparisons, use has_id() instead
45+
fn id(&self) -> PyResult<Option<String>> {
46+
self.map(|substore| Ok(substore.id().map(|x| x.to_owned())))
47+
}
48+
49+
fn filename(&self) -> PyResult<Option<String>> {
50+
self.map(|s| {
51+
Ok(s.as_ref()
52+
.filename()
53+
.map(|s| s.to_string_lossy().into_owned()))
54+
})
55+
}
56+
57+
fn has_id(&self, other: &str) -> PyResult<bool> {
58+
self.map(|substore| Ok(substore.id() == Some(other)))
59+
}
60+
61+
fn has_filename(&self, filename: &str) -> PyResult<bool> {
62+
self.map(|substore| {
63+
Ok(substore.as_ref().filename().map(|s| s.to_string_lossy())
64+
== Some(Cow::Borrowed(filename)))
65+
})
66+
}
67+
68+
fn __richcmp__(&self, other: PyRef<Self>, op: CompareOp) -> bool {
69+
match op {
70+
CompareOp::Eq => self.handle == other.handle,
71+
CompareOp::Ne => self.handle != other.handle,
72+
CompareOp::Lt => self.handle < other.handle,
73+
CompareOp::Gt => self.handle > other.handle,
74+
CompareOp::Le => self.handle <= other.handle,
75+
CompareOp::Ge => self.handle >= other.handle,
76+
}
77+
}
78+
79+
fn __hash__(&self) -> usize {
80+
self.handle.as_usize()
81+
}
82+
}
83+
84+
impl PyAnnotationSubStore {
85+
/// Map function to act on the actual underlying store, helps reduce boilerplate
86+
pub(crate) fn map<T, F>(&self, f: F) -> Result<T, PyErr>
87+
where
88+
F: FnOnce(ResultItem<AnnotationSubStore>) -> Result<T, StamError>,
89+
{
90+
if let Ok(store) = self.store.read() {
91+
let substore = store
92+
.substore(self.handle)
93+
.ok_or_else(|| PyRuntimeError::new_err("Failed to resolve substore"))?;
94+
f(substore).map_err(|err| PyStamError::new_err(format!("{}", err)))
95+
} else {
96+
Err(PyRuntimeError::new_err(
97+
"Unable to obtain store (should never happen)",
98+
))
99+
}
100+
}
101+
}

stam.pyi

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,19 @@ class AnnotationStore:
133133
If you want to store the dataset as a stand-off JSON file, you can specify a filename. The dataset will be loaded from file if it exists. Make sure to set `use_include = True` in the Annotation Store's configuration then.
134134
"""
135135

136+
def add_substore(self, filename: str) -> AnnotationSubStore:
137+
"""
138+
Load an existing annotation store as a dependency to this one. It will be store in an stand-off JSON file and included using the @include mechanism.
139+
Returns the added substore.
140+
"""
141+
142+
def add_new_substore(self, id: str, filename: str) -> AnnotationSubStore:
143+
"""
144+
Add a new empty annotation store as a dependency to this one.
145+
It will be stored in an stand-off JSON file and included using the @include mechanism.
146+
Returns the added substore.
147+
"""
148+
136149
def set_filename(self, filename: str) -> None:
137150
"""Set the filename for the annotationstore, the format is derived from the extension, can be `.json` or `csv`"""
138151

@@ -214,6 +227,9 @@ class AnnotationStore:
214227
def resources(self) -> Iterator[TextResource]:
215228
"""Returns an iterator over all text resources (:class:`TextResource`) in this store"""
216229

230+
def substores(self) -> Iterator[AnnotationSubStore]:
231+
"""Returns an iterator over all substores (:class:`AnnotationSubStore`) in this store, i.e. stores that are included by this one as dependencies"""
232+
217233
def annotations_len(self) -> int:
218234
"""Returns the number of annotations in the store (not substracting deletions)"""
219235

@@ -223,6 +239,9 @@ class AnnotationStore:
223239
def resources_len(self) -> int:
224240
"""Returns the number of text resources in the store (not substracting deletions)"""
225241

242+
def substores_len(self) -> int:
243+
"""Returns the number of substores in the store"""
244+
226245
def shrink_to_fit(self):
227246
"""Reallocates internal data structures to tight fits to conserve memory space (if necessary). You can use this after having added lots of annotations to possibly reduce the memory consumption."""
228247

@@ -1604,7 +1623,7 @@ class TextResource:
16041623
def filename(self, filename: str) -> Optional[str]:
16051624
"""Returns the filename for the stand-off file specified using @include (if any). This allocates a copy, use has_filename() for checking."""
16061625

1607-
def has_filename(self, filename: str) -> Optional[str]:
1626+
def has_filename(self, filename: str) -> bool:
16081627
"""Tests the filename for the stand-off file specified using @include (if any)."""
16091628

16101629
def id(self) -> Optional[str]:
@@ -2157,5 +2176,21 @@ class TextSelectionOperator:
21572176
Inverses the operator (turns it into a negation).
21582177
"""
21592178

2179+
class AnnotationSubStore:
2180+
"""
2181+
A substore is a sub-collection of annotations that is serialised as an independent AnnotationStore,
2182+
In STAM JSON it is included using the @include mechanism.
2183+
"""
2184+
2185+
def id(self) -> Optional[str]:
2186+
"""Returns the public identifier (by value, aka a copy)"""
2187+
2188+
def filename(self, filename: str) -> Optional[str]:
2189+
"""Returns the filename for the stand-off annotation store. This allocates a copy, use has_filename() for checking."""
2190+
2191+
def has_filename(self, filename: str) -> bool:
2192+
"""Tests the filename for the stand-off file specified using @include (if any)."""
2193+
2194+
21602195
class StamError(Exception):
21612196
"""STAM Error"""

0 commit comments

Comments
 (0)