Skip to content

Commit 04dff3d

Browse files
committed
WIP: Make Image-reading objects only instantiate format when necessary
This should mean that any need for check_format will go away - because unless you try to access the data, the file will not be touched. Heavily WIP, specific known things to be done: - Pickle support, as these will probably be sent across process boundaries - Exhaustive testing
1 parent 043c4c2 commit 04dff3d

File tree

4 files changed

+97
-38
lines changed

4 files changed

+97
-38
lines changed

src/dxtbx/boost_python/imageset_ext.cc

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,30 @@ namespace dxtbx { namespace boost_python {
4040
}
4141
} // namespace detail
4242

43-
ImageSetData::masker_ptr make_masker_pointer(boost::python::object masker) {
44-
if (masker == boost::python::object()) {
45-
return ImageSetData::masker_ptr();
46-
}
47-
return boost::python::extract<ImageSetData::masker_ptr>(masker)();
48-
}
49-
5043
/**
5144
* A constructor for the imageset data class
5245
*/
5346
std::shared_ptr<ImageSetData> make_imageset_data1(boost::python::object reader,
5447
boost::python::object masker) {
55-
// Create the pointer
56-
std::shared_ptr<ImageSetData> self(
57-
new ImageSetData(reader, make_masker_pointer(masker)));
58-
59-
// Return the imageset data
60-
return self;
48+
// Distinguish between the three masker states:
49+
// - A direct GoniometerShadowMasker instance
50+
// - A callable () -> GoniometerShadowMasker (we want to defer render)
51+
// - None
52+
boost::python::extract<ImageSetData::masker_ptr> get_masker_ptr(masker);
53+
ImageSetData *self = nullptr;
54+
if (get_masker_ptr.check()) {
55+
self = new ImageSetData(reader, get_masker_ptr());
56+
} else if (masker == boost::python::object()) {
57+
self = new ImageSetData(reader, nullptr);
58+
} else if (PyCallable_Check(masker.ptr())) {
59+
self = new ImageSetData(reader, masker);
60+
} else {
61+
PyErr_SetString(PyExc_TypeError,
62+
"Masker object must be: GoniometerShadowMasker | Callable[[], "
63+
"GoniometerShadow] | None");
64+
throw boost::python::error_already_set();
65+
}
66+
return std::shared_ptr<ImageSetData>(self);
6167
}
6268

6369
/**
@@ -70,8 +76,7 @@ namespace dxtbx { namespace boost_python {
7076
boost::python::dict params,
7177
boost::python::object format) {
7278
// Create the pointer
73-
std::shared_ptr<ImageSetData> self(
74-
new ImageSetData(reader, make_masker_pointer(masker)));
79+
auto self = make_imageset_data1(reader, masker);
7580

7681
// Set some stuff
7782
self->set_template(filename_template);
@@ -473,7 +478,8 @@ namespace dxtbx { namespace boost_python {
473478
&ExternalLookupItem<T>::get_filename,
474479
&ExternalLookupItem<T>::set_filename)
475480
.add_property(
476-
"data", &ExternalLookupItem<T>::get_data, &ExternalLookupItem<T>::set_data);
481+
"data", &ExternalLookupItem<T>::get_data, &ExternalLookupItem<T>::set_data)
482+
.def("set_data_generator", &ExternalLookupItem<T>::set_data_generator);
477483
}
478484

479485
/**

src/dxtbx/dxtbx_imageset_ext.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,14 @@ class ExternalLookup:
3434
class ExternalLookupItemBool:
3535
data: ImageBool
3636
filename: str
37+
def set_data_generator(self, generator: Callable[[], ImageBool | None]) -> None: ...
3738

3839
class ExternalLookupItemDouble:
3940
data: ImageDouble
4041
filename: str
42+
def set_data_generator(
43+
self, generator: Callable[[], ImageDouble | None]
44+
) -> None: ...
4145

4246
class ImageGrid(ImageSet):
4347
def __init__(self, *args, **kwargs) -> None: ...

src/dxtbx/format/FormatMultiImage.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,30 @@ def __init__(
3939
self.format_class = format_class
4040
assert len(filenames) == 1
4141
self._filename = filenames[0]
42-
if num_images is None:
43-
format_instance = self.format_class.get_instance(
44-
self._filename, **self.kwargs
45-
)
46-
self._num_images = format_instance.get_num_images()
47-
else:
48-
self._num_images = num_images
42+
self._num_images = num_images
43+
self._format_instance = None
44+
45+
def get_format_instance(self) -> FormatMultiImage:
46+
print("Instantiating format class from Reader")
47+
# if self._format_instance is None:
48+
# self._format_instance =
49+
# return self._format_instance
50+
return self.format_class.get_instance(self._filename, **self.kwargs)
4951

5052
def nullify_format_instance(self):
5153
self.format_class._current_instance_ = None
5254
self.format_class._current_filename_ = None
55+
self._format_instance = None
5356

5457
def read(self, index):
55-
format_instance = self.format_class.get_instance(self._filename, **self.kwargs)
56-
return format_instance.get_raw_data(index)
58+
return self.get_format_instance().get_raw_data(index)
5759

5860
def paths(self):
5961
return [self._filename]
6062

6163
def __len__(self) -> int:
64+
if self._num_images is None:
65+
self._num_images = self.get_format_instance().get_num_images()
6266
return self._num_images
6367

6468
def copy(self, filenames: Sequence[str], num_images: int | None = None):

src/dxtbx/imageset.h

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ class ExternalLookupItem {
8686
* Get the data
8787
*/
8888
Image<T> get_data() const {
89+
if (generator_ != boost::python::object()) {
90+
auto generated = generator_();
91+
// Only replace data if we got "None" from this generator
92+
if (generated != boost::python::object()) {
93+
data_ = boost::python::extract<Image<T>>(generated)();
94+
}
95+
// Discard the generator, no matter what
96+
generator_ = boost::python::object();
97+
}
8998
return data_;
9099
}
91100

@@ -97,9 +106,18 @@ class ExternalLookupItem {
97106
data_ = data;
98107
}
99108

109+
/// Set a generator, to only load the external item data on first use
110+
///
111+
/// This is a Python Callable[[], Image<T>] function. The function will
112+
/// be discarded after first use.
113+
void set_data_generator(boost::python::object generator) {
114+
generator_ = generator;
115+
}
116+
100117
protected:
101118
std::string filename_;
102-
Image<T> data_;
119+
mutable Image<T> data_;
120+
mutable boost::python::object generator_;
103121
};
104122

105123
/**
@@ -171,12 +189,22 @@ class ImageSetData {
171189
ImageSetData(boost::python::object reader, masker_ptr masker)
172190
: reader_(reader),
173191
masker_(masker),
192+
masker_obj_(),
174193
beams_(boost::python::len(reader)),
175194
detectors_(boost::python::len(reader)),
176195
goniometers_(boost::python::len(reader)),
177196
scans_(boost::python::len(reader)),
178197
reject_(boost::python::len(reader)) {}
179198

199+
ImageSetData(boost::python::object reader, boost::python::object masker)
200+
: reader_(reader),
201+
masker_(),
202+
masker_obj_(masker),
203+
beams_(boost::python::len(reader)),
204+
detectors_(boost::python::len(reader)),
205+
goniometers_(boost::python::len(reader)),
206+
scans_(boost::python::len(reader)),
207+
reject_(boost::python::len(reader)) {}
180208
/**
181209
* @returns The reader object
182210
*/
@@ -188,14 +216,19 @@ class ImageSetData {
188216
* @returns The masker object
189217
*/
190218
masker_ptr masker() {
219+
if (masker_ == nullptr && masker_obj_ != boost::python::object()) {
220+
masker_ = boost::python::extract<ImageSetData::masker_ptr>(masker_obj_())();
221+
masker_obj_ = boost::python::object();
222+
}
223+
191224
return masker_;
192225
}
193226

194227
/**
195228
* @returns Does the imageset have a dynamic mask.
196229
*/
197230
bool has_dynamic_mask() const {
198-
return masker_ != NULL;
231+
return masker_ != nullptr || masker_obj_ != boost::python::object();
199232
}
200233

201234
/**
@@ -440,7 +473,12 @@ class ImageSetData {
440473
std::size_t first,
441474
std::size_t last) const {
442475
DXTBX_ASSERT(last > first);
443-
ImageSetData partial = ImageSetData(reader, masker_);
476+
ImageSetData partial;
477+
if (masker_ == nullptr && masker_obj_ != boost::python::object()) {
478+
partial = ImageSetData(reader, masker_obj_);
479+
} else {
480+
partial = ImageSetData(reader, masker_);
481+
}
444482
for (size_t i = 0; i < last - first; i++) {
445483
partial.beams_[i] = beams_[i + first];
446484
partial.detectors_[i] = detectors_[i + first];
@@ -513,11 +551,18 @@ class ImageSetData {
513551
flex_type a = boost::python::extract<flex_type>(obj)();
514552

515553
// Return the image tile
516-
return ImageTile<T>(scitbx::af::versa<T, scitbx::af::c_grid<2> >(
554+
return ImageTile<T>(scitbx::af::versa<T, scitbx::af::c_grid<2>>(
517555
a.handle(), scitbx::af::c_grid<2>(a.accessor())));
518556
}
519557

520558
boost::python::object reader_;
559+
/// Hold an object that can be called to get the masker.
560+
///
561+
/// This won't be called until the masker is actually required, under
562+
/// the assumption that accessing the masker requires accessing the raw
563+
/// data file.
564+
boost::python::object masker_obj_;
565+
/// The Goniometer Masker object, if loaded (or present)
521566
std::shared_ptr<GoniometerShadowMasker> masker_;
522567
scitbx::af::shared<beam_ptr> beams_;
523568
scitbx::af::shared<detector_ptr> detectors_;
@@ -660,8 +705,8 @@ class ImageSet {
660705
* @returns The corrected data array
661706
*/
662707
Image<double> get_corrected_data(std::size_t index) {
663-
typedef scitbx::af::versa<double, scitbx::af::c_grid<2> > array_type;
664-
typedef scitbx::af::const_ref<double, scitbx::af::c_grid<2> > const_ref_type;
708+
typedef scitbx::af::versa<double, scitbx::af::c_grid<2>> array_type;
709+
typedef scitbx::af::const_ref<double, scitbx::af::c_grid<2>> const_ref_type;
665710

666711
// Get the multi-tile data, gain and pedestal
667712
DXTBX_ASSERT(index < indices_.size());
@@ -758,7 +803,7 @@ class ImageSet {
758803
std::size_t xsize = detector[i].get_image_size()[0];
759804
std::size_t ysize = detector[i].get_image_size()[1];
760805
scitbx::af::c_grid<2> grid(ysize, xsize);
761-
scitbx::af::versa<double, scitbx::af::c_grid<2> > data(grid, gain[i]);
806+
scitbx::af::versa<double, scitbx::af::c_grid<2>> data(grid, gain[i]);
762807
result.push_back(ImageTile<double>(data));
763808
}
764809
return result;
@@ -797,7 +842,7 @@ class ImageSet {
797842
std::size_t xsize = detector[i].get_image_size()[0];
798843
std::size_t ysize = detector[i].get_image_size()[1];
799844
scitbx::af::c_grid<2> grid(ysize, xsize);
800-
scitbx::af::versa<double, scitbx::af::c_grid<2> > data(grid, pedestal[i]);
845+
scitbx::af::versa<double, scitbx::af::c_grid<2>> data(grid, pedestal[i]);
801846
result.push_back(ImageTile<double>(data));
802847
}
803848
return result;
@@ -824,7 +869,7 @@ class ImageSet {
824869
for (std::size_t i = 0; i < detector.size(); ++i) {
825870
std::size_t xsize = detector[i].get_image_size()[0];
826871
std::size_t ysize = detector[i].get_image_size()[1];
827-
mask.push_back(ImageTile<bool>(scitbx::af::versa<bool, scitbx::af::c_grid<2> >(
872+
mask.push_back(ImageTile<bool>(scitbx::af::versa<bool, scitbx::af::c_grid<2>>(
828873
scitbx::af::c_grid<2>(ysize, xsize), true)));
829874
}
830875
return mask;
@@ -854,8 +899,8 @@ class ImageSet {
854899
if (!external_mask.empty()) {
855900
DXTBX_ASSERT(external_mask.n_tiles() == mask.n_tiles());
856901
for (std::size_t i = 0; i < mask.n_tiles(); ++i) {
857-
scitbx::af::ref<bool, scitbx::af::c_grid<2> > m1 = mask.tile(i).data().ref();
858-
scitbx::af::const_ref<bool, scitbx::af::c_grid<2> > m2 =
902+
scitbx::af::ref<bool, scitbx::af::c_grid<2>> m1 = mask.tile(i).data().ref();
903+
scitbx::af::const_ref<bool, scitbx::af::c_grid<2>> m2 =
859904
external_mask.tile(i).data().const_ref();
860905
DXTBX_ASSERT(m1.accessor().all_eq(m2.accessor()));
861906
for (std::size_t j = 0; j < m1.size(); ++j) {
@@ -1102,14 +1147,14 @@ class ImageSet {
11021147
*/
11031148
void clear_cache() {
11041149
data_cache_ = DataCache<ImageBuffer>();
1105-
double_raw_data_cache_ = DataCache<Image<double> >();
1150+
double_raw_data_cache_ = DataCache<Image<double>>();
11061151
}
11071152

11081153
protected:
11091154
ImageSetData data_;
11101155
scitbx::af::shared<std::size_t> indices_;
11111156
DataCache<ImageBuffer> data_cache_;
1112-
DataCache<Image<double> > double_raw_data_cache_;
1157+
DataCache<Image<double>> double_raw_data_cache_;
11131158

11141159
Image<double> get_raw_data_as_double(std::size_t index) {
11151160
DXTBX_ASSERT(index < indices_.size());

0 commit comments

Comments
 (0)