diff --git a/.github/workflows/wheel_build.yml b/.github/workflows/wheel_build.yml index 7df4a36..93993f7 100644 --- a/.github/workflows/wheel_build.yml +++ b/.github/workflows/wheel_build.yml @@ -14,7 +14,7 @@ jobs: matrix: os: [ubuntu-20.04, macos-13, windows-latest] cibw_archs: ["auto64"] - cibw_build: ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] + cibw_build: ["cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v3 @@ -34,7 +34,7 @@ jobs: - uses: actions/setup-python@v4 name: Install Python with: - python-version: '3.9' + python-version: '3.10' - name: Install cibuildwheel run: | @@ -44,7 +44,7 @@ jobs: run: | python -m cibuildwheel --output-dir dist env: - CIBW_BUILD: ${{ matrix.cibw_build }} + CIBW_BUILD: ${{ matrix.cibw_build }}-* CIBW_BUILD_VERBOSITY: 3 CIBW_SKIP: "*musllinux*" CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 @@ -78,9 +78,9 @@ jobs: CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Upload Artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: argolid-wheels + name: argolid-wheels-${{ matrix.os }}-${{ matrix.cibw_archs }}-${{ matrix.cibw_build }} path: dist/*.whl retention-days: 1 @@ -93,7 +93,7 @@ jobs: matrix: os: [macos-13-xlarge] cibw_archs: ["arm64"] - cibw_build: ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] + cibw_build: ["cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v3 @@ -112,7 +112,7 @@ jobs: run: | python -m cibuildwheel --output-dir dist env: - CIBW_BUILD: ${{ matrix.cibw_build }} + CIBW_BUILD: ${{ matrix.cibw_build }}-* CIBW_BUILD_VERBOSITY: 3 CIBW_ARCHS_MACOS: arm64 CIBW_BEFORE_ALL_MACOS: brew install nasm && @@ -130,8 +130,8 @@ jobs: CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Upload Artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: argolid-wheels-apple-arm64 + name: argolid-wheels-apple-arm64-${{ matrix.os }}-${{ matrix.cibw_archs }}-${{ matrix.cibw_build }} path: dist/*.whl retention-days: 1 \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e8ee5c3..ccc1201 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ set(SOURCE src/cpp/core/chunked_pyramid_assembler.cpp src/cpp/core/chunked_base_to_pyr_gen.cpp src/cpp/core/ome_tiff_to_chunked_pyramid.cpp + src/cpp/core/pyramid_compositor.cpp src/cpp/core/pyramid_view.cpp src/cpp/utilities/utilities.cpp ) diff --git a/src/cpp/core/pyramid_compositor.cpp b/src/cpp/core/pyramid_compositor.cpp new file mode 100644 index 0000000..95b2f5b --- /dev/null +++ b/src/cpp/core/pyramid_compositor.cpp @@ -0,0 +1,427 @@ +#include "pyramid_compositor.h" + +#include +#include +#include +#include + +#include "tensorstore/context.h" +#include "tensorstore/array.h" +#include "tensorstore/driver/zarr/dtype.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/open.h" + +#include + +using ::tensorstore::internal_zarr::ChooseBaseDType; +using json = nlohmann::json; + +namespace fs = std::filesystem; + +namespace argolid { +PyramidCompositor::PyramidCompositor(const std::string& input_pyramids_loc, const std::string& out_dir, const std::string& output_pyramid_name): + _input_pyramids_loc(input_pyramids_loc), + _output_pyramid_name(std::filesystem::path(out_dir) / output_pyramid_name), + _ome_metadata_file(out_dir + "/" + output_pyramid_name + "/METADATA.ome.xml"), + _pyramid_levels(-1), + _num_channels(-1) {} + + +void PyramidCompositor::create_xml() { + CreateXML( + this->_output_pyramid_name.u8string(), + this->_ome_metadata_file, + this->_plate_image_shapes[0], + this->_image_dtype + ); +} + +void PyramidCompositor::create_zattr_file() { + WriteTSZattrFilePlateImage( + this->_output_pyramid_name.u8string(), + this->_output_pyramid_name.u8string() + "/data.zarr/0", + this->_plate_image_shapes + ); +} + +void PyramidCompositor::create_zgroup_file() { + WriteVivZgroupFiles(this->_output_pyramid_name.u8string()); +} + +void PyramidCompositor::create_auxiliary_files() { + create_xml(); + create_zattr_file(); + create_zgroup_file(); +} + +void PyramidCompositor::write_zarr_chunk(int level, int channel, int y_index, int x_index) { + + std::tuple chunk = std::make_tuple(level, channel, y_index, x_index); + + if (_chunk_cache.find(chunk) != _chunk_cache.end()) { + return; + } + + if (_composition_map.empty()) { + std::cerr << "No composition map is set. Unable to generate pyramid" << std::endl; + return; + } + + if (_unit_image_shapes.find(level) == _unit_image_shapes.end()) { + std::cerr << "Requested level (" + std::to_string(level) + ") does not exist" << std::endl; + return; + } + + if (channel >= _num_channels) { + std::cerr << "Requested channel (" + std::to_string(channel) + ") does not exist" << std::endl; + return; + } + + auto plate_shape_it = _plate_image_shapes.find(level); + if (plate_shape_it == _plate_image_shapes.end()) { + std::cerr << "No plate image shapes found for level (" + std::to_string(level) + ")" << std::endl; + return; + } + + const auto& plate_shape = plate_shape_it->second; + if (plate_shape.size() < 4 || y_index > (plate_shape[3] / CHUNK_SIZE)) { + std::cerr << "Requested y index (" + std::to_string(y_index) + ") does not exist" << std::endl; + return; + } + + if (plate_shape.size() < 5 || x_index > (plate_shape[4] / CHUNK_SIZE)) { + std::cerr << "Requested x index (" + std::to_string(x_index) + ") does not exist" << std::endl; + return; + } + + // get datatype by opening + std::string input_file_name = _composition_map[{x_index, y_index, channel}]; + std::filesystem::path zarrArrayLoc = _output_pyramid_name / input_file_name / std::filesystem::path("data.zarr/0/") / std::to_string(level); + + auto read_spec = GetZarrSpecToRead(zarrArrayLoc.u8string()); + + tensorstore::TensorStore source; + + TENSORSTORE_CHECK_OK_AND_ASSIGN(source, tensorstore::Open( + read_spec, + tensorstore::OpenMode::open, + tensorstore::ReadWriteMode::read).result()); + + auto data_type = GetDataTypeCode(source.dtype().name()); + + switch(data_type){ + case 1: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 2: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 4: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 8: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 16: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 32: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 64: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 128: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 256: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + case 512: + _write_zarr_chunk(level, channel, y_index, x_index); + break; + default: + break; + } + + _chunk_cache.insert(chunk); +} + +template +void PyramidCompositor::_write_zarr_chunk(int level, int channel, int y_index, int x_index) { + + // Compute ranges in global coordinates + auto image_shape = _zarr_arrays[level].domain().shape(); + int y_start = y_index * CHUNK_SIZE; + int y_end = std::min((y_index + 1) * CHUNK_SIZE, (int)image_shape[3]); + int x_start = x_index * CHUNK_SIZE; + int x_end = std::min((x_index + 1) * CHUNK_SIZE, (int)image_shape[4]); + + int assembled_width = x_end - x_start; + int assembled_height = y_end - y_start; + + std::vector assembled_image_vec(assembled_width*assembled_height); + + // Determine required input images + int unit_image_height = _unit_image_shapes[level].first; + int unit_image_width = _unit_image_shapes[level].second; + + int row_start_pos = y_start; + + //int row, local_y_start, tile_y_start, tile_y_dim, tile_y_end, col_start_pos; + //int col, local_x_start, tile_x_start, tile_x_dim, tile_x_end; + while (row_start_pos < y_end) { + + int row = row_start_pos / unit_image_height; + int local_y_start = row_start_pos - y_start; + int tile_y_start = row_start_pos - row * unit_image_height; + int tile_y_dim = std::min((row + 1) * unit_image_height - row_start_pos, y_end - row_start_pos); + int tile_y_end = tile_y_start + tile_y_dim; + + int col_start_pos = x_start; + + while (col_start_pos < x_end) { + + int col = col_start_pos / unit_image_width; + int local_x_start = col_start_pos - x_start; + int tile_x_start = col_start_pos - col * unit_image_width; + int tile_x_dim = std::min((col + 1) * unit_image_width - col_start_pos, x_end - col_start_pos); + int tile_x_end = tile_x_start + tile_x_dim; + + _th_pool.detach_task([ + row, + col, + channel, + level, + x_start, + x_end, + local_x_start, + tile_x_start, + tile_x_dim, + tile_x_end, + local_y_start, + tile_y_start, + tile_y_dim, + tile_y_end, + y_start, + y_end, + &assembled_image_vec, + assembled_width, + this + ]() { + // Fetch input file path from composition map + std::string input_file_name = _composition_map[{col, row, channel}]; + std::filesystem::path zarrArrayLoc = std::filesystem::path(input_file_name) / "data.zarr/0/" / std::to_string(level); + + auto read_spec = GetZarrSpecToRead(zarrArrayLoc.u8string()); + + tensorstore::TensorStore source; + + if (_zarr_readers.find(zarrArrayLoc.u8string()) != _zarr_readers.end()) { + source = _zarr_readers[zarrArrayLoc.u8string()]; + } else { + TENSORSTORE_CHECK_OK_AND_ASSIGN(source, tensorstore::Open( + read_spec, + tensorstore::OpenMode::open, + tensorstore::ReadWriteMode::read).result()); + + _zarr_readers[zarrArrayLoc.u8string()] = source; + } + + std::vector read_buffer((tile_x_end-tile_x_start)*(tile_y_end-tile_y_start)); + auto array = tensorstore::Array(read_buffer.data(), {tile_y_end-tile_y_start, tile_x_end-tile_x_start}, tensorstore::c_order); + + tensorstore::IndexTransform<> read_transform = tensorstore::IdentityTransform(source.domain()); + + Seq rows = Seq(y_start, y_end); + Seq cols = Seq(x_start, x_end); + Seq tsteps = Seq(0, 0); + Seq channels = Seq(channel-1, channel); + Seq layers = Seq(0, 0); + + read_transform = (std::move(read_transform) | tensorstore::Dims(_y_index).ClosedInterval(rows.Start(), rows.Stop()-1) | + tensorstore::Dims(_x_index).ClosedInterval(cols.Start(), cols.Stop()-1)).value(); + + tensorstore::Read(source | read_transform, tensorstore::UnownedToShared(array)).value(); + + for (int i = 0; i < tile_y_end - tile_y_start; ++i) { + std::memcpy( + &assembled_image_vec[(local_y_start + i) * assembled_width + local_x_start], // Destination + &read_buffer[i * (tile_x_end - tile_x_start)], // Source + (tile_x_end - tile_x_start) * sizeof(assembled_image_vec[0]) // Number of bytes + ); + } + }); + + col_start_pos += tile_x_end - tile_x_start; + } + row_start_pos += tile_y_end - tile_y_start; + } + + // wait for threads to finish assembling vector before writing + _th_pool.wait(); + + WriteImageData( + _zarr_arrays[level], + assembled_image_vec, // Access the data from the std::shared_ptr + Seq(y_start, y_end), + Seq(x_start, x_end), + Seq(0, 0), + Seq(channel, channel), + Seq(0, 0) + ); +} + +void PyramidCompositor::set_composition(const std::unordered_map, std::string, TupleHash>& comp_map) { + _composition_map = comp_map; + + for (const auto& coord : comp_map) { + std::string file_path = coord.second; + std::filesystem::path attr_file_loc = std::filesystem::path(file_path) / "data.zarr/0/.zattrs"; + + if (std::filesystem::exists(attr_file_loc)) { + std::ifstream attr_file(attr_file_loc); + json attrs; + attr_file >> attrs; + + const auto& multiscale_metadata = attrs["multiscales"][0]["datasets"]; + _pyramid_levels = multiscale_metadata.size(); + for (const auto& dic : multiscale_metadata) { + + _th_pool.detach_task([ + dic, + file_path, + this + ](){ + std::string res_key = dic["path"]; + + std::filesystem::path zarr_array_loc = std::filesystem::path(file_path) / "data.zarr/0/" / res_key; + + auto read_spec = GetZarrSpecToRead(zarr_array_loc.u8string()); + + tensorstore::TensorStore source; + + TENSORSTORE_CHECK_OK_AND_ASSIGN(source, tensorstore::Open( + read_spec, + tensorstore::OpenMode::open, + tensorstore::ReadWriteMode::read).result()); + + // store reader to use later + _zarr_readers[zarr_array_loc.u8string()] = source; + + auto image_shape = source.domain().shape(); + _image_ts_dtype = source.dtype(); + _image_dtype = source.dtype().name(); + _image_dtype_code = GetDataTypeCode(_image_dtype); + + _unit_image_shapes[std::stoi(res_key)] = { + image_shape[image_shape.size()-2], + image_shape[image_shape.size()-1] + }; + }); + } + break; + } + } + + _th_pool.wait(); + + int num_rows = 0, num_cols = 0; + _num_channels = 0; + for (const auto& coord : comp_map) { + num_rows = std::max(num_rows, std::get<1>(coord.first)); + num_cols = std::max(num_cols, std::get<0>(coord.first)); + _num_channels = std::max(_num_channels, std::get<2>(coord.first)); + } + + num_cols += 1; + num_rows += 1; + _num_channels += 1; + + _plate_image_shapes.clear(); + _zarr_arrays.clear(); + _chunk_cache.clear(); + + for (auto& [level, shape]: _unit_image_shapes) { + + _plate_image_shapes[level] = std::vector { + 1, + _num_channels, + 1, + num_rows * shape.first, + num_cols * shape.second + }; + } + + for (const auto& [level, shape] : _unit_image_shapes) { + std::string path = _output_pyramid_name.u8string() + "/data.zarr/0/" + std::to_string(level); + + _th_pool.detach_task([ + path, + level=level, + this + ](){ + tensorstore::TensorStore source; + + TENSORSTORE_CHECK_OK_AND_ASSIGN(source, tensorstore::Open( + GetZarrSpecToWrite( + path, + _plate_image_shapes[level], + {1,1,1, CHUNK_SIZE, CHUNK_SIZE}, + ChooseBaseDType(_image_ts_dtype).value().encoded_dtype + ), + tensorstore::OpenMode::create | + tensorstore::OpenMode::delete_existing, + tensorstore::ReadWriteMode::write).result()); + + _zarr_arrays[level] = source; + }); + } + + _th_pool.wait(); + + create_auxiliary_files(); +} + +void PyramidCompositor::reset_composition() { + fs::remove_all(_output_pyramid_name); + _composition_map.clear(); + _plate_image_shapes.clear(); + _chunk_cache.clear(); + _plate_image_shapes.clear(); + _zarr_arrays.clear(); +} + +template +void PyramidCompositor::WriteImageData( + tensorstore::TensorStore& source, + std::vector& image, + const Seq& rows, + const Seq& cols, + const std::optional& layers, + const std::optional& channels, + const std::optional& tsteps) { + + std::vector shape; + + auto result_array = tensorstore::Array(image.data(), {rows.Stop()-rows.Start(), cols.Stop()-cols.Start()}, tensorstore::c_order); + + auto output_transform = tensorstore::IdentityTransform(source.domain()); + + output_transform = (std::move(output_transform) | tensorstore::Dims(_c_index).ClosedInterval(channels.value().Start(), channels.value().Stop())).value(); + + output_transform = (std::move(output_transform) | tensorstore::Dims(_y_index).ClosedInterval(rows.Start(), rows.Stop()-1) | + tensorstore::Dims(_x_index).ClosedInterval(cols.Start(), cols.Stop()-1)).value(); + + shape.emplace_back(rows.Stop() - rows.Start()+1); + shape.emplace_back(cols.Stop() - cols.Start()+1); + + // Write data array to TensorStore + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(result_array), source | output_transform).result(); + + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } +} +} // ns argolid \ No newline at end of file diff --git a/src/cpp/core/pyramid_compositor.h b/src/cpp/core/pyramid_compositor.h new file mode 100644 index 0000000..df37f96 --- /dev/null +++ b/src/cpp/core/pyramid_compositor.h @@ -0,0 +1,89 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include "BS_thread_pool.hpp" + +#include "tensorstore/tensorstore.h" +#include "tensorstore/spec.h" + +#include "../utilities/utilities.h" + +namespace fs = std::filesystem; +namespace argolid { +static constexpr int CHUNK_SIZE = 1024; + +class Seq +{ + private: + long start_index_, stop_index_, step_; + public: + inline Seq(const long start, const long stop, const long step=1):start_index_(start), stop_index_(stop), step_(step){} + inline long Start() const {return start_index_;} + inline long Stop() const {return stop_index_;} + inline long Step() const {return step_;} +}; + +class PyramidCompositor { +public: + + PyramidCompositor(const std::string& well_pyramid_loc, const std::string& out_dir, const std::string& pyramid_file_name); + + void reset_composition(); + + void create_xml(); + void create_zattr_file(); + void create_zgroup_file(); + void create_auxiliary_files(); + + void write_zarr_chunk(int level, int channel, int y_index, int x_index); + + void set_composition(const std::unordered_map, std::string, TupleHash>& comp_map); + +private: + + template + void _write_zarr_chunk(int level, int channel, int y_index, int x_index); + + template + void WriteImageData( + tensorstore::TensorStore& source, + std::vector& image, + const Seq& rows, + const Seq& cols, + const std::optional& layers, + const std::optional& channels, + const std::optional& tsteps); + + std::string _input_pyramids_loc; + std::filesystem::path _output_pyramid_name; + std::string _ome_metadata_file; + + std::unordered_map> _plate_image_shapes; + + std::unordered_map> _zarr_arrays; + + std::unordered_map> _zarr_readers; + + std::unordered_map> _unit_image_shapes; + + std::unordered_map, std::string, TupleHash> _composition_map; + + std::set> _chunk_cache; + int _pyramid_levels; + int _num_channels; + + tensorstore::DataType _image_ts_dtype; + std::string _image_dtype; + std::uint16_t _image_dtype_code; + + int _x_index = 4; + int _y_index = 3; + int _c_index = 1; + + BS::thread_pool _th_pool; +}; +} // ns argolid diff --git a/src/cpp/interface/pyramid_python_interface.cpp b/src/cpp/interface/pyramid_python_interface.cpp index adefd7f..29c81d3 100644 --- a/src/cpp/interface/pyramid_python_interface.cpp +++ b/src/cpp/interface/pyramid_python_interface.cpp @@ -1,5 +1,6 @@ #include "../core/ome_tiff_to_chunked_pyramid.h" #include "../core/pyramid_view.h" +#include "../core/pyramid_compositor.h" #include "../utilities/utilities.h" #include #include @@ -17,6 +18,16 @@ PYBIND11_MODULE(libargolid, m) { .def("GeneratePyramid", &argolid::PyramidView::GeneratePyramid) \ .def("AssembleBaseLevel", &argolid::PyramidView::AssembleBaseLevel) ; + + py::class_(m, "PyramidCompositorCPP") \ + .def(py::init()) \ + .def("reset_composition", &argolid::PyramidCompositor::reset_composition) \ + .def("create_xml", &argolid::PyramidCompositor::create_xml) \ + .def("create_zattr_file", &argolid::PyramidCompositor::create_zattr_file) \ + .def("create_auxiliary_files", &argolid::PyramidCompositor::create_auxiliary_files) \ + .def("write_zarr_chunk", &argolid::PyramidCompositor::write_zarr_chunk) \ + .def("set_composition", &argolid::PyramidCompositor::set_composition); + py::enum_(m, "VisType") .value("NG_Zarr", argolid::VisType::NG_Zarr) .value("PCNG", argolid::VisType::PCNG) diff --git a/src/cpp/utilities/utilities.cpp b/src/cpp/utilities/utilities.cpp index 45dd9cd..69db45a 100644 --- a/src/cpp/utilities/utilities.cpp +++ b/src/cpp/utilities/utilities.cpp @@ -202,6 +202,51 @@ void WriteTSZattrFile(const std::string& tiff_file_name, const std::string& zarr } } +void WriteTSZattrFilePlateImage( + const std::string& tiff_file_name, + const std::string& zarr_root_dir, + const std::unordered_map>& plate_image_shape){ + + json zarr_multiscale_axes; + zarr_multiscale_axes = json::parse(R"([ + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space", "unit": "micrometer"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"} + ])"); + + + float level = 1.0; + json scale_metadata_list = json::array(); + for(const auto& key_val: plate_image_shape){ + json scale_metadata; + scale_metadata["path"] = std::to_string(std::get<0>(key_val)); + scale_metadata["coordinateTransformations"] = {{{"type", "scale"}, {"scale", {1.0, 1.0, level, level}}}}; + scale_metadata_list.push_back(scale_metadata); + level = level*2; + } + + json combined_metadata; + combined_metadata["datasets"] = scale_metadata_list; + combined_metadata["version"] = "0.4"; + combined_metadata["axes"] = zarr_multiscale_axes; + combined_metadata["name"] = tiff_file_name; + combined_metadata["metadata"] = {{"method", "mean"}}; + json final_formated_metadata; +#if defined(__clang__) || defined(_MSC_VER) +// more details here: https://github.com/nlohmann/json/issues/2311 + final_formated_metadata["multiscales"][0] = {combined_metadata}; +#else + final_formated_metadata["multiscales"] = {combined_metadata}; +#endif + std::ofstream f(zarr_root_dir + "/.zattrs",std::ios_base::trunc |std::ios_base::out); + if (f.is_open()){ + f << final_formated_metadata; + } else { + PLOG_INFO <<"Unable to write .zattr file at "<< zarr_root_dir << "."; + } +} + void WriteVivZattrFile(const std::string& tiff_file_name, const std::string& zattr_file_loc, int min_level, int max_level){ json scale_metadata_list = json::array(); @@ -318,6 +363,60 @@ void GenerateOmeXML(const std::string& image_name, const std::string& output_fil doc.save_file(output_file.c_str()); } +void CreateXML( + const std::string& image_name, + const std::string& output_file, + const std::vector& image_shape, + const std::string& image_dtype) { + + + if (image_shape.size() != 5) { + throw std::invalid_argument("image_shape must have length 5."); + } + + pugi::xml_document doc; + + // Create the root element + pugi::xml_node omeNode = doc.append_child("OME"); + + // Add the namespaces and attributes to the root element + omeNode.append_attribute("xmlns") = "http://www.openmicroscopy.org/Schemas/OME/2016-06"; + omeNode.append_attribute("xmlns:xsi") = "http://www.w3.org/2001/XMLSchema-instance"; + auto creator = std::string{"Argolid "} + std::string{VERSION_INFO}; + omeNode.append_attribute("Creator") = creator.c_str(); + omeNode.append_attribute("UUID") = "urn:uuid:ce3367ae-0512-4e87-a045-20d87db14001"; + omeNode.append_attribute("xsi:schemaLocation") = "http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd"; + + // Create the element + pugi::xml_node imageNode = omeNode.append_child("Image"); + imageNode.append_attribute("ID") = "Image:0"; + imageNode.append_attribute("Name") = image_name.c_str(); + + // Create the element + pugi::xml_node pixelsNode = imageNode.append_child("Pixels"); + pixelsNode.append_attribute("BigEndian") = "false"; + pixelsNode.append_attribute("DimensionOrder") = "XYZCT"; + pixelsNode.append_attribute("ID") = "Pixels:0"; + pixelsNode.append_attribute("Interleaved") = "false"; + pixelsNode.append_attribute("SizeC") = std::to_string(image_shape[1]).c_str(); + pixelsNode.append_attribute("SizeZ") = std::to_string(image_shape[2]).c_str(); + pixelsNode.append_attribute("SizeT") = std::to_string(image_shape[0]).c_str(); + pixelsNode.append_attribute("SizeX") = std::to_string(image_shape[4]).c_str(); + pixelsNode.append_attribute("SizeY") = std::to_string(image_shape[3]).c_str(); + pixelsNode.append_attribute("Type") = image_dtype.c_str(); + + // Create the elements + for(std::int64_t i=0; i elements + channelNode.append_child("LightPath"); + } + + doc.save_file(output_file.c_str()); +} + void WriteMultiscaleMetadataForSingleFile( const std::string& input_file , const std::string& output_dir, int min_level, int max_level, VisType v) { @@ -347,4 +446,29 @@ std::optional> GetTiffDims (const std:: } +std::tuple, std::optional, std::optional>ParseMultiscaleMetadata(const std::string& axes_list, int len){ + + std::optional t_index{std::nullopt}, c_index{std::nullopt}, z_index{std::nullopt}; + + assert(axes_list.length() <= 5); + + if (axes_list.length() == len){ + // no speculation + for (int i=0; i GetZarrParams(VisType v){ } std::optional> GetTiffDims (const std::string filename); + +void CreateXML( + const std::string& image_name, + const std::string& output_file, + const std::vector& image_shape, + const std::string& image_dtype); + +void WriteTSZattrFilePlateImage( + const std::string& tiff_file_name, + const std::string& zarr_root_dir, + const std::unordered_map>& plate_image_shape); + +std::tuple, std::optional, std::optional>ParseMultiscaleMetadata(const std::string& axes_list, int len); + +// custom hashing for using std::tuple as key +struct TupleHash { + template + std::size_t operator()(const std::tuple& t) const { + return std::apply([](const auto&... args) { + return (std::hash>{}(args) ^ ...); + }, t); + } +}; } // ns argolid \ No newline at end of file diff --git a/src/python/argolid/pyramid_compositor.py b/src/python/argolid/pyramid_compositor.py index 8d0c15d..df8eaa3 100644 --- a/src/python/argolid/pyramid_compositor.py +++ b/src/python/argolid/pyramid_compositor.py @@ -8,89 +8,7 @@ import ome_types import tensorstore as ts -CHUNK_SIZE: int = 1024 - -OME_DTYPE = { - "uint8": ome_types.model.PixelType.UINT8, - "int8": ome_types.model.PixelType.INT8, - "uint16": ome_types.model.PixelType.UINT16, - "int16": ome_types.model.PixelType.INT16, - "uint32": ome_types.model.PixelType.UINT32, - "int32": ome_types.model.PixelType.INT32, - "float": ome_types.model.PixelType.FLOAT, - "double": ome_types.model.PixelType.DOUBLE, -} - - -def get_zarr_read_spec(file_path: str) -> dict: - """ - Returns a dictionary containing the specification for reading a Zarr file. - - Args: - file_path (str): The path to the Zarr file. - - Returns: - dict: A dictionary containing the specification for reading the Zarr file. - """ - return { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": file_path, - }, - "open": True, - "context": { - "cache_pool": {}, - "data_copy_concurrency": {"limit": os.cpu_count() // 2}, - "file_io_concurrency": {"limit": os.cpu_count() // 2}, - "file_io_sync": False, - }, - } - - -def get_zarr_write_spec( - file_path: str, chunk_size: int, base_shape: tuple, dtype: str -) -> dict: - """ - Returns a dictionary containing the specification for writing a Zarr file. - - Args: - file_path (str): The path to the Zarr file. - chunk_size (int): The size of the chunks in the Zarr file. - base_shape (tuple): The base shape of the Zarr file. - dtype (str): The data type of the Zarr file. - - Returns: - dict: A dictionary containing the specification for writing the Zarr file. - """ - return { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": file_path, - }, - "create": True, - "delete_existing": False, - "open": True, - "metadata": { - "shape": base_shape, - "chunks": [1, 1, 1, chunk_size, chunk_size], - "dtype": np.dtype(dtype).str, - "compressor": { - "id": "blosc", - "cname": "zstd", - "clevel": 1, - "shuffle": 1, - "blocksize": 0, - }, - }, - "context": { - "cache_pool": {}, - "data_copy_concurrency": {"limit": os.cpu_count() // 2}, - "file_io_concurrency": {"limit": os.cpu_count() // 2}, - "file_io_sync": False, - }, - } +from .libargolid import PyramidCompositorCPP class PyramidCompositor: @@ -109,87 +27,33 @@ def __init__( out_dir (str): The output directory for the composed zarr pyramid file. output_pyramid_name (str): The name of the zarr pyramid file. """ - self._input_pyramids_loc: str = input_pyramids_loc - self._chunk_cache: set = set() - self._output_pyramid_name: str = f"{out_dir}/{output_pyramid_name}" - self._ome_metadata_file: str = f"{out_dir}/{output_pyramid_name}/METADATA.ome.xml" - self._composition_map: dict = None - self._plate_image_shapes: dict = {} - self._zarr_arrays: dict = {} - self._unit_image_shapes: dict = {} - self._pyramid_levels: int = None - self._image_dtype: np.dtype = None - self._num_channels: int = None + self._pyramid_compositor_cpp = PyramidCompositorCPP(input_pyramids_loc, out_dir, output_pyramid_name) def _create_xml(self) -> None: """ Writes an OME-XML metadata file for the pyramid. """ - ome_metadata = ome_types.model.OME() - ome_metadata.images.append( - ome_types.model.Image( - id="Image:0", - pixels=ome_types.model.Pixels( - id="Pixels:0", - dimension_order="XYZCT", - big_endian=False, - size_c=self._plate_image_shapes[0][1], - size_z=self._plate_image_shapes[0][2], - size_t=self._plate_image_shapes[0][0], - size_x=self._plate_image_shapes[0][4], - size_y=self._plate_image_shapes[0][3], - channels=[ - ome_types.model.Channel( - id=f"Channel:{i}", - samples_per_pixel=1, - ) - for i in range(self._plate_image_shapes[0][1]) - ], - type=OME_DTYPE[str(self._image_dtype)], - ), - ) - ) - - with open(self._ome_metadata_file, "w") as fw: - fw.write(str(ome_metadata.to_xml())) + self._pyramid_compositor_cpp.create_xml() + def _create_zattr_file(self) -> None: """ Creates a .zattrs file for the zarr pyramid. """ - attr_dict: dict = {} - multiscale_metadata: list = [] - for key in self._plate_image_shapes: - multiscale_metadata.append({"path": str(key)}) - attr_dict["datasets"] = multiscale_metadata - attr_dict["version"] = "0.1" - attr_dict["name"] = self._output_pyramid_name - attr_dict["metadata"] = {"method": "mean"} - - final_attr_dict = {"multiscales": [attr_dict]} - - with open(f"{self._output_pyramid_name}/data.zarr/0/.zattrs", "w") as json_file: - json.dump(final_attr_dict, json_file) + self._pyramid_compositor_cpp.create_zattr_file() def _create_zgroup_file(self) -> None: """ Creates .zgroup files for the zarr pyramid. """ - zgroup_dict = {"zarr_format": 2} - - with open(f"{self._output_pyramid_name}/data.zarr/0/.zgroup", "w") as json_file: - json.dump(zgroup_dict, json_file) - - with open(f"{self._output_pyramid_name}/data.zarr/.zgroup", "w") as json_file: - json.dump(zgroup_dict, json_file) + self._pyramid_compositor_cpp.create_zgroup_file() def _create_auxilary_files(self) -> None: """ Creates auxiliary files (OME-XML, .zattrs, .zgroup) for the pyramid. """ - self._create_xml() - self._create_zattr_file() - self._create_zgroup_file() + self._pyramid_compositor_cpp.create_auxilary_files() + def _write_zarr_chunk( self, level: int, channel: int, y_index: int, x_index: int @@ -203,68 +67,7 @@ def _write_zarr_chunk( y_index (int): The y-index of the tile. x_index (int): The x-index of the tile. """ - - # x_range and y_range are in global coordinates at the corresponding level - y_range = [ - y_index * CHUNK_SIZE, - min((y_index + 1) * CHUNK_SIZE, self._zarr_arrays[level].shape[3]), - ] - x_range = [ - x_index * CHUNK_SIZE, - min((x_index + 1) * CHUNK_SIZE, self._zarr_arrays[level].shape[4]), - ] - - assembled_width = x_range[1] - x_range[0] - assembled_height = y_range[1] - y_range[0] - assembled_image = np.zeros( - (assembled_height, assembled_width), dtype=self._image_dtype - ) - - # find what input images are needed - - # are we at the begining of the input image? - unit_image_height = self._unit_image_shapes[level][0] - unit_image_width = self._unit_image_shapes[level][1] - - row_start_pos = y_range[0] - while row_start_pos < y_range[1]: - # row and col are unit map coordinates - row = row_start_pos // unit_image_height - # local_y* and local_x* are coordintes in the chunk - local_y_start = row_start_pos - y_range[0] - tile_y_start = row_start_pos - row * unit_image_height - tile_y_dim = min((row + 1) * unit_image_height - row_start_pos, y_range[1] - row_start_pos) - tile_y_end = tile_y_start + tile_y_dim - col_start_pos = x_range[0] - while col_start_pos < x_range[1]: - col = col_start_pos // unit_image_width - local_x_start = col_start_pos - x_range[0] - tile_x_start = col_start_pos - col * unit_image_width - tile_x_dim = min((col + 1) * unit_image_width - col_start_pos, x_range[1] - col_start_pos) - tile_x_end = tile_x_start + tile_x_dim - # read input zarr file - input_file_name = self._composition_map.get((col, row, channel)) - zarr_file_loc = Path(input_file_name) / "data.zarr/0/" - zarr_array_loc = zarr_file_loc / str(level) - zarr_file = ts.open(get_zarr_read_spec(str(zarr_array_loc))).result() - - # copy data - assembled_image[ - local_y_start : local_y_start + tile_y_end - tile_y_start, - local_x_start : local_x_start + tile_x_end - tile_x_start, - ] = ( - zarr_file[0, 0, 0, tile_y_start:tile_y_end, tile_x_start:tile_x_end] - .read() - .result() - ) - col_start_pos += tile_x_end - tile_x_start # update col index - - row_start_pos += tile_y_end - tile_y_start - - zarr_array = self._zarr_arrays[level] - zarr_array[ - 0, channel, 0, y_range[0] : y_range[1], x_range[0] : x_range[1] - ].write(assembled_image).result() + self._pyramid_compositor_cpp.write_zarr_chunk(level, channel, y_index, x_index) def set_composition(self, composition_map: dict) -> None: """ @@ -273,90 +76,13 @@ def set_composition(self, composition_map: dict) -> None: Args: composition_map (dict): A dictionary mapping composition images to file paths. """ - self._composition_map = composition_map - self._unit_image_shapes = {} - for coord in composition_map: - file = composition_map[coord] - zarr_file_loc = Path(file) / "data.zarr/0/" - attr_file_loc = Path(file) / "data.zarr/0/.zattrs" - if attr_file_loc.exists(): - with open(str(attr_file_loc), "r") as f: - attrs = json.load(f) - mutliscale_metadata = attrs["multiscales"][0]["datasets"] - self._pyramid_levels = len(mutliscale_metadata) - for dic in mutliscale_metadata: - res_key = dic["path"] - zarr_array_loc = zarr_file_loc / res_key - zarr_file = ts.open( - get_zarr_read_spec(str(zarr_array_loc)) - ).result() - self._unit_image_shapes[int(res_key)] = ( - zarr_file.shape[-2], - zarr_file.shape[-1], - ) - if res_key == "0": - self._image_dtype = zarr_file.dtype.numpy_dtype - - break - - num_rows = 0 - num_cols = 0 - num_channels = 0 - - for coord in composition_map: - num_rows = max(num_rows, coord[1]) - num_cols = max(num_cols, coord[0]) - num_channels = max(num_channels, coord[2]) - - num_cols += 1 - num_rows += 1 - num_channels += 1 - - self._num_channels = num_channels - - self._plate_image_shapes = {} - self._zarr_arrays = {} - self._chunk_cache = set() - for l in self._unit_image_shapes: - level = int(l) - self._plate_image_shapes[level] = ( - 1, - num_channels, - 1, - num_rows * self._unit_image_shapes[level][0], - num_cols * self._unit_image_shapes[level][1], - ) - num_row_tiles = math.ceil( - 1.0 * num_rows * self._unit_image_shapes[level][0] / CHUNK_SIZE - ) - num_col_tiles = math.ceil( - 1.0 * num_cols * self._unit_image_shapes[level][1] / CHUNK_SIZE - ) - if num_row_tiles == 0: - num_row_tiles = 1 - if num_col_tiles == 0: - num_col_tiles == 1 - self._zarr_arrays[level] = ts.open( - get_zarr_write_spec( - f"{self._output_pyramid_name}/data.zarr/0/{level}", - CHUNK_SIZE, - self._plate_image_shapes[level], - np.dtype(self._image_dtype).str, - ) - ).result() - - self._create_auxilary_files() + self._pyramid_compositor_cpp.set_composition(composition_map) def reset_composition(self) -> None: """ Resets the pyramid composition by removing the pyramid file and clearing internal data structures. """ - shutil.rmtree(self._output_pyramid_name) - self._composition_map = None - self._plate_image_shapes = None - self._chunk_cache = None - self._plate_image_shapes = {} - self._zarr_arrays = {} + self._pyramid_compositor_cpp.reset_composition() def get_zarr_chunk( self, level: int, channel: int, y_index: int, x_index: int @@ -378,24 +104,6 @@ def get_zarr_chunk( the requested channel does not exist, or the requested y_index or x_index is out of bounds. """ - if self._composition_map is None: - raise ValueError("No composition map is set. Unable to generate pyramid") - - if level not in self._unit_image_shapes: - raise ValueError(f"Requested level ({level}) does not exist") - - if channel >= self._num_channels: - raise ValueError(f"Requested channel ({channel}) does not exist") - - if y_index > (self._plate_image_shapes[level][3] // CHUNK_SIZE): - raise ValueError(f"Requested y index ({y_index}) does not exist") - if x_index > (self._plate_image_shapes[level][4] // CHUNK_SIZE): - raise ValueError(f"Requested y index ({x_index}) does not exist") + self._pyramid_compositor_cpp.write_zarr_chunk(level, channel, y_index, x_index) - if (level, channel, y_index, x_index) in self._chunk_cache: - return - else: - self._write_zarr_chunk(level, channel, y_index, x_index) - self._chunk_cache.add((level, channel, y_index, x_index)) - return