diff --git a/CHANGELOG b/CHANGELOG index 4cb7afbc..ef15edeb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,5 @@ 2.22.1 + - feat: notify user when loaded data have different pipeline hashes (#217) - fix: prevent accidental polygon filter creation (#148) - fix: disable QuickView controls when there is no data to display - fix: IndexError when trying to show a deleted slot in QuickView (#214) diff --git a/shapeout2/gui/pipeline_plot.py b/shapeout2/gui/pipeline_plot.py index eaf0a72b..24e13011 100644 --- a/shapeout2/gui/pipeline_plot.py +++ b/shapeout2/gui/pipeline_plot.py @@ -118,6 +118,16 @@ def update_content_plot(self, plot_state, slot_states, dslist): lay = plot_state["layout"] sca = plot_state["scatter"] + # create a hash set for the dcnum hashes + hash_set = set() + for ds in dslist: + pipe_config = ds.config.get("pipeline", {}) + dcnum_hash = pipe_config.get("dcnum hash", None) + if dcnum_hash is not None: + hash_set.add(dcnum_hash) + else: + hash_set.add(None) + # auto range (overrides stored ranges) if gen["auto range"]: # default range is limits + 5% margin @@ -168,6 +178,9 @@ def update_content_plot(self, plot_state, slot_states, dslist): elif lay["division"] == "each": colcount = 0 for ds, sl in zip(dslist, slot_states): + # get the hash flag + hash_flag = get_hash_flag(hash_set, ds) + pp = PipelinePlotItem(parent=linner) self.plot_items.append(pp) linner.addItem(item=pp, @@ -175,7 +188,7 @@ def update_content_plot(self, plot_state, slot_states, dslist): col=None, rowspan=1, colspan=1) - pp.redraw([ds], [sl], plot_state) + pp.redraw([ds], [sl], plot_state, hash_flag) colcount += 1 if colcount % lay["column count"] == 0: linner.nextRow() @@ -185,6 +198,9 @@ def update_content_plot(self, plot_state, slot_states, dslist): plot_state_scatter = copy.deepcopy(plot_state) plot_state_scatter["contour"]["enabled"] = False for ds, sl in zip(dslist, slot_states): + # get the hash flag + hash_flag = get_hash_flag(hash_set, ds) + pp = PipelinePlotItem(parent=linner) self.plot_items.append(pp) linner.addItem(item=pp, @@ -192,7 +208,7 @@ def update_content_plot(self, plot_state, slot_states, dslist): col=None, rowspan=1, colspan=1) - pp.redraw([ds], [sl], plot_state_scatter) + pp.redraw([ds], [sl], plot_state_scatter, hash_flag) colcount += 1 if colcount % lay["column count"] == 0: linner.nextRow() @@ -294,7 +310,7 @@ def perform_export(self, file): exp = exporters.SVGExporter(win.scene()) exp.export(file) - def redraw(self, dslist, slot_states, plot_state): + def redraw(self, dslist, slot_states, plot_state, hash_flag=None): # Remove everything for el in self._plot_elements: self.removeItem(el) @@ -333,7 +349,8 @@ def redraw(self, dslist, slot_states, plot_state): sct = add_scatter(plot_item=self, rtdc_ds=rtdc_ds, plot_state=plot_state, - slot_state=ss + slot_state=ss, + hash_flag=hash_flag ) self._plot_elements += sct # Contour data @@ -525,7 +542,7 @@ def add_isoelastics(plot_item, axis_x, axis_y, channel_width, pixel_size, return elements -def add_scatter(plot_item, plot_state, rtdc_ds, slot_state): +def add_scatter(plot_item, plot_state, rtdc_ds, slot_state, hash_flag): gen = plot_state["general"] sca = plot_state["scatter"] scatter = pg.ScatterPlotItem(size=sca["marker size"], @@ -592,6 +609,17 @@ def add_scatter(plot_item, plot_state, rtdc_ds, slot_state): if gen["scale y"] == "log": y = np.log10(y) + # add dcnum hash label + if hash_flag: + add_label( + hash_flag, + anchor_parent=plot_item.axes["top"]["item"], + font_size_diff=-1, + color="red", + text_halign="left", + text_valign="top", + ) + scatter.setData(x=x, y=y, brush=brush) scatter.setZValue(-50) return [scatter] @@ -733,6 +761,35 @@ def set_viewbox(plot, range_x, range_y, scale_x="linear", scale_y="linear", ) +def get_hash_flag(hash_set, rtdc_ds): + """Helper function to determine the hash flag based on the dataset and + hash set.""" + if len(hash_set) == 1: + # only one hash, no need to show it + return None + + req_hash_len = 4 + # get the longest hash from the hash set + longest_hash = max((h for h in hash_set if h), key=len, default="temphash") + + # find the minimum and unique hash length dynamically + for char_len in range(req_hash_len, len(longest_hash)): + temp_short_hash_set = set( + h[:char_len] if h is not None else None for h in hash_set + ) + if len(temp_short_hash_set) != len(hash_set): + req_hash_len += 1 + else: + break + + # get the pipeline hash + pipe_config = rtdc_ds.config.get("pipeline", {}) + dcnum_hash = pipe_config.get("dcnum hash", None) + # use the first `req_hash_len` characters of the hash + short_hash = dcnum_hash[:req_hash_len] if dcnum_hash else None + return f"Pipeline: {short_hash}" if short_hash else None + + linestyles = { "solid": QtCore.Qt.PenStyle.SolidLine, "dashed": QtCore.Qt.PenStyle.DashLine, diff --git a/tests/test_gui_pipeline_plot.py b/tests/test_gui_pipeline_plot.py index e3f27be4..3f047268 100644 --- a/tests/test_gui_pipeline_plot.py +++ b/tests/test_gui_pipeline_plot.py @@ -1,7 +1,12 @@ import numpy as np +import pathlib + +import dclab from shapeout2.gui import pipeline_plot +datapath = pathlib.Path(__file__).parent / "data" + def test_compute_contour_opening_angles(): contour = [ @@ -71,3 +76,87 @@ def test_compute_contour_opening_angles_log_scale(): angles = pipeline_plot.compute_contour_opening_angles( plot_state=plot_state, contour=contour) assert np.allclose(angles, np.pi/3) + + +def test_get_hash_flag(): + rtdc_paths = datapath.glob("*.rtdc") + + assert rtdc_paths + + hash_set = set() + rtdc_ds_list = [] + expected = [] + for path in rtdc_paths: + ds = dclab.new_dataset(path) + # get the hash flag + pipe_config = ds.config.get("pipeline", {}) + dcnum_hash = pipe_config.get("dcnum hash", None) + hash_set.add(dcnum_hash) + expected.append(f"Pipeline: {dcnum_hash[:4]}" if dcnum_hash else None) + rtdc_ds_list.append(ds) + + assert len(hash_set) == 2 + + for ds, exp in zip(rtdc_ds_list, expected): + result = pipeline_plot.get_hash_flag(hash_set, ds) + assert result == exp + + +def test_get_hash_flag_datasets_without_hash(): + rtdc_paths = [ + datapath / "artificial_with_image_bg.rtdc", # No hash + datapath / "blood_rbc_leukocytes.rtdc", # No hash + datapath / "blood_rbc_qpi_data.rtdc", # No hash + ] + + assert len(rtdc_paths) == 3 + + hash_set = set() + rtdc_ds_list = [] + expected = [] + for path in rtdc_paths: + ds = dclab.new_dataset(path) + # get the hash flag + pipe_config = ds.config.get("pipeline", {}) + dcnum_hash = pipe_config.get("dcnum hash", None) + hash_set.add(dcnum_hash) + expected.append(f"Pipeline: {dcnum_hash[:4]}" if dcnum_hash else None) + rtdc_ds_list.append(ds) + + assert len(hash_set) == 1 + for ds in rtdc_ds_list: + result = pipeline_plot.get_hash_flag(hash_set, ds) + assert result is None + + +def test_get_hash_flag_dataset_with_hash(): + rtdc_paths = [ + datapath / "artificial_with_image_bg.rtdc", # No hash + datapath / "blood_rbc_leukocytes.rtdc", # No hash + datapath / "naiad-capture_blood_pipeline.rtdc", # with hash + ] + + assert len(rtdc_paths) == 3 + hash_set = set() + rtdc_ds_list = [] + expected = [] + for path in rtdc_paths: + ds = dclab.new_dataset(path) + # get the hash flag + pipe_config = ds.config.get("pipeline", {}) + dcnum_hash = pipe_config.get("dcnum hash", None) + hash_set.add(dcnum_hash) + expected.append(f"Pipeline: {dcnum_hash[:4]}" if dcnum_hash else None) + rtdc_ds_list.append(ds) + + assert len(hash_set) == 2 + results = [] + for ds in rtdc_ds_list: + result = pipeline_plot.get_hash_flag(hash_set, ds) + results.append(result) + + assert len(results) == 3, "results length should be length of rtdc_paths" + + assert results[0] is None + assert results[1] is None + assert results[2] == "Pipeline: 1d01"