From 633657836504c70bc6958bcc969e2d455401201a Mon Sep 17 00:00:00 2001 From: rvasan Date: Thu, 2 Oct 2025 11:30:17 -0700 Subject: [PATCH 1/4] add 5th and 95th percentile --- viscy/utils/meta_utils.py | 7 ++++++- viscy/utils/mp_utils.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/viscy/utils/meta_utils.py b/viscy/utils/meta_utils.py index f948fb849..c640149ae 100644 --- a/viscy/utils/meta_utils.py +++ b/viscy/utils/meta_utils.py @@ -31,7 +31,12 @@ def write_meta_field(position: ngff.Position, metadata, field_name, subfield_nam """ if field_name in position.zattrs: if subfield_name in position.zattrs[field_name]: - position.zattrs[field_name][subfield_name].update(metadata) + # Need to create a new dict and reassign to trigger zarr write + updated_subfield = {**position.zattrs[field_name][subfield_name], **metadata} + position.zattrs[field_name] = { + **position.zattrs[field_name], + subfield_name: updated_subfield + } else: D1 = position.zattrs[field_name] field_metadata = { diff --git a/viscy/utils/mp_utils.py b/viscy/utils/mp_utils.py index 4db77e4de..796e0b56c 100644 --- a/viscy/utils/mp_utils.py +++ b/viscy/utils/mp_utils.py @@ -260,11 +260,16 @@ def get_val_stats(sample_values): indices :return dict meta_row: Dict with intensity data for image """ + p5 = float(np.nanpercentile(sample_values, 5)) + p95 = float(np.nanpercentile(sample_values, 95)) meta_row = { "mean": float(np.nanmean(sample_values)), "std": float(np.nanstd(sample_values)), "median": float(np.nanmedian(sample_values)), "iqr": float(scipy.stats.iqr(sample_values)), + "p5": p5, + "p95": p95, + "p95_p5": p95 - p5, # Range for normalization } return meta_row From ed936f218f5144f746a1733748e3c06f098ad8fe Mon Sep 17 00:00:00 2001 From: rvasan Date: Thu, 2 Oct 2025 11:34:26 -0700 Subject: [PATCH 2/4] ruff format --- viscy/utils/meta_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/viscy/utils/meta_utils.py b/viscy/utils/meta_utils.py index c640149ae..2bd79081d 100644 --- a/viscy/utils/meta_utils.py +++ b/viscy/utils/meta_utils.py @@ -32,10 +32,13 @@ def write_meta_field(position: ngff.Position, metadata, field_name, subfield_nam if field_name in position.zattrs: if subfield_name in position.zattrs[field_name]: # Need to create a new dict and reassign to trigger zarr write - updated_subfield = {**position.zattrs[field_name][subfield_name], **metadata} + updated_subfield = { + **position.zattrs[field_name][subfield_name], + **metadata, + } position.zattrs[field_name] = { **position.zattrs[field_name], - subfield_name: updated_subfield + subfield_name: updated_subfield, } else: D1 = position.zattrs[field_name] From 6ef18296b92510e520063de089f6c133015db44a Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Fri, 17 Oct 2025 13:13:38 -0700 Subject: [PATCH 3/4] optimize sorting and test --- tests/preprocessing/test_norm_stats.py | 20 ++++++++++++++++++++ viscy/utils/mp_utils.py | 21 +++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 tests/preprocessing/test_norm_stats.py diff --git a/tests/preprocessing/test_norm_stats.py b/tests/preprocessing/test_norm_stats.py new file mode 100644 index 000000000..fe51bf59a --- /dev/null +++ b/tests/preprocessing/test_norm_stats.py @@ -0,0 +1,20 @@ +import numpy as np +from viscy.utils.mp_utils import get_val_stats + + +def test_get_val_stats(): + sample_values = np.arange(0, 101) + stats = get_val_stats(sample_values) + expected_stats = { + "mean": 50.0, + "std": np.std(sample_values), + "median": 50.0, + "iqr": 50.0, + "p1": 1.0, + "p5": 5.0, + "p95": 95.0, + "p99": 99.0, + "p99_p1": 98.0, + "p95_p5": 90.0, + } + assert stats == expected_stats diff --git a/viscy/utils/mp_utils.py b/viscy/utils/mp_utils.py index 796e0b56c..0010ebedf 100644 --- a/viscy/utils/mp_utils.py +++ b/viscy/utils/mp_utils.py @@ -260,16 +260,21 @@ def get_val_stats(sample_values): indices :return dict meta_row: Dict with intensity data for image """ - p5 = float(np.nanpercentile(sample_values, 5)) - p95 = float(np.nanpercentile(sample_values, 95)) - + percentiles = [1, 5, 25, 50, 75, 95, 99] + percentile_values = { + k: float(v) + for k, v in zip(percentiles, np.nanpercentile(sample_values, percentiles)) + } meta_row = { "mean": float(np.nanmean(sample_values)), "std": float(np.nanstd(sample_values)), - "median": float(np.nanmedian(sample_values)), - "iqr": float(scipy.stats.iqr(sample_values)), - "p5": p5, - "p95": p95, - "p95_p5": p95 - p5, # Range for normalization + "median": percentile_values[50], + "iqr": percentile_values[75] - percentile_values[25], + "p5": percentile_values[5], + "p95": percentile_values[95], + "p95_p5": percentile_values[95] - percentile_values[5], + "p1": percentile_values[1], + "p99": percentile_values[99], + "p99_p1": percentile_values[99] - percentile_values[1], } return meta_row From 5acd8bdd7afb276819db0b8a07e5d703d7329f11 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Fri, 17 Oct 2025 13:14:42 -0700 Subject: [PATCH 4/4] remove scipy --- viscy/utils/mp_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/viscy/utils/mp_utils.py b/viscy/utils/mp_utils.py index 0010ebedf..6b39795f8 100644 --- a/viscy/utils/mp_utils.py +++ b/viscy/utils/mp_utils.py @@ -2,7 +2,6 @@ import iohub.ngff as ngff import numpy as np -import scipy.stats import viscy.utils.image_utils as image_utils import viscy.utils.masks as mask_utils