Skip to content

Commit 4e3a334

Browse files
authored
Fix include_stats order (#616)
1 parent 64673b5 commit 4e3a334

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

src/hats/io/parquet_metadata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def aggregate_column_statistics(
363363
return frame
364364

365365

366-
# pylint: disable=too-many-positional-arguments
366+
# pylint: disable=too-many-positional-arguments,too-many-statements
367367
def per_pixel_statistics(
368368
metadata_file: str | Path | UPath,
369369
exclude_hats_columns: bool = True,
@@ -451,6 +451,7 @@ def per_pixel_statistics(
451451
for stat in include_stats:
452452
if stat not in all_stats:
453453
raise ValueError(f"include_stats must be from list {all_stats} (found {stat})")
454+
include_stats = [stat for stat in all_stats if stat in include_stats]
454455
int_stats = [stat for stat in int_stats if stat in include_stats]
455456

456457
stat_mask = np.array([ind for ind, stat in enumerate(all_stats) if stat in include_stats])

tests/hats/io/test_parquet_metadata.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,13 @@ def test_per_pixel_statistics_multi_index(small_sky_order1_dir):
357357
def test_per_pixel_statistics_include_stats(small_sky_order1_dir):
358358
partition_info_file = paths.get_parquet_metadata_pointer(small_sky_order1_dir)
359359

360-
result_frame = per_pixel_statistics(partition_info_file, include_stats=["row_count"])
361-
# 5 = 5 columns * 1 stat per column
362-
assert result_frame.shape == (4, 5)
360+
result_frame = per_pixel_statistics(partition_info_file, include_stats=["disk_bytes", "memory_bytes"])
361+
# 10 = 5 columns * 2 stats per column
362+
assert result_frame.shape == (4, 10)
363+
364+
# The order of the stats should not matter
365+
result_frame_2 = per_pixel_statistics(partition_info_file, include_stats=["memory_bytes", "disk_bytes"])
366+
pd.testing.assert_frame_equal(result_frame, result_frame_2)
363367

364368
result_frame = per_pixel_statistics(
365369
partition_info_file, include_stats=["row_count"], include_columns=["id"]

0 commit comments

Comments
 (0)