Skip to content

Commit f2d5daf

Browse files
authored
Add configuration for a healpix column name. (#601)
* Add data/test for non-order-29 indexed dataset. * More testing and allowing margin cache with no healpix 29 * Set the properties on new catalogs. * Refresh unit test data.
1 parent f6156ca commit f2d5daf

File tree

81 files changed

+313
-40
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+313
-40
lines changed

src/hats_import/catalog/arguments.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from hats.io.paths import DATASET_DIR, PARTITION_ORDER
1313
from hats.io.validation import is_valid_catalog
1414
from hats.pixel_math import spatial_index
15+
from hats.pixel_math.spatial_index import SPATIAL_INDEX_COLUMN, SPATIAL_INDEX_ORDER
1516
from typing_extensions import Self
1617
from upath import UPath
1718

@@ -178,6 +179,13 @@ def to_table_properties(
178179
"hats_skymap_alt_orders": self.skymap_alt_orders,
179180
}
180181
)
182+
if self.add_healpix_29:
183+
info.update(
184+
{
185+
"hats_col_healpix": SPATIAL_INDEX_COLUMN,
186+
"hats_col_healpix_order": SPATIAL_INDEX_ORDER,
187+
}
188+
)
181189
properties = TableProperties(**info)
182190

183191
if properties.default_columns and column_names:

src/hats_import/margin_cache/margin_cache.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ def generate_margin_cache(args, client):
3232
margin_pair_file=resume_plan.margin_pair_file,
3333
output_path=args.tmp_path,
3434
margin_order=args.margin_order,
35+
healpix_column=args.catalog.catalog_info.healpix_column,
36+
healpix_order=args.catalog.catalog_info.healpix_order,
3537
)
3638
)
3739
resume_plan.wait_for_mapping(futures)

src/hats_import/margin_cache/margin_cache_arguments.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def _check_arguments(self):
5151
self.catalog = self.catalog.filter_from_pixel_list(self.debug_filter_pixel_list)
5252
if len(self.catalog.get_healpix_pixels()) == 0:
5353
raise ValueError("debug_filter_pixel_list has created empty catalog")
54+
if not self.catalog.has_healpix_column():
55+
raise ValueError("Only catalogs with some healpix column (e.g. _healpix_29) can have a margin")
5456

5557
if self.fine_filtering:
5658
raise NotImplementedError("Fine filtering temporarily removed.")
@@ -86,5 +88,7 @@ def to_table_properties(
8688
"margin_threshold": self.margin_threshold,
8789
"hats_order": highest_order,
8890
"moc_sky_fraction": f"{moc_sky_fraction:0.5f}",
91+
"hats_col_healpix": self.catalog.catalog_info.healpix_column,
92+
"hats_col_healpix_order": self.catalog.catalog_info.healpix_order,
8993
} | self.extra_property_dict()
9094
return TableProperties(**info)

src/hats_import/margin_cache/margin_cache_map_reduce.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pyarrow.parquet as pq
55
from hats.io import file_io, paths
66
from hats.pixel_math.healpix_pixel import HealpixPixel
7-
from hats.pixel_math.spatial_index import SPATIAL_INDEX_COLUMN, spatial_index_to_healpix
7+
from hats.pixel_math.spatial_index import spatial_index_to_healpix
88

99
from hats_import.margin_cache.margin_cache_resume_plan import MarginCachePlan
1010
from hats_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure
@@ -19,6 +19,8 @@ def map_pixel_shards(
1919
margin_pair_file,
2020
output_path,
2121
margin_order,
22+
healpix_column,
23+
healpix_order,
2224
):
2325
"""Creates margin cache shards from a source partition file."""
2426
try:
@@ -39,8 +41,9 @@ def map_pixel_shards(
3941
)
4042

4143
margin_pixel_list = spatial_index_to_healpix(
42-
data[SPATIAL_INDEX_COLUMN].to_numpy(),
44+
data[healpix_column].to_numpy(),
4345
target_order=margin_order,
46+
spatial_index_order=healpix_order,
4447
)
4548
margin_pixel_filter = pd.DataFrame(
4649
{"margin_pixel": margin_pixel_list, "filter_value": np.arange(0, len(margin_pixel_list))}
@@ -60,6 +63,7 @@ def map_pixel_shards(
6063
pixel=pixel,
6164
output_path=output_path,
6265
source_pixel=source_pixel,
66+
healpix_column=healpix_column,
6367
)
6468

6569
MarginCachePlan.mapping_key_done(output_path, mapping_key, num_rows)
@@ -74,6 +78,7 @@ def _to_pixel_shard(
7478
pixel,
7579
output_path,
7680
source_pixel,
81+
healpix_column,
7782
):
7883
"""Do boundary checking for the cached partition and then output remaining data."""
7984
margin_data = filtered_data
@@ -88,7 +93,7 @@ def _to_pixel_shard(
8893

8994
shard_path = paths.pixel_catalog_file(partition_dir, source_pixel)
9095

91-
margin_data = margin_data.sort_by(SPATIAL_INDEX_COLUMN)
96+
margin_data = margin_data.sort_by(healpix_column)
9297

9398
pq.write_table(margin_data, shard_path.path, filesystem=shard_path.fs)
9499
return num_rows
Binary file not shown.
Binary file not shown.
0 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)