11import xarray as xr
22import numpy as np
33
4+ import polars as pl
5+
46from uxarray .grid .connectivity import _replace_fill_values
57from uxarray .constants import INT_DTYPE , INT_FILL_VALUE
68
@@ -11,43 +13,45 @@ def _to_ugrid(in_ds, out_ds):
1113 """If input dataset (``in_ds``) file is an unstructured SCRIP file,
1214 function will reassign SCRIP variables to UGRID conventions in output file
1315 (``out_ds``).
14-
15- Parameters
16- ----------
17- in_ds : xarray.Dataset
18- Original scrip dataset of interest being used
19-
20- out_ds : xarray.Variable
21- file to be returned by ``_populate_scrip_data``, used as an empty placeholder file
22- to store reassigned SCRIP variables in UGRID conventions
2316 """
2417
2518 source_dims_dict = {}
2619
2720 if in_ds ["grid_area" ].all ():
2821 # Create node_lon & node_lat variables from grid_corner_lat/lon
29- # Turn latitude scrip array into 1D instead of 2D
22+ # Turn latitude and longitude scrip arrays into 1D
3023 corner_lat = in_ds ["grid_corner_lat" ].values .ravel ()
31-
32- # Repeat above steps with longitude data instead
3324 corner_lon = in_ds ["grid_corner_lon" ].values .ravel ()
3425
35- # Combine flat lat and lon arrays
36- corner_lon_lat = np .vstack ((corner_lon , corner_lat )).T
26+ # Use Polars to find unique coordinate pairs
27+ df = pl .DataFrame ({"lon" : corner_lon , "lat" : corner_lat }).with_row_count (
28+ "original_index"
29+ )
30+
31+ # Get unique rows (first occurrence). This preserves the order in which they appear.
32+ unique_df = df .unique (subset = ["lon" , "lat" ], keep = "first" )
33+
34+ # unq_ind: The indices of the unique rows in the original array
35+ unq_ind = unique_df ["original_index" ].to_numpy ().astype (INT_DTYPE )
36+
37+ # To get the inverse index (unq_inv): map each original row back to its unique row index.
38+ # Add a unique_id to the unique_df which will serve as the "inverse" mapping.
39+ unique_df = unique_df .with_row_count ("unique_id" )
3740
38- # Run numpy unique to determine which rows/values are actually unique
39- _ , unq_ind , unq_inv = np . unique (
40- corner_lon_lat , return_index = True , return_inverse = True , axis = 0
41+ # Join original df with unique_df to find out which unique_id corresponds to each row
42+ df_joined = df . join (
43+ unique_df . drop ( "original_index" ), on = [ "lon" , "lat" ], how = "left"
4144 )
45+ unq_inv = df_joined ["unique_id" ].to_numpy ().astype (INT_DTYPE )
4246
43- # Now, calculate unique lon and lat values to account for 'node_lon' and 'node_lat'
44- unq_lon = corner_lon_lat [unq_ind , :][:, 0 ]
45- unq_lat = corner_lon_lat [unq_ind , :][:, 1 ]
47+ # Extract unique lon and lat values using unq_ind
48+ unq_lon = corner_lon [unq_ind ]
49+ unq_lat = corner_lat [unq_ind ]
4650
4751 # Reshape face nodes array into original shape for use in 'face_node_connectivity'
4852 unq_inv = np .reshape (unq_inv , (len (in_ds .grid_size ), len (in_ds .grid_corners )))
4953
50- # Create node_lon & node_lat from unsorted, unique grid_corner_lat/lon
54+ # Create node_lon & node_lat
5155 out_ds [ugrid .NODE_COORDINATES [0 ]] = xr .DataArray (
5256 unq_lon , dims = [ugrid .NODE_DIM ], attrs = ugrid .NODE_LON_ATTRS
5357 )
0 commit comments