@@ -97,3 +97,53 @@ def canonicalize_D4(grid: Array) -> Array:
9797 # This should not occur because D4 contains identity, but guard anyway.
9898 return grid .copy ()
9999 return best
100+
101+
102+ def canonicalize_pair (input_grid : Array , output_grid : Array ) -> Tuple [Array , Array ]:
103+ """Canonicalise a pair of grids under shared D4 symmetries and colours.
104+
105+ The same D4 transform and colour relabelling are applied to both ``input_grid``
106+ and ``output_grid`` so that puzzle examples remain aligned.
107+
108+ [S:ALG v2] pair-D4 canonicalisation pass
109+
110+ Parameters
111+ ----------
112+ input_grid, output_grid:
113+ Arrays representing an ARC training pair.
114+
115+ Returns
116+ -------
117+ Tuple[np.ndarray, np.ndarray]
118+ Canonicalised input and output grids.
119+
120+ Raises
121+ ------
122+ TypeError
123+ If either grid is not a ``numpy.ndarray`` of integer dtype.
124+ """
125+
126+ if not isinstance (input_grid , np .ndarray ) or not isinstance (output_grid , np .ndarray ):
127+ raise TypeError ("grids must be numpy arrays" )
128+ if not np .issubdtype (input_grid .dtype , np .integer ) or not np .issubdtype (output_grid .dtype , np .integer ):
129+ raise TypeError ("grid dtype must be integer" )
130+
131+ best_in : Array | None = None
132+ best_out : Array | None = None
133+ best_key : Tuple [Tuple [int , int ], bytes , Tuple [int , int ], bytes ] | None = None
134+ for transform in D4 :
135+ inp_t = transform (input_grid )
136+ out_t = transform (output_grid )
137+ vals , counts = np .unique (np .concatenate ([inp_t .ravel (), out_t .ravel ()]), return_counts = True )
138+ order = [int (v ) for v , _ in sorted (zip (vals , counts ), key = lambda t : (- t [1 ], t [0 ]))]
139+ mapping = {c : i for i , c in enumerate (order )}
140+ vect_map = np .vectorize (mapping .get )
141+ inp_c = vect_map (inp_t ).astype (np .int16 )
142+ out_c = vect_map (out_t ).astype (np .int16 )
143+ key = (inp_c .shape , inp_c .tobytes (), out_c .shape , out_c .tobytes ())
144+ if best_key is None or key < best_key :
145+ best_in , best_out , best_key = inp_c , out_c , key
146+ if best_in is None or best_out is None :
147+ # This should not occur because D4 contains identity, but guard anyway.
148+ return input_grid .copy (), output_grid .copy ()
149+ return best_in , best_out
0 commit comments