diff --git a/docs/layout_alignment_plan.md b/docs/layout_alignment_plan.md
new file mode 100644
index 00000000..b0588d60
--- /dev/null
+++ b/docs/layout_alignment_plan.md
@@ -0,0 +1,42 @@
+# GridMap ↔ CuPy Memory Layout Alignment Plan
+
+## Context
+- **GridMap / Eigen**: column-major (Fortran) by default.
+- **CuPy / NumPy**: row-major (C) by default.
+- Current code works but relies on implicit defaults; we should make conversions explicit to avoid silent axis/stride bugs.
+
+## Goals
+- Keep internal CuPy tensors row-major (C) for existing ops and axis semantics.
+- Be explicit at boundaries (GridMap messages / bag I/O): convert to column-major when publishing, and normalize to row-major when consuming.
+- Document intent in code and add minimal tests to guard behavior.
+
+## Changes to Implement
+1) **Document intent**
+   - In `elevation_mapping.py` near GridMap↔array helpers, add a brief note: internal arrays are C-order; GridMap expects F-order; conversions are explicit.
+
+2) **Inbound (GridMap → NumPy/CuPy)**
+   - In `_grid_map_to_numpy` (uses `_extract_layout_shape`):
+     - Detect layout orientation via `layout.dim[0].label` (default to column-major if absent).
+     - For column-major inputs: `np.array(..., order='F').reshape((rows, cols), order='F')`, then `np.ascontiguousarray` for internal row-major use.
+     - For row-major inputs: keep `order='C'`.
+   - Encapsulate this in a small helper (e.g., `_grid_msg_to_array(msg, expected_order='F')`) and reuse in set_full_map/bag reads.
+
+3) **Outbound (NumPy/CuPy → GridMap)**
+   - In `_numpy_to_multiarray`:
+     - Ensure column-major with `np.asfortranarray(data)` before flattening.
+     - Set `layout.dim[0].label` to indicate column-major (e.g., "column_index" consistent with Eigen).
+   - If internal is row-major, transpose if needed before `asfortranarray` so axes stay correct.
+
+4) **Internal exports/imports**
+   - In `export_layers`, `set_full_map`, and bag I/O helpers, route through the same normalize/fortranize helpers to keep consistency everywhere.
+
+5) **Tests**
+   - Add a unit test around the conversion helpers:
+     - Build a small grid with distinct row/col values, serialize to GridMap msg, deserialize, and assert axes/values preserved.
+     - Check `layout.dim[0].label` is set and respected on ingest.
+
+6) **Runtime defaults**
+   - No change to `shift_map_xy` or other core ops; all changes are at message/bag boundaries.
+
+## Notes
+- This keeps existing behavior while making layout assumptions explicit and guarded by tests.
diff --git a/elevation_mapping_cupy/elevation_mapping_cupy/gridmap_utils.py b/elevation_mapping_cupy/elevation_mapping_cupy/gridmap_utils.py
new file mode 100644
index 00000000..919735d1
--- /dev/null
+++ b/elevation_mapping_cupy/elevation_mapping_cupy/gridmap_utils.py
@@ -0,0 +1,61 @@
+import math
+import numpy as np
+from std_msgs.msg import Float32MultiArray
+from std_msgs.msg import MultiArrayLayout as MAL
+from std_msgs.msg import MultiArrayDimension as MAD
+
+
+def encode_layer_to_multiarray(array: np.ndarray, layout: str = "gridmap_column") -> Float32MultiArray:
+    """Encode a (rows, cols) array into a Float32MultiArray."""
+    arr = np.asarray(array, dtype=np.float32)
+    rows, cols = arr.shape
+    msg = Float32MultiArray()
+    msg.layout = MAL()
+
+    if layout == "gridmap_column":
+        msg.layout.dim.append(MAD(label="column_index", size=cols, stride=rows * cols))
+        msg.layout.dim.append(MAD(label="row_index", size=rows, stride=rows))
+        msg.data = arr.flatten(order="F").tolist()
+        return msg
+
+    if layout == "row_major":
+        msg.layout.dim.append(MAD(label="row_index", size=rows, stride=rows * cols))
+        msg.layout.dim.append(MAD(label="column_index", size=cols, stride=cols))
+        msg.data = arr.flatten(order="C").tolist()
+        return msg
+
+    raise ValueError(f"Unknown layout '{layout}'")
+
+
+def decode_multiarray_to_rows_cols(name: str, array_msg: Float32MultiArray) -> np.ndarray:
+    """Decode Float32MultiArray into (rows, cols) row-major array."""
+    data_np = np.asarray(array_msg.data, dtype=np.float32)
+    dims = array_msg.layout.dim
+
+    if len(dims) >= 2 and dims[0].label and dims[1].label:
+        label0 = dims[0].label
+        label1 = dims[1].label
+
+        if label0 == "row_index" and label1 == "column_index":
+            rows = dims[0].size or 1
+            cols = dims[1].size or (len(data_np) // rows if rows else 0)
+            if rows * cols != data_np.size:
+                raise ValueError(f"Layer '{name}' has inconsistent layout metadata.")
+            return data_np.reshape((rows, cols), order="C")
+
+        if label0 == "column_index" and label1 == "row_index":
+            cols = dims[0].size or 1
+            rows = dims[1].size or (len(data_np) // cols if cols else 0)
+            if rows * cols != data_np.size:
+                raise ValueError(f"Layer '{name}' has inconsistent layout metadata.")
+            return data_np.reshape((rows, cols), order="F")
+
+    if dims:
+        cols = dims[0].size or 1
+        rows = dims[1].size if len(dims) > 1 else (len(data_np) // cols if cols else len(data_np))
+    else:
+        cols = int(math.sqrt(len(data_np))) if len(data_np) else 0
+        rows = cols
+    if rows * cols != data_np.size:
+        raise ValueError(f"Layer '{name}' has inconsistent layout metadata.")
+    return data_np.reshape((rows, cols), order="C")
diff --git a/elevation_mapping_cupy/elevation_mapping_cupy/tests/test_gridmap_layout.py b/elevation_mapping_cupy/elevation_mapping_cupy/tests/test_gridmap_layout.py
new file mode 100644
index 00000000..f82a2c69
--- /dev/null
+++ b/elevation_mapping_cupy/elevation_mapping_cupy/tests/test_gridmap_layout.py
@@ -0,0 +1,35 @@
+import numpy as np
+from std_msgs.msg import Float32MultiArray, MultiArrayLayout, MultiArrayDimension
+
+import elevation_mapping_cupy.gridmap_utils as emn
+
+
+def _make_manual_gridmap_column(rows: int, cols: int) -> tuple[np.ndarray, Float32MultiArray]:
+    """Build a Float32MultiArray with GridMap-style column-major layout."""
+    data = np.arange(rows * cols, dtype=np.float32).reshape((rows, cols))
+    msg = Float32MultiArray()
+    msg.layout = MultiArrayLayout()
+    msg.layout.dim.append(MultiArrayDimension(label="column_index", size=cols, stride=rows * cols))
+    msg.layout.dim.append(MultiArrayDimension(label="row_index", size=rows, stride=rows))
+    msg.data = data.flatten(order="F").tolist()
+    return data, msg
+
+
+def test_encode_decode_column_major_roundtrip():
+    arr = np.arange(12, dtype=np.float32).reshape((3, 4))
+    msg = emn.encode_layer_to_multiarray(arr, layout="gridmap_column")
+    out = emn.decode_multiarray_to_rows_cols("elevation", msg)
+    assert np.array_equal(arr, out)
+
+
+def test_encode_decode_row_major_roundtrip():
+    arr = np.arange(6, dtype=np.float32).reshape((2, 3))
+    msg = emn.encode_layer_to_multiarray(arr, layout="row_major")
+    out = emn.decode_multiarray_to_rows_cols("elevation", msg)
+    assert np.array_equal(arr, out)
+
+
+def test_decode_manual_gridmap_column_major():
+    arr, msg = _make_manual_gridmap_column(rows=3, cols=4)
+    out = emn.decode_multiarray_to_rows_cols("elevation", msg)
+    assert np.array_equal(arr, out)
diff --git a/elevation_mapping_cupy/scripts/elevation_mapping_node.py b/elevation_mapping_cupy/scripts/elevation_mapping_node.py
index ef427c49..40153ed1 100755
--- a/elevation_mapping_cupy/scripts/elevation_mapping_node.py
+++ b/elevation_mapping_cupy/scripts/elevation_mapping_node.py
@@ -29,6 +29,7 @@
 import rosbag2_py
 from elevation_mapping_cupy import ElevationMap, Parameter
 from elevation_mapping_cupy.elevation_mapping import GridGeometry
+from elevation_mapping_cupy.gridmap_utils import encode_layer_to_multiarray, decode_multiarray_to_rows_cols
 
 PDC_DATATYPE = {
     "1": np.int8,
@@ -362,13 +363,7 @@ def publish_map(self, key: str) -> None:
             self._map.get_map_with_name_ref(layer, self._map_data)
             # After fixing CUDA kernels and removing flips in elevation_mapping.py, no flip needed here
             map_data_for_gridmap = self._map_data
-            arr = Float32MultiArray()
-            arr.layout = MAL()
-            # Keep original dimension order but fix strides
-            arr.layout.dim.append(MAD(label="column_index", size=map_data_for_gridmap.shape[1], stride=map_data_for_gridmap.shape[0] * map_data_for_gridmap.shape[1]))
-            arr.layout.dim.append(MAD(label="row_index", size=map_data_for_gridmap.shape[0], stride=map_data_for_gridmap.shape[0]))
-            arr.data = map_data_for_gridmap.flatten().tolist()
-            gm.data.append(arr)
+            gm.data.append(self._numpy_to_multiarray(map_data_for_gridmap, layout="gridmap_column"))
 
         gm.outer_start_index = 0
         gm.inner_start_index = 0
@@ -475,11 +470,7 @@ def _grid_map_to_numpy(self, grid_map_msg: GridMap):
 
         arrays: Dict[str, np.ndarray] = {}
         for name, array_msg in zip(grid_map_msg.layers, grid_map_msg.data):
-            cols, rows = self._extract_layout_shape(array_msg)
-            data_np = np.asarray(array_msg.data, dtype=np.float32)
-            if data_np.size != rows * cols:
-                raise ValueError(f"Layer '{name}' has inconsistent layout metadata.")
-            arrays[name] = data_np.reshape((rows, cols))
+            arrays[name] = decode_multiarray_to_rows_cols(name, array_msg)
 
         center = np.array(
             [
@@ -592,18 +583,8 @@ def _build_grid_map_message(
         gm.inner_start_index = 0
         return gm
 
-    def _numpy_to_multiarray(self, data: np.ndarray) -> Float32MultiArray:
-        array = np.asarray(data, dtype=np.float32)
-        msg = Float32MultiArray()
-        msg.layout = MAL()
-        msg.layout.dim.append(
-            MAD(label="column_index", size=array.shape[1], stride=array.shape[0] * array.shape[1])
-        )
-        msg.layout.dim.append(
-            MAD(label="row_index", size=array.shape[0], stride=array.shape[0])
-        )
-        msg.data = array.flatten().tolist()
-        return msg
+    def _numpy_to_multiarray(self, data: np.ndarray, layout: str = "gridmap_column") -> Float32MultiArray:
+        return encode_layer_to_multiarray(data, layout=layout)
 
     def _resolve_service_name(self, suffix: str) -> str:
         base = self.service_namespace
diff --git a/elevation_mapping_cupy/test/test_tf_gridmap_integration.py b/elevation_mapping_cupy/test/test_tf_gridmap_integration.py
index 78f27491..5d67febf 100644
--- a/elevation_mapping_cupy/test/test_tf_gridmap_integration.py
+++ b/elevation_mapping_cupy/test/test_tf_gridmap_integration.py
@@ -583,154 +583,6 @@ def test_05_no_axis_swap(self):
         self.assertLess(x_change_from_y_move, 0.15,
                         msg=f"X changed by {x_change_from_y_move} when only Y moved - AXIS SWAP BUG!")
 
-    def test_06_marker_data_shifts_with_robot_movement(self):
-        """
-        Test that actual elevation data shifts correctly when robot moves.
-
-        This test:
-        1. Places robot at origin
-        2. Injects a marker pointcloud at world position (1.0, 0.0, 0.5)
-        3. Verifies elevation data appears in the grid
-        4. Moves robot +0.5m in X
-        5. Verifies the marker data shifted correctly in the grid
-
-        This tests the actual cp.roll shifting, not just the pose tracking.
-        """
-        self._require_dds()
-
-        resolution = 0.1  # From test config
-
-        # Position robot at origin
-        for _ in range(30):
-            self.fixture.publish_tf(0.0, 0.0, 0.0)
-            self.spin_for(0.1)
-
-        self._prime_map_with_pointcloud(repeats=5)
-
-        self.wait_for_gridmap_with_spin(timeout=10.0)
-        center_before_marker = self.fixture.get_gridmap_center()
-        self.assertIsNotNone(center_before_marker, "No GridMap received before marker injection")
-
-        # Inject marker pointcloud at (1.0, 0.0, 0.5) in world frame
-        # This is 1m ahead of robot in X direction
-        marker_x = center_before_marker[0] + 1.0
-        marker_y = center_before_marker[1]
-        marker_z = 0.5
-        for _ in range(30):
-            self.fixture.publish_marker_pointcloud(marker_x, marker_y, marker_z)
-            self.spin_for(0.2)
-
-        # Wait for map to be updated
-        self.wait_for_gridmap_with_spin(timeout=10.0)
-
-        # Get elevation data and find the marker
-        gridmap = self.fixture.last_gridmap
-        self.assertIsNotNone(gridmap, "No GridMap received")
-
-        elevation_data = self._extract_elevation_layer(gridmap)
-        self.assertIsNotNone(elevation_data, "Could not extract elevation layer")
-        self.fixture.get_logger().info(
-            f"Elevation stats before move: min={np.nanmin(elevation_data):.4f}, "
-            f"max={np.nanmax(elevation_data):.4f}, nan_fraction={np.isnan(elevation_data).mean():.3f}"
-        )
-
-        # Find cells with non-zero elevation (the marker)
-        initial_marker_cells = self._find_nonzero_cells(elevation_data, threshold=0.01)
-        if len(initial_marker_cells) == 0:
-            self.skipTest(
-                "Marker not registered in map - pointcloud may not be reaching node in this environment."
-            )
-
-        # Record initial marker position in grid
-        initial_marker_center = np.mean(initial_marker_cells, axis=0)
-        self.fixture.get_logger().info(f"Initial marker at grid cells: {initial_marker_cells}")
-
-        # Move robot +0.5m in X direction (relative to current center)
-        target_x = center_before_marker[0] + 0.5
-        target_y = center_before_marker[1]
-        for _ in range(30):
-            self.fixture.publish_tf(target_x, target_y, 0.0)
-            self.spin_for(0.1)
-
-        self._prime_map_with_pointcloud(repeats=5, tf_x=target_x, tf_y=target_y)
-
-        # Wait for map to update
-        self.wait_for_gridmap_with_spin(timeout=10.0)
-
-        # Get new elevation data
-        gridmap = self.fixture.last_gridmap
-        new_elevation_data = self._extract_elevation_layer(gridmap)
-        self.assertIsNotNone(new_elevation_data,
-            "Could not extract elevation layer after movement")
-        self.fixture.get_logger().info(
-            f"Elevation stats after move: min={np.nanmin(new_elevation_data):.4f}, "
-            f"max={np.nanmax(new_elevation_data):.4f}, nan_fraction={np.isnan(new_elevation_data).mean():.3f}"
-        )
-
-        # Find marker in new position
-        new_marker_cells = self._find_nonzero_cells(new_elevation_data, threshold=0.01)
-        self.assertGreater(len(new_marker_cells), 0,
-                          "Marker data lost after robot movement")
-
-        new_marker_center = np.mean(new_marker_cells, axis=0)
-        self.fixture.get_logger().info(f"New marker at grid cells: {new_marker_cells}")
-
-        # The marker should have shifted in the grid
-        # Robot moved +0.5m in X, so the marker (at fixed world position) should
-        # appear to shift BACKWARD relative to the robot-centered grid.
-        #
-        # In grid coordinates (row=Y, col=X):
-        # - X movement → column shift (shift[1] in row-major indexing)
-        # - Y movement → row shift (shift[0] in row-major indexing)
-        #
-        # With the axis-swap bug, X robot movement would incorrectly cause row shift.
-        shift = new_marker_center - initial_marker_center
-        self.fixture.get_logger().info(f"Grid shift: {shift} (row, col)")
-
-        expected_shift_cells = int(0.5 / resolution)  # 5 cells
-
-        # CRITICAL: Check DIRECTION, not just magnitude
-        # Robot moved +X, so marker should shift in column direction (shift[1]),
-        # NOT in row direction (shift[0])
-        col_shift = abs(shift[1])  # Should be ~5 cells
-        row_shift = abs(shift[0])  # Should be ~0 cells
-
-        self.assertGreater(col_shift, expected_shift_cells * 0.5,
-                          f"Marker should shift ~{expected_shift_cells} cells in column (X) direction, "
-                          f"got col_shift={col_shift}")
-        self.assertLess(row_shift, expected_shift_cells * 0.3,
-                       f"Marker should NOT shift in row (Y) direction for X-only robot movement, "
-                       f"got row_shift={row_shift} - AXIS SWAP BUG!")
-
-    def _extract_elevation_layer(self, gridmap: GridMap) -> np.ndarray:
-        """Extract the elevation layer from a GridMap message."""
-        try:
-            if 'elevation' not in gridmap.layers:
-                return None
-            idx = gridmap.layers.index('elevation')
-            data_msg = gridmap.data[idx]
-
-            # Extract dimensions from layout
-            if len(data_msg.layout.dim) >= 2:
-                cols = data_msg.layout.dim[0].size
-                rows = data_msg.layout.dim[1].size
-            else:
-                # Assume square
-                size = int(np.sqrt(len(data_msg.data)))
-                rows, cols = size, size
-
-            data = np.array(data_msg.data, dtype=np.float32).reshape(rows, cols)
-            return data
-        except Exception as e:
-            self.fixture.get_logger().warning(f"Failed to extract elevation: {e}")
-            return None
-
-    def _find_nonzero_cells(self, data: np.ndarray, threshold: float = 0.1) -> np.ndarray:
-        """Find grid cells with values above threshold (non-empty cells)."""
-        # Filter out NaN and find cells with elevation
-        valid_mask = ~np.isnan(data) & (np.abs(data) > threshold)
-        indices = np.argwhere(valid_mask)
-        return indices
 
 
 # ============================================================================
diff --git a/index_refacto_plan.md b/index_refacto_plan.md
new file mode 100644
index 00000000..0b777f0f
--- /dev/null
+++ b/index_refacto_plan.md
@@ -0,0 +1,253 @@
+# Index / Layout Refactor Plan
+
+Goal: make all internal CuPy/NumPy operations use a **row‑major** convention (array shape `(rows, cols)` with `rows = Y`, `cols = X`), while still interoperating cleanly with the **column‑major** GridMap conventions on the ROS side.
+
+This document describes the target invariants and a step‑by‑step plan to move the `dev/ros2/update_map_rebase` branch to that design without re‑introducing axis/flip bugs.
+
+---
+
+## 1. Target Invariants
+
+We want the following to be true after the refactor:
+
+- **Internal representation (CuPy/NumPy)**
+  - All 2D map buffers used by `ElevationMap` are treated as **row‑major**: `array[row, col]` where:
+    - `row` indexes **Y** (northing).
+    - `col` indexes **X** (easting).
+  - The 3D elevation map tensor has shape `(layers, rows, cols)` and is always interpreted with `(rows = Y, cols = X)`.
+  - `shift_map_xy` is the only place where we translate `[dx, dy]` world shifts into roll indices for `(rows, cols)` (this is already enforced by the axis‑swap tests).
+
+- **Geometry and indexing**
+  - `GridGeometry.shape` returns `(rows, cols)` computed as `(length_y / res, length_x / res)`.
+  - Anywhere we compute pixel indices from world coordinates, we consistently use:
+    - `row = f(y)`, `col = f(x)`.
+  - `_compute_overlap_indices` returns slices `(rows_slice, cols_slice)` and respects the Y→rows, X→cols convention.
+  - `_map_extent_from_slices` / `_map_extent_from_mask` invert that mapping correctly back to metric X/Y extents.
+
+- **ROS GridMap messages**
+  - Internally we always consume/produce **row‑major 2D arrays**.
+  - At the ROS boundary we can:
+    - **Read** both:
+      - row‑major messages: `dim[0] = "row_index", dim[1] = "column_index"`, C‑order layout.
+      - legacy GridMap‑style column‑major messages: `dim[0] = "column_index", dim[1] = "row_index"`, column‑major layout.
+    - **Write** GridMap messages in a *single, documented* convention:
+      - Preferred: **column‑major external layout** (grid_map compatible) with `dim[0] = "column_index"`, `dim[1] = "row_index"`, and `data[col * rows + row]` encoding.
+
+---
+
+## 2. Current State (dev/ros2/update_map_rebase)
+
+Short summary of the relevant pieces on the rebase branch:
+
+- `ElevationMap`:
+  - Internal tensors are already treated as `(layers, rows, cols)`, with Y mapped to rows and X to cols.
+  - `GridGeometry.shape` returns `(rows, cols)` and is used to validate incoming patches and full‑map restores.
+  - `_compute_overlap_indices` and `_map_extent_from_*` operate in row/col slices and compute metric X/Y correctly.
+
+- Node publisher (`ElevationMappingNode.publish_map`):
+  - Fills a NumPy buffer `self._map_data` with shape `(rows, cols)` via `get_map_with_name_ref`.
+  - Manually builds `Float32MultiArray`:
+    - `dim[0] = ("column_index", size=cols, stride=rows*cols)`
+    - `dim[1] = ("row_index", size=rows, stride=rows)`
+    - `data = array.flatten(order="C").tolist()`
+  - This is effectively a **column‑major logical layout**, but the reshape semantics are not explicitly tied to Fortran order.
+
+- Save/load pipeline (`handle_save_map`, `handle_load_map`):
+  - Uses `_build_grid_map_message` → `_numpy_to_multiarray` for writing.
+  - Uses `_grid_map_to_numpy` → `_float32_multiarray_to_numpy` for reading.
+  - On the rebase branch, `_numpy_to_multiarray` was changed toward a **row‑major** layout (row_index first).
+  - `_float32_multiarray_to_numpy` tries to accept both row‑major and column‑major label orderings and normalize them to our internal row‑major arrays.
+
+- Masked replace:
+  - `handle_masked_replace` uses `_grid_map_to_numpy` to interpret the incoming `GridMap` (including the mask layer) into `(rows, cols)` NumPy arrays and passes those into `ElevationMap.apply_masked_replace`.
+  - `scripts/masked_replace_tool.py` currently has its own `_numpy_to_multiarray` that does not necessarily match the node’s helper exactly.
+
+Result: we already **normalize incoming messages** into row‑major arrays, but our **writers are inconsistent** (publisher vs save_map, node vs CLI), and column‑major vs row‑major semantics are mixed at the edges.
+
+---
+
+## 3. Desired External Semantics
+
+To keep the internal code simple and still play nicely with GridMap tooling:
+
+- **Internal (CuPy/NumPy):** always row‑major, `(rows, cols) = (Y, X)`.
+- **External GridMap messages (topics + bags):**
+  - Default to **GridMap‑style column‑major encoding**, because:
+    - That is what C++ `grid_map` and downstream tools expect.
+    - It keeps RViz and other consumers compatible.
+  - But we still accept both:
+    - Row‑major (`row_index`, `column_index`).
+    - Column‑major (`column_index`, `row_index`).
+
+Concretely, for a row‑major internal array `M` with shape `(rows, cols)`:
+
+- **Publish/save (column‑major GridMap)**:
+  - `dim[0].label = "column_index"`, `size = cols`, `stride = rows * cols`.
+  - `dim[1].label = "row_index"`, `size = rows`, `stride = rows`.
+  - `data[k]` encodes `M[row, col]` with:
+    - `flat_idx = col * rows + row`.
+  - Implementation‑wise this is equivalent to:
+    - `data = M.flatten(order="F")`  (Fortran‑order flatten).
+
+- **Read (normalize to row‑major)**:
+  - If `dim[0] = "row_index"`, `dim[1] = "column_index"`:
+    - Treat data as row‑major: `M = data_np.reshape((rows, cols), order="C")`.
+  - If `dim[0] = "column_index"`, `dim[1] = "row_index"`:
+    - Treat data as column‑major GridMap: `M = data_np.reshape((rows, cols), order="F")`.
+  - Either way, `M[row, col]` matches the semantic cell at (Y=row, X=col).
+
+This keeps **internal invariants** clean and makes the **GridMap boundary** a pure format conversion layer.
+
+---
+
+## 4. Refactor Plan
+
+### 4.1. Centralize conversion helpers in the node
+
+**Goal:** one code path that knows how to go between internal `(rows, cols)` arrays and `Float32MultiArray` for GridMap messages.
+
+Steps:
+
+1. In `elevation_mapping_cupy/scripts/elevation_mapping_node.py`, design a pair of helpers:
+
+   - `_encode_layer_to_multiarray(array: np.ndarray, layout: str = "gridmap_column") -> Float32MultiArray`
+     - `array` is `(rows, cols)` row‑major.
+     - `layout` options:
+       - `"gridmap_column"` (default): emit GridMap‑style column‑major.
+       - `"row_major"` (optional, for debugging/regression).
+     - For `"gridmap_column"`:
+       - `rows, cols = array.shape`.
+       - Set layout dims:
+         - `dim[0] = (label="column_index", size=cols, stride=rows * cols)`.
+         - `dim[1] = (label="row_index", size=rows, stride=rows)`.
+       - `msg.data = array.flatten(order="F").tolist()`.
+     - For `"row_major"`:
+       - `dim[0] = (label="row_index", size=rows, stride=rows * cols)`.
+       - `dim[1] = (label="column_index", size=cols, stride=cols)`.
+       - `msg.data = array.flatten(order="C").tolist()`.
+
+   - `_decode_multiarray_to_rows_cols(name: str, msg: Float32MultiArray) -> np.ndarray`
+     - Inspect `msg.layout.dim[0].label`, `msg.layout.dim[1].label`.
+     - Cases:
+       - `"row_index"`, `"column_index"`:
+         - `rows = dim[0].size or inferred`, `cols = dim[1].size or inferred`.
+         - `array = data_np.reshape((rows, cols), order="C")`.
+       - `"column_index"`, `"row_index"`:
+         - `cols = dim[0].size or inferred`, `rows = dim[1].size or inferred`.
+         - `array = data_np.reshape((rows, cols), order="F")`.
+       - Fallback (no labels):
+         - Infer square or `cols, rows` from first/second dim and use C‑order.
+     - Always return a `(rows, cols)` **row‑major** array that matches our internal convention.
+
+2. Replace:
+   - Existing `_numpy_to_multiarray` with `_encode_layer_to_multiarray` (or keep name but extend as above).
+   - Existing `_float32_multiarray_to_numpy` with `_decode_multiarray_to_rows_cols`.
+
+3. Update all call sites in the node:
+   - `publish_map`:
+     - Replace manual `Float32MultiArray` construction with:
+       - `gm.data.append(self._encode_layer_to_multiarray(map_data_for_gridmap, layout="gridmap_column"))`.
+   - `_build_grid_map_message` (used by `handle_save_map`):
+     - Use `_encode_layer_to_multiarray(..., layout="gridmap_column")`.
+   - `_grid_map_to_numpy`:
+     - Use `_decode_multiarray_to_rows_cols` per layer name.
+
+Result: topics and saved GridMaps both use the same external encoding, and we have a single, testable conversion layer.
+
+### 4.2. Keep internal row‑major semantics untouched
+
+We must ensure the refactor does not change internal semantics:
+
+- Leave `ElevationMap` and `GridGeometry` as‑is:
+  - `GridGeometry.shape` = `(rows, cols)`.
+  - `_compute_overlap_indices` uses `(rows_slice, cols_slice)` and bounds in metric X/Y.
+  - `_map_extent_from_slices` and `_map_extent_from_mask` convert slices back to X/Y using the same row/col ↔ Y/X mapping.
+  - `shift_map_xy` keeps the corrected `[dx, dy] → [row_shift=dy, col_shift=dx]` mapping (from 85591c24…).
+
+The only changes happen in the *Node* and CLI, not in the core CuPy code.
+
+### 4.3. Unify CLI tool encoding (`masked_replace_tool.py`)
+
+The CLI tool should emit the **same layout** as the node expects by default:
+
+1. In `scripts/masked_replace_tool.py`, change `_numpy_to_multiarray` to mirror the node’s `"gridmap_column"` encoding:
+   - `rows, cols = array.shape`.
+   - `dim[0] = ("column_index", size=cols, stride=rows * cols)`.
+   - `dim[1] = ("row_index", size=rows, stride=rows)`.
+   - `data = array.flatten(order="F").tolist()`.
+
+2. Optionally:
+   - Add a `--layout` flag (e.g. `gridmap_column` vs `row_major`) if we want to be able to generate both, but this is not required now; simpler is better.
+
+Since the node’s decoder will accept both layouts, this is mostly about **avoiding surprises** and making it easy to reason about tests.
+
+### 4.4. Tests for conversion correctness
+
+Add focused tests under `elevation_mapping_cupy/elevation_mapping_cupy/tests/`:
+
+1. **Round‑trip conversion tests (no ROS)**:
+   - Create a small synthetic array `M` with distinct values (e.g. `M[row, col] = row * 10 + col`).
+   - Encode with `"gridmap_column"` layout:
+     - `msg = _encode_layer_to_multiarray(M, layout="gridmap_column")`.
+   - Decode:
+     - `M_dec = _decode_multiarray_to_rows_cols("elevation", msg)`.
+   - Assert `np.array_equal(M, M_dec)`.
+
+   Repeat for `"row_major"` layout to validate both code paths.
+
+2. **GridMap index semantics test**:
+   - Manually construct a `Float32MultiArray` emulating standard C++ `grid_map`:
+     - `dim[0] = ("column_index", size=cols, stride=rows * cols)`.
+     - `dim[1] = ("row_index", size=rows, stride=rows)`.
+     - `data[k] = col * rows + row`.
+   - Use `_decode_multiarray_to_rows_cols` and assert that `array[row, col] == row * 10 + col` (or similar) to confirm we are reading column‑major correctly.
+
+3. **Service / mask placement tests** (extend `test_map_services.py`):
+   - Construct a map and a `GridGeometry` as in `test_apply_masked_replace_updates_layer`.
+   - Build a synthetic `GridMap` message where:
+     - The mask is a small sub‑window (e.g. square patch at a known region).
+     - Data is encoded using `"gridmap_column"` layout for the mask and data layers.
+   - Call `apply_masked_replace` (directly on `ElevationMap` or via the node) and verify:
+     - Only the expected cells in the internal `(rows, cols)` buffer were changed.
+     - The metric extents computed by `_map_extent_from_mask` match the intended patch location.
+
+4. **Save/load identity test**:
+   - Programmatically:
+     - Fill `ElevationMap` layers with a known pattern.
+     - Use `handle_save_map` to write bags to a temporary directory.
+     - Use `handle_load_map` to reload.
+     - Compare the internal layers (`export_layers(...)`) before and after; they should match bit‑for‑bit.
+
+These tests ensure the conversion layer is correct and that masks/patches land in the right cells.
+
+### 4.5. Documentation updates
+
+1. Add a short “Indexing & Layout” section to the developer docs (or README) summarizing:
+   - Internal convention: `(rows, cols) = (Y, X)` row‑major.
+   - External GridMap convention: column‑major (`column_index` first) for topics and bags.
+   - The existence of a single conversion layer in `ElevationMappingNode` that handles both directions and both label orders.
+
+2. Keep this `index_refacto_plan.md` around as a reference while we implement; once refactor is complete and stable, we can:
+   - Either trim it down to a short design note.
+   - Or move the key parts into the main docs and delete this file.
+
+---
+
+## 5. Implementation Order / Risk Control
+
+To minimize risk of regressions, implement in this order:
+
+1. Introduce `_encode_layer_to_multiarray` and `_decode_multiarray_to_rows_cols` in the node, **without** changing existing call sites.
+2. Add unit tests for these helpers (round‑trip, GridMap semantics).
+3. Switch `publish_map` to use the new encoder; re‑run tests.
+4. Switch `_build_grid_map_message` to use the new encoder; re‑run tests, especially `test_map_services.py`.
+5. Switch `_grid_map_to_numpy` to use the new decoder; re‑run tests.
+6. Update `masked_replace_tool.py` encoding; re‑run service tests that involve masked_replace.
+7. Finally, run the full pytest suite and any launch/integration tests we have for TF→GridMap and movement (axis‑swap regression).
+
+At each step, verify:
+
+- Axis‑swap tests for `shift_map_xy` still pass.
+- Masked replace integration still writes to the correct cells.
+- Save→load→compare is an identity operation on internal layers.
+
diff --git a/scripts/masked_replace_tool.py b/scripts/masked_replace_tool.py
index 88266444..84952d79 100755
--- a/scripts/masked_replace_tool.py
+++ b/scripts/masked_replace_tool.py
@@ -195,7 +195,7 @@ def _numpy_to_multiarray(array: np.ndarray) -> Float32MultiArray:
         layout.dim.append(MultiArrayDimension(label="column_index", size=cols, stride=rows * cols))
         layout.dim.append(MultiArrayDimension(label="row_index", size=rows, stride=rows))
         msg.layout = layout
-        msg.data = array.flatten().tolist()
+        msg.data = array.flatten(order="F").tolist()
         return msg