stefmolin · stefmolin · Feb 9, 2025 · Feb 9, 2025
diff --git a/docs/tutorials/shape-creation.rst b/docs/tutorials/shape-creation.rst
@@ -53,7 +53,7 @@ various bits of information from the dataset, such as the automatically-calculat
 bounds (*e.g.*, :attr:`.Dataset.data_bounds`, which form the bounding box of the
 starting data, and :attr:`.Dataset.morph_bounds`, which define the limits of where
 the algorithm can move the points) or percentiles using the data itself (see
-:attr:`.Dataset.df`). For example, the :class:`.XLines` shape inherits from
+:attr:`.Dataset.data`). For example, the :class:`.XLines` shape inherits from
 :class:`.LineCollection` and uses the morph bounds (:attr:`.Dataset.morph_bounds`)
 to calculate its position and scale:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -101,6 +101,8 @@ lint.select = [
   "FA",  # flake8-future-annotations
   "I",   # isort
   "N",   # pep8-naming
+  "NPY", # numpy
+  "PD",  # pandas-vet
   "PTH", # flake8-use-pathlib
   "RUF", # ruff-specific rules
   "SIM", # flake8-simplify

diff --git a/src/data_morph/data/dataset.py b/src/data_morph/data/dataset.py
@@ -32,7 +32,7 @@
     ----------
     name : str
         The name to use for the dataset.
-    df : pandas.DataFrame
+    data : pandas.DataFrame
         DataFrame containing columns x and y.
     scale : numbers.Number, optional
         The factor to scale the data by (can be used to speed up morphing).
@@ -49,10 +49,12 @@
     def __init__(
         self,
         name: str,
-        df: pd.DataFrame,
+        data: pd.DataFrame,
         scale: Number | None = None,
     ) -> None:
-        self.df: pd.DataFrame = self._validate_data(df).pipe(self._scale_data, scale)
+        self.data: pd.DataFrame = self._validate_data(data).pipe(
+            self._scale_data, scale
+        )
         """pandas.DataFrame: DataFrame containing columns x and y."""
 
         self.name: str = name
@@ -81,7 +83,7 @@
         """
         return BoundingBox(
             *[
-                Interval([self.df[dim].min(), self.df[dim].max()], inclusive=False)
+                Interval([self.data[dim].min(), self.data[dim].max()], inclusive=False)
                 for dim in self._REQUIRED_COLUMNS
             ]
         )
@@ -122,13 +124,13 @@
         plot_bounds.align_aspect_ratio()
         return plot_bounds
 
-    def _scale_data(self, df: pd.DataFrame, scale: Number) -> pd.DataFrame:
+    def _scale_data(self, data: pd.DataFrame, scale: Number) -> pd.DataFrame:
         """
         Apply scaling to the data.
 
         Parameters
         ----------
-        df : pandas.DataFrame
+        data : pandas.DataFrame
             The data to scale.
         scale : numbers.Number, optional
             The factor to scale the data by (can be used to speed up morphing).
@@ -141,17 +143,17 @@
         """
         if scale is None:
             self._scaled = False
-            return df
+            return data
 
         if isinstance(scale, bool) or not isinstance(scale, Number):
             raise TypeError('scale must be a numeric value.')
 
         if not scale:
             raise ValueError('scale must be non-zero.')
 
-        scaled_df = df.assign(x=df.x.div(scale), y=df.y.div(scale))
+        scaled_data = data.assign(x=data.x.div(scale), y=data.y.div(scale))
         self._scaled = True
-        return scaled_df
+        return scaled_data
 
     def _validate_data(self, data: pd.DataFrame) -> pd.DataFrame:
         """
@@ -211,7 +213,7 @@
             fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2)
 
         ax.axis('equal')
-        ax.scatter(self.df.x, self.df.y, s=2, color='black')
+        ax.scatter(self.data.x, self.data.y, s=2, color='black')
         ax.set(xlabel='', ylabel='', title=self if title == 'default' else title)
 
         if show_bounds:
@@ -236,8 +238,8 @@
                 )
             )
             ax.text(
-                (self.df.x.max() + self.df.x.min()) / 2,
-                self.df.y.max() + self.data_bounds.y_bounds.range / scale_base,
+                (self.data.x.max() + self.data.x.min()) / 2,
+                self.data.y.max() + self.data_bounds.y_bounds.range / scale_base,
                 'DATA BOUNDS',
                 color='blue',
                 va='bottom',

diff --git a/src/data_morph/data/loader.py b/src/data_morph/data/loader.py
@@ -106,18 +106,18 @@ class directly.
                 Path(cls._DATA_PATH) / cls._DATASETS[dataset]
             )
             name = dataset
-            df = pd.read_csv(filepath)
+            data = pd.read_csv(filepath)
         except KeyError:
             try:
                 name = Path(dataset).stem
-                df = pd.read_csv(dataset)
+                data = pd.read_csv(dataset)
             except FileNotFoundError as err:
                 raise ValueError(
                     f'Unknown dataset "{dataset}". '
                     'Provide a valid path to a CSV dataset or use one of '
                     f'the included datasets: {", ".join(cls.AVAILABLE_DATASETS)}.'
                 ) from err
-        return Dataset(name=name, df=df, scale=scale)
+        return Dataset(name=name, data=data, scale=scale)
 
     @classmethod
     @plot_with_custom_style
@@ -166,7 +166,7 @@ def plot_available_datasets(cls) -> Axes:
                 elif dataset == 'SDS':
                     dataset += ' logo'
 
-                ax.scatter(points.df.x, points.df.y, s=4, color='black')
+                ax.scatter(points.data.x, points.data.y, s=4, color='black')
 
                 # tight plot bounds for the grid of datasets in the docs
                 bounds = points.data_bounds.clone()
@@ -175,7 +175,7 @@ def plot_available_datasets(cls) -> Axes:
                 bounds.align_aspect_ratio()
 
                 ax.set(
-                    title=f'{dataset} ({points.df.shape[0]:,d} points)',
+                    title=f'{dataset} ({points.data.shape[0]:,d} points)',
                     xlim=bounds.x_bounds,
                     ylim=bounds.y_bounds,
                     xlabel='',

diff --git a/src/data_morph/data/stats.py b/src/data_morph/data/stats.py
@@ -12,13 +12,13 @@
 )
 
 
-def get_values(df: pd.DataFrame) -> SummaryStatistics:
+def get_values(data: pd.DataFrame) -> SummaryStatistics:
     """
     Calculate the summary statistics for the given set of points.
 
     Parameters
     ----------
-    df : pandas.DataFrame
+    data : pandas.DataFrame
         A dataset with columns x and y.
 
     Returns
@@ -28,9 +28,9 @@ def get_values(df: pd.DataFrame) -> SummaryStatistics:
         along with the Pearson correlation coefficient between the two.
     """
     return SummaryStatistics(
-        df.x.mean(),
-        df.y.mean(),
-        df.x.std(),
-        df.y.std(),
-        df.corr().x.y,
+        data.x.mean(),
+        data.y.mean(),
+        data.x.std(),
+        data.y.std(),
+        data.corr().x.y,
     )
diff --git a/src/data_morph/morpher.py b/src/data_morph/morpher.py
@@ -274,7 +274,7 @@ def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool:
 
     def _perturb(
         self,
-        df: pd.DataFrame,
+        data: pd.DataFrame,
         target_shape: Shape,
         *,
         shake: Number,
@@ -287,7 +287,7 @@ def _perturb(
 
         Parameters
         ----------
-        df : pandas.DataFrame
+        data : pandas.DataFrame
             The data to perturb.
         target_shape : Shape
             The shape to morph the data into.
@@ -308,9 +308,8 @@ def _perturb(
         pandas.DataFrame
             The input dataset with one point perturbed.
         """
-        row = self._rng.integers(0, len(df))
-        initial_x = df.at[row, 'x']
-        initial_y = df.at[row, 'y']
+        row = self._rng.integers(0, len(data))
+        initial_x, initial_y = data.to_numpy()[row]
 
         # this is the simulated annealing step, if "do_bad", then we are willing to
         # accept a new state which is worse than the current one
@@ -329,10 +328,10 @@ def _perturb(
             within_bounds = [new_x, new_y] in bounds
             done = close_enough and within_bounds
 
-        df.loc[row, 'x'] = new_x
-        df.loc[row, 'y'] = new_y
+        data.loc[row, 'x'] = new_x
+        data.loc[row, 'y'] = new_y
 
-        return df
+        return data
 
     def morph(
         self,
@@ -434,7 +433,7 @@ def morph(
         ):
             raise ValueError('allowed_dist must be a non-negative numeric value.')
 
-        morphed_data = start_shape.df.copy()
+        morphed_data = start_shape.data.copy()
 
         # iteration numbers that we will end up writing to file as frames
         frame_numbers = self._select_frames(
@@ -487,7 +486,7 @@ def _tweening(
                 bounds=start_shape.morph_bounds,
             )
 
-            if self._is_close_enough(start_shape.df, perturbed_data):
+            if self._is_close_enough(start_shape.data, perturbed_data):
                 morphed_data = perturbed_data
 
             frame_number = record_frames(

diff --git a/src/data_morph/plotting/static.py b/src/data_morph/plotting/static.py
@@ -23,7 +23,7 @@
 
 @plot_with_custom_style
 def plot(
-    df: pd.DataFrame,
+    data: pd.DataFrame,
     x_bounds: Iterable[Number],
     y_bounds: Iterable[Number],
     save_to: str | Path,
@@ -35,7 +35,7 @@ def plot(
 
     Parameters
     ----------
-    df : pandas.DataFrame
+    data : pandas.DataFrame
         The dataset to plot.
     x_bounds, y_bounds : Iterable[numbers.Number]
         The plotting limits.
@@ -57,14 +57,14 @@ def plot(
     )
     fig.get_layout_engine().set(w_pad=1.4, h_pad=0.2, wspace=0)
 
-    ax.scatter(df.x, df.y, s=1, alpha=0.7, color='black')
+    ax.scatter(data.x, data.y, s=1, alpha=0.7, color='black')
     ax.set(xlim=x_bounds, ylim=y_bounds)
 
     tick_formatter = EngFormatter()
     ax.xaxis.set_major_formatter(tick_formatter)
     ax.yaxis.set_major_formatter(tick_formatter)
 
-    res = get_values(df)
+    res = get_values(data)
 
     labels = ('X Mean', 'Y Mean', 'X SD', 'Y SD', 'Corr.')
     locs = np.linspace(0.8, 0.2, num=len(labels))

diff --git a/src/data_morph/shapes/circles.py b/src/data_morph/shapes/circles.py
@@ -41,10 +41,10 @@ class Circle(Shape):
     """
 
     def __init__(self, dataset: Dataset, radius: Number | None = None) -> None:
-        self.center: np.ndarray = dataset.df[['x', 'y']].mean().to_numpy()
+        self.center: np.ndarray = dataset.data[['x', 'y']].mean().to_numpy()
         """numpy.ndarray: The (x, y) coordinates of the circle's center."""
 
-        self.radius: Number = radius or dataset.df[['x', 'y']].std().mean() * 1.5
+        self.radius: Number = radius or dataset.data[['x', 'y']].std().mean() * 1.5
         """numbers.Number: The radius of the circle."""
 
     def __repr__(self) -> str:
@@ -125,7 +125,7 @@ def __init__(self, dataset: Dataset, num_rings: int = 4) -> None:
         if num_rings <= 1:
             raise ValueError('num_rings must be greater than 1')
 
-        stdev = dataset.df.std().mean()
+        stdev = dataset.data.std().mean()
         self.circles: list[Circle] = [
             Circle(dataset, r)
             for r in np.linspace(stdev / num_rings * 2, stdev * 2, num_rings)

diff --git a/src/data_morph/shapes/lines/diamond.py b/src/data_morph/shapes/lines/diamond.py
@@ -26,8 +26,8 @@ class Diamond(LineCollection):
     """
 
     def __init__(self, dataset: Dataset) -> None:
-        xmin, xmax = dataset.df.x.quantile([0.05, 0.95])
-        ymin, ymax = dataset.df.y.quantile([0.05, 0.95])
+        xmin, xmax = dataset.data.x.quantile([0.05, 0.95])
+        ymin, ymax = dataset.data.y.quantile([0.05, 0.95])
 
         xmid = (xmax + xmin) / 2
         ymid = (ymax + ymin) / 2

diff --git a/src/data_morph/shapes/lines/rectangle.py b/src/data_morph/shapes/lines/rectangle.py
@@ -26,8 +26,8 @@ class Rectangle(LineCollection):
     """
 
     def __init__(self, dataset: Dataset) -> None:
-        xmin, xmax = dataset.df.x.quantile([0.1, 0.9])
-        ymin, ymax = dataset.df.y.quantile([0.1, 0.9])
+        xmin, xmax = dataset.data.x.quantile([0.1, 0.9])
+        ymin, ymax = dataset.data.y.quantile([0.1, 0.9])
 
         super().__init__(
             [[xmin, ymin], [xmin, ymax]],

diff --git a/src/data_morph/shapes/points/dots_grid.py b/src/data_morph/shapes/points/dots_grid.py
@@ -29,8 +29,8 @@ class DotsGrid(PointCollection):
     name = 'dots'
 
     def __init__(self, dataset: Dataset) -> None:
-        xlow, xhigh = dataset.df.x.quantile([0.05, 0.95]).tolist()
-        ylow, yhigh = dataset.df.y.quantile([0.05, 0.95]).tolist()
+        xlow, xhigh = dataset.data.x.quantile([0.05, 0.95]).tolist()
+        ylow, yhigh = dataset.data.y.quantile([0.05, 0.95]).tolist()
 
         xmid = (xhigh + xlow) / 2
         ymid = (yhigh + ylow) / 2

diff --git a/src/data_morph/shapes/points/scatter.py b/src/data_morph/shapes/points/scatter.py
@@ -30,9 +30,9 @@ class Scatter(PointCollection):
 
     def __init__(self, dataset: Dataset) -> None:
         rng = np.random.default_rng(1)
-        center = (dataset.df.x.mean(), dataset.df.y.mean())
+        center = (dataset.data.x.mean(), dataset.data.y.mean())
         points = [center]
-        max_radius = max(dataset.df.x.std(), dataset.df.y.std())
+        max_radius = max(dataset.data.x.std(), dataset.data.y.std())
         for radius in np.linspace(max_radius // 5, max_radius, num=5):
             for angle in np.linspace(0, 360, num=50, endpoint=False):
                 points.append(

diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py
@@ -18,17 +18,17 @@ class TestDataset:
     def test_scale_data(self, scale, starter_shapes_dir):
         """Confirm that data scaling is working by checking min and max."""
 
-        original_df = pd.read_csv(starter_shapes_dir / 'dino.csv')
-        original_min = original_df.min()
-        original_max = original_df.max()
+        original_data = pd.read_csv(starter_shapes_dir / 'dino.csv')
+        original_min = original_data.min()
+        original_max = original_data.max()
 
         dataset = DataLoader.load_dataset('dino', scale=scale)
 
         if scale:
-            assert_equal(dataset.df.min().to_numpy(), original_min / scale)
-            assert_equal(dataset.df.max().to_numpy(), original_max / scale)
+            assert_equal(dataset.data.min().to_numpy(), original_min / scale)
+            assert_equal(dataset.data.max().to_numpy(), original_max / scale)
         else:
-            assert_frame_equal(dataset.df, original_df)
+            assert_frame_equal(dataset.data, original_data)
 
     @pytest.mark.input_validation
     @pytest.mark.parametrize(
@@ -52,17 +52,17 @@ def test_scale_data_invalid_scale(self, scale):
     def test_validate_data_missing_columns(self, starter_shapes_dir):
         """Confirm that creation of a Dataset validates the DataFrame columns."""
 
-        df = pd.read_csv(starter_shapes_dir / 'dino.csv').rename(columns={'x': 'a'})
+        data = pd.read_csv(starter_shapes_dir / 'dino.csv').rename(columns={'x': 'a'})
 
         with pytest.raises(ValueError, match='Columns "x" and "y" are required.'):
-            _ = Dataset('dino', df)
+            _ = Dataset('dino', data)
 
     def test_validate_data_fix_column_casing(self, starter_shapes_dir):
         """Confirm that creating a Dataset with correct names but in wrong casing works."""
 
-        df = pd.read_csv(starter_shapes_dir / 'dino.csv').rename(columns={'x': 'X'})
-        dataset = Dataset('dino', df)
-        assert not dataset.df[list(dataset._REQUIRED_COLUMNS)].empty
+        data = pd.read_csv(starter_shapes_dir / 'dino.csv').rename(columns={'x': 'X'})
+        dataset = Dataset('dino', data)
+        assert not dataset.data[list(dataset._REQUIRED_COLUMNS)].empty
 
     @pytest.mark.bounds
     @pytest.mark.parametrize(

diff --git a/tests/data/test_loader.py b/tests/data/test_loader.py
@@ -27,7 +27,7 @@ def test_load_dataset(self, name, file, starter_shapes_dir):
         assert isinstance(dataset_from_pkg, Dataset)
         assert isinstance(dataset_from_file, Dataset)
         assert dataset_from_pkg.name == dataset_from_file.name
-        assert_frame_equal(dataset_from_pkg.df, dataset_from_file.df)
+        assert_frame_equal(dataset_from_pkg.data, dataset_from_file.data)
 
     @pytest.mark.input_validation
     @pytest.mark.parametrize('dataset', ['does_not_exist', 'does_not_exist.csv'])