Skip to content

Commit 972a0e6

Browse files
authored
Address numpy random changes and rename df to data (#259)
1 parent 9a2a3f9 commit 972a0e6

File tree

18 files changed

+73
-69
lines changed

18 files changed

+73
-69
lines changed

docs/tutorials/shape-creation.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ various bits of information from the dataset, such as the automatically-calculat
5353
bounds (*e.g.*, :attr:`.Dataset.data_bounds`, which form the bounding box of the
5454
starting data, and :attr:`.Dataset.morph_bounds`, which define the limits of where
5555
the algorithm can move the points) or percentiles using the data itself (see
56-
:attr:`.Dataset.df`). For example, the :class:`.XLines` shape inherits from
56+
:attr:`.Dataset.data`). For example, the :class:`.XLines` shape inherits from
5757
:class:`.LineCollection` and uses the morph bounds (:attr:`.Dataset.morph_bounds`)
5858
to calculate its position and scale:
5959

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ lint.select = [
101101
"FA", # flake8-future-annotations
102102
"I", # isort
103103
"N", # pep8-naming
104+
"NPY", # numpy
105+
"PD", # pandas-vet
104106
"PTH", # flake8-use-pathlib
105107
"RUF", # ruff-specific rules
106108
"SIM", # flake8-simplify

src/data_morph/data/dataset.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Dataset:
3232
----------
3333
name : str
3434
The name to use for the dataset.
35-
df : pandas.DataFrame
35+
data : pandas.DataFrame
3636
DataFrame containing columns x and y.
3737
scale : numbers.Number, optional
3838
The factor to scale the data by (can be used to speed up morphing).
@@ -49,10 +49,12 @@ class Dataset:
4949
def __init__(
5050
self,
5151
name: str,
52-
df: pd.DataFrame,
52+
data: pd.DataFrame,
5353
scale: Number | None = None,
5454
) -> None:
55-
self.df: pd.DataFrame = self._validate_data(df).pipe(self._scale_data, scale)
55+
self.data: pd.DataFrame = self._validate_data(data).pipe(
56+
self._scale_data, scale
57+
)
5658
"""pandas.DataFrame: DataFrame containing columns x and y."""
5759

5860
self.name: str = name
@@ -81,7 +83,7 @@ def _derive_data_bounds(self) -> BoundingBox:
8183
"""
8284
return BoundingBox(
8385
*[
84-
Interval([self.df[dim].min(), self.df[dim].max()], inclusive=False)
86+
Interval([self.data[dim].min(), self.data[dim].max()], inclusive=False)
8587
for dim in self._REQUIRED_COLUMNS
8688
]
8789
)
@@ -122,13 +124,13 @@ def _derive_plotting_bounds(self) -> BoundingBox:
122124
plot_bounds.align_aspect_ratio()
123125
return plot_bounds
124126

125-
def _scale_data(self, df: pd.DataFrame, scale: Number) -> pd.DataFrame:
127+
def _scale_data(self, data: pd.DataFrame, scale: Number) -> pd.DataFrame:
126128
"""
127129
Apply scaling to the data.
128130
129131
Parameters
130132
----------
131-
df : pandas.DataFrame
133+
data : pandas.DataFrame
132134
The data to scale.
133135
scale : numbers.Number, optional
134136
The factor to scale the data by (can be used to speed up morphing).
@@ -141,17 +143,17 @@ def _scale_data(self, df: pd.DataFrame, scale: Number) -> pd.DataFrame:
141143
"""
142144
if scale is None:
143145
self._scaled = False
144-
return df
146+
return data
145147

146148
if isinstance(scale, bool) or not isinstance(scale, Number):
147149
raise TypeError('scale must be a numeric value.')
148150

149151
if not scale:
150152
raise ValueError('scale must be non-zero.')
151153

152-
scaled_df = df.assign(x=df.x.div(scale), y=df.y.div(scale))
154+
scaled_data = data.assign(x=data.x.div(scale), y=data.y.div(scale))
153155
self._scaled = True
154-
return scaled_df
156+
return scaled_data
155157

156158
def _validate_data(self, data: pd.DataFrame) -> pd.DataFrame:
157159
"""
@@ -211,7 +213,7 @@ def plot(
211213
fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2)
212214

213215
ax.axis('equal')
214-
ax.scatter(self.df.x, self.df.y, s=2, color='black')
216+
ax.scatter(self.data.x, self.data.y, s=2, color='black')
215217
ax.set(xlabel='', ylabel='', title=self if title == 'default' else title)
216218

217219
if show_bounds:
@@ -236,8 +238,8 @@ def plot(
236238
)
237239
)
238240
ax.text(
239-
(self.df.x.max() + self.df.x.min()) / 2,
240-
self.df.y.max() + self.data_bounds.y_bounds.range / scale_base,
241+
(self.data.x.max() + self.data.x.min()) / 2,
242+
self.data.y.max() + self.data_bounds.y_bounds.range / scale_base,
241243
'DATA BOUNDS',
242244
color='blue',
243245
va='bottom',

src/data_morph/data/loader.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,18 @@ class directly.
106106
Path(cls._DATA_PATH) / cls._DATASETS[dataset]
107107
)
108108
name = dataset
109-
df = pd.read_csv(filepath)
109+
data = pd.read_csv(filepath)
110110
except KeyError:
111111
try:
112112
name = Path(dataset).stem
113-
df = pd.read_csv(dataset)
113+
data = pd.read_csv(dataset)
114114
except FileNotFoundError as err:
115115
raise ValueError(
116116
f'Unknown dataset "{dataset}". '
117117
'Provide a valid path to a CSV dataset or use one of '
118118
f'the included datasets: {", ".join(cls.AVAILABLE_DATASETS)}.'
119119
) from err
120-
return Dataset(name=name, df=df, scale=scale)
120+
return Dataset(name=name, data=data, scale=scale)
121121

122122
@classmethod
123123
@plot_with_custom_style
@@ -166,7 +166,7 @@ def plot_available_datasets(cls) -> Axes:
166166
elif dataset == 'SDS':
167167
dataset += ' logo'
168168

169-
ax.scatter(points.df.x, points.df.y, s=4, color='black')
169+
ax.scatter(points.data.x, points.data.y, s=4, color='black')
170170

171171
# tight plot bounds for the grid of datasets in the docs
172172
bounds = points.data_bounds.clone()
@@ -175,7 +175,7 @@ def plot_available_datasets(cls) -> Axes:
175175
bounds.align_aspect_ratio()
176176

177177
ax.set(
178-
title=f'{dataset} ({points.df.shape[0]:,d} points)',
178+
title=f'{dataset} ({points.data.shape[0]:,d} points)',
179179
xlim=bounds.x_bounds,
180180
ylim=bounds.y_bounds,
181181
xlabel='',

src/data_morph/data/stats.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
)
1313

1414

15-
def get_values(df: pd.DataFrame) -> SummaryStatistics:
15+
def get_values(data: pd.DataFrame) -> SummaryStatistics:
1616
"""
1717
Calculate the summary statistics for the given set of points.
1818
1919
Parameters
2020
----------
21-
df : pandas.DataFrame
21+
data : pandas.DataFrame
2222
A dataset with columns x and y.
2323
2424
Returns
@@ -28,9 +28,9 @@ def get_values(df: pd.DataFrame) -> SummaryStatistics:
2828
along with the Pearson correlation coefficient between the two.
2929
"""
3030
return SummaryStatistics(
31-
df.x.mean(),
32-
df.y.mean(),
33-
df.x.std(),
34-
df.y.std(),
35-
df.corr().x.y,
31+
data.x.mean(),
32+
data.y.mean(),
33+
data.x.std(),
34+
data.y.std(),
35+
data.corr().x.y,
3636
)

src/data_morph/morpher.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def _is_close_enough(self, df1: pd.DataFrame, df2: pd.DataFrame) -> bool:
274274

275275
def _perturb(
276276
self,
277-
df: pd.DataFrame,
277+
data: pd.DataFrame,
278278
target_shape: Shape,
279279
*,
280280
shake: Number,
@@ -287,7 +287,7 @@ def _perturb(
287287
288288
Parameters
289289
----------
290-
df : pandas.DataFrame
290+
data : pandas.DataFrame
291291
The data to perturb.
292292
target_shape : Shape
293293
The shape to morph the data into.
@@ -308,9 +308,8 @@ def _perturb(
308308
pandas.DataFrame
309309
The input dataset with one point perturbed.
310310
"""
311-
row = self._rng.integers(0, len(df))
312-
initial_x = df.at[row, 'x']
313-
initial_y = df.at[row, 'y']
311+
row = self._rng.integers(0, len(data))
312+
initial_x, initial_y = data.to_numpy()[row]
314313

315314
# this is the simulated annealing step, if "do_bad", then we are willing to
316315
# accept a new state which is worse than the current one
@@ -329,10 +328,10 @@ def _perturb(
329328
within_bounds = [new_x, new_y] in bounds
330329
done = close_enough and within_bounds
331330

332-
df.loc[row, 'x'] = new_x
333-
df.loc[row, 'y'] = new_y
331+
data.loc[row, 'x'] = new_x
332+
data.loc[row, 'y'] = new_y
334333

335-
return df
334+
return data
336335

337336
def morph(
338337
self,
@@ -434,7 +433,7 @@ def morph(
434433
):
435434
raise ValueError('allowed_dist must be a non-negative numeric value.')
436435

437-
morphed_data = start_shape.df.copy()
436+
morphed_data = start_shape.data.copy()
438437

439438
# iteration numbers that we will end up writing to file as frames
440439
frame_numbers = self._select_frames(
@@ -487,7 +486,7 @@ def _tweening(
487486
bounds=start_shape.morph_bounds,
488487
)
489488

490-
if self._is_close_enough(start_shape.df, perturbed_data):
489+
if self._is_close_enough(start_shape.data, perturbed_data):
491490
morphed_data = perturbed_data
492491

493492
frame_number = record_frames(

src/data_morph/plotting/static.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
@plot_with_custom_style
2525
def plot(
26-
df: pd.DataFrame,
26+
data: pd.DataFrame,
2727
x_bounds: Iterable[Number],
2828
y_bounds: Iterable[Number],
2929
save_to: str | Path,
@@ -35,7 +35,7 @@ def plot(
3535
3636
Parameters
3737
----------
38-
df : pandas.DataFrame
38+
data : pandas.DataFrame
3939
The dataset to plot.
4040
x_bounds, y_bounds : Iterable[numbers.Number]
4141
The plotting limits.
@@ -57,14 +57,14 @@ def plot(
5757
)
5858
fig.get_layout_engine().set(w_pad=1.4, h_pad=0.2, wspace=0)
5959

60-
ax.scatter(df.x, df.y, s=1, alpha=0.7, color='black')
60+
ax.scatter(data.x, data.y, s=1, alpha=0.7, color='black')
6161
ax.set(xlim=x_bounds, ylim=y_bounds)
6262

6363
tick_formatter = EngFormatter()
6464
ax.xaxis.set_major_formatter(tick_formatter)
6565
ax.yaxis.set_major_formatter(tick_formatter)
6666

67-
res = get_values(df)
67+
res = get_values(data)
6868

6969
labels = ('X Mean', 'Y Mean', 'X SD', 'Y SD', 'Corr.')
7070
locs = np.linspace(0.8, 0.2, num=len(labels))

src/data_morph/shapes/circles.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ class Circle(Shape):
4141
"""
4242

4343
def __init__(self, dataset: Dataset, radius: Number | None = None) -> None:
44-
self.center: np.ndarray = dataset.df[['x', 'y']].mean().to_numpy()
44+
self.center: np.ndarray = dataset.data[['x', 'y']].mean().to_numpy()
4545
"""numpy.ndarray: The (x, y) coordinates of the circle's center."""
4646

47-
self.radius: Number = radius or dataset.df[['x', 'y']].std().mean() * 1.5
47+
self.radius: Number = radius or dataset.data[['x', 'y']].std().mean() * 1.5
4848
"""numbers.Number: The radius of the circle."""
4949

5050
def __repr__(self) -> str:
@@ -125,7 +125,7 @@ def __init__(self, dataset: Dataset, num_rings: int = 4) -> None:
125125
if num_rings <= 1:
126126
raise ValueError('num_rings must be greater than 1')
127127

128-
stdev = dataset.df.std().mean()
128+
stdev = dataset.data.std().mean()
129129
self.circles: list[Circle] = [
130130
Circle(dataset, r)
131131
for r in np.linspace(stdev / num_rings * 2, stdev * 2, num_rings)

src/data_morph/shapes/lines/diamond.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class Diamond(LineCollection):
2626
"""
2727

2828
def __init__(self, dataset: Dataset) -> None:
29-
xmin, xmax = dataset.df.x.quantile([0.05, 0.95])
30-
ymin, ymax = dataset.df.y.quantile([0.05, 0.95])
29+
xmin, xmax = dataset.data.x.quantile([0.05, 0.95])
30+
ymin, ymax = dataset.data.y.quantile([0.05, 0.95])
3131

3232
xmid = (xmax + xmin) / 2
3333
ymid = (ymax + ymin) / 2

src/data_morph/shapes/lines/rectangle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class Rectangle(LineCollection):
2626
"""
2727

2828
def __init__(self, dataset: Dataset) -> None:
29-
xmin, xmax = dataset.df.x.quantile([0.1, 0.9])
30-
ymin, ymax = dataset.df.y.quantile([0.1, 0.9])
29+
xmin, xmax = dataset.data.x.quantile([0.1, 0.9])
30+
ymin, ymax = dataset.data.y.quantile([0.1, 0.9])
3131

3232
super().__init__(
3333
[[xmin, ymin], [xmin, ymax]],

0 commit comments

Comments
 (0)