Skip to content

Commit bbecd03

Browse files
authored
Merge pull request #208 from siapy/fix
2 parents e7111a3 + 16f53af commit bbecd03

File tree

20 files changed

+986
-505
lines changed

20 files changed

+986
-505
lines changed

docs/api/models/metrics.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

docs/api/optimizers/metrics.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: siapy.optimizers.metrics

mkdocs.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,10 @@ nav:
9696
- Features: api/features/features.md
9797
- Helpers: api/features/helpers.md
9898
- Spectral Indices: api/features/spectral_indices.md
99-
- Models:
100-
- Metrics: api/models/metrics.md
10199
- Optimizers:
102100
- Configs: api/optimizers/configs.md
103101
- Evaluators: api/optimizers/evaluators.md
102+
- Metrics: api/models/metrics.md
104103
- Optimizers: api/optimizers/optimizers.md
105104
- Parameters: api/optimizers/parameters.md
106105
- Scorers: api/optimizers/scorers.md

pdm.lock

Lines changed: 190 additions & 230 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

siapy/datasets/helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,5 @@ def generate_regression_target(
4949

5050

5151
def merge_signals_from_multiple_cameras(data: "TabularDatasetData") -> None:
52-
data.signals.copy()
52+
# TODO: Implement the function to merge signals from multiple cameras
53+
pass

siapy/datasets/schemas.py

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Iterable
2+
from dataclasses import dataclass
3+
from typing import Any, Iterable, Optional
34

45
import pandas as pd
56
from pydantic import BaseModel, ConfigDict
67

78
from siapy.core.exceptions import InvalidInputError
9+
from siapy.entities import Signatures
810

911
from .helpers import generate_classification_target, generate_regression_target
1012

@@ -88,7 +90,7 @@ def reset_index(self) -> "ClassificationTarget":
8890

8991
class RegressionTarget(Target):
9092
value: pd.Series
91-
name: str
93+
name: str = "value"
9294

9395
def __getitem__(self, indices: Any) -> "RegressionTarget":
9496
value = self.value.iloc[indices]
@@ -105,7 +107,7 @@ def from_iterable(cls, data: Iterable[Any]) -> "RegressionTarget":
105107
@classmethod
106108
def from_dict(cls, data: dict[str, Any]) -> "RegressionTarget":
107109
value = pd.Series(data["value"], name="value")
108-
name = data["name"]
110+
name = data["name"] if "name" in data else "value"
109111
return cls(value=value, name=name)
110112

111113
def to_dict(self) -> dict[str, Any]:
@@ -121,42 +123,38 @@ def reset_index(self) -> "RegressionTarget":
121123
return RegressionTarget(value=self.value.reset_index(drop=True), name=self.name)
122124

123125

124-
class TabularDatasetData(BaseModel):
125-
model_config = ConfigDict(arbitrary_types_allowed=True)
126-
pixels: pd.DataFrame
127-
signals: pd.DataFrame
126+
@dataclass
127+
class TabularDatasetData:
128+
signatures: Signatures
128129
metadata: pd.DataFrame
129130
target: Target | None = None
130131

131-
def __init__(self, *args: Any, **kwargs: Any):
132-
super().__init__(*args, **kwargs)
133-
self._validate_lengths()
132+
def __len__(self) -> int:
133+
return len(self.signatures)
134134

135-
def __setattr__(self, name: str, value: Any) -> None:
136-
super().__setattr__(name, value)
137-
if name in self.model_fields.keys():
138-
self._validate_lengths()
135+
def __repr__(self) -> str:
136+
return f"TabularDatasetData(signatures={self.signatures}, metadata={self.metadata}, target={self.target})"
139137

140138
def __getitem__(self, indices: Any) -> "TabularDatasetData":
141-
pixels = self.pixels.iloc[indices]
142-
signals = self.signals.iloc[indices]
139+
signatures = self.signatures[indices]
143140
metadata = self.metadata.iloc[indices]
141+
if isinstance(metadata, pd.Series):
142+
metadata = pd.DataFrame(metadata).T
144143
target = None if self.target is None else self.target.__getitem__(indices)
145-
return TabularDatasetData(pixels=pixels, signals=signals, metadata=metadata, target=target)
144+
return TabularDatasetData(signatures=signatures, metadata=metadata, target=target)
146145

147-
def __len__(self) -> int:
148-
return len(self.pixels)
146+
def __post_init__(self) -> None:
147+
self._validate_lengths()
149148

150149
@classmethod
151150
def from_dict(cls, data: dict[str, Any]) -> "TabularDatasetData":
152-
pixels = pd.DataFrame(data["pixels"])
153-
signals = pd.DataFrame(data["signals"])
151+
signatures = Signatures.from_dict({"pixels": data["pixels"], "signals": data["signals"]})
154152
metadata = pd.DataFrame(data["metadata"])
155153
target = TabularDatasetData.target_from_dict(data.get("target", None))
156-
return cls(pixels=pixels, signals=signals, metadata=metadata, target=target)
154+
return cls(signatures=signatures, metadata=metadata, target=target)
157155

158156
@staticmethod
159-
def target_from_dict(data: dict[str, Any] | None) -> Target | None:
157+
def target_from_dict(data: dict[str, Any] | None = None) -> Optional[Target]:
160158
if data is None:
161159
return None
162160

@@ -172,14 +170,13 @@ def target_from_dict(data: dict[str, Any] | None) -> Target | None:
172170
raise InvalidInputError(data, "Invalid target dict.")
173171

174172
def _validate_lengths(self) -> None:
175-
if not (len(self.pixels) == len(self.signals) == len(self.metadata)):
173+
if len(self.signatures) != len(self.metadata):
176174
raise InvalidInputError(
177175
{
178-
"pixels_length": len(self.pixels),
179-
"signals_length": len(self.signals),
176+
"signatures_length": len(self.signatures),
180177
"metadata_length": len(self.metadata),
181178
},
182-
"Lengths of pixels, signals, and metadata must be equal",
179+
"Lengths of signatures and metadata must be equal",
183180
)
184181
if self.target is not None and len(self.target) != len(self):
185182
raise InvalidInputError(
@@ -190,25 +187,77 @@ def _validate_lengths(self) -> None:
190187
"Target length must be equal to the length of the dataset.",
191188
)
192189

190+
def set_attributes(
191+
self,
192+
*,
193+
signatures: Signatures | None = None,
194+
metadata: pd.DataFrame | None = None,
195+
target: Target | None = None,
196+
) -> "TabularDatasetData":
197+
current_data = self.copy()
198+
signatures = signatures if signatures is not None else current_data.signatures
199+
metadata = metadata if metadata is not None else current_data.metadata
200+
target = target if target is not None else current_data.target
201+
return TabularDatasetData(signatures=signatures, metadata=metadata, target=target)
202+
193203
def to_dict(self) -> dict[str, Any]:
204+
signatures_dict = self.signatures.to_dict()
194205
return {
195-
"pixels": self.pixels.to_dict(),
196-
"signals": self.signals.to_dict(),
206+
"pixels": signatures_dict["pixels"],
207+
"signals": signatures_dict["signals"],
197208
"metadata": self.metadata.to_dict(),
198209
"target": self.target.to_dict() if self.target is not None else None,
199210
}
200211

201212
def to_dataframe(self) -> pd.DataFrame:
202-
combined_df = pd.concat([self.pixels, self.signals, self.metadata], axis=1)
213+
combined_df = pd.concat([self.signatures.to_dataframe(), self.metadata], axis=1)
203214
if self.target is not None:
204215
target_series = self.target.to_dataframe()
205216
combined_df = pd.concat([combined_df, target_series], axis=1)
206217
return combined_df
207218

219+
def to_dataframe_multiindex(self) -> pd.DataFrame:
220+
signatures_df = self.signatures.to_dataframe_multiindex()
221+
222+
metadata_columns = pd.MultiIndex.from_tuples(
223+
[("metadata", col) for col in self.metadata.columns], names=["category", "field"]
224+
)
225+
metadata_df = pd.DataFrame(self.metadata.values, columns=metadata_columns)
226+
227+
combined_df = pd.concat([signatures_df, metadata_df], axis=1)
228+
229+
if self.target is not None:
230+
target_df = self.target.to_dataframe()
231+
if isinstance(self.target, ClassificationTarget):
232+
target_columns = pd.MultiIndex.from_tuples(
233+
[("target", col) for col in target_df.columns],
234+
names=["category", "field"],
235+
)
236+
elif isinstance(self.target, RegressionTarget):
237+
target_columns = pd.MultiIndex.from_tuples(
238+
[("target", self.target.name)],
239+
names=["category", "field"],
240+
)
241+
else:
242+
raise InvalidInputError(
243+
self.target,
244+
"Invalid target type. Expected ClassificationTarget or RegressionTarget.",
245+
)
246+
target_df = pd.DataFrame(target_df.values, columns=target_columns)
247+
combined_df = pd.concat([combined_df, target_df], axis=1)
248+
249+
return combined_df
250+
208251
def reset_index(self) -> "TabularDatasetData":
209252
return TabularDatasetData(
210-
pixels=self.pixels.reset_index(drop=True),
211-
signals=self.signals.reset_index(drop=True),
253+
signatures=self.signatures.reset_index(),
212254
metadata=self.metadata.reset_index(drop=True),
213255
target=self.target.reset_index() if self.target is not None else None,
214256
)
257+
258+
def copy(self) -> "TabularDatasetData":
259+
return TabularDatasetData(
260+
signatures=self.signatures.copy(),
261+
metadata=self.metadata.copy(),
262+
target=self.target.model_copy() if self.target is not None else None,
263+
)

siapy/datasets/tabular.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ def process_image_data(self) -> None:
7979

8080
def generate_dataset_data(self, mean_signatures: bool = True) -> TabularDatasetData:
8181
self._check_data_entities()
82-
pixels_dfs = []
83-
signals_dfs = []
82+
signatures_dfs = []
8483
metadata_dfs = []
8584
for entity in self.data_entities:
8685
signatures_df = entity.signatures.to_dataframe().dropna()
@@ -104,17 +103,13 @@ def generate_dataset_data(self, mean_signatures: bool = True) -> TabularDatasetD
104103
"Sanity check failed! The columns in metadata_df do not match MetaDataEntity fields."
105104
)
106105

107-
signatures = Signatures.from_dataframe(signatures_df)
108-
109-
pixels_dfs.append(signatures.pixels.df)
110-
signals_dfs.append(signatures.signals.df)
106+
signatures_dfs.append(signatures_df)
111107
metadata_dfs.append(metadata_df)
112108

113-
return TabularDatasetData(
114-
pixels=pd.concat(pixels_dfs, ignore_index=True),
115-
signals=pd.concat(signals_dfs, ignore_index=True),
116-
metadata=pd.concat(metadata_dfs, ignore_index=True),
117-
)
109+
signatures_concat = pd.concat(signatures_dfs, ignore_index=True)
110+
metadata_concat = pd.concat(metadata_dfs, ignore_index=True)
111+
signatures = Signatures.from_dataframe(signatures_concat)
112+
return TabularDatasetData(signatures=signatures, metadata=metadata_concat)
118113

119114
def _check_data_entities(self) -> None:
120115
if not self.data_entities:

siapy/entities/pixels.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pandas as pd
99
from numpy.typing import NDArray
1010

11-
from siapy.core.exceptions import InvalidInputError
11+
from siapy.core.exceptions import InvalidInputError, InvalidTypeError
1212

1313
__all__ = [
1414
"Pixels",
@@ -42,9 +42,11 @@ def __len__(self) -> int:
4242
def __repr__(self) -> str:
4343
return f"Pixels(\n{self.df}\n)"
4444

45-
def __getitem__(self, idx: int) -> PixelCoordinate:
46-
row = self.df.iloc[idx]
47-
return PixelCoordinate(x=row[self.coords.X], y=row[self.coords.Y])
45+
def __getitem__(self, indices: Any) -> "Pixels":
46+
df_slice = self.df.iloc[indices]
47+
if isinstance(df_slice, pd.Series):
48+
df_slice = df_slice.to_frame().T
49+
return Pixels(df_slice)
4850

4951
def __eq__(self, other: Any) -> bool:
5052
if not isinstance(other, Pixels):
@@ -76,6 +78,7 @@ def df_homogenious(self) -> pd.DataFrame:
7678
return df_homo
7779

7880
def u(self) -> "pd.Series[float]":
81+
# TODO: change to u -> x
7982
return self.df[self.coords.X]
8083

8184
def v(self) -> "pd.Series[float]":
@@ -96,13 +99,31 @@ def as_type(self, dtype: type) -> "Pixels":
9699
converted_df[self.coords.Y] = converted_df[self.coords.Y].astype(dtype)
97100
return Pixels(converted_df)
98101

102+
def get_coordinate(self, idx: int) -> PixelCoordinate:
103+
row = self.df.iloc[idx]
104+
return PixelCoordinate(x=row[self.coords.X], y=row[self.coords.Y])
105+
106+
107+
def validate_pixel_input_dimensions(df: pd.DataFrame | pd.Series) -> None:
108+
if isinstance(df, pd.Series):
109+
raise InvalidTypeError(
110+
input_value=df,
111+
allowed_types=pd.DataFrame,
112+
message="Expected a DataFrame, but got a Series.",
113+
)
114+
115+
if df.empty:
116+
raise InvalidInputError(
117+
message="Input DataFrame is empty.",
118+
input_value=df,
119+
)
99120

100-
def validate_pixel_input_dimensions(df: pd.DataFrame) -> None:
101121
if df.shape[1] != 2:
102122
raise InvalidInputError(
103-
message="Invalid input dimensions: expected 2 columns (u, v), got",
123+
message="Invalid input dimensions: expected 2 columns (x, y), got",
104124
input_value=df.shape[1],
105125
)
126+
106127
if sorted(df.columns) != sorted([HomogeneousCoordinate.X, HomogeneousCoordinate.Y]):
107128
raise InvalidInputError(
108129
message=f"Invalid column names: expected ['{HomogeneousCoordinate.X}', '{HomogeneousCoordinate.Y}'], got",

0 commit comments

Comments
 (0)