Skip to content

Commit f876d49

Browse files
authored
Merge pull request #210 from siapy/develop
2 parents e7111a3 + dccee33 commit f876d49

34 files changed

+1452
-698
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ generate-docs:
5858

5959
.PHONY: serve-docs ## Serve the docs
6060
serve-docs:
61+
pdm run mkdocs serve
62+
63+
.PHONY: serve-docs-mike ## Serve the docs using mike
64+
serve-docs-mike:
6165
pdm run mike serve
6266

6367
.PHONY: version ## Check project version

docs/api/entities/helpers.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: siapy.entities.helpers

docs/api/entities/images/mock.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: siapy.entities.images.mock

docs/api/models/metrics.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

docs/api/optimizers/metrics.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: siapy.optimizers.metrics

mkdocs.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,20 +87,21 @@ nav:
8787
- Interfaces: api/entities/images/interfaces.md
8888
- Rasterio Library: api/entities/images/rasterio_lib.md
8989
- Spectral Library: api/entities/images/spectral_lib.md
90+
- Mock Image: api/entities/images/mock.md
9091
- Spectral Images: api/entities/images/spimage.md
9192
- Shape: api/entities/shapes/shape.md
9293
- Image Sets: api/entities/imagesets.md
9394
- Pixels: api/entities/pixels.md
9495
- Signatures: api/entities/signatures.md
96+
- Helpers: api/entities/helpers.md
9597
- Features:
9698
- Features: api/features/features.md
9799
- Helpers: api/features/helpers.md
98100
- Spectral Indices: api/features/spectral_indices.md
99-
- Models:
100-
- Metrics: api/models/metrics.md
101101
- Optimizers:
102102
- Configs: api/optimizers/configs.md
103103
- Evaluators: api/optimizers/evaluators.md
104+
- Metrics: api/optimizers/metrics.md
104105
- Optimizers: api/optimizers/optimizers.md
105106
- Parameters: api/optimizers/parameters.md
106107
- Scorers: api/optimizers/scorers.md

pdm.lock

Lines changed: 190 additions & 230 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

siapy/datasets/helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,5 @@ def generate_regression_target(
4949

5050

5151
def merge_signals_from_multiple_cameras(data: "TabularDatasetData") -> None:
52-
data.signals.copy()
52+
# TODO: Implement the function to merge signals from multiple cameras
53+
pass

siapy/datasets/schemas.py

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Iterable
2+
from dataclasses import dataclass
3+
from typing import Any, Iterable, Optional
34

45
import pandas as pd
56
from pydantic import BaseModel, ConfigDict
67

78
from siapy.core.exceptions import InvalidInputError
9+
from siapy.entities import Signatures
810

911
from .helpers import generate_classification_target, generate_regression_target
1012

@@ -88,7 +90,7 @@ def reset_index(self) -> "ClassificationTarget":
8890

8991
class RegressionTarget(Target):
9092
value: pd.Series
91-
name: str
93+
name: str = "value"
9294

9395
def __getitem__(self, indices: Any) -> "RegressionTarget":
9496
value = self.value.iloc[indices]
@@ -105,7 +107,7 @@ def from_iterable(cls, data: Iterable[Any]) -> "RegressionTarget":
105107
@classmethod
106108
def from_dict(cls, data: dict[str, Any]) -> "RegressionTarget":
107109
value = pd.Series(data["value"], name="value")
108-
name = data["name"]
110+
name = data["name"] if "name" in data else "value"
109111
return cls(value=value, name=name)
110112

111113
def to_dict(self) -> dict[str, Any]:
@@ -121,42 +123,38 @@ def reset_index(self) -> "RegressionTarget":
121123
return RegressionTarget(value=self.value.reset_index(drop=True), name=self.name)
122124

123125

124-
class TabularDatasetData(BaseModel):
125-
model_config = ConfigDict(arbitrary_types_allowed=True)
126-
pixels: pd.DataFrame
127-
signals: pd.DataFrame
126+
@dataclass
127+
class TabularDatasetData:
128+
signatures: Signatures
128129
metadata: pd.DataFrame
129130
target: Target | None = None
130131

131-
def __init__(self, *args: Any, **kwargs: Any):
132-
super().__init__(*args, **kwargs)
133-
self._validate_lengths()
132+
def __len__(self) -> int:
133+
return len(self.signatures)
134134

135-
def __setattr__(self, name: str, value: Any) -> None:
136-
super().__setattr__(name, value)
137-
if name in self.model_fields.keys():
138-
self._validate_lengths()
135+
def __repr__(self) -> str:
136+
return f"TabularDatasetData(signatures={self.signatures}, metadata={self.metadata}, target={self.target})"
139137

140138
def __getitem__(self, indices: Any) -> "TabularDatasetData":
141-
pixels = self.pixels.iloc[indices]
142-
signals = self.signals.iloc[indices]
139+
signatures = self.signatures[indices]
143140
metadata = self.metadata.iloc[indices]
141+
if isinstance(metadata, pd.Series):
142+
metadata = pd.DataFrame(metadata).T
144143
target = None if self.target is None else self.target.__getitem__(indices)
145-
return TabularDatasetData(pixels=pixels, signals=signals, metadata=metadata, target=target)
144+
return TabularDatasetData(signatures=signatures, metadata=metadata, target=target)
146145

147-
def __len__(self) -> int:
148-
return len(self.pixels)
146+
def __post_init__(self) -> None:
147+
self._validate_lengths()
149148

150149
@classmethod
151150
def from_dict(cls, data: dict[str, Any]) -> "TabularDatasetData":
152-
pixels = pd.DataFrame(data["pixels"])
153-
signals = pd.DataFrame(data["signals"])
151+
signatures = Signatures.from_dict({"pixels": data["pixels"], "signals": data["signals"]})
154152
metadata = pd.DataFrame(data["metadata"])
155153
target = TabularDatasetData.target_from_dict(data.get("target", None))
156-
return cls(pixels=pixels, signals=signals, metadata=metadata, target=target)
154+
return cls(signatures=signatures, metadata=metadata, target=target)
157155

158156
@staticmethod
159-
def target_from_dict(data: dict[str, Any] | None) -> Target | None:
157+
def target_from_dict(data: dict[str, Any] | None = None) -> Optional[Target]:
160158
if data is None:
161159
return None
162160

@@ -172,14 +170,13 @@ def target_from_dict(data: dict[str, Any] | None) -> Target | None:
172170
raise InvalidInputError(data, "Invalid target dict.")
173171

174172
def _validate_lengths(self) -> None:
175-
if not (len(self.pixels) == len(self.signals) == len(self.metadata)):
173+
if len(self.signatures) != len(self.metadata):
176174
raise InvalidInputError(
177175
{
178-
"pixels_length": len(self.pixels),
179-
"signals_length": len(self.signals),
176+
"signatures_length": len(self.signatures),
180177
"metadata_length": len(self.metadata),
181178
},
182-
"Lengths of pixels, signals, and metadata must be equal",
179+
"Lengths of signatures and metadata must be equal",
183180
)
184181
if self.target is not None and len(self.target) != len(self):
185182
raise InvalidInputError(
@@ -190,25 +187,77 @@ def _validate_lengths(self) -> None:
190187
"Target length must be equal to the length of the dataset.",
191188
)
192189

190+
def set_attributes(
191+
self,
192+
*,
193+
signatures: Signatures | None = None,
194+
metadata: pd.DataFrame | None = None,
195+
target: Target | None = None,
196+
) -> "TabularDatasetData":
197+
current_data = self.copy()
198+
signatures = signatures if signatures is not None else current_data.signatures
199+
metadata = metadata if metadata is not None else current_data.metadata
200+
target = target if target is not None else current_data.target
201+
return TabularDatasetData(signatures=signatures, metadata=metadata, target=target)
202+
193203
def to_dict(self) -> dict[str, Any]:
204+
signatures_dict = self.signatures.to_dict()
194205
return {
195-
"pixels": self.pixels.to_dict(),
196-
"signals": self.signals.to_dict(),
206+
"pixels": signatures_dict["pixels"],
207+
"signals": signatures_dict["signals"],
197208
"metadata": self.metadata.to_dict(),
198209
"target": self.target.to_dict() if self.target is not None else None,
199210
}
200211

201212
def to_dataframe(self) -> pd.DataFrame:
202-
combined_df = pd.concat([self.pixels, self.signals, self.metadata], axis=1)
213+
combined_df = pd.concat([self.signatures.to_dataframe(), self.metadata], axis=1)
203214
if self.target is not None:
204215
target_series = self.target.to_dataframe()
205216
combined_df = pd.concat([combined_df, target_series], axis=1)
206217
return combined_df
207218

219+
def to_dataframe_multiindex(self) -> pd.DataFrame:
220+
signatures_df = self.signatures.to_dataframe_multiindex()
221+
222+
metadata_columns = pd.MultiIndex.from_tuples(
223+
[("metadata", col) for col in self.metadata.columns], names=["category", "field"]
224+
)
225+
metadata_df = pd.DataFrame(self.metadata.values, columns=metadata_columns)
226+
227+
combined_df = pd.concat([signatures_df, metadata_df], axis=1)
228+
229+
if self.target is not None:
230+
target_df = self.target.to_dataframe()
231+
if isinstance(self.target, ClassificationTarget):
232+
target_columns = pd.MultiIndex.from_tuples(
233+
[("target", col) for col in target_df.columns],
234+
names=["category", "field"],
235+
)
236+
elif isinstance(self.target, RegressionTarget):
237+
target_columns = pd.MultiIndex.from_tuples(
238+
[("target", self.target.name)],
239+
names=["category", "field"],
240+
)
241+
else:
242+
raise InvalidInputError(
243+
self.target,
244+
"Invalid target type. Expected ClassificationTarget or RegressionTarget.",
245+
)
246+
target_df = pd.DataFrame(target_df.values, columns=target_columns)
247+
combined_df = pd.concat([combined_df, target_df], axis=1)
248+
249+
return combined_df
250+
208251
def reset_index(self) -> "TabularDatasetData":
209252
return TabularDatasetData(
210-
pixels=self.pixels.reset_index(drop=True),
211-
signals=self.signals.reset_index(drop=True),
253+
signatures=self.signatures.reset_index(),
212254
metadata=self.metadata.reset_index(drop=True),
213255
target=self.target.reset_index() if self.target is not None else None,
214256
)
257+
258+
def copy(self) -> "TabularDatasetData":
259+
return TabularDatasetData(
260+
signatures=self.signatures.copy(),
261+
metadata=self.metadata.copy(),
262+
target=self.target.model_copy() if self.target is not None else None,
263+
)

siapy/datasets/tabular.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from siapy.core.types import ImageContainerType
1010
from siapy.datasets.schemas import TabularDatasetData
1111
from siapy.entities import Signatures, SpectralImage, SpectralImageSet
12+
from siapy.entities.helpers import get_signatures_within_convex_hull
1213

1314
__all__ = [
1415
"TabularDataset",
@@ -62,9 +63,8 @@ def process_image_data(self) -> None:
6263
self.data_entities.clear()
6364
for image_idx, image in enumerate(self.image_set):
6465
for shape_idx, shape in enumerate(image.geometric_shapes.shapes):
65-
convex_hulls = shape.get_pixels_within_convex_hull()
66-
for geometry_idx, pixels in enumerate(convex_hulls):
67-
signatures = image.to_signatures(pixels)
66+
signatures_hull = get_signatures_within_convex_hull(image, shape)
67+
for geometry_idx, signatures in enumerate(signatures_hull):
6868
entity = TabularDataEntity(
6969
image_idx=image_idx,
7070
shape_idx=shape_idx,
@@ -79,8 +79,7 @@ def process_image_data(self) -> None:
7979

8080
def generate_dataset_data(self, mean_signatures: bool = True) -> TabularDatasetData:
8181
self._check_data_entities()
82-
pixels_dfs = []
83-
signals_dfs = []
82+
signatures_dfs = []
8483
metadata_dfs = []
8584
for entity in self.data_entities:
8685
signatures_df = entity.signatures.to_dataframe().dropna()
@@ -104,17 +103,13 @@ def generate_dataset_data(self, mean_signatures: bool = True) -> TabularDatasetD
104103
"Sanity check failed! The columns in metadata_df do not match MetaDataEntity fields."
105104
)
106105

107-
signatures = Signatures.from_dataframe(signatures_df)
108-
109-
pixels_dfs.append(signatures.pixels.df)
110-
signals_dfs.append(signatures.signals.df)
106+
signatures_dfs.append(signatures_df)
111107
metadata_dfs.append(metadata_df)
112108

113-
return TabularDatasetData(
114-
pixels=pd.concat(pixels_dfs, ignore_index=True),
115-
signals=pd.concat(signals_dfs, ignore_index=True),
116-
metadata=pd.concat(metadata_dfs, ignore_index=True),
117-
)
109+
signatures_concat = pd.concat(signatures_dfs, ignore_index=True)
110+
metadata_concat = pd.concat(metadata_dfs, ignore_index=True)
111+
signatures = Signatures.from_dataframe(signatures_concat)
112+
return TabularDatasetData(signatures=signatures, metadata=metadata_concat)
118113

119114
def _check_data_entities(self) -> None:
120115
if not self.data_entities:

0 commit comments

Comments
 (0)