Skip to content

Commit 3cbb81c

Browse files
Fix docs building and rename AttributeInfo attribute (#1950)
This PR fixes the failing documentation builds and renames AttributeInfo.annotation to AttributeInfo.field and bumps the version to 2.0.0 <!-- Contributing guide: https://github.com/open-edge-platform/datumaro/blob/develop/contributing.md --> <!-- Please add a summary of changes. You may use Copilot to auto-generate the PR description but please consider including any other relevant facts which Copilot may be unaware of (such as design choices and testing procedure). Add references to the relevant issues and pull requests if any like so: Resolves #111 and #222. Depends on #1000 (for series of dependent commits). --> ### Checklist <!-- Put an 'x' in all the boxes that apply --> - [ ] I have added tests to cover my changes or documented any manual tests. - [ ] I have updated the [documentation](https://github.com/open-edge-platform/datumaro/tree/develop/docs) accordingly --------- Signed-off-by: Albert van Houten <[email protected]>
1 parent e7e3b42 commit 3cbb81c

21 files changed

+174
-203
lines changed

.github/workflows/docs_latest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
sudo apt-get install pandoc graphviz
3434
- name: Build-Docs
3535
run: |
36-
uv pip install -r docs/requirements.txt
36+
uv pip install -r docs/requirements.txt --system
3737
cd docs
3838
make clean
3939
make html

.github/workflows/docs_stable.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
sudo apt-get install pandoc graphviz
3333
- name: Build-Docs
3434
run: |
35-
uv pip install -r docs/requirements.txt
35+
uv pip install -r docs/requirements.txt --system
3636
cd docs
3737
make clean
3838
make html

src/datumaro/components/annotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ class ExtractedMask(Mask):
486486
and 1 for foreground.
487487
488488
>>> import numpy as np
489-
>>> from datumaro.components.annotation import ExtractedMask
489+
>>> from datumaro.components.field import ExtractedMask
490490
>>>
491491
>>> index_mask = np.random.randint(low=0, high=2, size=(10, 10), dtype=np.uint8)
492492
>>> mask1 = ExtractedMask(index_mask=index_mask, index=0, label=0) # 0 for background

src/datumaro/experimental/converter_registry.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -616,11 +616,11 @@ def _group_fields_by_semantic(schema: Schema) -> dict[Semantic, _SchemaState]:
616616
groups: dict[Semantic, dict[Type[Field], AttributeSpec[Field]]] = defaultdict(dict)
617617

618618
for attr_name, attr_info in schema.attributes.items():
619-
semantic = attr_info.annotation.semantic
619+
semantic = attr_info.field.semantic
620620

621-
field_type = type(attr_info.annotation)
621+
field_type = type(attr_info.field)
622622
attr_spec = AttributeSpec(
623-
name=attr_name, field=attr_info.annotation, categories=attr_info.categories
623+
name=attr_name, field=attr_info.field, categories=attr_info.categories
624624
)
625625
groups[semantic][field_type] = attr_spec
626626

@@ -915,8 +915,8 @@ def find_conversion_path(
915915
# We do not want to include those attributes into the inferred_categories.
916916
inferred_categories: dict[str, Categories] = {}
917917
for attr_name, attr_info in to_schema.attributes.items():
918-
semantic = attr_info.annotation.semantic
919-
attr_spec = target_groups[semantic].field_to_attr_spec[type(attr_info.annotation)]
918+
semantic = attr_info.field.semantic
919+
attr_spec = target_groups[semantic].field_to_attr_spec[type(attr_info.field)]
920920
if attr_spec.categories is not None:
921921
inferred_categories[attr_name] = attr_spec.categories
922922

src/datumaro/experimental/dataset.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,15 @@ def infer_schema(cls) -> Schema:
110110
final_type = annotation
111111
else:
112112
final_type = type_origin if type_origin is not None else annotation
113-
attributes[name] = AttributeInfo(type=final_type, annotation=field_annotation)
113+
attributes[name] = AttributeInfo(type=final_type, field=field_annotation)
114114
return Schema(attributes=attributes)
115115

116116
def evaluate_lazy_field(self, name: str) -> Any:
117117
row_df = self._transforms.apply([name])
118118

119119
# Now extract the value from the converted dataframe
120120
attr_info = self._transforms.schema.attributes[name]
121-
value = attr_info.annotation.from_polars(name, 0, row_df, attr_info.type)
121+
value = attr_info.field.from_polars(name, 0, row_df, attr_info.type)
122122

123123
return value
124124

@@ -144,7 +144,7 @@ def __get__(self, instance, _):
144144

145145
# Now extract the value from the converted dataframe
146146
attr_info = self._transforms.schema.attributes[self._attr_name]
147-
value = attr_info.annotation.from_polars(self._attr_name, 0, row_df, attr_info.type)
147+
value = attr_info.field.from_polars(self._attr_name, 0, row_df, attr_info.type)
148148

149149
# Cache the value and set it as a real attribute
150150
setattr(instance, self._attr_name, value)
@@ -236,7 +236,7 @@ def _generate_polars_schema(self) -> pl.Schema:
236236
"""Generate a Polars schema from the dataset's field definitions."""
237237
schema: dict[str, pl.DataType] = {}
238238
for key, attr_info in self._schema.attributes.items():
239-
schema.update(attr_info.annotation.to_polars_schema(key))
239+
schema.update(attr_info.field.to_polars_schema(key))
240240
return pl.Schema(schema)
241241

242242
def append(self, sample: DType):
@@ -251,7 +251,7 @@ def append(self, sample: DType):
251251

252252
series_data: dict[str, pl.Series] = {}
253253
for key, attr_info in self._schema.attributes.items():
254-
series_data.update(attr_info.annotation.to_polars(key, getattr(sample, key)))
254+
series_data.update(attr_info.field.to_polars(key, getattr(sample, key)))
255255

256256
new_row = pl.DataFrame(series_data).cast(dict(self.df.schema)) # type: ignore
257257

@@ -311,9 +311,7 @@ def __getitem__(self, row_idx: int) -> DType:
311311
for key, attr_info in self._schema.attributes.items():
312312
if key not in lazy_attributes:
313313
# This attribute is directly available
314-
direct_attributes[key] = attr_info.annotation.from_polars(
315-
key, 0, row_df, attr_info.type
316-
)
314+
direct_attributes[key] = attr_info.field.from_polars(key, 0, row_df, attr_info.type)
317315

318316
# If there are lazy converters, create a dynamic class with descriptors
319317
dtype = self._dtype
@@ -390,7 +388,7 @@ def __setitem__(self, row_idx: int, sample: DType):
390388

391389
series_data: dict[str, pl.Series] = {}
392390
for key, attr_info in self._schema.attributes.items():
393-
series_data.update(attr_info.annotation.to_polars(key, getattr(sample, key)))
391+
series_data.update(attr_info.field.to_polars(key, getattr(sample, key)))
394392

395393
updated_row = pl.DataFrame(series_data).cast(dict(self.df.schema)) # type: ignore
396394

@@ -482,7 +480,7 @@ def filter_by_subset(self, subset: Subset) -> Dataset[DType]:
482480
A new Dataset with items of the given subset.
483481
"""
484482
for subset_column_name, attribute_info in self.schema.attributes.items():
485-
if isinstance(attribute_info.annotation, SubsetField):
483+
if isinstance(attribute_info.field, SubsetField):
486484
break
487485
else:
488486
raise RuntimeError(

src/datumaro/experimental/export_import.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,16 @@ def _export_images_from_dataset(
7171
image_fields = []
7272
for name, attr_info in dataset.schema.attributes.items():
7373
if isinstance(
74-
attr_info.annotation,
74+
attr_info.field,
7575
(ImageCallableField, ImagePathField, InstanceMaskCallableField, MaskCallableField),
7676
):
77-
image_fields.append((name, attr_info.annotation))
77+
image_fields.append((name, attr_info.field))
7878

7979
if not image_fields:
8080
return image_paths
8181

8282
# Export images for each field
83-
for field_name, field_annotation in image_fields:
83+
for field_name, field in image_fields:
8484
image_paths[field_name] = {}
8585

8686
for idx in range(len(dataset)):
@@ -98,7 +98,7 @@ def _export_images_from_dataset(
9898
continue
9999

100100
# Handle different field types
101-
if isinstance(field_annotation, ImagePathField):
101+
if isinstance(field, ImagePathField):
102102
# For ImagePathField: Copy image directly from filesystem
103103
try:
104104
source_path = Path(value)
@@ -118,7 +118,7 @@ def _export_images_from_dataset(
118118
print(f"Warning: Failed to copy image from {value}: {e}")
119119
continue
120120

121-
elif isinstance(field_annotation, ImageCallableField):
121+
elif isinstance(field, ImageCallableField):
122122
# Call the callable to get the image data
123123
if not callable(value):
124124
continue
@@ -163,7 +163,7 @@ def _export_images_from_dataset(
163163
)
164164
continue
165165

166-
elif isinstance(field_annotation, (InstanceMaskCallableField, MaskCallableField)):
166+
elif isinstance(field, (InstanceMaskCallableField, MaskCallableField)):
167167
# Call the callable to get the mask data
168168
if not callable(value):
169169
continue
@@ -371,13 +371,13 @@ def _import_dataset_from_dir(
371371
if images_base_dir.exists():
372372
# Identify image-related fields from schema
373373
for field_name, attr_info in schema.attributes.items():
374-
annotation = getattr(attr_info, "annotation", None)
375-
if annotation is None:
374+
field = getattr(attr_info, "field", None)
375+
if field is None:
376376
continue
377377

378-
is_path_field = isinstance(annotation, ImagePathField)
378+
is_path_field = isinstance(field, ImagePathField)
379379
is_callable_field = isinstance(
380-
annotation, (ImageCallableField, InstanceMaskCallableField, MaskCallableField)
380+
field, (ImageCallableField, InstanceMaskCallableField, MaskCallableField)
381381
)
382382

383383
if not (is_path_field or is_callable_field):

src/datumaro/experimental/filtering/filter_registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ def create_filtering_plan(schema: Schema) -> FilteringPlan:
146146
attributes: List[str] = []
147147

148148
for field_name, field_info in schema.attributes.items():
149-
filter_cls = FilterRegistry.get_filter(type(field_info.annotation))
149+
filter_cls = FilterRegistry.get_filter(type(field_info.field))
150150
if filter_cls is not None:
151-
filter_instance = filter_cls(AttributeSpec(field_name, field_info.annotation))
151+
filter_instance = filter_cls(AttributeSpec(field_name, field_info.field))
152152
filters.append(FilterEntry(field_name, filter_instance))
153153
attributes.append(field_name)
154154

0 commit comments

Comments
 (0)