Skip to content

Commit 66689ab

Browse files
authored
Base mmsegmentation dataset (#3060)
* added base segmentation dataset to inherit from * linter fixes
1 parent 54e7356 commit 66689ab

File tree

3 files changed

+588
-23
lines changed

3 files changed

+588
-23
lines changed

deeplake/integrations/mmlab/segmentation/basedataset.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
from mmengine.dataset import Compose # type: ignore
1919

2020

21-
class BaseDataset(Dataset):
21+
class DeeplakeBaseDataset(Dataset):
2222
r"""
23-
@brief A modified copy of OpenMMLab's BaseDataset.
23+
@brief A modified copy of OpenMMLab's DeeplakeBaseDataset.
2424
25-
This class is a direct copy of OpenMMLab's `BaseDataset`, with modifications
25+
This class is a direct copy of OpenMMLab's `DeeplakeBaseDataset`, with modifications
2626
to remove forced filesystem initialization (`force_init`) and customize the
2727
dataset length retrieval.
2828
2929
@note
30-
- We do not use the original `BaseDataset` because it enforces local filesystem
30+
- We do not use the original `DeeplakeBaseDataset` because it enforces local filesystem
3131
dataset initialization, which is incompatible with our cloud-based dataset.
3232
- Instead of relying on local file scans, this version retrieves dataset size
3333
from a cloud storage backend.
@@ -95,27 +95,27 @@ class BaseDataset(Dataset):
9595
lazy_init (bool, optional): Whether to load annotation during
9696
instantiation. In some cases, such as visualization, only the meta
9797
information of the dataset is needed, which is not necessary to
98-
load annotation file. ``Basedataset`` can skip load annotations to
98+
load annotation file. ``DeeplakeBaseDataset`` can skip load annotations to
9999
save time by set ``lazy_init=True``. Defaults to False.
100-
max_refetch (int, optional): If ``Basedataset.prepare_data`` get a
100+
max_refetch (int, optional): If ``DeeplakeBaseDataset.prepare_data`` get a
101101
None img. The maximum extra number of cycles to get a valid
102102
image. Defaults to 1000.
103103
104104
Note:
105-
BaseDataset collects meta information from ``annotation file`` (the
106-
lowest priority), ``BaseDataset.METAINFO``(medium) and ``metainfo
105+
DeeplakeBaseDataset collects meta information from ``annotation file`` (the
106+
lowest priority), ``DeeplakeBaseDataset.METAINFO``(medium) and ``metainfo
107107
parameter`` (highest) passed to constructors. The lower priority meta
108108
information will be overwritten by higher one.
109109
110110
Note:
111111
Dataset wrapper such as ``ConcatDataset``, ``RepeatDataset`` .etc.
112-
should not inherit from ``BaseDataset`` since ``get_subset`` and
112+
should not inherit from ``DeeplakeBaseDataset`` since ``get_subset`` and
113113
``get_subset_`` could produce ambiguous meaning sub-dataset which
114114
conflicts with original dataset.
115115
116116
Examples:
117117
>>> # Assume the annotation file is given above.
118-
>>> class CustomDataset(BaseDataset):
118+
>>> class CustomDataset(DeeplakeBaseDataset):
119119
>>> METAINFO: dict = dict(task_name='custom_task',
120120
>>> dataset_type='custom_type')
121121
>>> metainfo=dict(task_name='custom_task_name')
@@ -193,7 +193,7 @@ def get_data_info(self, idx: int) -> dict:
193193
return data_info
194194

195195
def full_init(self):
196-
"""Load annotation file and set ``BaseDataset._fully_initialized`` to True."""
196+
"""Load annotation file and set ``DeeplakeBaseDataset._fully_initialized`` to True."""
197197
if self._fully_initialized:
198198
return
199199

@@ -204,7 +204,7 @@ def metainfo(self) -> dict:
204204
"""Get meta information of dataset.
205205
206206
Returns:
207-
dict: meta information collected from ``BaseDataset.METAINFO``,
207+
dict: meta information collected from ``DeeplakeBaseDataset.METAINFO``,
208208
annotation file and metainfo argument during instantiation.
209209
"""
210210
return copy.deepcopy(self._metainfo)
@@ -330,7 +330,7 @@ def load_data_list(self) -> List[dict]:
330330
raw_data_list = annotations["data_list"]
331331

332332
# Meta information load from annotation file will not influence the
333-
# existed meta information load from `BaseDataset.METAINFO` and
333+
# existed meta information load from `DeeplakeBaseDataset.METAINFO` and
334334
# `metainfo` arguments defined in constructor.
335335
for k, v in metainfo.items():
336336
self._metainfo.setdefault(k, v)
@@ -454,7 +454,7 @@ def get_subset_(self, indices: Union[Sequence[int], int]) -> None:
454454
the index given in indices.
455455
456456
Examples:
457-
>>> dataset = BaseDataset('path/to/ann_file')
457+
>>> dataset = DeeplakeBaseDataset('path/to/ann_file')
458458
>>> len(dataset)
459459
100
460460
>>> dataset.get_subset_(90)
@@ -483,7 +483,7 @@ def get_subset_(self, indices: Union[Sequence[int], int]) -> None:
483483
else:
484484
self.data_list = self._get_unserialized_subset(indices)
485485

486-
def get_subset(self, indices: Union[Sequence[int], int]) -> "BaseDataset":
486+
def get_subset(self, indices: Union[Sequence[int], int]) -> "DeeplakeBaseDataset":
487487
"""Return a subset of dataset.
488488
489489
This method will return a subset of original dataset. If type of
@@ -494,7 +494,7 @@ def get_subset(self, indices: Union[Sequence[int], int]) -> "BaseDataset":
494494
given in indices.
495495
496496
Examples:
497-
>>> dataset = BaseDataset('path/to/ann_file')
497+
>>> dataset = DeeplakeBaseDataset('path/to/ann_file')
498498
>>> len(dataset)
499499
100
500500
>>> subdataset = dataset.get_subset(90)
@@ -518,7 +518,7 @@ def get_subset(self, indices: Union[Sequence[int], int]) -> "BaseDataset":
518518
index of dataset.
519519
520520
Returns:
521-
BaseDataset: A subset of dataset.
521+
DeeplakeBaseDataset: A subset of dataset.
522522
"""
523523
# Get subset of data from serialized data or data information list
524524
# according to `self.serialize_data`. Since `_get_serialized_subset`
@@ -662,7 +662,7 @@ def prepare_data(self, idx) -> Any:
662662
def __len__(self):
663663
return len(self.deeplake_dataset)
664664

665-
def _copy_without_annotation(self, memo=dict()) -> "BaseDataset":
665+
def _copy_without_annotation(self, memo=dict()) -> "DeeplakeBaseDataset":
666666
"""Deepcopy for all attributes other than ``data_list``,
667667
``data_address`` and ``data_bytes``.
668668
@@ -671,7 +671,7 @@ def _copy_without_annotation(self, memo=dict()) -> "BaseDataset":
671671
correctly.
672672
673673
Returns:
674-
BaseDataset
674+
DeeplakeBaseDataset
675675
"""
676676
cls = self.__class__
677677
other = cls.__new__(cls)

0 commit comments

Comments
 (0)