1818from mmengine .dataset import Compose # type: ignore
1919
2020
21- class BaseDataset (Dataset ):
21+ class DeeplakeBaseDataset (Dataset ):
2222 r"""
23- @brief A modified copy of OpenMMLab's BaseDataset .
23+ @brief A modified copy of OpenMMLab's DeeplakeBaseDataset .
2424
25- This class is a direct copy of OpenMMLab's `BaseDataset `, with modifications
25+ This class is a direct copy of OpenMMLab's `DeeplakeBaseDataset `, with modifications
2626 to remove forced filesystem initialization (`force_init`) and customize the
2727 dataset length retrieval.
2828
2929 @note
30- - We do not use the original `BaseDataset ` because it enforces local filesystem
30+ - We do not use the original `DeeplakeBaseDataset ` because it enforces local filesystem
3131 dataset initialization, which is incompatible with our cloud-based dataset.
3232 - Instead of relying on local file scans, this version retrieves dataset size
3333 from a cloud storage backend.
@@ -95,27 +95,27 @@ class BaseDataset(Dataset):
9595 lazy_init (bool, optional): Whether to load annotation during
9696 instantiation. In some cases, such as visualization, only the meta
9797 information of the dataset is needed, which is not necessary to
98- load annotation file. ``Basedataset `` can skip load annotations to
98+ load annotation file. ``DeeplakeBaseDataset `` can skip load annotations to
9999 save time by set ``lazy_init=True``. Defaults to False.
100- max_refetch (int, optional): If ``Basedataset .prepare_data`` get a
100+ max_refetch (int, optional): If ``DeeplakeBaseDataset .prepare_data`` get a
101101 None img. The maximum extra number of cycles to get a valid
102102 image. Defaults to 1000.
103103
104104 Note:
105- BaseDataset collects meta information from ``annotation file`` (the
106- lowest priority), ``BaseDataset .METAINFO``(medium) and ``metainfo
105+ DeeplakeBaseDataset collects meta information from ``annotation file`` (the
106+ lowest priority), ``DeeplakeBaseDataset .METAINFO``(medium) and ``metainfo
107107 parameter`` (highest) passed to constructors. The lower priority meta
108108 information will be overwritten by higher one.
109109
110110 Note:
111111 Dataset wrapper such as ``ConcatDataset``, ``RepeatDataset`` .etc.
112- should not inherit from ``BaseDataset `` since ``get_subset`` and
112+ should not inherit from ``DeeplakeBaseDataset `` since ``get_subset`` and
113113 ``get_subset_`` could produce ambiguous meaning sub-dataset which
114114 conflicts with original dataset.
115115
116116 Examples:
117117 >>> # Assume the annotation file is given above.
118- >>> class CustomDataset(BaseDataset ):
118+ >>> class CustomDataset(DeeplakeBaseDataset ):
119119 >>> METAINFO: dict = dict(task_name='custom_task',
120120 >>> dataset_type='custom_type')
121121 >>> metainfo=dict(task_name='custom_task_name')
@@ -193,7 +193,7 @@ def get_data_info(self, idx: int) -> dict:
193193 return data_info
194194
195195 def full_init (self ):
196- """Load annotation file and set ``BaseDataset ._fully_initialized`` to True."""
196+ """Load annotation file and set ``DeeplakeBaseDataset ._fully_initialized`` to True."""
197197 if self ._fully_initialized :
198198 return
199199
@@ -204,7 +204,7 @@ def metainfo(self) -> dict:
204204 """Get meta information of dataset.
205205
206206 Returns:
207- dict: meta information collected from ``BaseDataset .METAINFO``,
207+ dict: meta information collected from ``DeeplakeBaseDataset .METAINFO``,
208208 annotation file and metainfo argument during instantiation.
209209 """
210210 return copy .deepcopy (self ._metainfo )
@@ -330,7 +330,7 @@ def load_data_list(self) -> List[dict]:
330330 raw_data_list = annotations ["data_list" ]
331331
332332 # Meta information load from annotation file will not influence the
333- # existed meta information load from `BaseDataset .METAINFO` and
333+ # existed meta information load from `DeeplakeBaseDataset .METAINFO` and
334334 # `metainfo` arguments defined in constructor.
335335 for k , v in metainfo .items ():
336336 self ._metainfo .setdefault (k , v )
@@ -454,7 +454,7 @@ def get_subset_(self, indices: Union[Sequence[int], int]) -> None:
454454 the index given in indices.
455455
456456 Examples:
457- >>> dataset = BaseDataset ('path/to/ann_file')
457+ >>> dataset = DeeplakeBaseDataset ('path/to/ann_file')
458458 >>> len(dataset)
459459 100
460460 >>> dataset.get_subset_(90)
@@ -483,7 +483,7 @@ def get_subset_(self, indices: Union[Sequence[int], int]) -> None:
483483 else :
484484 self .data_list = self ._get_unserialized_subset (indices )
485485
486- def get_subset (self , indices : Union [Sequence [int ], int ]) -> "BaseDataset " :
486+ def get_subset (self , indices : Union [Sequence [int ], int ]) -> "DeeplakeBaseDataset " :
487487 """Return a subset of dataset.
488488
489489 This method will return a subset of original dataset. If type of
@@ -494,7 +494,7 @@ def get_subset(self, indices: Union[Sequence[int], int]) -> "BaseDataset":
494494 given in indices.
495495
496496 Examples:
497- >>> dataset = BaseDataset ('path/to/ann_file')
497+ >>> dataset = DeeplakeBaseDataset ('path/to/ann_file')
498498 >>> len(dataset)
499499 100
500500 >>> subdataset = dataset.get_subset(90)
@@ -518,7 +518,7 @@ def get_subset(self, indices: Union[Sequence[int], int]) -> "BaseDataset":
518518 index of dataset.
519519
520520 Returns:
521- BaseDataset : A subset of dataset.
521+ DeeplakeBaseDataset : A subset of dataset.
522522 """
523523 # Get subset of data from serialized data or data information list
524524 # according to `self.serialize_data`. Since `_get_serialized_subset`
@@ -662,7 +662,7 @@ def prepare_data(self, idx) -> Any:
662662 def __len__ (self ):
663663 return len (self .deeplake_dataset )
664664
665- def _copy_without_annotation (self , memo = dict ()) -> "BaseDataset " :
665+ def _copy_without_annotation (self , memo = dict ()) -> "DeeplakeBaseDataset " :
666666 """Deepcopy for all attributes other than ``data_list``,
667667 ``data_address`` and ``data_bytes``.
668668
@@ -671,7 +671,7 @@ def _copy_without_annotation(self, memo=dict()) -> "BaseDataset":
671671 correctly.
672672
673673 Returns:
674- BaseDataset
674+ DeeplakeBaseDataset
675675 """
676676 cls = self .__class__
677677 other = cls .__new__ (cls )
0 commit comments