@@ -56,21 +56,17 @@ def __init__(
56
56
self ,
57
57
* ,
58
58
metadata : DatasetMetadata ,
59
- storage_dir : Path ,
59
+ path_to_dataset : Path ,
60
60
lock : asyncio .Lock ,
61
- directory_name : str | None = None ,
62
61
) -> None :
63
62
"""Initialize a new instance.
64
63
65
64
Preferably use the `FileSystemDatasetClient.open` class method to create a new instance.
66
65
"""
67
66
self ._metadata = metadata
68
67
69
- self ._storage_dir = storage_dir
70
- """The base directory where the storage data are being persisted."""
71
-
72
- self ._directory_name = directory_name
73
- """The directory name to use for this dataset. If None, uses metadata.name or default."""
68
+ self ._path_to_dataset = path_to_dataset
69
+ """The full path to the dataset directory."""
74
70
75
71
self ._lock = lock
76
72
"""A lock to ensure that only one operation is performed at a time."""
@@ -82,14 +78,7 @@ async def get_metadata(self) -> DatasetMetadata:
82
78
@property
83
79
def path_to_dataset (self ) -> Path :
84
80
"""The full path to the dataset directory."""
85
- # Use the explicit directory name if provided, otherwise fall back to metadata.name or default
86
- if self ._directory_name is not None :
87
- return self ._storage_dir / self ._STORAGE_SUBDIR / self ._directory_name
88
-
89
- if self ._metadata .name is None :
90
- return self ._storage_dir / self ._STORAGE_SUBDIR / self ._STORAGE_SUBSUBDIR_DEFAULT
91
-
92
- return self ._storage_dir / self ._STORAGE_SUBDIR / self ._metadata .name
81
+ return self ._path_to_dataset
93
82
94
83
@property
95
84
def path_to_metadata (self ) -> Path :
@@ -124,12 +113,12 @@ async def open(
124
113
ValueError: If a dataset with the specified ID is not found, if metadata is invalid,
125
114
or if both name and alias are provided.
126
115
"""
127
- # Validate parameters - exactly one of name or alias should be provided (or neither for default)
128
- if name is not None and alias is not None :
129
- raise ValueError ('Cannot specify both name and alias parameters' )
116
+ # Validate parameters
117
+ specified_params = sum (1 for param in [id , name , alias ] if param is not None )
118
+ if specified_params > 1 :
119
+ raise ValueError ('Only one of "id", "name", or "alias" can be specified, not multiple.' )
130
120
131
- storage_dir = Path (configuration .storage_dir )
132
- dataset_base_path = storage_dir / cls ._STORAGE_SUBDIR
121
+ dataset_base_path = Path (configuration .storage_dir ) / cls ._STORAGE_SUBDIR
133
122
134
123
if not dataset_base_path .exists ():
135
124
await asyncio .to_thread (dataset_base_path .mkdir , parents = True , exist_ok = True )
@@ -141,21 +130,20 @@ async def open(
141
130
if not dataset_dir .is_dir ():
142
131
continue
143
132
144
- metadata_path = dataset_dir / METADATA_FILENAME
145
- if not metadata_path .exists ():
133
+ path_to_metadata = dataset_dir / METADATA_FILENAME
134
+ if not path_to_metadata .exists ():
146
135
continue
147
136
148
137
try :
149
- file = await asyncio .to_thread (metadata_path .open )
138
+ file = await asyncio .to_thread (path_to_metadata .open )
150
139
try :
151
140
file_content = json .load (file )
152
141
metadata = DatasetMetadata (** file_content )
153
142
if metadata .id == id :
154
143
client = cls (
155
144
metadata = metadata ,
156
- storage_dir = storage_dir ,
145
+ path_to_dataset = dataset_base_path / dataset_dir ,
157
146
lock = asyncio .Lock (),
158
- directory_name = dataset_dir .name , # Use the actual directory name
159
147
)
160
148
await client ._update_metadata (update_accessed_at = True )
161
149
found = True
@@ -170,48 +158,29 @@ async def open(
170
158
171
159
# Get a new instance by name or alias.
172
160
else :
173
- # Determine the directory name and metadata name based on whether this is a named or alias storage
174
- if alias is not None :
175
- # For alias storages, use the alias as directory name and set metadata.name to None
176
- # Special case: alias='default' should use the same directory as default storage
177
- directory_name = None if alias == 'default' else alias
178
- actual_name = None
179
- elif name is not None :
180
- # For named storages, use the name as both directory name and metadata.name
181
- directory_name = name
182
- actual_name = name
183
- else :
184
- # For default storage (no name or alias), use None for both - same as alias='default'
185
- directory_name = None
186
- actual_name = None
187
-
188
- dataset_path = (
189
- dataset_base_path / cls ._STORAGE_SUBSUBDIR_DEFAULT
190
- if directory_name is None
191
- else dataset_base_path / directory_name
192
- )
193
- metadata_path = dataset_path / METADATA_FILENAME
161
+ dataset_dir = Path (name ) if name else Path (alias ) if alias else Path ('default' )
162
+ path_to_dataset = dataset_base_path / dataset_dir
163
+ path_to_metadata = path_to_dataset / METADATA_FILENAME
194
164
195
165
# If the dataset directory exists, reconstruct the client from the metadata file.
196
- if dataset_path .exists () and metadata_path .exists ():
197
- file = await asyncio .to_thread (open , metadata_path )
166
+ if path_to_dataset .exists () and path_to_metadata .exists ():
167
+ file = await asyncio .to_thread (open , path_to_metadata )
198
168
try :
199
169
file_content = json .load (file )
200
170
finally :
201
171
await asyncio .to_thread (file .close )
202
172
try :
203
173
metadata = DatasetMetadata (** file_content )
204
- # For aliases, ensure the metadata.name is None
205
- if alias is not None :
206
- metadata = metadata .model_copy (update = {'name' : None })
207
174
except ValidationError as exc :
208
- raise ValueError (f'Invalid metadata file for dataset "{ name } "' ) from exc
175
+ raise ValueError (f'Invalid metadata file for dataset "{ name or alias } "' ) from exc
176
+
177
+ # Update metadata name to match the resolution.
178
+ metadata .name = name
209
179
210
180
client = cls (
211
181
metadata = metadata ,
212
- storage_dir = storage_dir ,
182
+ path_to_dataset = path_to_dataset ,
213
183
lock = asyncio .Lock (),
214
- directory_name = directory_name ,
215
184
)
216
185
217
186
await client ._update_metadata (update_accessed_at = True )
@@ -221,17 +190,16 @@ async def open(
221
190
now = datetime .now (timezone .utc )
222
191
metadata = DatasetMetadata (
223
192
id = crypto_random_object_id (),
224
- name = actual_name , # Use actual_name which will be None for aliases
193
+ name = name ,
225
194
created_at = now ,
226
195
accessed_at = now ,
227
196
modified_at = now ,
228
197
item_count = 0 ,
229
198
)
230
199
client = cls (
231
200
metadata = metadata ,
232
- storage_dir = storage_dir ,
201
+ path_to_dataset = path_to_dataset ,
233
202
lock = asyncio .Lock (),
234
- directory_name = directory_name ,
235
203
)
236
204
await client ._update_metadata ()
237
205
0 commit comments