@@ -101,10 +101,10 @@ fs.ls() # list folders/files in datastore datastorename
101
101
# output example:
102
102
# folder1
103
103
# folder2
104
- # file1 .csv
104
+ # file3 .csv
105
105
106
106
# use an open context
107
- with fs.open(' ./folder /file1.csv' ) as f:
107
+ with fs.open(' ./folder1 /file1.csv' ) as f:
108
108
# do some process
109
109
process_file(f)
110
110
```
@@ -116,18 +116,22 @@ from azureml.fsspec import AzureMachineLearningFileSystem
116
116
# instantiate file system using following URI
117
117
fs = AzureMachineLearningFileSystem(' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastore/datastorename' )
118
118
119
- fs.upload(lpath = ' ./data/upload_files/crime-spring.csv' , rpath = f ' data/fsspec ' , recursive = False , ** {' overwrite' : True })
120
- fs.upload(lpath = ' ./data/upload_folder/' , rpath = f ' data/fsspec_folder ' , recursive = True , ** {' overwrite' : True })
119
+ # you can specify recursive as False to upload a file
120
+ fs.upload(lpath = ' data/upload_files/crime-spring.csv' , rpath = ' data/fsspec' , recursive = False , ** {' overwrite' : True })
121
+
122
+ # you need to specify recursive as True to upload a folder
123
+ fs.upload(lpath = ' data/upload_folder/' , rpath = ' data/fsspec_folder' , recursive = True , ** {' overwrite' : True })
121
124
122
125
```
123
126
124
127
### Download files via AzureMachineLearningFileSystem
125
128
``` python
126
129
# you can specify recursive as False to download a file
127
- fs.download(rpath = f ' data/fsspec/crime-spring.csv ' , lpath = ' ./data/download_files/, recursive=False)
130
+ # Downloading overwrite option is set to be MERGE_WITH_OVERWRITE
131
+ fs.download(rpath = ' data/fsspec/crime-spring.csv' , lpath = ' data/download_files/, recursive=False)
128
132
129
133
# you need to specify recursive as True to download a folder
130
- fs.download(rpath = f ' data/fsspec_folder ' , lpath = f ' ./ data/download_folder/' , recursive = True )
134
+ fs.download(rpath = ' data/fsspec_folder' , lpath = ' data/download_folder/' , recursive = True )
131
135
```
132
136
133
137
# ## Examples
@@ -153,14 +157,14 @@ import pandas as pd
153
157
from azureml.fsspec import AzureMachineLearningFileSystem
154
158
155
159
# define the URI - update <> placeholders
156
- uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/*.csv '
160
+ uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
157
161
158
162
# create the filesystem
159
163
fs = AzureMachineLearningFileSystem(uri)
160
164
161
165
# append csv files in folder to a list
162
166
dflist = []
163
- for path in fs.ls():
167
+ for path in fs.ls(' /<folder>/*.csv ' ):
164
168
with fs.open(path) as f:
165
169
dflist.append(pd.read_csv(f))
166
170
@@ -192,14 +196,14 @@ import pandas as pd
192
196
from azureml.fsspec import AzureMachineLearningFileSystem
193
197
194
198
# define the URI - update <> placeholders
195
- uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/*.parquet '
199
+ uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
196
200
197
201
# create the filesystem
198
202
fs = AzureMachineLearningFileSystem(uri)
199
203
200
204
# append csv files in folder to a list
201
205
dflist = []
202
- for path in fs.ls():
206
+ for path in fs.ls(' /<folder>/*.parquet ' ):
203
207
with fs.open(path) as f:
204
208
dflist.append(pd.read_parquet(f))
205
209
@@ -247,14 +251,14 @@ from PIL import Image
247
251
from azureml.fsspec import AzureMachineLearningFileSystem
248
252
249
253
# define the URI - update <> placeholders
250
- uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/<image.jpeg> '
254
+ uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
251
255
252
256
# create the filesystem
253
257
fs = AzureMachineLearningFileSystem(uri)
254
258
255
- with fs.open() as f:
259
+ with fs.open(' /<folder>/<image.jpeg> ' ) as f:
256
260
img = Image.open(f)
257
- img.show()
261
+ img.show(
258
262
```
259
263
260
264
# ### PyTorch custom dataset example
@@ -328,16 +332,16 @@ from azureml.fsspec import AzureMachineLearningFileSystem
328
332
from torch.utils.data import DataLoader
329
333
330
334
# define the URI - update <> placeholders
331
- uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/ '
335
+ uri = ' azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
332
336
333
337
# create the filesystem
334
338
fs = AzureMachineLearningFileSystem(uri)
335
339
336
340
# create the dataset
337
341
training_data = CustomImageDataset(
338
342
filesystem = fs,
339
- annotations_file = ' <datastore_name>/<path> /annotations.csv' ,
340
- img_dir = ' <datastore_name> /<path_to_images>/'
343
+ annotations_file = ' /annotations.csv' ,
344
+ img_dir = ' /<path_to_images>/'
341
345
)
342
346
343
347
# Preparing your data for training with DataLoaders
0 commit comments