Skip to content

Commit a56e69a

Browse files
authored
Update how-to-access-data-interactive.md
1 parent 24dc663 commit a56e69a

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

articles/machine-learning/how-to-access-data-interactive.md

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ fs.ls() # list folders/files in datastore datastorename
101101
# output example:
102102
# folder1
103103
# folder2
104-
# file1.csv
104+
# file3.csv
105105

106106
# use an open context
107-
with fs.open('./folder/file1.csv') as f:
107+
with fs.open('./folder1/file1.csv') as f:
108108
# do some process
109109
process_file(f)
110110
```
@@ -116,18 +116,22 @@ from azureml.fsspec import AzureMachineLearningFileSystem
116116
# instantiate file system using following URI
117117
fs = AzureMachineLearningFileSystem('azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastore/datastorename')
118118

119-
fs.upload(lpath='./data/upload_files/crime-spring.csv', rpath=f'data/fsspec', recursive=False, **{'overwrite': True})
120-
fs.upload(lpath='./data/upload_folder/', rpath=f'data/fsspec_folder', recursive=True, **{'overwrite': True})
119+
# you can specify recursive as False to upload a file
120+
fs.upload(lpath='data/upload_files/crime-spring.csv', rpath='data/fsspec', recursive=False, **{'overwrite': True})
121+
122+
# you need to specify recursive as True to upload a folder
123+
fs.upload(lpath='data/upload_folder/', rpath='data/fsspec_folder', recursive=True, **{'overwrite': True})
121124

122125
```
123126

124127
### Download files via AzureMachineLearningFileSystem
125128
```python
126129
# you can specify recursive as False to download a file
127-
fs.download(rpath=f'data/fsspec/crime-spring.csv', lpath='./data/download_files/, recursive=False)
130+
# Downloading overwrite option is set to be MERGE_WITH_OVERWRITE
131+
fs.download(rpath='data/fsspec/crime-spring.csv', lpath='data/download_files/, recursive=False)
128132

129133
# you need to specify recursive as True to download a folder
130-
fs.download(rpath=f'data/fsspec_folder', lpath=f'./data/download_folder/', recursive=True)
134+
fs.download(rpath='data/fsspec_folder', lpath='data/download_folder/', recursive=True)
131135
```
132136

133137
### Examples
@@ -153,14 +157,14 @@ import pandas as pd
153157
from azureml.fsspec import AzureMachineLearningFileSystem
154158

155159
# define the URI - update <> placeholders
156-
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/*.csv'
160+
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
157161

158162
# create the filesystem
159163
fs = AzureMachineLearningFileSystem(uri)
160164

161165
# append csv files in folder to a list
162166
dflist = []
163-
for path in fs.ls():
167+
for path in fs.ls('/<folder>/*.csv'):
164168
with fs.open(path) as f:
165169
dflist.append(pd.read_csv(f))
166170

@@ -192,14 +196,14 @@ import pandas as pd
192196
from azureml.fsspec import AzureMachineLearningFileSystem
193197

194198
# define the URI - update <> placeholders
195-
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/*.parquet'
199+
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
196200

197201
# create the filesystem
198202
fs = AzureMachineLearningFileSystem(uri)
199203

200204
# append csv files in folder to a list
201205
dflist = []
202-
for path in fs.ls():
206+
for path in fs.ls('/<folder>/*.parquet'):
203207
with fs.open(path) as f:
204208
dflist.append(pd.read_parquet(f))
205209

@@ -247,14 +251,14 @@ from PIL import Image
247251
from azureml.fsspec import AzureMachineLearningFileSystem
248252

249253
# define the URI - update <> placeholders
250-
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/<image.jpeg>'
254+
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
251255

252256
# create the filesystem
253257
fs = AzureMachineLearningFileSystem(uri)
254258

255-
with fs.open() as f:
259+
with fs.open('/<folder>/<image.jpeg>') as f:
256260
img = Image.open(f)
257-
img.show()
261+
img.show(
258262
```
259263

260264
#### PyTorch custom dataset example
@@ -328,16 +332,16 @@ from azureml.fsspec import AzureMachineLearningFileSystem
328332
from torch.utils.data import DataLoader
329333

330334
# define the URI - update <> placeholders
331-
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/'
335+
uri = 'azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>'
332336

333337
# create the filesystem
334338
fs = AzureMachineLearningFileSystem(uri)
335339

336340
# create the dataset
337341
training_data = CustomImageDataset(
338342
filesystem=fs,
339-
annotations_file='<datastore_name>/<path>/annotations.csv',
340-
img_dir='<datastore_name>/<path_to_images>/'
343+
annotations_file='/annotations.csv',
344+
img_dir='/<path_to_images>/'
341345
)
342346

343347
# Preparing your data for training with DataLoaders

0 commit comments

Comments
 (0)