diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py index cd3ed06d4c..4cbd4f9109 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py @@ -4,6 +4,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import DefaultAzureCredential from azure.ai.ml import MLClient @@ -103,28 +104,22 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[6] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # Extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py index e67ebe6593..9d25126377 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py @@ -4,6 +4,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import DefaultAzureCredential from azure.ai.ml import MLClient @@ -106,28 +107,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[6] + local_data_path = ( + repo_root + / "sample-data" + / "image-classification" + / "multilabelFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # Extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py index 7027170e4a..fdf7682443 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import subprocess import sys import urllib @@ -156,27 +157,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root + / "sample-data" + / "image-instance-segmentation" + / "odFridgeObjectsMask.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # Extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py index 79735c4b8c..c68b589519 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree as ET from zipfile import ZipFile +from pathlib import Path from azure.identity import DefaultAzureCredential from azure.ai.ml import MLClient @@ -159,27 +160,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) + print(f"Using local data from {local_data_path}") # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/inference/image-classification/prepare_data.py b/cli/foundation-models/system/inference/image-classification/prepare_data.py index d53a90d01c..940dfa8478 100644 --- a/cli/foundation-models/system/inference/image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/image-classification/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None: @@ -20,33 +21,33 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N os.makedirs(dataset_parent_dir, exist_ok=True) # download data + repo_root = Path(__file__).resolve().parents[5] if is_multilabel_dataset == 0: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) else: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" - print(f"Downloading data from {download_url}") + local_data_path = ( + repo_root + / "sample-data" + / "image-classification" + / "multilabelFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py index e5ef7bf09a..285574780b 100644 --- a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path import random import string @@ -19,31 +20,23 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-instance-segmentation/prepare_data.py b/cli/foundation-models/system/inference/image-instance-segmentation/prepare_data.py index f5598b83d2..828bb0cbfa 100644 --- a/cli/foundation-models/system/inference/image-instance-segmentation/prepare_data.py +++ b/cli/foundation-models/system/inference/image-instance-segmentation/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -17,32 +18,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root + / "sample-data" + / "image-instance-segmentation" + / "odFridgeObjectsMask.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py index e497f4d791..7019f9e1d9 100644 --- a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path def download_and_unzip(dataset_parent_dir: str) -> None: @@ -17,32 +18,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py index 97a069fc13..d561f5670c 100644 --- a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path import random import string @@ -19,31 +20,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/image-to-text/prepare_data.py b/cli/foundation-models/system/inference/image-to-text/prepare_data.py index 83a37ac9c2..27581698e0 100644 --- a/cli/foundation-models/system/inference/image-to-text/prepare_data.py +++ b/cli/foundation-models/system/inference/image-to-text/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path def download_and_unzip(dataset_parent_dir: str) -> None: @@ -17,31 +18,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/mask-generation/prepare_data.py b/cli/foundation-models/system/inference/mask-generation/prepare_data.py index 16bdd73e8d..1b7c972771 100644 --- a/cli/foundation-models/system/inference/mask-generation/prepare_data.py +++ b/cli/foundation-models/system/inference/mask-generation/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path def download_and_unzip(dataset_parent_dir: str) -> None: @@ -17,32 +18,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py index b6ac22befd..173840d7bb 100644 --- a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py +++ b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path def download_and_unzip(dataset_parent_dir: str) -> None: @@ -15,33 +16,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None: :type dataset_parent_dir: str """ # Create directory, if it does not exist + # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py index 02e6327104..496e866379 100644 --- a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py @@ -6,6 +6,7 @@ import urllib.request import pandas as pd from zipfile import ZipFile +from pathlib import Path # Change this to match the inference dataset LABELS = "water_bottle, milk_bottle, carton, can" @@ -20,31 +21,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py index d76cd78d2c..06cd50ba0b 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -98,33 +99,36 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) -def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): - - # Create directory, if it does not exist - os.makedirs(dataset_parent_dir, exist_ok=True) - - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + repo_root = Path(__file__).resolve().parents[4] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] dataset_dir = os.path.join(dataset_parent_dir, dataset_name) + print("dataset dir") + print(dataset_dir) + print("parent dir") + print(dataset_parent_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir + + +def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): + # Create directory, if it does not exist + os.makedirs(dataset_parent_dir, exist_ok=True) + + # Use local file instead of downloading + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py index a725d918de..0eb838d5a2 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -103,31 +104,29 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): + # Create directory, if it does not exist # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[4] + local_data_path = ( + repo_root + / "sample-data" + / "image-classification" + / "multilabelFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/prepare_data.py index 4d6115d6f2..2ecaa12e20 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/prepare_data.py @@ -1,5 +1,6 @@ import argparse import os +from pathlib import Path import sys import subprocess import urllib @@ -39,27 +40,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip" + # Local data + repo_root = Path(__file__).resolve().parents[4] + local_data_path = ( + repo_root + / "sample-data" + / "image-instance-segmentation" + / "odFridgeObjectsMask.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py index 4f49de01dc..38f6ad6940 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py @@ -5,6 +5,7 @@ import xml.etree.ElementTree as ET from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -140,29 +141,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url # create data folder if it doesnt exist. + # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[4] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/prepare_data.py index 10a7941299..fe4ce68586 100644 --- a/cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/prepare_data.py @@ -1,5 +1,6 @@ import argparse import os +from pathlib import Path import sys import subprocess import urllib @@ -39,27 +40,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root + / "sample-data" + / "image-instance-segmentation" + / "odFridgeObjectsMask.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py index 57fdc34ac6..de87d7ad73 100644 --- a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -100,31 +101,26 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): + # Create directory, if it does not exist # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py index a6d03ac51e..79daca91a6 100644 --- a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -106,28 +107,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root + / "sample-data" + / "image-classification" + / "multilabelFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py index 02e7e36278..f503bbc4a2 100644 --- a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py @@ -5,6 +5,7 @@ import xml.etree.ElementTree as ET from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -140,29 +141,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url # create data folder if it doesnt exist. + # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = ( + repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" + ) # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/sample-data/image-classification/fridgeObjects.zip b/sample-data/image-classification/fridgeObjects.zip new file mode 100644 index 0000000000..1ea6e4a1e6 Binary files /dev/null and b/sample-data/image-classification/fridgeObjects.zip differ diff --git a/sample-data/image-classification/multilabelFridgeObjects.zip b/sample-data/image-classification/multilabelFridgeObjects.zip new file mode 100644 index 0000000000..7bb702ff8f Binary files /dev/null and b/sample-data/image-classification/multilabelFridgeObjects.zip differ diff --git a/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip new file mode 100644 index 0000000000..099b09aedc Binary files /dev/null and b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip differ diff --git a/sample-data/image-object-detection/odFridgeObjects.zip b/sample-data/image-object-detection/odFridgeObjects.zip new file mode 100644 index 0000000000..273696796d Binary files /dev/null and b/sample-data/image-object-detection/odFridgeObjects.zip differ diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb index e34cb260e5..645087e0ed 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb @@ -278,6 +278,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -287,27 +288,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path(__file__).resolve().parents[7]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb index ebf56f8863..31257ce30c 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb @@ -275,6 +275,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -284,27 +285,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[6]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb b/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb index 16d87dd7bc..41046a4ce0 100644 --- a/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-instance-segmentation/image-instance-segmentation.ipynb @@ -273,6 +273,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -282,27 +283,25 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -557,6 +556,7 @@ " name=\"model_evaluation_pipeline\", label=\"latest\"\n", ")\n", "\n", + "\n", "# define the pipeline job\n", "@pipeline()\n", "def evaluation_pipeline(mlflow_model):\n", diff --git a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb index 1d96fff4fa..ae54d99431 100644 --- a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb @@ -274,6 +274,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -283,29 +284,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -542,6 +536,7 @@ " name=\"model_evaluation_pipeline\", label=\"latest\"\n", ")\n", "\n", + "\n", "# define the pipeline job\n", "@pipeline()\n", "def evaluation_pipeline(mlflow_model):\n", diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb index 87626de632..b1382c33f8 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb @@ -285,6 +285,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -294,27 +295,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[6]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -644,18 +640,22 @@ "def create_pipeline_transformers():\n", " \"\"\"Create pipeline.\"\"\"\n", "\n", - " transformers_pipeline_component: PipelineComponent = pipeline_component_transformers_func(\n", - " compute_model_import=model_import_cluster_name,\n", - " compute_finetune=finetune_cluster_name,\n", - " compute_model_evaluation=model_eval_cluster_name,\n", - " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", - " validation_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " # test data\n", - " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", - " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " instance_count=instance_count,\n", - " process_count_per_instance=process_count_per_instance,\n", - " **pipeline_component_args,\n", + " transformers_pipeline_component: PipelineComponent = (\n", + " pipeline_component_transformers_func(\n", + " compute_model_import=model_import_cluster_name,\n", + " compute_finetune=finetune_cluster_name,\n", + " compute_model_evaluation=model_eval_cluster_name,\n", + " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", + " validation_data=Input(\n", + " type=AssetTypes.MLTABLE, path=validation_mltable_path\n", + " ),\n", + " # test data\n", + " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", + " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", + " instance_count=instance_count,\n", + " process_count_per_instance=process_count_per_instance,\n", + " **pipeline_component_args,\n", + " )\n", " )\n", " return {\n", " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb index 90b1b07ead..71f238da84 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb @@ -282,6 +282,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -291,27 +292,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[6]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -644,18 +640,22 @@ "def create_pipeline_transformers():\n", " \"\"\"Create pipeline.\"\"\"\n", "\n", - " transformers_pipeline_component: PipelineComponent = pipeline_component_transformers_func(\n", - " compute_model_import=model_import_cluster_name,\n", - " compute_finetune=finetune_cluster_name,\n", - " compute_model_evaluation=model_eval_cluster_name,\n", - " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", - " validation_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " # test data\n", - " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", - " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " instance_count=instance_count,\n", - " process_count_per_instance=process_count_per_instance,\n", - " **pipeline_component_args,\n", + " transformers_pipeline_component: PipelineComponent = (\n", + " pipeline_component_transformers_func(\n", + " compute_model_import=model_import_cluster_name,\n", + " compute_finetune=finetune_cluster_name,\n", + " compute_model_evaluation=model_eval_cluster_name,\n", + " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", + " validation_data=Input(\n", + " type=AssetTypes.MLTABLE, path=validation_mltable_path\n", + " ),\n", + " # test data\n", + " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", + " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", + " instance_count=instance_count,\n", + " process_count_per_instance=process_count_per_instance,\n", + " **pipeline_component_args,\n", + " )\n", " )\n", " return {\n", " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", diff --git a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb index 9390841a79..f4f9265481 100644 --- a/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.ipynb @@ -296,6 +296,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -305,27 +306,25 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -642,18 +641,22 @@ "def create_pipeline_mmdetection():\n", " \"\"\"Create pipeline.\"\"\"\n", "\n", - " mmdetection_pipeline_component: PipelineComponent = pipeline_component_mmdetection_func(\n", - " compute_model_import=model_import_cluster_name,\n", - " compute_finetune=finetune_cluster_name,\n", - " compute_model_evaluation=model_eval_cluster_name,\n", - " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", - " validation_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " # test data\n", - " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", - " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " instance_count=instance_count,\n", - " process_count_per_instance=process_count_per_instance,\n", - " **pipeline_component_args,\n", + " mmdetection_pipeline_component: PipelineComponent = (\n", + " pipeline_component_mmdetection_func(\n", + " compute_model_import=model_import_cluster_name,\n", + " compute_finetune=finetune_cluster_name,\n", + " compute_model_evaluation=model_eval_cluster_name,\n", + " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", + " validation_data=Input(\n", + " type=AssetTypes.MLTABLE, path=validation_mltable_path\n", + " ),\n", + " # test data\n", + " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", + " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", + " instance_count=instance_count,\n", + " process_count_per_instance=process_count_per_instance,\n", + " **pipeline_component_args,\n", + " )\n", " )\n", " return {\n", " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", diff --git a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb index a4402eeb9d..2e25a27b6c 100644 --- a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb @@ -301,6 +301,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -310,29 +311,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -728,18 +722,22 @@ "def create_pipeline_mmdetection():\n", " \"\"\"Create pipeline.\"\"\"\n", "\n", - " mmdetection_pipeline_component: PipelineComponent = pipeline_component_mmdetection_func(\n", - " compute_model_import=model_import_cluster_name,\n", - " compute_finetune=finetune_cluster_name,\n", - " compute_model_evaluation=model_eval_cluster_name,\n", - " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", - " validation_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " # test data\n", - " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", - " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", - " instance_count=instance_count,\n", - " process_count_per_instance=process_count_per_instance,\n", - " **pipeline_component_args,\n", + " mmdetection_pipeline_component: PipelineComponent = (\n", + " pipeline_component_mmdetection_func(\n", + " compute_model_import=model_import_cluster_name,\n", + " compute_finetune=finetune_cluster_name,\n", + " compute_model_evaluation=model_eval_cluster_name,\n", + " training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),\n", + " validation_data=Input(\n", + " type=AssetTypes.MLTABLE, path=validation_mltable_path\n", + " ),\n", + " # test data\n", + " # Using the same data for validation and test. If you want to use a different dataset for test, specify it below\n", + " test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),\n", + " instance_count=instance_count,\n", + " process_count_per_instance=process_count_per_instance,\n", + " **pipeline_component_args,\n", + " )\n", " )\n", " return {\n", " # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.\n", diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 4d737dcc7d..e1b9541764 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -170,6 +170,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -180,31 +181,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index 44092dc677..18dd78d63b 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -136,6 +136,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -145,27 +146,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb index 632d1dfd44..c57ace426d 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,31 +172,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb index 7d9c025610..97e1ee009e 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb @@ -127,6 +127,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -136,27 +137,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { @@ -324,7 +320,11 @@ ] } ], - "metadata": {}, + "metadata": { + "language_info": { + "name": "python" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb index 624a988816..222e614b1e 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,31 +172,25 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb index 3500f26cf8..c9ff21d54d 100644 --- a/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-instance-segmentation/image-instance-segmentation-online-endpoint.ipynb @@ -127,36 +127,32 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", "# Change to a different location if you prefer\n", "dataset_parent_dir = \"./data\"\n", "\n", - "# Create data folder if it doesnt exist.\n", - "os.makedirs(dataset_parent_dir, exist_ok=True)\n", - "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 93f946a615..fdaa2964de 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,31 +172,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index 6c5cea7f50..28e43f50af 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -127,6 +127,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -136,27 +137,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb index b610478e7b..b150bc1a8b 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb @@ -166,6 +166,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -176,31 +177,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb index c67d6e02c4..e9b5f73101 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb @@ -130,6 +130,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -139,27 +140,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb index 0d52ef7bcf..9b02964c09 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb @@ -102,6 +102,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -111,34 +112,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", + "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb index c767466d37..991720f7af 100644 --- a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,31 +172,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb index 8b6c237d51..8c5e5049a1 100644 --- a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb @@ -126,6 +126,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -135,27 +136,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb index 4d2011cdf1..d97145ef70 100644 --- a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb @@ -153,6 +153,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -163,31 +164,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb index 5ea054e763..a759652c4a 100644 --- a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb @@ -119,6 +119,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -128,27 +129,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb index 49cc039cd7..55e0f66c90 100644 --- a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,31 +172,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb index b5a7edb022..ec6bce60bf 100644 --- a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb @@ -126,6 +126,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -135,27 +136,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb index 29ddef08c3..9574e0bd07 100644 --- a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb @@ -166,6 +166,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -176,31 +177,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb index 87bc9cbf70..42bef99ce5 100644 --- a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -142,27 +143,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb index 01f8f364d8..67054a7b80 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb @@ -136,6 +136,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -145,34 +146,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[4]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", + "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb index ba864d1f23..7600bcf808 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb @@ -134,6 +134,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -143,27 +144,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[4]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-instance-segmentation-task-fridge-items/automl-image-instance-segmentation-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-instance-segmentation-task-fridge-items/automl-image-instance-segmentation-task-fridge-items.ipynb index 2e5b374dd5..3674a5c969 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-instance-segmentation-task-fridge-items/automl-image-instance-segmentation-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-instance-segmentation-task-fridge-items/automl-image-instance-segmentation-task-fridge-items.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -142,27 +143,25 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[4]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb index c93937e816..b7ef8e3a26 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb @@ -118,6 +118,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -127,27 +128,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[4]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb index 1e622208dc..a8bc72f9e5 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -142,27 +143,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[4]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb index 60e96335fc..b21f82dd86 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb @@ -114,6 +114,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -123,27 +124,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb index 6766bce0f9..e0a09369b4 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb @@ -110,6 +110,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -119,27 +120,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-instance-segmentation-in-pipeline/automl-image-instance-segmentation-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-instance-segmentation-in-pipeline/automl-image-instance-segmentation-in-pipeline.ipynb index 36c6723d21..0a7e5370d1 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-instance-segmentation-in-pipeline/automl-image-instance-segmentation-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-instance-segmentation-in-pipeline/automl-image-instance-segmentation-in-pipeline.ipynb @@ -109,6 +109,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -118,27 +119,25 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root\n", + " / \"sample-data\"\n", + " / \"image-instance-segmentation\"\n", + " / \"odFridgeObjectsMask.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb index b294129270..232a731739 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb @@ -109,6 +109,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -118,27 +119,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# Local data\n", + "repo_root = Path.cwd().resolve().parents[5]\n", + "local_data_path = (\n", + " repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", + ")\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.basename(local_data_path).split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "# Extract files\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, {