diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py index cd3ed06d4c..40f9a54a29 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py @@ -103,28 +103,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + # Use local data path + print("Using local data.") + local_data_path = "/sample-data/image-classification/fridgeObjects.zip" - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # Extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py index e67ebe6593..a3d971c98e 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py @@ -106,28 +106,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # Use local data path + print("Using local data.") + local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip" - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # Extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py index 79735c4b8c..172f3d427c 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py @@ -154,37 +154,26 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): :param ml_client: Azure ML client :param dataset_parent_dir: Path to the dataset folder """ - # Download data from public url - - # create data folder if it doesnt exist. + # Create data folder if it doesn't exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use - dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") + # Use local data path + print("Using local data.") + local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip" - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + + # Use the extracted directory + extracted_dir = os.path.join(dataset_parent_dir, os.path.basename(local_data_path).split(".")[0]) # Upload data and create a data asset URI folder print("Uploading data to blob storage") my_data = Data( - path=dataset_dir, + path=extracted_dir, type=AssetTypes.URI_FOLDER, description="Fridge-items images Object detection", name="fridge-items-images-od-ft", @@ -198,7 +187,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): print(uri_folder_data_asset.path) create_jsonl_and_mltable_files( - uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir + uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=extracted_dir ) diff --git a/cli/foundation-models/system/inference/image-classification/prepare_data.py b/cli/foundation-models/system/inference/image-classification/prepare_data.py index d53a90d01c..05fd79d292 100644 --- a/cli/foundation-models/system/inference/image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/image-classification/prepare_data.py @@ -8,10 +8,10 @@ from zipfile import ZipFile -def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str :param is_multilabel_dataset: flag to indicate if dataset is multi-label or not :type is_multilabel_dataset: int @@ -19,34 +19,27 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Use local data path if is_multilabel_dataset == 0: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + local_data_path = "/sample-data/image-classification/fridgeObjects.zip" else: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" - print(f"Downloading data from {download_url}") + local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -142,7 +135,7 @@ def prepare_data_for_batch_inference(dataset_dir: str, is_multilabel: int = 0) - args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.abspath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py index e5ef7bf09a..775f63206d 100644 --- a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py @@ -10,40 +10,33 @@ import string -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Use local data path + local_data_path = "/sample-data/image-classification/fridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -137,7 +130,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py index e497f4d791..443164d264 100644 --- a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py @@ -8,41 +8,33 @@ from zipfile import ZipFile -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Use local data path + local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip" + print(f"Using local data from {local_data_path}") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -118,7 +110,7 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.abspath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py index 97a069fc13..feb3c098cc 100644 --- a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py @@ -10,40 +10,33 @@ import string -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Use local data path + local_data_path = "/sample-data/image-classification/fridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -221,7 +214,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/image-to-text/prepare_data.py b/cli/foundation-models/system/inference/image-to-text/prepare_data.py index 83a37ac9c2..327c2df011 100644 --- a/cli/foundation-models/system/inference/image-to-text/prepare_data.py +++ b/cli/foundation-models/system/inference/image-to-text/prepare_data.py @@ -8,40 +8,33 @@ from zipfile import ZipFile -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Use local data path + local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -134,7 +127,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/mask-generation/prepare_data.py b/cli/foundation-models/system/inference/mask-generation/prepare_data.py index 16bdd73e8d..301c24c0aa 100644 --- a/cli/foundation-models/system/inference/mask-generation/prepare_data.py +++ b/cli/foundation-models/system/inference/mask-generation/prepare_data.py @@ -8,41 +8,33 @@ from zipfile import ZipFile -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # Use local data path + local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip" + print(f"Using local data from {local_data_path}") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -181,7 +173,7 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.abspath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py index b6ac22befd..52fb49f4a7 100644 --- a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py +++ b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py @@ -8,40 +8,33 @@ from zipfile import ZipFile -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Use local dataset and unzip it. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Use local data path + local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir @@ -159,7 +152,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py index 02e6327104..b7122ea7f3 100644 --- a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py @@ -11,40 +11,32 @@ LABELS = "water_bottle, milk_bottle, carton, can" -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # Local data path + local_data_path = "/data-samples/image-classification/fridgeObjects.zip" + print(f"Using local data from {local_data_path}") - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir @@ -151,7 +143,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py index d76cd78d2c..77b257f35f 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py @@ -105,7 +105,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # download data print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + download_url = "/data-samples/image-classification/fridgeObjects.zip" # Extract current dataset name from dataset url dataset_name = os.path.basename(download_url).split(".")[0] @@ -115,11 +115,8 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Get the name of zip file data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(download_url, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py index a725d918de..b1ab00106d 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py @@ -108,26 +108,18 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # download data print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + download_url = "/data-samples/image-classification/multilabelFridgeObjects.zip" # Extract current dataset name from dataset url dataset_name = os.path.basename(download_url).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(download_url, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py index 4f49de01dc..63a3352901 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py @@ -136,6 +136,18 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + local_data_path = "/data-samples/image-object-detection/odFridgeObjects.zip" + dataset_name = os.path.basename(local_data_path).split(".")[0] + dataset_dir = os.path.join(dataset_parent_dir, dataset_name) + with ZipFile(local_data_path, "r") as zip: + print("extracting files...") + zip.extractall(path=dataset_parent_dir) + print("done") + return dataset_dir + + def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url @@ -143,26 +155,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): os.makedirs(dataset_parent_dir, exist_ok=True) # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use - dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: - print("extracting files...") - zip.extractall(path=dataset_parent_dir) - print("done") - # delete zip file - os.remove(data_file) + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py index 57fdc34ac6..92607e3c92 100644 --- a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py @@ -105,26 +105,18 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # download data print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + download_url = "/data-samples/image-classification/fridgeObjects.zip" # Extract current dataset name from dataset url dataset_name = os.path.basename(download_url).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(download_url, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py index a6d03ac51e..5daf593036 100644 --- a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py @@ -101,6 +101,18 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + local_data_path = "/data-samples/image-classification/multilabelFridgeObjects.zip" + with ZipFile(local_data_path, "r") as zip: + print("extracting files...") + zip.extractall(path=dataset_parent_dir) + print("done") + dataset_name = os.path.basename(local_data_path).split(".")[0] + dataset_dir = os.path.join(dataset_parent_dir, dataset_name) + return dataset_dir + + def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist @@ -108,26 +120,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # download data print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use - dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: - print("extracting files...") - zip.extractall(path=dataset_parent_dir) - print("done") - # delete zip file - os.remove(data_file) + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py index 02e7e36278..1f6553153f 100644 --- a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py @@ -136,6 +136,19 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + local_data_path = "/data-samples/image-object-detection/odFridgeObjects.zip" + dataset_name = os.path.basename(local_data_path).split(".")[0] + dataset_dir = os.path.join(dataset_parent_dir, dataset_name) + with ZipFile(local_data_path, "r") as zip: + print("extracting files...") + zip.extractall(path=dataset_parent_dir) + print("done") + os.remove(local_data_path) + return dataset_dir + + def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Download data from public url @@ -143,26 +156,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): os.makedirs(dataset_parent_dir, exist_ok=True) # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use - dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: - print("extracting files...") - zip.extractall(path=dataset_parent_dir) - print("done") - # delete zip file - os.remove(data_file) + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/sample-data/image-classification/fridgeObjects.zip b/sample-data/image-classification/fridgeObjects.zip new file mode 100644 index 0000000000..1ea6e4a1e6 Binary files /dev/null and b/sample-data/image-classification/fridgeObjects.zip differ diff --git a/sample-data/image-classification/multilabelFridgeObjects.zip b/sample-data/image-classification/multilabelFridgeObjects.zip new file mode 100644 index 0000000000..7bb702ff8f Binary files /dev/null and b/sample-data/image-classification/multilabelFridgeObjects.zip differ diff --git a/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip new file mode 100644 index 0000000000..099b09aedc Binary files /dev/null and b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip differ diff --git a/sample-data/image-object-detection/odFridgeObjects.zip b/sample-data/image-object-detection/odFridgeObjects.zip new file mode 100644 index 0000000000..273696796d Binary files /dev/null and b/sample-data/image-object-detection/odFridgeObjects.zip differ diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb index e34cb260e5..c71d3106c8 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb @@ -278,7 +278,6 @@ "outputs": [], "source": [ "import os\n", - "import urllib\n", "from zipfile import ZipFile\n", "\n", "# Change to a different location if you prefer\n", @@ -287,27 +286,23 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data path\n", + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb index ebf56f8863..fbf55ac11b 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb @@ -284,27 +284,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/multilabelFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb index 1d96fff4fa..0b1f50b6db 100644 --- a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb @@ -283,29 +283,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-object-detection/odFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb index 87626de632..3db2d862f6 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb @@ -294,27 +294,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb index 90b1b07ead..6e55a0db39 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb @@ -291,27 +291,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/multilabelFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb index a4402eeb9d..d224e258c6 100644 --- a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb @@ -310,29 +310,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-object-detection/odFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 4d737dcc7d..2a8af1fcb5 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -179,32 +179,22 @@ "\n", "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", - "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index 44092dc677..00c94fe7a0 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -145,27 +145,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb index 632d1dfd44..d97ce36c2f 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb @@ -171,31 +171,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb index 7d9c025610..a04fc0f3ec 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb @@ -136,27 +136,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "dataset_dir = unzip(dataset_parent_dir)" ] }, { @@ -324,7 +319,11 @@ ] } ], - "metadata": {}, + "metadata": { + "language_info": { + "name": "python" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 93f946a615..5767ef5401 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -171,31 +171,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-object-detection/odFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index 6c5cea7f50..a99831e238 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -136,27 +136,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-object-detection/odFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb index b610478e7b..53e12b3ba5 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb @@ -176,31 +176,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb index c67d6e02c4..a37c873a2c 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb @@ -139,27 +139,22 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb index 0d52ef7bcf..8074081a93 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb @@ -111,34 +111,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-classification/fridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb index c767466d37..a1c19093f2 100644 --- a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb @@ -171,31 +171,22 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", - "\n", - "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", - "# Get dataset path for later use\n", - "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", - "\n", - "if os.path.exists(dataset_dir):\n", - " shutil.rmtree(dataset_dir)\n", - "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", - "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", - " print(\"extracting files...\")\n", - " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + "local_data_path = \"/data-samples/image-object-detection/odFridgeObjects.zip\"\n", + "\n", + "def unzip(dataset_parent_dir):\n", + " # Extract current dataset name from dataset url\n", + " dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + " # Get dataset path for later use\n", + " dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", + "\n", + " # extract files\n", + " with ZipFile(local_data_path, \"r\") as zip:\n", + " print(\"extracting files...\")\n", + " zip.extractall(path=dataset_parent_dir)\n", + " print(\"done\")\n", + " return dataset_dir\n", + "\n", + "dataset_dir = unzip(dataset_parent_dir)" ] }, {