Azure · hakotesova · Oct 3, 2025 · Oct 3, 2025
@@ -103,28 +103,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+    # Use local data path
+    print("Using local data.")
+    local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # Extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -106,28 +106,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
+    # Use local data path
+    print("Using local data.")
+    local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip"
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # Extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -154,37 +154,26 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     :param ml_client: Azure ML client
     :param dataset_parent_dir: Path to the dataset folder
     """
-    # Download data from public url
-
-    # create data folder if it doesnt exist.
+    # Create data folder if it doesn't exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
-
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
-    # Get dataset path for later use
-    dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
-
-    # Get the data zip file path
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
+    # Use local data path
+    print("Using local data.")
+    local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip"
 
-    # Download the dataset
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
+    # Use the extracted directory
+    extracted_dir = os.path.join(dataset_parent_dir, os.path.basename(local_data_path).split(".")[0])
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")
     my_data = Data(
-        path=dataset_dir,
+        path=extracted_dir,
         type=AssetTypes.URI_FOLDER,
         description="Fridge-items images Object detection",
         name="fridge-items-images-od-ft",
@@ -198,7 +187,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     print(uri_folder_data_asset.path)
 
     create_jsonl_and_mltable_files(
-        uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
+        uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=extracted_dir
     )
 
 

@@ -8,45 +8,38 @@
 from zipfile import ZipFile
 
 
-def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None:
-    """Download image dataset and unzip it.
+def unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> str:
+    """Use local dataset and unzip it.
 
-    :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
+    :param dataset_parent_dir: dataset parent directory to which dataset will be extracted
     :type dataset_parent_dir: str
     :param is_multilabel_dataset: flag to indicate if dataset is multi-label or not
     :type is_multilabel_dataset: int
     """
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
+    # Use local data path
     if is_multilabel_dataset == 0:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+        local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
     else:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+        local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip"
+    print(f"Using local data from {local_data_path}")
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 
@@ -142,7 +135,7 @@ def prepare_data_for_batch_inference(dataset_dir: str, is_multilabel: int = 0) -
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
 
-    dataset_dir = download_and_unzip(
+    dataset_dir = unzip(
         dataset_parent_dir=os.path.join(
             os.path.dirname(os.path.abspath(__file__)), args.data_path
         ),

@@ -10,40 +10,33 @@
 import string
 
 
-def download_and_unzip(dataset_parent_dir: str) -> None:
-    """Download image dataset and unzip it.
+def unzip(dataset_parent_dir: str) -> str:
+    """Use local dataset and unzip it.
 
-    :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
+    :param dataset_parent_dir: dataset parent directory to which dataset will be extracted
     :type dataset_parent_dir: str
     """
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # Use local data path
+    local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
+    print(f"Using local data from {local_data_path}")
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 
@@ -137,7 +130,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None:
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
 
-    dataset_dir = download_and_unzip(
+    dataset_dir = unzip(
         dataset_parent_dir=os.path.join(
             os.path.dirname(os.path.realpath(__file__)), args.data_path
         ),

@@ -8,41 +8,33 @@
 from zipfile import ZipFile
 
 
-def download_and_unzip(dataset_parent_dir: str) -> None:
-    """Download image dataset and unzip it.
+def unzip(dataset_parent_dir: str) -> str:
+    """Use local dataset and unzip it.
 
-    :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
+    :param dataset_parent_dir: dataset parent directory to which dataset will be extracted
     :type dataset_parent_dir: str
     """
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
+    # Use local data path
+    local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip"
+    print(f"Using local data from {local_data_path}")
 
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
-
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 
@@ -118,7 +110,7 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None:
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
 
-    dataset_dir = download_and_unzip(
+    dataset_dir = unzip(
         dataset_parent_dir=os.path.join(
             os.path.dirname(os.path.abspath(__file__)), args.data_path
         ),

@@ -10,40 +10,33 @@
 import string
 
 
-def download_and_unzip(dataset_parent_dir: str) -> None:
-    """Download image dataset and unzip it.
+def unzip(dataset_parent_dir: str) -> str:
+    """Use local dataset and unzip it.
 
-    :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
+    :param dataset_parent_dir: dataset parent directory to which dataset will be extracted
     :type dataset_parent_dir: str
     """
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # Use local data path
+    local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
+    print(f"Using local data from {local_data_path}")
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 
@@ -221,7 +214,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None:
     args, unknown = parser.parse_known_args()
     args_dict = vars(args)
 
-    dataset_dir = download_and_unzip(
+    dataset_dir = unzip(
         dataset_parent_dir=os.path.join(
             os.path.dirname(os.path.realpath(__file__)), args.data_path
         ),