Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,28 +103,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
# Use local data path
print("Using local data.")
local_data_path = "/sample-data/image-classification/fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,28 +106,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
# Use local data path
print("Using local data.")
local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,37 +154,26 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
:param ml_client: Azure ML client
:param dataset_parent_dir: Path to the dataset folder
"""
# Download data from public url

# create data folder if it doesnt exist.
# Create data folder if it doesn't exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the data zip file path
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
# Use local data path
print("Using local data.")
local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip"

# Download the dataset
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

# Use the extracted directory
extracted_dir = os.path.join(dataset_parent_dir, os.path.basename(local_data_path).split(".")[0])

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
my_data = Data(
path=dataset_dir,
path=extracted_dir,
type=AssetTypes.URI_FOLDER,
description="Fridge-items images Object detection",
name="fridge-items-images-od-ft",
Expand All @@ -198,7 +187,7 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
print(uri_folder_data_asset.path)

create_jsonl_and_mltable_files(
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=dataset_dir
uri_folder_data_path=uri_folder_data_asset.path, dataset_dir=extracted_dir
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,45 +8,38 @@
from zipfile import ZipFile


def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None:
"""Download image dataset and unzip it.
def unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> str:
"""Use local dataset and unzip it.

:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
:param dataset_parent_dir: dataset parent directory to which dataset will be extracted
:type dataset_parent_dir: str
:param is_multilabel_dataset: flag to indicate if dataset is multi-label or not
:type is_multilabel_dataset: int
"""
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
# Use local data path
if is_multilabel_dataset == 0:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
else:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
print(f"Downloading data from {download_url}")
local_data_path = "/sample-data/image-classification/multilabelFridgeObjects.zip"
print(f"Using local data from {local_data_path}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down Expand Up @@ -142,7 +135,7 @@ def prepare_data_for_batch_inference(dataset_dir: str, is_multilabel: int = 0) -
args, unknown = parser.parse_known_args()
args_dict = vars(args)

dataset_dir = download_and_unzip(
dataset_dir = unzip(
dataset_parent_dir=os.path.join(
os.path.dirname(os.path.abspath(__file__)), args.data_path
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,33 @@
import string


def download_and_unzip(dataset_parent_dir: str) -> None:
"""Download image dataset and unzip it.
def unzip(dataset_parent_dir: str) -> str:
"""Use local dataset and unzip it.

:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
:param dataset_parent_dir: dataset parent directory to which dataset will be extracted
:type dataset_parent_dir: str
"""
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")
# Use local data path
local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
print(f"Using local data from {local_data_path}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down Expand Up @@ -137,7 +130,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None:
args, unknown = parser.parse_known_args()
args_dict = vars(args)

dataset_dir = download_and_unzip(
dataset_dir = unzip(
dataset_parent_dir=os.path.join(
os.path.dirname(os.path.realpath(__file__)), args.data_path
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,33 @@
from zipfile import ZipFile


def download_and_unzip(dataset_parent_dir: str) -> None:
"""Download image dataset and unzip it.
def unzip(dataset_parent_dir: str) -> str:
"""Use local dataset and unzip it.

:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
:param dataset_parent_dir: dataset parent directory to which dataset will be extracted
:type dataset_parent_dir: str
"""
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
# Use local data path
local_data_path = "/sample-data/image-object-detection/odFridgeObjects.zip"
print(f"Using local data from {local_data_path}")

download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down Expand Up @@ -118,7 +110,7 @@ def prepare_data_for_online_inference(dataset_dir: str) -> None:
args, unknown = parser.parse_known_args()
args_dict = vars(args)

dataset_dir = download_and_unzip(
dataset_dir = unzip(
dataset_parent_dir=os.path.join(
os.path.dirname(os.path.abspath(__file__)), args.data_path
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,33 @@
import string


def download_and_unzip(dataset_parent_dir: str) -> None:
"""Download image dataset and unzip it.
def unzip(dataset_parent_dir: str) -> str:
"""Use local dataset and unzip it.

:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
:param dataset_parent_dir: dataset parent directory to which dataset will be extracted
:type dataset_parent_dir: str
"""
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")
# Use local data path
local_data_path = "/sample-data/image-classification/fridgeObjects.zip"
print(f"Using local data from {local_data_path}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down Expand Up @@ -221,7 +214,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None:
args, unknown = parser.parse_known_args()
args_dict = vars(args)

dataset_dir = download_and_unzip(
dataset_dir = unzip(
dataset_parent_dir=os.path.join(
os.path.dirname(os.path.realpath(__file__)), args.data_path
),
Expand Down
Loading
Loading