|
| 1 | +# |
| 2 | +# Copyright (C) 2025 Intel Corporation |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | +# |
| 5 | +import argparse |
| 6 | +import asyncio |
| 7 | +import json |
| 8 | +import time |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | +import httpx |
| 12 | + |
| 13 | + |
| 14 | +async def stream_file(client, url, filename, semaphore): |
| 15 | + async with semaphore: |
| 16 | + start_time = time.time() |
| 17 | + total_bytes = 0 |
| 18 | + async with client.stream("GET", url) as stream: |
| 19 | + with Path(filename).open("wb") as file: |
| 20 | + async for data in stream.aiter_bytes(): |
| 21 | + file.write(data) |
| 22 | + total_bytes += len(data) |
| 23 | + end_time = time.time() |
| 24 | + download_time = end_time - start_time |
| 25 | + total_bytes /= 1024 * 1024 |
| 26 | + |
| 27 | + speed_mbps = total_bytes / download_time if download_time > 0 else 0 |
| 28 | + print(f"Downloaded {url} - {total_bytes:.2f} MB in {download_time:.2f}s ({speed_mbps:.2f} MB/s)") |
| 29 | + |
| 30 | + |
| 31 | +async def main(): |
| 32 | + parser = argparse.ArgumentParser() |
| 33 | + parser.add_argument( |
| 34 | + "-d", |
| 35 | + "--data_dir", |
| 36 | + type=Path, |
| 37 | + required=True, |
| 38 | + help="Directory to store downloaded models and datasets", |
| 39 | + ) |
| 40 | + parser.add_argument( |
| 41 | + "-j", |
| 42 | + "--json_path", |
| 43 | + type=Path, |
| 44 | + required=True, |
| 45 | + help="Path to the JSON file with model information", |
| 46 | + ) |
| 47 | + args = parser.parse_args() |
| 48 | + |
| 49 | + with args.json_path.open("r") as f: |
| 50 | + models_data = json.load(f) |
| 51 | + |
| 52 | + base_path = "https://storage.geti.intel.com/geti_predict/test/" |
| 53 | + semaphore = asyncio.Semaphore(10) |
| 54 | + args.data_dir.mkdir(parents=True, exist_ok=True) |
| 55 | + async with httpx.AsyncClient(timeout=60.0) as client: |
| 56 | + tasks = [] |
| 57 | + for model_entry in models_data: |
| 58 | + model_name = model_entry["name"] |
| 59 | + download_url = base_path + model_name |
| 60 | + save_path = args.data_dir / model_name |
| 61 | + save_path.parent.mkdir(parents=True, exist_ok=True) |
| 62 | + tasks.append(stream_file(client, download_url, save_path, semaphore)) |
| 63 | + |
| 64 | + if model_name.endswith(".xml"): |
| 65 | + tasks.append( |
| 66 | + stream_file(client, download_url.replace(".xml", ".bin"), save_path.with_suffix(".bin"), semaphore), |
| 67 | + ) |
| 68 | + |
| 69 | + print(f"Starting download of {len(tasks)} files with max 10 concurrent downloads...") |
| 70 | + await asyncio.gather(*tasks) |
| 71 | + print(f"All {len(tasks)} files downloaded successfully!") |
| 72 | + |
| 73 | + |
| 74 | +if __name__ == "__main__": |
| 75 | + asyncio.run(main()) |
0 commit comments