From 5393fdde9cc7d9db06bb2741c4d948aa321907ea Mon Sep 17 00:00:00 2001 From: PiotrBLL Date: Thu, 19 Dec 2024 02:04:28 +0100 Subject: [PATCH 01/10] Add Intel Gaudi HPU device usage --- clip/clip.py | 52 ++++++++++++++++++++++++++++++++++++++++----------- clip/utils.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 clip/utils.py diff --git a/clip/clip.py b/clip/clip.py index 398a6282c..334f06490 100644 --- a/clip/clip.py +++ b/clip/clip.py @@ -12,9 +12,11 @@ from .model import build_model from .simple_tokenizer import SimpleTokenizer as _Tokenizer +from .utils import get_device_initial try: from torchvision.transforms import InterpolationMode + BICUBIC = InterpolationMode.BICUBIC except ImportError: BICUBIC = Image.BICUBIC @@ -51,13 +53,24 @@ def _download(url: str, root: str): raise RuntimeError(f"{download_target} exists and is not a regular file") if os.path.isfile(download_target): - if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: + if ( + hashlib.sha256(open(download_target, "rb").read()).hexdigest() + == expected_sha256 + ): return download_target else: - warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") + warnings.warn( + f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file" + ) with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: - with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: + with tqdm( + total=int(source.info().get("Content-Length")), + ncols=80, + unit="iB", + unit_scale=True, + unit_divisor=1024, + ) as loop: while True: buffer = source.read(8192) if not buffer: @@ -91,7 +104,12 @@ def available_models() -> List[str]: return list(_MODELS.keys()) -def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit: bool = False, download_root: str = None): +def load( + name: str, + device: Union[str, torch.device] = get_device_initial(), + jit: bool = False, + download_root: str = None, +): """Load a CLIP model Parameters @@ -100,7 +118,7 @@ def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_a A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict device : Union[str, torch.device] - The device to put the loaded model + The device to put the loaded model, by default it uses the device returned by `clip.get_device_initial()` jit : bool Whether to load the optimized JIT model or more hackable non-JIT model (default). @@ -123,10 +141,12 @@ def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_a else: raise RuntimeError(f"Model {name} not found; available models = {available_models()}") - with open(model_path, 'rb') as opened_file: + with open(model_path, "rb") as opened_file: try: # loading JIT archive - model = torch.jit.load(opened_file, map_location=device if jit else "cpu").eval() + model = torch.jit.load( + opened_file, map_location=device if jit else "cpu" + ).eval() state_dict = None except RuntimeError: # loading saved state dict @@ -171,9 +191,11 @@ def patch_device(module): patch_device(model.encode_image) patch_device(model.encode_text) - # patch dtype to float32 on CPU - if str(device) == "cpu": - float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) + # patch dtype to float32 on CPU, HPU + if str(device) in ["cpu", "hpu"]: + float_holder = torch.jit.trace( + lambda: torch.ones([]).float(), example_inputs=[] + ) float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] float_node = float_input.node() @@ -199,10 +221,18 @@ def patch_float(module): model.float() + if str(device) == "hpu": + if torch.hpu.is_available(): + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + + model = wrap_in_hpu_graph(model) + model = model.eval().to(torch.device(device)) return model, _transform(model.input_resolution.item()) -def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> Union[torch.IntTensor, torch.LongTensor]: +def tokenize( + texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False +) -> Union[torch.IntTensor, torch.LongTensor]: """ Returns the tokenized representation of given input string(s) diff --git a/clip/utils.py b/clip/utils.py new file mode 100644 index 000000000..738489549 --- /dev/null +++ b/clip/utils.py @@ -0,0 +1,30 @@ +import importlib.util + +import torch + + +def get_device_initial(preferred_device=None): + """ + Determine the appropriate device to use (cuda, hpu, or cpu). + Args: + preferred_device (str): User-preferred device ('cuda', 'hpu', or 'cpu'). + + Returns: + str: Device string ('cuda', 'hpu', or 'cpu'). + """ + # Check for HPU support + if importlib.util.find_spec("habana_frameworks") is not None: + from habana_frameworks.torch.utils.library_loader import load_habana_module + + load_habana_module() + if torch.hpu.is_available(): + if preferred_device == "hpu" or preferred_device is None: + return "hpu" + + # Check for CUDA (GPU support) + if torch.cuda.is_available(): + if preferred_device == "cuda" or preferred_device is None: + return "cuda" + + # Default to CPU + return "cpu" From deb2964c2639c02644e724781ef231e4541780ec Mon Sep 17 00:00:00 2001 From: PiotrBLL Date: Thu, 19 Dec 2024 02:05:11 +0100 Subject: [PATCH 02/10] Add test - `test_hpu_support` --- tests/test_consistency.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_consistency.py b/tests/test_consistency.py index f2c6fd4fe..f1b6fae22 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -6,7 +6,7 @@ import clip -@pytest.mark.parametrize('model_name', clip.available_models()) +@pytest.mark.parametrize("model_name", clip.available_models()) def test_consistency(model_name): device = "cpu" jit_model, transform = clip.load(model_name, device=device, jit=True) @@ -23,3 +23,22 @@ def test_consistency(model_name): py_probs = logits_per_image.softmax(dim=-1).cpu().numpy() assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1) + + +@pytest.mark.parametrize("model_name", clip.available_models()) +def test_hpu_support(model_name): + device = "hpu" + jit_model, transform = clip.load(model_name, device=device, jit=True) + py_model, _ = clip.load(model_name, device=device, jit=False) + + image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) + text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) + + with torch.no_grad(): + logits_per_image, _ = jit_model(image, text) + jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy() + + logits_per_image, _ = py_model(image, text) + py_probs = logits_per_image.softmax(dim=-1).cpu().numpy() + + assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1) From e8d6206c164bf1f064cd3d36e06426aba9d845bd Mon Sep 17 00:00:00 2001 From: PiotrBLL Date: Thu, 19 Dec 2024 02:06:10 +0100 Subject: [PATCH 03/10] Add Dockerfile.hpu, requirements_hpu.txt and update README.md with HPU support information --- Dockerfile.hpu | 25 +++++++++++++++++++ README.md | 59 ++++++++++++++++++++++++++++++++++++++++++++ requirements_hpu.txt | 7 ++++++ 3 files changed, 91 insertions(+) create mode 100644 Dockerfile.hpu create mode 100644 requirements_hpu.txt diff --git a/Dockerfile.hpu b/Dockerfile.hpu new file mode 100644 index 000000000..fe729c15a --- /dev/null +++ b/Dockerfile.hpu @@ -0,0 +1,25 @@ +# Use the official Gaudi Docker image with PyTorch +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest + +# Set environment variables for Habana +ENV HABANA_VISIBLE_DEVICES=all +ENV OMPI_MCA_btl_vader_single_copy_mechanism=none +ENV PT_HPU_LAZY_ACC_PAR_MODE=0 +ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=1 + +# Set timezone to UTC and install essential packages +ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC +RUN apt-get update && apt-get install -y \ + tzdata \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +COPY . /workspace/clip +WORKDIR /workspace/clip + +# Copy HPU requirements +COPY requirements_hpu.txt /workspace/requirements_hpu.txt + +# Install Python packages +RUN pip install --upgrade pip \ + && pip install -r requirements_hpu.txt diff --git a/README.md b/README.md index db56b56e2..51264f8e1 100644 --- a/README.md +++ b/README.md @@ -193,6 +193,65 @@ print(f"Accuracy = {accuracy:.3f}") Note that the `C` value should be determined via a hyperparameter sweep using a validation split. +## Intel® Gaudi® HPU Usage + +### Build the Docker Image +To use Intel® Gaudi® HPU for running this notebook, start by building a Docker image with the appropriate environment setup. + +```bash +docker build -t clip_hpu:latest -f Dockerfile.hpu . +``` + +In the `Dockerfile.hpu`, we use the `vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest` base image. Ensure that the version matches your setup. +See the [PyTorch Docker Images for the Intel® Gaudi® Accelerator](https://developer.habana.ai/catalog/pytorch-container/) for more information. + +### Run the Container + +```bash +docker run -it --runtime=habana clip_hpu:latest +``` + +Optionally, you can add a mapping volume (`-v`) to access your project directory inside the container. Add the flag `-v /path/to/your/project:/workspace/project` to the `docker run` command. +Replace `/path/to/your/project` with the path to your project directory on your local machine. + +### Command-line Usage with Intel® Gaudi® HPU + +To run the notebook with Intel® Gaudi® HPU, use the `--device hpu` option when specifying the device in the code. + +For example, modify the device assignment as follows: + +```python +device = 'hpu' if torch.device('hpu').is_available() else 'cuda' if torch.cuda.is_available() else 'cpu' +model.to(device) +image_input = image_input.to(device) +text_tokens = text_tokens.to(device) +``` + +### Python Usage with Intel® Gaudi® HPU + +To leverage Intel® Gaudi® HPU in Python, ensure that the device is specified as `hpu` during model initialization and tensor manipulation. + +```python +import clip +import torch + +# Load the model on HPU +device = "hpu" +model, preprocess = clip.load("ViT-B/32", device=device) + +# Prepare data and move to HPU +image_input = preprocess(image).unsqueeze(0).to(device) +text_tokens = clip.tokenize("a sample text").to(device) + +# Run inference +with torch.no_grad(): + image_features = model.encode_image(image_input) + text_features = model.encode_text(text_tokens) + +print("Inference completed on HPU") +``` + + ## See Also * [OpenCLIP](https://github.com/mlfoundations/open_clip): includes larger and independently trained CLIP models up to ViT-G/14 diff --git a/requirements_hpu.txt b/requirements_hpu.txt new file mode 100644 index 000000000..3eefe7c1a --- /dev/null +++ b/requirements_hpu.txt @@ -0,0 +1,7 @@ +-r requirements.txt +optimum-habana==1.14.1 +transformers==4.45.2 +huggingface-hub==0.26.2 +tiktoken==0.8.0 +torch-geometric==2.6.1 +numba==0.60.0 From d17b83174d11214edef78ef5bba5b0350394123c Mon Sep 17 00:00:00 2001 From: PiotrBLL Date: Thu, 19 Dec 2024 15:24:38 +0100 Subject: [PATCH 04/10] Fix JIT error --- clip/clip.py | 16 ++++++++++++++-- tests/test_consistency.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/clip/clip.py b/clip/clip.py index 334f06490..7565b8930 100644 --- a/clip/clip.py +++ b/clip/clip.py @@ -156,13 +156,25 @@ def load( state_dict = torch.load(opened_file, map_location="cpu") if not jit: - model = build_model(state_dict or model.state_dict()).to(device) + model = build_model(state_dict or model.state_dict()) + + if str(device) == "hpu": + from habana_frameworks.torch.utils.library_loader import load_habana_module + + load_habana_module() + if torch.hpu.is_available(): + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + + model = wrap_in_hpu_graph(model) + model = model.eval().to(torch.device(device)) + else: + model = model.to(device) if str(device) == "cpu": model.float() return model, _transform(model.visual.input_resolution) # patch the device names - device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) + device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device("cpu" if device == "hpu" else device)), example_inputs=[]) device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1] def _node_get(node: torch._C.Node, key: str): diff --git a/tests/test_consistency.py b/tests/test_consistency.py index f1b6fae22..18000f61b 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -28,7 +28,7 @@ def test_consistency(model_name): @pytest.mark.parametrize("model_name", clip.available_models()) def test_hpu_support(model_name): device = "hpu" - jit_model, transform = clip.load(model_name, device=device, jit=True) + jit_model, transform = clip.load(model_name, device="cpu", jit=True) py_model, _ = clip.load(model_name, device=device, jit=False) image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) From 859a140c046fd6b6858317398688cdfd65cb9779 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Tue, 28 Jan 2025 18:50:41 +0100 Subject: [PATCH 05/10] fix: Remove incorrect code snippet from README-HPU section. --- README.md | 52 +++++++++------------------------------ tests/test_consistency.py | 24 +++++++++--------- 2 files changed, 23 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 51264f8e1..e2f4206f6 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,9 @@ import torch import clip from PIL import Image -device = "cuda" if torch.cuda.is_available() else "cpu" +from clip.utils import get_device_initial + +device = get_device_initial() # "HPU" if using Intel® Gaudi® HPU, "cuda" if using CUDA GPU, "cpu" otherwise model, preprocess = clip.load("ViT-B/32", device=device) image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device) @@ -94,8 +96,10 @@ import clip import torch from torchvision.datasets import CIFAR100 +from clip.utils import get_device_initial + # Load the model -device = "cuda" if torch.cuda.is_available() else "cpu" +device = get_device_initial() model, preprocess = clip.load('ViT-B/32', device) # Download the dataset @@ -153,8 +157,10 @@ from torch.utils.data import DataLoader from torchvision.datasets import CIFAR100 from tqdm import tqdm +from clip.utils import get_device_initial + # Load the model -device = "cuda" if torch.cuda.is_available() else "cpu" +device = get_device_initial() model, preprocess = clip.load('ViT-B/32', device) # Load the dataset @@ -209,47 +215,11 @@ See the [PyTorch Docker Images for the Intel® Gaudi® Accelerator](https://deve ```bash docker run -it --runtime=habana clip_hpu:latest -``` - -Optionally, you can add a mapping volume (`-v`) to access your project directory inside the container. Add the flag `-v /path/to/your/project:/workspace/project` to the `docker run` command. -Replace `/path/to/your/project` with the path to your project directory on your local machine. - -### Command-line Usage with Intel® Gaudi® HPU - -To run the notebook with Intel® Gaudi® HPU, use the `--device hpu` option when specifying the device in the code. - -For example, modify the device assignment as follows: - -```python -device = 'hpu' if torch.device('hpu').is_available() else 'cuda' if torch.cuda.is_available() else 'cpu' -model.to(device) -image_input = image_input.to(device) -text_tokens = text_tokens.to(device) -``` +``` ### Python Usage with Intel® Gaudi® HPU -To leverage Intel® Gaudi® HPU in Python, ensure that the device is specified as `hpu` during model initialization and tensor manipulation. - -```python -import clip -import torch - -# Load the model on HPU -device = "hpu" -model, preprocess = clip.load("ViT-B/32", device=device) - -# Prepare data and move to HPU -image_input = preprocess(image).unsqueeze(0).to(device) -text_tokens = clip.tokenize("a sample text").to(device) - -# Run inference -with torch.no_grad(): - image_features = model.encode_image(image_input) - text_features = model.encode_text(text_tokens) - -print("Inference completed on HPU") -``` +You do not need to change the code to leverage Intel® Gaudi® HPU. The `get_device_initial()` function will automatically detect the correct device and return the appropriate device name. So no changes are required. ## See Also diff --git a/tests/test_consistency.py b/tests/test_consistency.py index 18000f61b..371725031 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -27,18 +27,18 @@ def test_consistency(model_name): @pytest.mark.parametrize("model_name", clip.available_models()) def test_hpu_support(model_name): - device = "hpu" - jit_model, transform = clip.load(model_name, device="cpu", jit=True) - py_model, _ = clip.load(model_name, device=device, jit=False) + devices = ["hpu", "cpu"] + all_probs = [] + for device in devices: + print(f"=== Testing {model_name} on {device} ===") + model, transform = clip.load(model_name, device=device, jit=False) - image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) - text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) - - with torch.no_grad(): - logits_per_image, _ = jit_model(image, text) - jit_probs = logits_per_image.softmax(dim=-1).cpu().numpy() + image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) + text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) - logits_per_image, _ = py_model(image, text) - py_probs = logits_per_image.softmax(dim=-1).cpu().numpy() + with torch.no_grad(): + logits_per_image, _ = model(image, text) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + all_probs.append(probs) - assert np.allclose(jit_probs, py_probs, atol=0.01, rtol=0.1) + assert np.allclose(all_probs[0], all_probs[1], atol=0.01, rtol=0.1) From fc198e5172fb72d2e045348cdd674521606bdbc5 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Wed, 5 Feb 2025 00:06:35 +0100 Subject: [PATCH 06/10] Add time execution on HPU vs CPU benchmark --- benchmark.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 benchmark.py diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 000000000..c31bbeff5 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,37 @@ +import logging +import time +import numpy as np +import habana_frameworks.torch.core as ht +import torch +from PIL import Image + +import clip +from clip.utils import get_device_initial + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def run_model(model_name, device): + model, transform = clip.load(model_name, device=get_device_initial(device), jit=False) + + image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) + text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) + + with torch.no_grad(): + logits_per_image, _ = model(image, text) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + return probs + + +if __name__ == "__main__": + logger.info("Running on HPU") + start_time = time.time() + run_model("RN50", "hpu") + end_time = time.time() + logger.info(f"HPU execution time: {end_time - start_time:.4f} seconds") + + logger.info("Running on CPU") + start_time = time.time() + run_model("RN50", "cpu") + end_time = time.time() + logger.info(f"CPU execution time: {end_time - start_time:.4f} seconds") From af0f80c1e647b2ece79ea8cf99b2b3a3343ff410 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Wed, 12 Feb 2025 13:38:36 +0100 Subject: [PATCH 07/10] Add CLIP installation instruction to Dockerfile `pip install -e .` --- Dockerfile.hpu | 3 ++- requirements_hpu.txt | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Dockerfile.hpu b/Dockerfile.hpu index fe729c15a..2fa8d3aa2 100644 --- a/Dockerfile.hpu +++ b/Dockerfile.hpu @@ -22,4 +22,5 @@ COPY requirements_hpu.txt /workspace/requirements_hpu.txt # Install Python packages RUN pip install --upgrade pip \ - && pip install -r requirements_hpu.txt + && pip install -r requirements_hpu.txt \ + && pip install -e . \ No newline at end of file diff --git a/requirements_hpu.txt b/requirements_hpu.txt index 3eefe7c1a..5aba6057c 100644 --- a/requirements_hpu.txt +++ b/requirements_hpu.txt @@ -1,7 +1,3 @@ -r requirements.txt optimum-habana==1.14.1 -transformers==4.45.2 -huggingface-hub==0.26.2 -tiktoken==0.8.0 -torch-geometric==2.6.1 -numba==0.60.0 +pytest From 6d3570d0e2d9aa6af3d88a2f0640daed6fe6b433 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Wed, 12 Feb 2025 14:06:40 +0100 Subject: [PATCH 08/10] Add info on how to run tests to README.md --- README.md | 6 ++++++ tests/test_consistency.py | 1 + 2 files changed, 7 insertions(+) diff --git a/README.md b/README.md index e2f4206f6..abda08854 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,12 @@ docker run -it --runtime=habana clip_hpu:latest You do not need to change the code to leverage Intel® Gaudi® HPU. The `get_device_initial()` function will automatically detect the correct device and return the appropriate device name. So no changes are required. +### Run the Tests + +```bash +pytest +``` +This will run the tests and verify that the model is working correctly. ## See Also diff --git a/tests/test_consistency.py b/tests/test_consistency.py index 371725031..9b72208bf 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -2,6 +2,7 @@ import pytest import torch from PIL import Image +import habana_frameworks.torch import clip From 07c9771a30d70171a29357d9e959115f92b1749e Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Wed, 12 Feb 2025 14:08:35 +0100 Subject: [PATCH 09/10] Move image IO ops out of time measurements Run model n times and average the runtime --- benchmark.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/benchmark.py b/benchmark.py index c31bbeff5..0b944775e 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,7 +1,6 @@ import logging import time import numpy as np -import habana_frameworks.torch.core as ht import torch from PIL import Image @@ -11,27 +10,39 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + def run_model(model_name, device): - model, transform = clip.load(model_name, device=get_device_initial(device), jit=False) + model, transform = clip.load( + model_name, device=get_device_initial(device), jit=False + ) image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) with torch.no_grad(): + start_time = time.perf_counter() + logits_per_image, _ = model(image, text) probs = logits_per_image.softmax(dim=-1).cpu().numpy() - return probs + + end_time = time.perf_counter() + logger.info(f"Execution time: {end_time - start_time:.4f} seconds") + return probs, end_time - start_time + + +def run_n_times(model_name, device, n): + times = [] + logger.info(f"Running {model_name} on {device} {n} times") + for _ in range(n): + logger.info(f"Run {_ + 1} of {n}") + _, time = run_model(model_name, device) + times.append(time) + return np.mean(times) if __name__ == "__main__": - logger.info("Running on HPU") - start_time = time.time() - run_model("RN50", "hpu") - end_time = time.time() - logger.info(f"HPU execution time: {end_time - start_time:.4f} seconds") - - logger.info("Running on CPU") - start_time = time.time() - run_model("RN50", "cpu") - end_time = time.time() - logger.info(f"CPU execution time: {end_time - start_time:.4f} seconds") + hpu_time = run_n_times("RN50", "hpu", 10) + cpu_time = run_n_times("RN50", "cpu", 10) + + logger.info(f"HPU time: {hpu_time:.4f} seconds") + logger.info(f"CPU time: {cpu_time:.4f} seconds") From 91532816085ba91ffe111d9bc9083304b60f41d1 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 24 Feb 2025 17:32:29 +0100 Subject: [PATCH 10/10] fix: Remove benchmark.py --- benchmark.py | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) delete mode 100644 benchmark.py diff --git a/benchmark.py b/benchmark.py deleted file mode 100644 index 0b944775e..000000000 --- a/benchmark.py +++ /dev/null @@ -1,48 +0,0 @@ -import logging -import time -import numpy as np -import torch -from PIL import Image - -import clip -from clip.utils import get_device_initial - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def run_model(model_name, device): - model, transform = clip.load( - model_name, device=get_device_initial(device), jit=False - ) - - image = transform(Image.open("CLIP.png")).unsqueeze(0).to(device) - text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device) - - with torch.no_grad(): - start_time = time.perf_counter() - - logits_per_image, _ = model(image, text) - probs = logits_per_image.softmax(dim=-1).cpu().numpy() - - end_time = time.perf_counter() - logger.info(f"Execution time: {end_time - start_time:.4f} seconds") - return probs, end_time - start_time - - -def run_n_times(model_name, device, n): - times = [] - logger.info(f"Running {model_name} on {device} {n} times") - for _ in range(n): - logger.info(f"Run {_ + 1} of {n}") - _, time = run_model(model_name, device) - times.append(time) - return np.mean(times) - - -if __name__ == "__main__": - hpu_time = run_n_times("RN50", "hpu", 10) - cpu_time = run_n_times("RN50", "cpu", 10) - - logger.info(f"HPU time: {hpu_time:.4f} seconds") - logger.info(f"CPU time: {cpu_time:.4f} seconds")