Skip to content

Commit 0e8e865

Browse files
committed
Add GPU option to benchmark
1 parent 1a76abd commit 0e8e865

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

bench/ndarray/matmul_Blosc2PyTorch.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
19
### Matmul performance comparison between Blosc2 and PyTorch with persistent storage
2-
# It is important to force numpy to use mkl as it can speed up the
3-
# blosc2 matmul (which uses np.matmul as a backend) by a factor of 2x:
4-
# conda install numpy mkl
5-
# To download the kevlar.h5 dataset use:
6-
# curl http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5 --output "kevlar.h5"
10+
# For this bench to work, you first need to download the data file at:
11+
# http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5
12+
713
import numpy as np
814
import blosc2
9-
import matplotlib.pyplot as plt
1015
import torch
1116
import pickle
1217
from time import time
1318
import h5py
1419
import hdf5plugin
15-
from tqdm import tqdm # progress bar (pip install tqdm)
16-
import os
20+
from tqdm import tqdm # progress bar
21+
1722
cparams = {
1823
"codec": blosc2.Codec.LZ4,
1924
"filters": [blosc2.Filter.SHUFFLE],
@@ -22,6 +27,17 @@
2227
batch_size = 32
2328
CREATE = True
2429
dtype = np.float32
30+
31+
# Check what's available
32+
print(f"MPS available: {torch.backends.mps.is_available()}")
33+
print(f"CUDA available: {torch.cuda.is_available()}")
34+
35+
# GPU for PyTorch
36+
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
37+
device = torch.device("gpu" if torch.cuda.is_available() else "cpu")
38+
# device = torch.device("cpu") # Force CPU usage
39+
print(f"Using device: {device}")
40+
2541
if CREATE:
2642
def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
2743
scale=1.0,
@@ -136,9 +152,9 @@ def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
136152
dset_b = f["data"]
137153
dset_out = f["out"]
138154

139-
for i in range(0, len(dset_out), batch_size): # batch of 32
140-
batch_a = torch.from_numpy(dset_a[i:i+batch_size]) # NumPy array slice
141-
batch_b = torch.from_numpy(dset_b[i:i+batch_size]) # NumPy array slice
155+
for i in range(0, len(dset_out), batch_size):
156+
batch_a = torch.from_numpy(dset_a[i:i+batch_size]).to(device)
157+
batch_b = torch.from_numpy(dset_b[i:i+batch_size]).to(device)
142158
dset_out[i:i+batch_size] = torch.matmul(batch_a, batch_b)
143159
hdf5_chunks = [dset_a.chunks, dset_b.chunks]
144160
hdf5_chunks_out = dset_out.chunks

0 commit comments

Comments
 (0)