|
| 1 | +####################################################################### |
| 2 | +# Copyright (c) 2019-present, Blosc Development Team <[email protected]> |
| 3 | +# All rights reserved. |
| 4 | +# |
| 5 | +# This source code is licensed under a BSD-style license (found in the |
| 6 | +# LICENSE file in the root directory of this source tree) |
| 7 | +####################################################################### |
| 8 | + |
1 | 9 | ### Matmul performance comparison between Blosc2 and PyTorch with persistent storage |
2 | | -# It is important to force numpy to use mkl as it can speed up the |
3 | | -# blosc2 matmul (which uses np.matmul as a backend) by a factor of 2x: |
4 | | -# conda install numpy mkl |
5 | | -# To download the kevlar.h5 dataset use: |
6 | | -# curl http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5 --output "kevlar.h5" |
| 10 | +# For this bench to work, you first need to download the data file at: |
| 11 | +# http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5 |
| 12 | + |
7 | 13 | import numpy as np |
8 | 14 | import blosc2 |
9 | | -import matplotlib.pyplot as plt |
10 | 15 | import torch |
11 | 16 | import pickle |
12 | 17 | from time import time |
13 | 18 | import h5py |
14 | 19 | import hdf5plugin |
15 | | -from tqdm import tqdm # progress bar (pip install tqdm) |
16 | | -import os |
| 20 | +from tqdm import tqdm # progress bar |
| 21 | + |
17 | 22 | cparams = { |
18 | 23 | "codec": blosc2.Codec.LZ4, |
19 | 24 | "filters": [blosc2.Filter.SHUFFLE], |
|
22 | 27 | batch_size = 32 |
23 | 28 | CREATE = True |
24 | 29 | dtype = np.float32 |
| 30 | + |
| 31 | +# Check what's available |
| 32 | +print(f"MPS available: {torch.backends.mps.is_available()}") |
| 33 | +print(f"CUDA available: {torch.cuda.is_available()}") |
| 34 | + |
| 35 | +# GPU for PyTorch |
| 36 | +device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") |
| 37 | +device = torch.device("gpu" if torch.cuda.is_available() else "cpu") |
| 38 | +# device = torch.device("cpu") # Force CPU usage |
| 39 | +print(f"Using device: {device}") |
| 40 | + |
25 | 41 | if CREATE: |
26 | 42 | def build_dense_rowwarp_matrix(out_h=2000, in_h=2167, |
27 | 43 | scale=1.0, |
@@ -136,9 +152,9 @@ def build_dense_rowwarp_matrix(out_h=2000, in_h=2167, |
136 | 152 | dset_b = f["data"] |
137 | 153 | dset_out = f["out"] |
138 | 154 |
|
139 | | - for i in range(0, len(dset_out), batch_size): # batch of 32 |
140 | | - batch_a = torch.from_numpy(dset_a[i:i+batch_size]) # NumPy array slice |
141 | | - batch_b = torch.from_numpy(dset_b[i:i+batch_size]) # NumPy array slice |
| 155 | + for i in range(0, len(dset_out), batch_size): |
| 156 | + batch_a = torch.from_numpy(dset_a[i:i+batch_size]).to(device) |
| 157 | + batch_b = torch.from_numpy(dset_b[i:i+batch_size]).to(device) |
142 | 158 | dset_out[i:i+batch_size] = torch.matmul(batch_a, batch_b) |
143 | 159 | hdf5_chunks = [dset_a.chunks, dset_b.chunks] |
144 | 160 | hdf5_chunks_out = dset_out.chunks |
|
0 commit comments