forked from foundation-model-stack/fastsafetensors
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_multi_paddle.py
More file actions
45 lines (42 loc) · 1.99 KB
/
test_multi_paddle.py
File metadata and controls
45 lines (42 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Copyright 2024- IBM Inc. All rights reserved
# SPDX-License-Identifier: Apache-2.0
import pytest
import torch
import paddle
from safetensors import safe_open
from fastsafetensors import cpp as fstcpp
from fastsafetensors import SafeTensorsFileLoader, SingleGroup, SafeTensorsMetadata
from fastsafetensors.common import paddle_loaded
def test_shuffle_paddle(fstcpp_log, input_files, pg_paddle):
if paddle_loaded:
device = "gpu" if paddle.device.cuda.device_count() else "cpu"
loader = SafeTensorsFileLoader(pg_paddle, device, nogds=True, debug_log=True, framework="paddle")
loader.add_filenames({0: input_files})
bufs = loader.copy_files_to_device()
key_dims = {key: -1 for key in loader.get_keys()}
for i in range(0, 12):
key_dims[f"h.{i}.mlp.c_proj.weight"] = 0
key_dims[f"h.{i}.mlp.c_fc.weight"] = 1
tensors = bufs.as_dict(key_dims)
with safe_open(input_files[0], framework="pt") as f:
for key in tensors.keys():
dim = key_dims[key]
if dim == 0 or dim == 1:
t = f.get_slice(key)
rank_slices = ()
shape = t.get_shape()
size = shape[dim]
block_size = (size + pg_paddle.process_group.size() - 1) // pg_paddle.process_group.size()
for i in range(0, len(shape)):
if i < dim:
rank_slices += (slice(None,None,None),)
elif i == dim:
rank_slices += (slice(pg_paddle.process_group.rank() * block_size, (pg_paddle.process_group.rank() + 1) * block_size, 1),)
break
t = t[rank_slices]
t = t.clone().detach()
else:
t = f.get_tensor(key)
assert paddle.all(paddle.to_tensor(t.numpy(),place=loader.device).equal(tensors[key]))
bufs.close()
loader.close()