Skip to content

Commit f5ca9e4

Browse files
authored
[Feature Enhancement] test_compiler supports XPU and support use numpy to initialize tensor. (#329)
* test_compiler supports XPU. * Format code. * Optimize some code and support use numpy to initialize tensor.
1 parent f83ad6f commit f5ca9e4

File tree

5 files changed

+75
-34
lines changed

5 files changed

+75
-34
lines changed

graph_net/paddle/backend/cinn_backend.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ def synchronize(self):
1919
if (
2020
paddle.device.is_compiled_with_cuda()
2121
or paddle.device.is_compiled_with_rocm()
22+
or paddle.device.is_compiled_with_xpu()
2223
):
2324
paddle.device.synchronize()

graph_net/paddle/backend/nope_backend.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ def synchronize(self):
1010
if (
1111
paddle.device.is_compiled_with_cuda()
1212
or paddle.device.is_compiled_with_rocm()
13+
or paddle.device.is_compiled_with_xpu()
1314
):
1415
paddle.device.synchronize()

graph_net/paddle/test_compiler.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import random
1313
import platform
1414
import traceback
15+
import subprocess
16+
import re
1517

1618
from graph_net.paddle import utils
1719
from graph_net import path_utils
@@ -40,12 +42,25 @@ def set_seed(random_seed):
4042

4143

4244
def get_hardward_name(args):
45+
hardware = "unknown"
4346
if test_compiler_util.is_gpu_device(args.device):
4447
hardware = paddle.device.cuda.get_device_name(0)
48+
elif args.device == "xpu":
49+
try:
50+
output = subprocess.check_output(["xpu-smi", "-L"], text=True)
51+
hardware = next(
52+
match.group(2)
53+
for line in output.splitlines()
54+
if (
55+
match := re.match(
56+
r"XPU\s+(\d+):\s+(.+?)\s+\(UUID:\s*([^)]+)\)", line
57+
)
58+
)
59+
)
60+
except Exception as e:
61+
pass
4562
elif args.device == "cpu":
4663
hardware = platform.processor()
47-
else:
48-
hardware = "unknown"
4964
return hardware
5065

5166

@@ -422,7 +437,7 @@ def test_multi_models(args):
422437
def main(args):
423438
assert os.path.isdir(args.model_path)
424439
assert args.compiler in {"cinn", "nope"}
425-
assert args.device in ["cuda", "dcu", "cpu"]
440+
assert args.device in ["cuda", "dcu", "xpu", "cpu"]
426441

427442
initalize_seed = 123
428443
set_seed(random_seed=initalize_seed)

graph_net/paddle/utils.py

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import inspect
99
import ast
1010
import math
11+
import numpy as np
1112
import paddle
1213

1314
kLiteralTensorSize = 64
@@ -139,6 +140,7 @@ def convert_to_valid_number(data_type, value):
139140

140141

141142
def convert_meta_classes_to_tensors(file_path):
143+
current_device = paddle.device.get_device()
142144
for name, cls in _get_classes(file_path):
143145
attrs = {
144146
k: v
@@ -159,7 +161,7 @@ def convert_meta_classes_to_tensors(file_path):
159161
"info": {
160162
"shape": attrs.get("shape", []),
161163
"dtype": data_type,
162-
"device": attrs.get("device", "gpu"),
164+
"device": attrs.get("device", current_device),
163165
"mean": convert_to_valid_number(data_type, attrs.get("mean", None)),
164166
"std": convert_to_valid_number(data_type, attrs.get("std", None)),
165167
"min_val": convert_to_valid_number(data_type, attrs.get("min_val", 0)),
@@ -188,7 +190,43 @@ def extract_dynamic_shapes(example_inputs):
188190
pass
189191

190192

191-
def replay_tensor(info):
193+
def init_integer_tensor(dtype, shape, min_val, max_val, use_numpy):
194+
if use_numpy:
195+
array = np.random.randint(
196+
low=min_val, high=max_val + 1, size=shape, dtype=dtype
197+
)
198+
return paddle.to_tensor(array)
199+
else:
200+
return paddle.randint(low=min_val, high=max_val + 1, shape=shape, dtype=dtype)
201+
202+
203+
def init_float_tensor(shape, mean, std, min_val, max_val, use_numpy):
204+
tensor = None
205+
if use_numpy:
206+
if mean is not None and std is not None:
207+
array = np.random.normal(mean, std, shape)
208+
mask = (array < min_val) | (array > max_val)
209+
while np.any(mask):
210+
array[mask] = np.random.normal(mean, std, mask.sum())
211+
mask = (array < min_val) | (array > max_val)
212+
else:
213+
array = np.random.uniform(low=min_val, high=max_val, size=shape)
214+
tensor = paddle.to_tensor(array)
215+
else:
216+
if mean is not None and std is not None:
217+
tensor = paddle.empty(shape=shape, dtype="float32")
218+
initializer = paddle.nn.initializer.TruncatedNormal(
219+
mean=mean, std=std, a=min_val, b=max_val
220+
)
221+
initializer(tensor)
222+
else:
223+
tensor = paddle.uniform(
224+
shape=shape, dtype="float32", min=min_val, max=max_val
225+
)
226+
return tensor
227+
228+
229+
def replay_tensor(info, use_numpy=True):
192230
device = info["info"]["device"]
193231
dtype = info["info"]["dtype"]
194232
shape = info["info"]["shape"]
@@ -201,27 +239,14 @@ def replay_tensor(info):
201239
shape = list(map(lambda i: i if i is not None else 1, shape))
202240
if "data" in info and info["data"] is not None:
203241
return paddle.reshape(info["data"], shape).to(dtype).to(device)
204-
elif dtype == paddle.int32 or dtype == paddle.int64:
205-
return paddle.cast(
206-
paddle.randint(low=min_val, high=max_val + 1, shape=shape, dtype="int64"),
207-
dtype,
208-
).to(device)
209-
elif dtype == paddle.bool:
210-
return paddle.cast(
211-
paddle.randint(low=0, high=2, shape=shape, dtype="int32"),
212-
paddle.bool,
213-
).to(device)
242+
elif dtype in [paddle.int32, paddle.int64, paddle.bool]:
243+
init_dtype = "int32" if dtype == paddle.bool else "int64"
244+
min_val, max_val = 0, 1 if dtype == paddle.bool else min_val, max_val
245+
return (
246+
init_integer_tensor(init_dtype, shape, min_val, max_val, use_numpy)
247+
.to(dtype)
248+
.to(device)
249+
)
214250
else:
215-
if mean is not None and std is not None:
216-
tensor = paddle.empty(shape=shape, dtype="float32")
217-
initializer = paddle.nn.initializer.TruncatedNormal(
218-
mean=mean, std=std, a=min_val, b=max_val
219-
)
220-
initializer(tensor)
221-
return tensor.to(dtype).to(device)
222-
else:
223-
return (
224-
paddle.uniform(shape=shape, dtype="float32", min=min_val, max=max_val)
225-
.to(dtype)
226-
.to(device)
227-
)
251+
tensor = init_float_tensor(shape, mean, std, min_val, max_val, use_numpy)
252+
return tensor.to(dtype).to(device)

graph_net/test_compiler_util.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,19 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
4949
flush=True,
5050
)
5151
selected_gpu_uuid, max_gpu_util, max_mem_util = None, 0.0, 0.0
52-
for i in range(5):
52+
for i in range(3):
5353
synchronizer_func()
5454
time.sleep(1)
5555

5656
cmd = [
5757
"nvidia-smi",
58+
f"--id={selected_gpu_id}",
5859
f"--query-gpu=index,gpu_uuid,utilization.gpu,memory.used,memory.total",
5960
"--format=csv,noheader,nounits",
6061
]
6162
output = subprocess.check_output(cmd).decode().strip()
6263
_, selected_gpu_uuid, gpu_util, used_mem, mem_total = next(
63-
line.split(", ")
64-
for line in output.split("\n")
65-
if line.strip() and int(line.split(", ")[0]) == selected_gpu_id
64+
line.split(", ") for line in output.split("\n") if line.strip()
6665
)
6766
gpu_util = float(gpu_util)
6867
mem_util = float(used_mem) * 100 / float(mem_total)
@@ -78,6 +77,7 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
7877
other_tasks = []
7978
cmd = [
8079
"nvidia-smi",
80+
f"--id={selected_gpu_id}",
8181
f"--query-compute-apps=gpu_uuid,pid,used_memory",
8282
"--format=csv,noheader,nounits",
8383
]
@@ -86,8 +86,7 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
8686
line
8787
for line in output.split("\n")
8888
if line.strip()
89-
and (line.split(", ")[0] == selected_gpu_uuid)
90-
and (line.split(", ")[1] != current_pid)
89+
if line.split(", ")[1] != current_pid
9190
]
9291
# Note: in docker container, the current_pid maybe different from that captured by nvidia-smi.
9392
print(

0 commit comments

Comments
 (0)