Skip to content

Commit cf4533d

Browse files
authored
Cherry pick install check (#18326)
* test=release/1.5, add mutigpu install check * test=develop, refine code to use cuda_devices
1 parent c8d00cb commit cf4533d

File tree

1 file changed

+99
-24
lines changed

1 file changed

+99
-24
lines changed

python/paddle/fluid/install_check.py

Lines changed: 99 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,18 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from .framework import Program, program_guard, unique_name, default_startup_program
15+
import os
16+
from .framework import Program, program_guard, unique_name, cuda_places, cpu_places
1617
from .param_attr import ParamAttr
1718
from .initializer import Constant
1819
from . import layers
1920
from . import backward
2021
from .dygraph import Layer, nn
2122
from . import executor
22-
23+
from . import optimizer
2324
from . import core
25+
from . import compiler
26+
import logging
2427
import numpy as np
2528

2629
__all__ = ['run_check']
@@ -45,25 +48,97 @@ def run_check():
4548
This func should not be called only if you need to verify installation
4649
'''
4750
print("Running Verify Fluid Program ... ")
48-
prog = Program()
49-
startup_prog = Program()
50-
scope = core.Scope()
51-
with executor.scope_guard(scope):
52-
with program_guard(prog, startup_prog):
53-
with unique_name.guard():
54-
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
55-
inp = layers.data(
56-
name="inp", shape=[2, 2], append_batch_size=False)
57-
simple_layer = SimpleLayer("simple_layer")
58-
out = simple_layer(inp)
59-
param_grads = backward.append_backward(
60-
out, parameter_list=[simple_layer._fc1._w.name])[0]
61-
exe = executor.Executor(core.CPUPlace(
62-
) if not core.is_compiled_with_cuda() else core.CUDAPlace(0))
63-
exe.run(default_startup_program())
64-
exe.run(feed={inp.name: np_inp},
65-
fetch_list=[out.name, param_grads[1].name])
66-
67-
print(
68-
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now!"
69-
)
51+
52+
device_list = []
53+
if core.is_compiled_with_cuda():
54+
try:
55+
core.get_cuda_device_count()
56+
except Exception as e:
57+
logging.warning(
58+
"You are using GPU version Paddle Fluid, But Your CUDA Device is not set properly"
59+
"\n Original Error is {}".format(e))
60+
return 0
61+
device_list = cuda_places()
62+
else:
63+
device_list = [core.CPUPlace(), core.CPUPlace()]
64+
65+
np_inp_single = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
66+
inp = []
67+
for i in range(len(device_list)):
68+
inp.append(np_inp_single)
69+
np_inp_muti = np.array(inp)
70+
np_inp_muti = np_inp_muti.reshape(len(device_list), 2, 2)
71+
72+
def test_parallerl_exe():
73+
train_prog = Program()
74+
startup_prog = Program()
75+
scope = core.Scope()
76+
with executor.scope_guard(scope):
77+
with program_guard(train_prog, startup_prog):
78+
with unique_name.guard():
79+
build_strategy = compiler.BuildStrategy()
80+
build_strategy.enable_inplace = True
81+
build_strategy.memory_optimize = True
82+
inp = layers.data(name="inp", shape=[2, 2])
83+
simple_layer = SimpleLayer("simple_layer")
84+
out = simple_layer(inp)
85+
exe = executor.Executor(
86+
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
87+
(core.get_cuda_device_count() > 0) else core.CPUPlace())
88+
loss = layers.mean(out)
89+
loss.persistable = True
90+
optimizer.SGD(learning_rate=0.01).minimize(loss)
91+
startup_prog.random_seed = 1
92+
compiled_prog = compiler.CompiledProgram(
93+
train_prog).with_data_parallel(
94+
build_strategy=build_strategy,
95+
loss_name=loss.name,
96+
places=device_list)
97+
exe.run(startup_prog)
98+
99+
exe.run(compiled_prog,
100+
feed={inp.name: np_inp_muti},
101+
fetch_list=[loss.name])
102+
103+
def test_simple_exe():
104+
train_prog = Program()
105+
startup_prog = Program()
106+
scope = core.Scope()
107+
with executor.scope_guard(scope):
108+
with program_guard(train_prog, startup_prog):
109+
with unique_name.guard():
110+
inp0 = layers.data(
111+
name="inp", shape=[2, 2], append_batch_size=False)
112+
simple_layer0 = SimpleLayer("simple_layer")
113+
out0 = simple_layer0(inp0)
114+
param_grads = backward.append_backward(
115+
out0, parameter_list=[simple_layer0._fc1._w.name])[0]
116+
exe0 = executor.Executor(
117+
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
118+
(core.get_cuda_device_count() > 0) else core.CPUPlace())
119+
exe0.run(startup_prog)
120+
exe0.run(feed={inp0.name: np_inp_single},
121+
fetch_list=[out0.name, param_grads[1].name])
122+
123+
test_simple_exe()
124+
125+
print("Your Paddle Fluid works well on SINGLE GPU or CPU.")
126+
try:
127+
test_parallerl_exe()
128+
print("Your Paddle Fluid works well on MUTIPLE GPU or CPU.")
129+
print(
130+
"Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now"
131+
)
132+
except Exception as e:
133+
logging.warning(
134+
"Your Paddle Fluid has some problem with multiple GPU. This may be caused by:"
135+
"\n 1. There is only 1 or 0 GPU visible on your Device;"
136+
"\n 2. No.1 or No.2 GPU or both of them are occupied now"
137+
"\n 3. Wrong installation of NVIDIA-NCCL2, please follow instruction on https://github.com/NVIDIA/nccl-tests "
138+
"\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html"
139+
)
140+
141+
print("\n Original Error is: {}".format(e))
142+
print(
143+
"Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! "
144+
"\n Let's start deep Learning with Paddle Fluid now")

0 commit comments

Comments
 (0)