Skip to content

Commit 189ac02

Browse files
authored
test=develop, add distributed tools (#22623) (#22637)
1 parent 77428e8 commit 189ac02

File tree

4 files changed

+698
-1
lines changed

4 files changed

+698
-1
lines changed

python/paddle/fluid/incubate/fleet/utils/fleet_barrier_util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
1616
from paddle.fluid.contrib.utils import HDFSClient
1717
import os
18+
import time
1819

1920

2021
def check_all_trainers_ready(ready_path, epoch):

python/paddle/fluid/incubate/fleet/utils/fleet_util.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,19 @@
2323
import time
2424
import paddle.fluid as fluid
2525
from paddle.fluid.log_helper import get_logger
26-
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
26+
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet as fleet_pslib
27+
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet as fleet_transpiler
2728
from . import hdfs
2829
from .hdfs import *
30+
from . import utils
2931

3032
__all__ = ["FleetUtil"]
3133

3234
_logger = get_logger(
3335
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
3436

37+
fleet = fleet_pslib
38+
3539

3640
class FleetUtil(object):
3741
"""
@@ -46,6 +50,16 @@ class FleetUtil(object):
4650
4751
"""
4852

53+
def __init__(self, mode="pslib"):
54+
global fleet
55+
if mode == "pslib":
56+
fleet = fleet_pslib
57+
elif mode == "transpiler":
58+
fleet = fleet_transpiler
59+
else:
60+
raise ValueError(
61+
"Please choose one mode from [\"pslib\", \"transpiler\"]")
62+
4963
def rank0_print(self, s):
5064
"""
5165
Worker of rank 0 print some log.
@@ -1535,3 +1549,69 @@ def print_global_metrics(self,
15351549
(print_prefix, auc, bucket_error, mae, rmse,
15361550
actual_ctr, predicted_ctr, copc, mean_predict_qvalue,
15371551
total_ins_num))
1552+
1553+
def program_type_trans(self, prog_dir, prog_fn, is_text):
1554+
return utils.program_type_trans(prog_dir, prog_fn, is_text)
1555+
1556+
def draw_from_program_file(self, model_filename, is_text, output_dir,
1557+
output_filename):
1558+
"""draw program from file"""
1559+
program = utils.load_program(model_filename, is_text)
1560+
utils.graphviz(program.global_block(), output_dir, output_filename)
1561+
1562+
def draw_from_program(self, program, output_dir, output_name):
1563+
"""draw Program"""
1564+
utils.graphviz(program.global_block(), output_dir, output_name)
1565+
1566+
def check_two_programs(self, config):
1567+
train_prog = utils.load_program(config.train_prog_path,
1568+
config.is_text_train_program)
1569+
pruned_prog = utils.load_program(config.pruned_prog_path,
1570+
config.is_text_pruned_program)
1571+
if config.draw:
1572+
pruned_dir = os.path.dirname(config.pruned_prog_path)
1573+
self.draw_from_program(pruned_prog, pruned_dir,
1574+
config.draw_out_name)
1575+
res = utils.check_pruned_program_vars(train_prog, pruned_prog)
1576+
if res:
1577+
_logger.info("check_programs succeed.")
1578+
else:
1579+
_logger.info(
1580+
"check_programs failed. pruned program and train program not match!"
1581+
)
1582+
return res
1583+
1584+
def check_vars_and_dump(self, config):
1585+
_logger.info("start check_vars_and_dump.")
1586+
results = utils.check_saved_vars_try_dump(
1587+
config.dump_model_dir, config.dump_program_filename,
1588+
config.is_text_dump_program, config.feed_config,
1589+
config.fetch_config, config.batch_size, config.save_params_filename)
1590+
_logger.info("check_vars_and_dump succeed.")
1591+
return results
1592+
1593+
def parse_program_proto(self, prog_path, is_text, output_dir):
1594+
"""
1595+
Parse program.proto into a more readable format.
1596+
This function will generate three files:
1597+
output_dir/vars_all.log,
1598+
output_dir/vars_persistable.log,
1599+
output_dir/ops.log.
1600+
1601+
Args:
1602+
prog_path(str): proto file path to be parsed.
1603+
is_text(bool): proto file is human-readale format or not(binary).
1604+
output_dir(str): output dir.
1605+
1606+
Examples:
1607+
.. code-block:: python
1608+
1609+
from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil
1610+
fleet_util = FleetUtil()
1611+
program_path = "./program.pbtxt"
1612+
is_text = True
1613+
output_dir = "/tmp/"
1614+
fleet_util.parse_program_proto(program_path, is_text, output_dir)
1615+
"""
1616+
program = utils.load_program(prog_path, is_text)
1617+
utils.parse_program(program, output_dir)

0 commit comments

Comments
 (0)