Skip to content

Commit c8c1937

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/GraphNet into subgraph_dataset
2 parents bf54b98 + 5b16d63 commit c8c1937

File tree

2,959 files changed

+52934
-5216
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,959 files changed

+52934
-5216
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
samples/transformers-auto-model/dbmdz_electra-large-discriminator-finetuned-conll03-english

graph_net/config/empty_cstr_torch_samples_list.txt

Lines changed: 151 additions & 487 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
samples/transformers-auto-model/microsoft_xclip-base-patch32-16-frames

graph_net/dimension_generalizer.py

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
from pathlib import Path
1212
from dataclasses import asdict
1313
import graph_net.graph_net_json_file_util as gn_json
14+
from collections import OrderedDict
15+
import copy
16+
from graph_net.hash_util import get_sha256_hash
1417

1518

1619
class ApplyDimGenPasses:
@@ -49,7 +52,12 @@ def __call__(self, rel_model_path):
4952
output_dir = Path(self.config["output_dir"])
5053
output_dir.mkdir(parents=True, exist_ok=True)
5154
generalized_model_path = output_dir / rel_model_path
52-
if self.config["resume"] and (generalized_model_path / "model.py").exists():
55+
if (
56+
self.config["resume"]
57+
and generalized_model_path.exists()
58+
and generalized_model_path.is_dir()
59+
and len(list(generalized_model_path.iterdir())) > 0
60+
):
5361
return
5462
tensor_metas = self._get_tensor_metas(model_path)
5563
tensor_meta_attrs_list = [asdict(tensor_meta) for tensor_meta in tensor_metas]
@@ -64,6 +72,7 @@ def __call__(self, rel_model_path):
6472
)
6573
dim_axes_pairs = self._get_dim_axes_pairs(dyn_dim_cstrs)
6674
if len(dim_axes_pairs) == 0:
75+
print("No symbolic dims found. {model_path=}")
6776
return
6877

6978
def get_generalized():
@@ -74,19 +83,80 @@ def get_generalized():
7483
inputs=inputs,
7584
)
7685

77-
with get_generalized() as generalized_model_py_path:
78-
self._save_generalized_model_path(rel_model_path, generalized_model_py_path)
86+
with get_generalized() as tmp_model_py_path:
87+
from_model_path = Path(self.config["model_path_prefix"]) / rel_model_path
88+
triples = self._get_reified_tensor_metas(from_model_path, dyn_dim_cstrs)
89+
for symbol2example_value, cur_tensor_metas, cur_dyn_dim_cstrs in triples:
90+
to_model_path = self._get_to_model_path(
91+
rel_model_path, symbol2example_value
92+
)
93+
print(f"{str(to_model_path)=}")
94+
self._copy_sample_model_path(from_model_path, to_model_path)
95+
self._save_generalized_model_path(to_model_path, tmp_model_py_path)
96+
self._save_tensor_metas_as_weight_meta(to_model_path, cur_tensor_metas)
97+
self._save_dyn_dim_cstrs(to_model_path, cur_dyn_dim_cstrs)
7998

8099
self._check_num_handled_models()
81100

82-
def _save_generalized_model_path(self, rel_model_path, generalized_model_py_path):
83-
from_model_path = Path(self.config["model_path_prefix"]) / rel_model_path
84-
to_model_path = Path(self.config["output_dir"]) / rel_model_path
85-
print(f"{str(to_model_path)=}")
101+
def _get_reified_tensor_metas(self, from_model_path, dyn_dim_cstrs):
102+
tensor_metas = self._get_tensor_metas(str(from_model_path))
103+
symbols, reified_dims = self._get_symbols_and_reified_dims(
104+
from_model_path, dyn_dim_cstrs
105+
)
106+
for dims in reified_dims:
107+
symbol2example_value = OrderedDict(list(zip(symbols, dims)))
108+
cur_dyn_dim_cstrs = copy.deepcopy(dyn_dim_cstrs)
109+
cur_tensor_metas = copy.deepcopy(tensor_metas)
110+
cur_dyn_dim_cstrs.update_symbol2example_value(symbol2example_value)
111+
update_tensor_metas_by_dyn_dim_cstr(cur_tensor_metas, cur_dyn_dim_cstrs)
112+
yield symbol2example_value, cur_tensor_metas, cur_dyn_dim_cstrs
113+
114+
def _get_symbols_and_reified_dims(self, from_model_path, dyn_dim_cstrs):
115+
json_value = gn_json.read_json(str(from_model_path))
116+
reifier_name = json_value[gn_json.kSymbolicDimensionReifier]
117+
from graph_net.torch.sym_dim_reifiers.reifier_mgr import get_reifier
118+
119+
reifier_class = get_reifier(reifier_name)
120+
reifier_instance = reifier_class(str(from_model_path))
121+
assert reifier_instance.match
122+
symbols2reified_dims = reifier_instance.reify()
123+
assert len(symbols2reified_dims) == 1
124+
symbols, reified_dims = next(iter(symbols2reified_dims.items()))
125+
assert tuple(symbols) == tuple(dyn_dim_cstrs.symbols)
126+
assert all(len(symbols) == len(dims) for dims in reified_dims)
127+
return symbols, reified_dims
128+
129+
def _save_dyn_dim_cstrs(self, to_model_path, dyn_dim_cstrs):
130+
cstr_code = dyn_dim_cstrs.serialize_to_py_str()
131+
(to_model_path / "input_tensor_constraints.py").write_text(cstr_code)
132+
133+
def _save_tensor_metas_as_weight_meta(self, to_model_path, tensor_metas):
134+
weight_meta_code = "\n".join(
135+
tensor_meta.serialize_to_py_str() for tensor_meta in tensor_metas
136+
)
137+
(to_model_path / "weight_meta.py").write_text(weight_meta_code)
138+
139+
def _get_to_model_path(self, rel_model_path, symbol2example_value):
140+
sym_dim_str = "_".join(
141+
f"{sym_name}_{dim}"
142+
for symbol, dim in symbol2example_value.items()
143+
for sym_name in [symbol.name]
144+
)
145+
sub_module_name = f"{os.path.basename(rel_model_path)}__{sym_dim_str}"
146+
to_model_path = (
147+
Path(self.config["output_dir"]) / rel_model_path / sub_module_name
148+
)
149+
return to_model_path
150+
151+
def _copy_sample_model_path(self, from_model_path, to_model_path):
86152
to_model_path.mkdir(parents=True, exist_ok=True)
87153
shutil.copytree(Path(from_model_path), Path(to_model_path), dirs_exist_ok=True)
88-
generalized_model_py_code = Path(generalized_model_py_path).read_text()
154+
155+
def _save_generalized_model_path(self, to_model_path, tmp_model_py_path):
156+
generalized_model_py_code = Path(tmp_model_py_path).read_text()
89157
(to_model_path / "model.py").write_text(generalized_model_py_code)
158+
file_hash = get_sha256_hash(generalized_model_py_code)
159+
(to_model_path / "graph_hash.txt").write_text(file_hash)
90160

91161
def _get_dim_axes_pairs(self, dyn_dim_cstrs):
92162
sym_input_shapes = dyn_dim_cstrs.get_sorted_symbolic_input_shapes()

graph_net/hash_util.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import hashlib
2+
3+
4+
def get_sha256_hash(content):
5+
m = hashlib.sha256()
6+
m.update(content.encode())
7+
return m.hexdigest()

graph_net/model_path_handler.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import traceback
21
import argparse
32
from graph_net.imp_util import load_module
43
import logging
54
import sys
65
import json
76
import base64
7+
import subprocess
88

99
logging.basicConfig(
1010
level=logging.WARNING, format="%(asctime)s [%(levelname)s] %(message)s"
@@ -37,33 +37,49 @@ def _get_handler(args):
3737

3838
def main(args):
3939
handler = _get_handler(args)
40-
for model_path in _get_model_paths(args):
41-
print(f"{model_path=}")
40+
if args.model_path is not None:
41+
handle_model_path(handler, args.model_path)
42+
elif args.use_subprocess:
43+
handle_model_path_list_in_subprocess(args)
44+
else:
45+
handle_model_path_list_in_current_process(handler, args)
46+
47+
48+
def handle_model_path_list_in_current_process(handler, args):
49+
for model_path in _get_model_path_list(args):
4250
try:
43-
handler(model_path)
51+
handle_model_path(handler, model_path)
4452
except KeyboardInterrupt:
45-
sys.exit(-1)
46-
except Exception as e:
47-
print("--- Concise Error Message ---")
48-
print(e)
53+
print("KeyboardInterrupt")
54+
return
4955

50-
print("\n--- Full Traceback ---")
51-
traceback.print_exc()
5256

57+
def handle_model_path_list_in_subprocess(args):
58+
for model_path in _get_model_path_list(args):
59+
cmd = f"{sys.executable} -m graph_net.model_path_handler --model-path {model_path} --handler-config {args.handler_config}"
60+
try:
61+
subprocess.Popen(cmd, shell=True).wait()
62+
except KeyboardInterrupt:
63+
print("KeyboardInterrupt")
64+
return
5365

54-
def _get_model_paths(args):
55-
assert args.model_path is not None or args.model_path_list is not None
56-
if args.model_path is not None:
57-
yield args.model_path
58-
if args.model_path_list is not None:
59-
with open(args.model_path_list) as f:
60-
yield from (
61-
clean_line
62-
for line in f
63-
for clean_line in [line.strip()]
64-
if len(clean_line) > 0
65-
if not clean_line.startswith("#")
66-
)
66+
67+
def handle_model_path(handler, model_path):
68+
print(f"{model_path=}", flush=True)
69+
handler(model_path)
70+
71+
72+
def _get_model_path_list(args):
73+
assert args.model_path is None
74+
assert args.model_path_list is not None
75+
with open(args.model_path_list) as f:
76+
yield from (
77+
clean_line
78+
for line in f
79+
for clean_line in [line.strip()]
80+
if len(clean_line) > 0
81+
if not clean_line.startswith("#")
82+
)
6783

6884

6985
if __name__ == "__main__":
@@ -89,5 +105,11 @@ def _get_model_paths(args):
89105
default=None,
90106
help="handler configuration string",
91107
)
108+
parser.add_argument(
109+
"--use-subprocess",
110+
action="store_true",
111+
default=False,
112+
help="use subprocess",
113+
)
92114
args = parser.parse_args()
93115
main(args=args)

graph_net/test/decomposer_validator_test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,4 @@ echo "Results saved in: $FILE_PATH/ES_result.png"
4848
echo ""
4949
echo "IMPORTANT: Please verify if the curve in ES_result.png is a straight line"
5050
echo "If the curve is NOT a straight line, please check the log file: $FILE_PATH/log.log"
51-
echo "=================================================="
51+
echo "=================================================="

graph_net/test/naive_graph_decomposer_test.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,16 @@ config_json_str=$(cat <<EOF
1111
"handler_path": "$GRAPH_NET_ROOT/torch/naive_graph_decomposer.py",
1212
"handler_class_name": "NaiveDecomposerExtractor",
1313
"handler_config": {
14+
"model_path_prefix": "$GRAPH_NET_ROOT/../",
1415
"output_dir": "/tmp/naive_decompose_workspace",
1516
"split_positions": [8, 16, 32],
16-
"group_head_and_tail": true,
17-
"filter_path":"$GRAPH_NET_ROOT/torch/naive_subgraph_filter.py",
18-
"filter_config": {}
17+
"chain_style": true,
18+
"group_head_and_tail": true
1919
}
2020
}
2121
EOF
2222
)
2323
CONFIG=$(echo $config_json_str | base64 -w 0)
2424

25-
python3 -m graph_net.model_path_handler --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --handler-config=$CONFIG
25+
# python3 -m graph_net.model_path_handler --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --handler-config=$CONFIG
26+
python3 -m graph_net.model_path_handler --model-path-list $GRAPH_NET_ROOT/config/decomposition_error_tmp_torch_samples_list.txt --handler-config=$CONFIG

graph_net/tools/_get_in_tensor_symbolic_shapes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import sympy
12
from pathlib import Path
23
from graph_net.dynamic_dim_constraints import DynamicDimConstraints
34
import graph_net.graph_net_json_file_util as gn_json
@@ -27,6 +28,10 @@ def __call__(self, model_path):
2728
dyn_dim_cstrs = DynamicDimConstraints.unserialize_from_py_file(
2829
str(input_tensor_cstr_filepath)
2930
)
31+
for shape, name in dyn_dim_cstrs.input_shapes:
32+
if not any(isinstance(dim, sympy.Expr) for dim in shape):
33+
continue
34+
print(f"{shape=} {name=}")
3035
input_shapes_str = str(dyn_dim_cstrs.serialize_symbolic_input_shapes_to_str())
3136
print(f"get-in-tensor-symbolic-shapes {input_shapes_str} {model_path}")
3237

graph_net/tools/batch_apply_dim_gen_passes.sh renamed to graph_net/tools/apply_dim_gen_passes.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ config_json_str=$(cat <<EOF
1111
"handler_path": "$GRAPH_NET_ROOT/dimension_generalizer.py",
1212
"handler_class_name": "ApplyDimGenPasses",
1313
"handler_config": {
14-
"resume": true,
14+
"resume": false,
1515
"output_dir": "/tmp/dimension_generalized_samples",
1616
"model_path_prefix": "$GRAPH_NET_ROOT/../",
1717
"dimension_generalizer_filepath": "$GRAPH_NET_ROOT/torch/static_to_dynamic.py",
1818
"dimension_generalizer_class_name": "StaticToDynamic",
19-
"limits_handled_models": 9999999,
19+
"limits_handled_models": 10,
2020
"last_model_log_file": "/tmp/a.py"
2121
}
2222
}

0 commit comments

Comments
 (0)