Skip to content

Commit 5415f4a

Browse files
committed
merge develop
2 parents c90ed7d + ed5f8b7 commit 5415f4a

7 files changed

+1169
-3
lines changed

.pre-commit-config.yaml

100644100755
File mode changed.

graph_net/config/todo_torch_samples_list.txt

Lines changed: 577 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
graph-net-test-compiler-log [Processing] /work/GraphNet/samples/transformers-auto-model/hf-tiny-model-private_tiny-random-BlipModel
2+
graph-net-test-compiler-log [Config] model: hf-tiny-model-private/tiny-random-BlipModel
3+
graph-net-test-compiler-log [Config] device: cuda
4+
graph-net-test-compiler-log [Config] hardware: NVIDIA A100-SXM4-80GB
5+
graph-net-test-compiler-log [Config] compiler: inductor
6+
graph-net-test-compiler-log [Config] warmup: 5
7+
graph-net-test-compiler-log [Config] trials: 20
8+
graph-net-test-compiler-log [Config] compile_framework_version: 2.7.1+cu126
9+
[Profiling] Using device: cuda NVIDIA A100-SXM4-80GB, warm up 5, trials 20
10+
Trial 1: e2e=6.64949 ms, gpu=6.46451 ms
11+
Trial 2: e2e=6.58774 ms, gpu=6.47782 ms
12+
Trial 3: e2e=6.57296 ms, gpu=6.47270 ms
13+
Trial 4: e2e=6.59156 ms, gpu=6.47782 ms
14+
Trial 5: e2e=6.53505 ms, gpu=6.43277 ms
15+
Trial 6: e2e=6.50096 ms, gpu=6.40000 ms
16+
Trial 7: e2e=6.55699 ms, gpu=6.45632 ms
17+
Trial 8: e2e=8.09932 ms, gpu=7.97901 ms
18+
Trial 9: e2e=8.67844 ms, gpu=8.54016 ms
19+
Trial 10: e2e=9.74393 ms, gpu=9.60102 ms
20+
Trial 11: e2e=9.88483 ms, gpu=9.73722 ms
21+
Trial 12: e2e=7.18832 ms, gpu=7.06867 ms
22+
Trial 13: e2e=6.62160 ms, gpu=6.51571 ms
23+
Trial 14: e2e=6.54459 ms, gpu=6.43482 ms
24+
Trial 15: e2e=6.50930 ms, gpu=6.40819 ms
25+
Trial 16: e2e=6.50382 ms, gpu=6.40410 ms
26+
Trial 17: e2e=7.00188 ms, gpu=6.89766 ms
27+
Trial 18: e2e=6.83117 ms, gpu=6.72461 ms
28+
Trial 19: e2e=6.63400 ms, gpu=6.51878 ms
29+
Trial 20: e2e=6.61588 ms, gpu=6.51059 ms
30+
[Profiling] Using device: cuda NVIDIA A100-SXM4-80GB, warm up 5, trials 20
31+
Trial 1: e2e=3.27635 ms, gpu=3.16826 ms
32+
Trial 2: e2e=3.16405 ms, gpu=3.07814 ms
33+
Trial 3: e2e=3.12924 ms, gpu=3.04947 ms
34+
Trial 4: e2e=3.09730 ms, gpu=3.02285 ms
35+
Trial 5: e2e=3.04723 ms, gpu=2.97267 ms
36+
Trial 6: e2e=3.06034 ms, gpu=2.98598 ms
37+
Trial 7: e2e=3.05533 ms, gpu=2.98086 ms
38+
Trial 8: e2e=3.02792 ms, gpu=2.95526 ms
39+
Trial 9: e2e=3.03411 ms, gpu=2.96346 ms
40+
Trial 10: e2e=2.95377 ms, gpu=2.88563 ms
41+
Trial 11: e2e=2.94375 ms, gpu=2.87642 ms
42+
Trial 12: e2e=2.94924 ms, gpu=2.88154 ms
43+
Trial 13: e2e=2.93207 ms, gpu=2.86413 ms
44+
Trial 14: e2e=2.91133 ms, gpu=2.84467 ms
45+
Trial 15: e2e=2.89869 ms, gpu=2.82214 ms
46+
Trial 16: e2e=2.88749 ms, gpu=2.82112 ms
47+
Trial 17: e2e=2.88129 ms, gpu=2.81293 ms
48+
Trial 18: e2e=2.91824 ms, gpu=2.83648 ms
49+
Trial 19: e2e=2.90084 ms, gpu=2.83238 ms
50+
Trial 20: e2e=2.89059 ms, gpu=2.82419 ms
51+
graph-net-test-compiler-log [Datatype][eager]: float32 float32 float32 float32 float32 float32 float32 float32
52+
graph-net-test-compiler-log [Datatype][compiled]: float32 float32 float32 float32 float32 float32 float32 float32
53+
graph-net-test-compiler-log [DataType] eager:['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32'] compiled:['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32'] match:True
54+
graph-net-test-compiler-log [Correctness][equal]: 1 1 0 0 0 0 0 0
55+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-10_rtol_1.00E-06]: 1 1 0 0 0 0 0 0
56+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-10_rtol_2.56E-04]: 1 1 1 1 0 0 1 0
57+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-10_rtol_1.69E-12]: 1 1 0 0 0 0 0 0
58+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-14_rtol_1.00E-14]: 1 1 0 0 0 0 0 0
59+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-09_rtol_3.98E-06]: 1 1 0 0 0 0 1 0
60+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-09_rtol_5.85E-04]: 1 1 1 1 0 0 1 0
61+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-09_rtol_2.54E-11]: 1 1 0 0 0 0 0 0
62+
graph-net-test-compiler-log [Correctness][all_close_atol_2.51E-13_rtol_2.51E-13]: 1 1 0 0 0 0 0 0
63+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-08_rtol_1.58E-05]: 1 1 1 1 0 0 1 0
64+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-08_rtol_1.34E-03]: 1 1 1 1 0 0 1 0
65+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-08_rtol_3.82E-10]: 1 1 0 1 0 0 0 0
66+
graph-net-test-compiler-log [Correctness][all_close_atol_6.31E-12_rtol_6.31E-12]: 1 1 0 0 0 0 0 0
67+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-07_rtol_6.31E-05]: 1 1 1 1 0 0 1 0
68+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-07_rtol_3.06E-03]: 1 1 1 1 0 0 1 0
69+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-07_rtol_5.75E-09]: 1 1 0 1 0 0 1 0
70+
graph-net-test-compiler-log [Correctness][all_close_atol_1.58E-10_rtol_1.58E-10]: 1 1 0 0 0 0 0 0
71+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-06_rtol_2.51E-04]: 1 1 1 1 0 0 1 0
72+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-06_rtol_7.00E-03]: 1 1 1 1 0 0 1 0
73+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-06_rtol_8.65E-08]: 1 1 1 1 0 0 1 0
74+
graph-net-test-compiler-log [Correctness][all_close_atol_3.98E-09_rtol_3.98E-09]: 1 1 0 0 0 0 0 0
75+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-05_rtol_1.00E-03]: 1 1 1 1 0 0 1 0
76+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-05_rtol_1.60E-02]: 1 1 1 1 0 0 1 0
77+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-05_rtol_1.30E-06]: 1 1 1 1 0 0 1 0
78+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-07_rtol_1.00E-07]: 1 1 0 1 0 0 1 0
79+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-04_rtol_3.98E-03]: 1 1 1 1 0 0 1 0
80+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-04_rtol_3.66E-02]: 1 1 1 1 0 0 1 0
81+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-04_rtol_1.96E-05]: 1 1 1 1 0 0 1 0
82+
graph-net-test-compiler-log [Correctness][all_close_atol_2.51E-06_rtol_2.51E-06]: 1 1 1 1 0 0 1 0
83+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-03_rtol_1.58E-02]: 1 1 1 1 0 0 1 0
84+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-03_rtol_8.36E-02]: 1 1 1 1 0 0 1 0
85+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-03_rtol_2.94E-04]: 1 1 1 1 0 0 1 0
86+
graph-net-test-compiler-log [Correctness][all_close_atol_6.31E-05_rtol_6.31E-05]: 1 1 1 1 0 0 1 0
87+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-02_rtol_6.31E-02]: 1 1 1 1 0 0 1 0
88+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-02_rtol_1.91E-01]: 1 1 1 1 0 0 1 0
89+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-02_rtol_4.42E-03]: 1 1 1 1 0 0 1 0
90+
graph-net-test-compiler-log [Correctness][all_close_atol_1.58E-03_rtol_1.58E-03]: 1 1 1 1 0 0 1 0
91+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-01_rtol_2.51E-01]: 1 1 1 1 0 0 1 0
92+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-01_rtol_4.37E-01]: 1 1 1 1 0 0 1 0
93+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E-01_rtol_6.65E-02]: 1 1 1 1 0 0 1 0
94+
graph-net-test-compiler-log [Correctness][all_close_atol_3.98E-02_rtol_3.98E-02]: 1 1 1 1 0 0 1 0
95+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+00_rtol_1.00E+00]: 1 1 1 1 0 0 1 0
96+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+00_rtol_1.00E+00]: 1 1 1 1 0 0 1 0
97+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+00_rtol_1.00E+00]: 1 1 1 1 0 0 1 0
98+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+00_rtol_1.00E+00]: 1 1 1 1 0 0 1 0
99+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+01_rtol_3.98E+00]: 1 1 1 1 0 0 1 0
100+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+01_rtol_2.29E+00]: 1 1 1 1 0 0 1 0
101+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+01_rtol_1.50E+01]: 1 1 1 1 0 0 1 0
102+
graph-net-test-compiler-log [Correctness][all_close_atol_2.51E+01_rtol_2.51E+01]: 1 1 1 1 0 0 1 0
103+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+02_rtol_1.58E+01]: 1 1 1 1 0 0 1 0
104+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+02_rtol_5.23E+00]: 1 1 1 1 0 0 1 0
105+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+02_rtol_2.26E+02]: 1 1 1 1 0 0 1 0
106+
graph-net-test-compiler-log [Correctness][all_close_atol_6.31E+02_rtol_6.31E+02]: 1 1 1 1 0 0 1 0
107+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+03_rtol_6.31E+01]: 1 1 1 1 0 0 1 0
108+
grapseth-net-test-compiler-log [Correctness][all_close_atol_1.00E+03_rtol_1.20E+01]: 1 1 1 1 0 0 1 0
109+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+03_rtol_3.40E+03]: 1 1 1 1 0 0 1 0
110+
graph-net-test-compiler-log [Correctness][all_close_atol_1.58E+04_rtol_1.58E+04]: 1 1 1 1 0 0 1 0
111+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+04_rtol_2.51E+02]: 1 1 1 1 0 0 1 0
112+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+04_rtol_2.73E+01]: 1 1 1 1 0 0 1 0
113+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+04_rtol_5.11E+04]: 1 1 1 1 0 0 1 0
114+
graph-net-test-compiler-log [Correctness][all_close_atol_3.98E+05_rtol_3.98E+05]: 1 1 1 1 0 0 1 0
115+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+05_rtol_1.00E+03]: 1 1 1 1 0 0 1 0
116+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+05_rtol_6.25E+01]: 1 1 1 1 0 0 1 0
117+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+05_rtol_7.69E+05]: 1 1 1 1 0 0 1 0
118+
graph-net-test-compiler-log [Correctness][all_close_atol_1.00E+07_rtol_1.00E+07]: 1 1 1 1 0 0 1 0
119+
graph-net-test-compiler-log [Correctness][max_diff]: 0.0 0.0 4.76837158203125e-07 7.450580596923828e-09 nan nan 4.470348358154297e-08 nan
120+
graph-net-test-compiler-log [Correctness][mean_diff]: 0.0 0.0 8.794346229024086e-08 2.4363089323742315e-09 nan nan 1.3707904145121574e-08 nan
121+
graph-net-test-compiler-log [Result] status: success
122+
graph-net-test-compiler-log [Performance][eager]: {"e2e": {"mean": 7.14259, "std": 1.04778, "min": 6.50096, "max": 9.88483}, "gpu": {"mean": 7.02612, "std": 1.03642, "min": 6.4, "max": 9.73722}}
123+
graph-net-test-compiler-log [Performance][compiled]: {"e2e": {"mean": 2.99796, "std": 0.106326, "min": 2.88129, "max": 3.27635}, "gpu": {"mean": 2.92393, "std": 0.0989045, "min": 2.81293, "max": 3.16826}}
124+
graph-net-test-compiler-log [Speedup][e2e]: 2.38248
125+
graph-net-test-compiler-log [Speedup][gpu]: 2.40297
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/bash
2+
3+
GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(graph_net.__file__))")
4+
5+
LOG_FILE="$GRAPH_NET_ROOT/test/log_file_for_subgraph_decompose_and_evaluation_step.log"
6+
OUTPUT_DIR="/tmp/decompose_and_evaluation_workspace"
7+
TOLERANCE=3
8+
INITIAL_MAX_SIZE=2048
9+
10+
test_config_json_str=$(cat <<EOF
11+
{
12+
"module_name": "graph_net.torch.test_compiler",
13+
"arguments": {
14+
"compiler": "nope",
15+
"device": "cuda",
16+
"warmup": 5,
17+
"trials": 20
18+
}
19+
}
20+
EOF
21+
)
22+
23+
TEST_CONFIG_B64=$(echo "$test_config_json_str" | base64 -w 0)
24+
25+
echo "Starting GraphNet Auto-Debugger"
26+
echo "--------------------------------------------------------"
27+
echo "Log File: $LOG_FILE"
28+
echo "Output Dir: $OUTPUT_DIR"
29+
echo "Init Size: $INITIAL_MAX_SIZE"
30+
echo "--------------------------------------------------------"
31+
32+
python3 -m graph_net.torch.subgraph_decompose_and_evaluation_step \
33+
--log-file="$LOG_FILE" \
34+
--output-dir="$OUTPUT_DIR" \
35+
--test-config="$TEST_CONFIG_B64" \
36+
--tolerance="$TOLERANCE" \
37+
--max-subgraph-size="$INITIAL_MAX_SIZE"
38+
39+
if [ $? -ne 0 ]; then
40+
echo ""
41+
echo "[ERROR] Task failed! Please check logs and fix bugs before proceeding."
42+
exit 1
43+
fi
44+
45+
echo ""
46+
echo ">>> Pass execution finished."
47+
echo ">>> Run this script again to execute the NEXT pass if needed."

graph_net/torch/decompose_util.py

100644100755
Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,22 @@ def _get_submodule_inputs_and_outputs(
215215
)
216216
node_list = list(gm.graph.nodes)
217217

218+
def _hashable(obj):
219+
if isinstance(obj, slice):
220+
return ("__slice__", obj.start, obj.stop, obj.step)
221+
elif isinstance(obj, (list, tuple)):
222+
return tuple(_hashable(x) for x in obj)
223+
else:
224+
return obj
225+
218226
def get_related_node(node):
219227
for arg in node.args:
220228
if isinstance(arg, tuple):
221-
yield from arg
229+
for x in arg:
230+
yield _hashable(x)
222231
else:
223-
yield arg
224-
yield node
232+
yield _hashable(arg)
233+
yield _hashable(node)
225234

226235
for node in node_list[0:start_node_idx]:
227236
for related_node in get_related_node(node):

0 commit comments

Comments
 (0)