Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions samples/transformers-auto-model/Qwen1.5-0.5B/graph_net.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
"num_nodes_required": 1,
"dynamic": false,
"model_name": "Qwen/Qwen1.5-0.5B",
"heuristic_tag": "unknown"
}
"heuristic_tag": "nlp"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"framework": "torch",
"num_devices_required": 1,
"num_nodes_required": 1,
"source": "huggingface_hub",
"heuristic_tag": "nlp"
}
130 changes: 130 additions & 0 deletions tools/check_and_count_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
import json


def check_completeness(samples_dir):
samples_missing_hash = []
samples_missing_json = []
samples_missing_meta = []
for root, dirs, files in os.walk(samples_dir):
if "shape_patches_" not in root and "model.py" in files:
model_path = root
if not os.path.exists(os.path.join(model_path, "graph_hash.txt")):
samples_missing_hash.append(model_path)
if not os.path.exists(os.path.join(model_path, "graph_net.json")):
samples_missing_json.append(model_path)
if not os.path.exists(
os.path.join(model_path, "input_meta.py")
) or not os.path.exists(os.path.join(model_path, "weight_meta.py")):
samples_missing_meta.append(model_path)

all_samples_complete = (
len(samples_missing_hash) == 0
and len(samples_missing_json) == 0
and len(samples_missing_meta) == 0
)

if not all_samples_complete:
print(f"Check completeness result for {samples_dir}:")
print(f"1. {len(samples_missing_hash)} samples missing graph_hash.txt")
for model_path in samples_missing_hash:
print(f" - {model_path}")

print(f"2. {len(samples_missing_json)} samples missing graph_net.json")
for model_path in samples_missing_json:
print(f" - {model_path}")

print(
f"3. {len(samples_missing_meta)} samples missing input_meta.py or weight_meta.py"
)
for model_path in samples_missing_meta:
print(f" - {model_path}")
print()

return all_samples_complete


def check_redandancy(samples_dir):
graph_hash2model_paths = {}
for root, dirs, files in os.walk(samples_dir):
if "graph_hash.txt" in files:
model_path = root
graph_hash_path = os.path.join(model_path, "graph_hash.txt")
graph_hash = open(graph_hash_path).read()
if graph_hash not in graph_hash2model_paths.keys():
graph_hash2model_paths[graph_hash] = [model_path]
else:
graph_hash2model_paths[graph_hash].append(model_path)

has_duplicates = False
print(f"Totally {len(graph_hash2model_paths)} unique graphs under {samples_dir}.")
for graph_hash, model_paths in graph_hash2model_paths.items():
graph_hash2model_paths[graph_hash] = sorted(model_paths)
if len(model_paths) > 1:
has_duplicates = True
print(f"Redundant models detected for grap_hash {graph_hash}:")
for model_path in model_paths:
print(f" {model_path}")
return has_duplicates, graph_hash2model_paths


def count_samples(samples_dir, framework):
model_sources = os.listdir(samples_dir)

graph_net_count = 0
graph_net_dict = {}
model_names_set = set()
for source in model_sources:
source_dir = os.path.join(samples_dir, source)
if os.path.isdir(source_dir):
graph_net_dict[source] = 0
for root, dirs, files in os.walk(source_dir):
if "graph_net.json" in files:
with open(os.path.join(root, "graph_net.json"), "r") as f:
data = json.load(f)
model_name = data.get("model_name", None)
if model_name is not None and model_name != "NO_VALID_MATCH_FOUND":
if model_name not in model_names_set:
model_names_set.add(model_name)
graph_net_count += 1
graph_net_dict[source] += 1
else:
graph_net_count += 1
graph_net_dict[source] += 1

print(f"Number of {framework} samples: {graph_net_count}")
for name, number in graph_net_dict.items():
print(f"- {name:24}: {number}")
print()


def main():
filename = os.path.abspath(__file__)
root_dir = os.path.dirname(os.path.dirname(filename))

framework2dirname = {
"torch": "samples",
"paddle": "paddle_samples",
}

all_samples_complete = True
for samples_dirname in framework2dirname.values():
samples_dir = os.path.join(root_dir, samples_dirname)
all_samples_complete = all_samples_complete and check_completeness(samples_dir)
assert all_samples_complete, "Please fix the incompleted samples!"

all_samples_has_duplicates = False
for samples_dirname in framework2dirname.values():
samples_dir = os.path.join(root_dir, samples_dirname)
has_duplicates, graph_hash2model_paths = check_redandancy(samples_dir)
all_samples_has_duplicates = all_samples_has_duplicates or has_duplicates
print()
assert not all_samples_has_duplicates, "Please remove the redundant samples!"

for framework in framework2dirname.keys():
samples_dir = os.path.join(root_dir, framework2dirname[framework])
count_samples(samples_dir, framework)


if __name__ == "__main__":
main()
5 changes: 2 additions & 3 deletions tools/ci/check_validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ function prepare_torch_env() {
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126 > /dev/null
[ $? -ne 0 ] && LOG "[FATAL] Install torch2.9.0 failed!" && exit -1
else
python ${GRAPH_NET_EXTRACT_WORKSPACE}/tools/count_sample.py
LOG "[INFO] This pull request doesn't change any torch samples, skip the CI."
fi
}
Expand All @@ -62,7 +61,6 @@ function prepare_paddle_env() {
[ $? -ne 0 ] && LOG "[FATAL] Install paddlepaddle-develop failed!" && exit -1
python -c "import paddle; print('[PaddlePaddle Commit]', paddle.version.commit)"
else
python ${GRAPH_NET_EXTRACT_WORKSPACE}/tools/count_sample.py
LOG "[INFO] This pull request doesn't change any paddle samples, skip the CI."
fi
}
Expand Down Expand Up @@ -165,7 +163,8 @@ function main() {
check_validation_info=$(check_paddle_validation)
check_validation_code=$?
summary_problems $check_validation_code "$check_validation_info"
python ${GRAPH_NET_EXTRACT_WORKSPACE}/tools/count_sample.py
python ${GRAPH_NET_EXTRACT_WORKSPACE}/tools/check_and_count_samples.py >&2
[ $? -ne 0 ] && LOG "[FATAL] Check completeness or redundancy failed!" && exit -1
LOG "[INFO] check_validation run success and no error!"
}

Expand Down
40 changes: 0 additions & 40 deletions tools/count_sample.py

This file was deleted.