Skip to content

Commit 04f3c15

Browse files
authored
[New Sample] Add NLP samples, incluing rocketqa, ure, utc models. (#260)
* Add rocketqa models. * Update graph_hash. * Remove redundant samples. * Optimize check_redundant_incrementally.py. * Add uer samples. * Add utc samples. * Fix path.
1 parent 57b5f8b commit 04f3c15

File tree

96 files changed

+57246
-22
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+57246
-22
lines changed

.github/workflows/Validate-GPU.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
-v "/home/data/cfs/.ccache:/root/.ccache" \
6262
-v "/dev/shm:/dev/shm" \
6363
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
64-
-v ${{ github.workspace }}:/graphnet \
64+
-v ${{ github.workspace }}:${{ github.workspace }} \
6565
-e python \
6666
-e core_index \
6767
-e BRANCH \
@@ -73,7 +73,7 @@ jobs:
7373
-e CACHE_DIR \
7474
-e GITHUB_API_TOKEN \
7575
-e CFS_DIR \
76-
-w /graphnet --network host ${docker_image}
76+
-w ${{ github.workspace }} --network host ${docker_image}
7777
7878
- name: Run check
7979
env:

graph_net/paddle/check_redundant_incrementally.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,26 @@ def main(args):
4646
assert os.path.isdir(
4747
args.graph_net_samples_path
4848
), f"args.graph_net_samples_path ({args.graph_net_samples_path}) is not a directory!"
49+
50+
current_model_graph_hash_pathes = set()
51+
if args.model_path:
52+
assert os.path.isdir(
53+
args.model_path
54+
), f"args.model_path {args.model_path} is not a directory!"
55+
current_model_graph_hash_pathes = set(
56+
graph_hash_path
57+
for model_path in get_recursively_model_pathes(args.model_path)
58+
for graph_hash_path in [f"{model_path}/graph_hash.txt"]
59+
)
60+
4961
find_redundant = False
5062
graph_hash2graph_net_model_path = {}
5163
for model_path in get_recursively_model_pathes(args.graph_net_samples_path):
5264
graph_hash_path = f"{model_path}/graph_hash.txt"
53-
if os.path.isfile(graph_hash_path):
65+
if (
66+
os.path.isfile(graph_hash_path)
67+
and graph_hash_path not in current_model_graph_hash_pathes
68+
):
5469
graph_hash = open(graph_hash_path).read()
5570
if graph_hash not in graph_hash2graph_net_model_path.keys():
5671
graph_hash2graph_net_model_path[graph_hash] = [graph_hash_path]
@@ -60,29 +75,24 @@ def main(args):
6075
print(
6176
f"Totally {len(graph_hash2graph_net_model_path)} unique samples under {args.graph_net_samples_path}."
6277
)
63-
for graph_hash, graph_paths in graph_hash2graph_net_model_path.items():
64-
if len(graph_paths) > 1:
65-
print(f"Redundant models detected for grap_hash {graph_hash}:")
66-
for model_path in graph_paths:
67-
print(f" {model_path}")
68-
assert (
69-
not find_redundant
70-
), f"Redundant models detected under {args.graph_net_samples_path}."
7178

7279
if args.model_path:
73-
assert os.path.isdir(
74-
args.model_path
75-
), f"args.model_path {args.model_path} is not a directory!"
76-
current_model_graph_hash_pathes = set(
77-
graph_hash_path
78-
for model_path in get_recursively_model_pathes(args.model_path)
79-
for graph_hash_path in [f"{model_path}/graph_hash.txt"]
80-
)
80+
# Check whether the specified model is redundant.
8181
for current_model_graph_hash_path in current_model_graph_hash_pathes:
8282
graph_hash = open(current_model_graph_hash_path).read()
8383
assert (
8484
graph_hash not in graph_hash2graph_net_model_path
85-
), f"Redundant models detected. old-model-path:{current_model_graph_hash_path}, new-model-path:{graph_hash2graph_net_model_path[graph_hash]}."
85+
), f"Redundant models detected.\n\tgraph_hash:{graph_hash}, newly-added-model-path:{current_model_graph_hash_path}, existing-model-path:{graph_hash2graph_net_model_path[graph_hash]}."
86+
else:
87+
# Check whether there are redundant samples under samples directory.
88+
for graph_hash, graph_paths in graph_hash2graph_net_model_path.items():
89+
if len(graph_paths) > 1:
90+
print(f"Redundant models detected for grap_hash {graph_hash}:")
91+
for model_path in graph_paths:
92+
print(f" {model_path}")
93+
assert (
94+
not find_redundant
95+
), f"Redundant models detected under {args.graph_net_samples_path}."
8696

8797

8898
if __name__ == "__main__":

graph_net/paddle/samples_util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33

44

55
def get_default_samples_directory():
6-
return f"{os.path.dirname(graph_net.__file__)}/../paddle_samples"
6+
graph_net_root = os.path.dirname(os.path.dirname(graph_net.__file__))
7+
return f"{graph_net_root}/paddle_samples"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
62cc3d05adaf6e4219e2b653fec24cce7290406e2f80064a1e914ebc82570775
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"framework": "paddle",
3-
"model_name": "ernie-search-base-dual-encoder-marco-en",
3+
"model_name": "rocketqa-base-cross-encoder",
44
"num_devices_required": 1,
55
"num_nodes_required": 1
66
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
class Program_weight_tensor_data_0:
2+
name = "data_0"
3+
shape = [1, 21]
4+
dtype = "int64"
5+
data = [
6+
1,
7+
6368,
8+
30,
9+
3441,
10+
5254,
11+
2775,
12+
7208,
13+
42,
14+
1675,
15+
6433,
16+
7946,
17+
4640,
18+
31618,
19+
7476,
20+
34874,
21+
1662,
22+
4968,
23+
36810,
24+
9478,
25+
42,
26+
2,
27+
]
28+
29+
30+
class Program_weight_tensor_data_1:
31+
name = "data_1"
32+
shape = [1, 21]
33+
dtype = "int64"
34+
data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

0 commit comments

Comments
 (0)