Skip to content

Commit 7c1087c

Browse files
authored
Igbh dataset download script improvemenet (#632)
* changes for igbh dataset download * changed logic precedence * handle if env not in dependency
1 parent 1336831 commit 7c1087c

File tree

2 files changed

+63
-41
lines changed

2 files changed

+63
-41
lines changed

automation/script/module.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3605,7 +3605,9 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
36053605
"update_tags_from_env_with_prefix", {})
36063606
for t in update_tags_from_env_with_prefix:
36073607
for key in update_tags_from_env_with_prefix[t]:
3608-
if str(env.get(key, '')).strip() != '':
3608+
if str(d.get('env', {}).get(key, '')).strip() != '':
3609+
d['tags'] += "," + t + str(d.get('env')[key])
3610+
elif str(env.get(key, '')).strip() != '':
36093611
d['tags'] += "," + t + str(env[key])
36103612

36113613
for key in clean_env_keys_deps:

script/get-dataset-mlperf-inference-igbh/_cm.yaml

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ prehook_deps:
3131
extra_cache_tags: dataset,igbh,paper,node_feat
3232
force_cache: true
3333
enable_if_env:
34-
CM_IGBH_DATASET_TYPE: full
34+
CM_IGBH_DATASET_TYPE:
35+
- 'full'
3536
names:
3637
- dae
37-
tags: download-and-extract
38+
tags: download-and-extract,_wget
3839
update_tags_from_env_with_prefix:
3940
_url.:
4041
- CM_PACKAGE_URL
@@ -46,10 +47,11 @@ prehook_deps:
4647
extra_cache_tags: dataset,igbh,paper,node_label_19
4748
force_cache: true
4849
enable_if_env:
49-
CM_IGBH_DATASET_TYPE: full
50+
CM_IGBH_DATASET_TYPE:
51+
- 'full'
5052
names:
5153
- dae
52-
tags: download-and-extract
54+
tags: download-and-extract,_wget
5355
update_tags_from_env_with_prefix:
5456
_url.:
5557
- CM_PACKAGE_URL
@@ -61,10 +63,11 @@ prehook_deps:
6163
extra_cache_tags: dataset,igbh,paper,node_label_2K
6264
force_cache: true
6365
enable_if_env:
64-
CM_IGBH_DATASET_TYPE: full
66+
CM_IGBH_DATASET_TYPE:
67+
- 'full'
6568
names:
6669
- dae
67-
tags: download-and-extract
70+
tags: download-and-extract,_wget
6871
update_tags_from_env_with_prefix:
6972
_url.:
7073
- CM_PACKAGE_URL
@@ -76,10 +79,11 @@ prehook_deps:
7679
extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping
7780
force_cache: true
7881
enable_if_env:
79-
CM_IGBH_DATASET_TYPE: full
82+
CM_IGBH_DATASET_TYPE:
83+
- 'full'
8084
names:
8185
- dae
82-
tags: download-and-extract
86+
tags: download-and-extract,_wget
8387
update_tags_from_env_with_prefix:
8488
_url.:
8589
- CM_PACKAGE_URL
@@ -92,10 +96,11 @@ prehook_deps:
9296
extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index
9397
force_cache: true
9498
enable_if_env:
95-
CM_IGBH_DATASET_TYPE: full
99+
CM_IGBH_DATASET_TYPE:
100+
- 'full'
96101
names:
97102
- dae
98-
tags: download-and-extract
103+
tags: download-and-extract,_wget
99104
update_tags_from_env_with_prefix:
100105
_url.:
101106
- CM_PACKAGE_URL
@@ -108,10 +113,11 @@ prehook_deps:
108113
extra_cache_tags: dataset,igbh,author,author_id_index_mapping
109114
force_cache: true
110115
enable_if_env:
111-
CM_IGBH_DATASET_TYPE: full
116+
CM_IGBH_DATASET_TYPE:
117+
- 'full'
112118
names:
113119
- dae
114-
tags: download-and-extract
120+
tags: download-and-extract,_wget
115121
update_tags_from_env_with_prefix:
116122
_url.:
117123
- CM_PACKAGE_URL
@@ -123,10 +129,11 @@ prehook_deps:
123129
extra_cache_tags: dataset,igbh,author,node_feat
124130
force_cache: true
125131
enable_if_env:
126-
CM_IGBH_DATASET_TYPE: full
132+
CM_IGBH_DATASET_TYPE:
133+
- 'full'
127134
names:
128135
- dae
129-
tags: download-and-extract
136+
tags: download-and-extract,_wget
130137
update_tags_from_env_with_prefix:
131138
_url.:
132139
- CM_PACKAGE_URL
@@ -139,10 +146,11 @@ prehook_deps:
139146
extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping
140147
force_cache: true
141148
enable_if_env:
142-
CM_IGBH_DATASET_TYPE: full
149+
CM_IGBH_DATASET_TYPE:
150+
- 'full'
143151
names:
144152
- dae
145-
tags: download-and-extract
153+
tags: download-and-extract,_wget
146154
update_tags_from_env_with_prefix:
147155
_url.:
148156
- CM_PACKAGE_URL
@@ -154,10 +162,11 @@ prehook_deps:
154162
extra_cache_tags: dataset,igbh,conference,node_feat
155163
force_cache: true
156164
enable_if_env:
157-
CM_IGBH_DATASET_TYPE: full
165+
CM_IGBH_DATASET_TYPE:
166+
- 'full'
158167
names:
159168
- dae
160-
tags: download-and-extract
169+
tags: download-and-extract,_wget
161170
update_tags_from_env_with_prefix:
162171
_url.:
163172
- CM_PACKAGE_URL
@@ -170,10 +179,11 @@ prehook_deps:
170179
extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping
171180
force_cache: true
172181
enable_if_env:
173-
CM_IGBH_DATASET_TYPE: full
182+
CM_IGBH_DATASET_TYPE:
183+
- 'full'
174184
names:
175185
- dae
176-
tags: download-and-extract
186+
tags: download-and-extract,_wget
177187
update_tags_from_env_with_prefix:
178188
_url.:
179189
- CM_PACKAGE_URL
@@ -185,10 +195,11 @@ prehook_deps:
185195
extra_cache_tags: dataset,igbh,institute,node_feat
186196
force_cache: true
187197
enable_if_env:
188-
CM_IGBH_DATASET_TYPE: full
198+
CM_IGBH_DATASET_TYPE:
199+
- 'full'
189200
names:
190201
- dae
191-
tags: download-and-extract
202+
tags: download-and-extract,_wget
192203
update_tags_from_env_with_prefix:
193204
_url.:
194205
- CM_PACKAGE_URL
@@ -201,10 +212,11 @@ prehook_deps:
201212
extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping
202213
force_cache: true
203214
enable_if_env:
204-
CM_IGBH_DATASET_TYPE: full
215+
CM_IGBH_DATASET_TYPE:
216+
- 'full'
205217
names:
206218
- dae
207-
tags: download-and-extract
219+
tags: download-and-extract,_wget
208220
update_tags_from_env_with_prefix:
209221
_url.:
210222
- CM_PACKAGE_URL
@@ -216,10 +228,11 @@ prehook_deps:
216228
extra_cache_tags: dataset,igbh,journal,node_feat
217229
force_cache: true
218230
enable_if_env:
219-
CM_IGBH_DATASET_TYPE: full
231+
CM_IGBH_DATASET_TYPE:
232+
- 'full'
220233
names:
221234
- dae
222-
tags: download-and-extract
235+
tags: download-and-extract,_wget
223236
update_tags_from_env_with_prefix:
224237
_url.:
225238
- CM_PACKAGE_URL
@@ -232,10 +245,11 @@ prehook_deps:
232245
extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping
233246
force_cache: true
234247
enable_if_env:
235-
CM_IGBH_DATASET_TYPE: full
248+
CM_IGBH_DATASET_TYPE:
249+
- 'full'
236250
names:
237251
- dae
238-
tags: download-and-extract
252+
tags: download-and-extract,_wget
239253
update_tags_from_env_with_prefix:
240254
_url.:
241255
- CM_PACKAGE_URL
@@ -247,10 +261,11 @@ prehook_deps:
247261
extra_cache_tags: dataset,igbh,fos,node_feat
248262
force_cache: true
249263
enable_if_env:
250-
CM_IGBH_DATASET_TYPE: full
264+
CM_IGBH_DATASET_TYPE:
265+
- 'full'
251266
names:
252267
- dae
253-
tags: download-and-extract
268+
tags: download-and-extract,_wget
254269
update_tags_from_env_with_prefix:
255270
_url.:
256271
- CM_PACKAGE_URL
@@ -263,10 +278,11 @@ prehook_deps:
263278
extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index
264279
force_cache: true
265280
enable_if_env:
266-
CM_IGBH_DATASET_TYPE: full
281+
CM_IGBH_DATASET_TYPE:
282+
- 'full'
267283
names:
268284
- dae
269-
tags: download-and-extract
285+
tags: download-and-extract,_wget
270286
update_tags_from_env_with_prefix:
271287
_url.:
272288
- CM_PACKAGE_URL
@@ -279,10 +295,11 @@ prehook_deps:
279295
extra_cache_tags: dataset,igbh,paper_published_journal,edge_index
280296
force_cache: true
281297
enable_if_env:
282-
CM_IGBH_DATASET_TYPE: full
298+
CM_IGBH_DATASET_TYPE:
299+
- 'full'
283300
names:
284301
- dae
285-
tags: download-and-extract
302+
tags: download-and-extract,_wget
286303
update_tags_from_env_with_prefix:
287304
_url.:
288305
- CM_PACKAGE_URL
@@ -295,10 +312,11 @@ prehook_deps:
295312
extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index
296313
force_cache: true
297314
enable_if_env:
298-
CM_IGBH_DATASET_TYPE: full
315+
CM_IGBH_DATASET_TYPE:
316+
- 'full'
299317
names:
300318
- dae
301-
tags: download-and-extract
319+
tags: download-and-extract,_wget
302320
update_tags_from_env_with_prefix:
303321
_url.:
304322
- CM_PACKAGE_URL
@@ -311,10 +329,11 @@ prehook_deps:
311329
extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index
312330
force_cache: true
313331
enable_if_env:
314-
CM_IGBH_DATASET_TYPE: full
332+
CM_IGBH_DATASET_TYPE:
333+
- 'full'
315334
names:
316335
- dae
317-
tags: download-and-extract
336+
tags: download-and-extract,_wget
318337
update_tags_from_env_with_prefix:
319338
_url.:
320339
- CM_PACKAGE_URL
@@ -327,10 +346,11 @@ prehook_deps:
327346
extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index
328347
force_cache: true
329348
enable_if_env:
330-
CM_IGBH_DATASET_TYPE: full
349+
CM_IGBH_DATASET_TYPE:
350+
- 'full'
331351
names:
332352
- dae
333-
tags: download-and-extract
353+
tags: download-and-extract,_wget
334354
update_tags_from_env_with_prefix:
335355
_url.:
336356
- CM_PACKAGE_URL

0 commit comments

Comments
 (0)