Skip to content

Commit a3bd8fe

Browse files
committed
Moved domain_oid check to prevent an unnecessary extraction rule from being processed
1 parent ccedd54 commit a3bd8fe

File tree

1 file changed

+37
-36
lines changed

1 file changed

+37
-36
lines changed

notebooks/enterprise-search/elastic-crawler-to-open-crawler-migration.ipynb

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -269,49 +269,50 @@
269269
" config_oid = source[\"configuration_oid\"]\n",
270270
" domain_oid = source[\"domain_oid\"]\n",
271271
"\n",
272-
" all_rules = source[\"rules\"]\n",
273-
" all_url_filters = source[\"url_filters\"]\n",
272+
" # ensure the domain oid actually exists in our in-memory data structure\n",
273+
" if domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]:\n",
274+
" all_rules = source[\"rules\"]\n",
275+
" all_url_filters = source[\"url_filters\"]\n",
274276
"\n",
275-
" # extract url filters\n",
276-
" url_filters = []\n",
277-
" if all_url_filters:\n",
278-
" url_filters = [\n",
279-
" {\n",
280-
" \"type\": all_url_filters[0][\"filter\"],\n",
281-
" \"pattern\": all_url_filters[0][\"pattern\"],\n",
282-
" }\n",
283-
" ]\n",
277+
" # extract url filters\n",
278+
" url_filters = []\n",
279+
" if all_url_filters:\n",
280+
" url_filters = [\n",
281+
" {\n",
282+
" \"type\": all_url_filters[0][\"filter\"],\n",
283+
" \"pattern\": all_url_filters[0][\"pattern\"],\n",
284+
" }\n",
285+
" ]\n",
284286
"\n",
285-
" # extract rulesets\n",
286-
" action_translation_map = {\n",
287-
" \"fixed\": \"set\",\n",
288-
" \"extracted\": \"extract\",\n",
289-
" }\n",
287+
" # extract rulesets\n",
288+
" action_translation_map = {\n",
289+
" \"fixed\": \"set\",\n",
290+
" \"extracted\": \"extract\",\n",
291+
" }\n",
290292
"\n",
291-
" ruleset = {}\n",
292-
" if all_rules:\n",
293-
" ruleset = [\n",
293+
" ruleset = {}\n",
294+
" if all_rules:\n",
295+
" ruleset = [\n",
296+
" {\n",
297+
" \"action\": action_translation_map[\n",
298+
" all_rules[0][\"content_from\"][\"value_type\"]\n",
299+
" ],\n",
300+
" \"field_name\": all_rules[0][\"field_name\"],\n",
301+
" \"selector\": all_rules[0][\"selector\"],\n",
302+
" \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n",
303+
" \"value\": all_rules[0][\"content_from\"][\"value\"],\n",
304+
" \"source\": all_rules[0][\"source_type\"],\n",
305+
" }\n",
306+
" ]\n",
307+
"\n",
308+
" # populate the in-memory data structure\n",
309+
" temp_extraction_rulesets = [\n",
294310
" {\n",
295-
" \"action\": action_translation_map[\n",
296-
" all_rules[0][\"content_from\"][\"value_type\"]\n",
297-
" ],\n",
298-
" \"field_name\": all_rules[0][\"field_name\"],\n",
299-
" \"selector\": all_rules[0][\"selector\"],\n",
300-
" \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n",
301-
" \"value\": all_rules[0][\"content_from\"][\"value\"],\n",
302-
" \"source\": all_rules[0][\"source_type\"],\n",
311+
" \"url_filters\": url_filters,\n",
312+
" \"rules\": ruleset,\n",
303313
" }\n",
304314
" ]\n",
305315
"\n",
306-
" # populate the in-memory data structure\n",
307-
" temp_extraction_rulesets = [\n",
308-
" {\n",
309-
" \"url_filters\": url_filters,\n",
310-
" \"rules\": ruleset,\n",
311-
" }\n",
312-
" ]\n",
313-
"\n",
314-
" if domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]:\n",
315316
" print(\n",
316317
" f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n",
317318
" )\n",

0 commit comments

Comments
 (0)