Skip to content

Commit c7debe3

Browse files
committed
Cleanup and more output
1 parent 356f845 commit c7debe3

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

notebooks/enterprise-search/elastic_crawler_to_open_crawler_migration.ipynb

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@
233233
" # populate the in-memory data structure\n",
234234
" inflight_configuration_data[crawler_oid][\"domains_temp\"][\n",
235235
" domain_oid\n",
236-
" ] = temp_domain_conf"
236+
" ] = temp_domain_conf\n",
237+
" print()"
237238
]
238239
},
239240
{
@@ -258,7 +259,7 @@
258259
" _source=[\"configuration_oid\", \"domain_oid\", \"rules\", \"url_filters\"],\n",
259260
")\n",
260261
"\n",
261-
"extr_count = 0\n",
262+
"extr_count = 1\n",
262263
"for exr_rule in extraction_rules[\"hits\"][\"hits\"]:\n",
263264
" source = exr_rule[\"_source\"]\n",
264265
"\n",
@@ -306,12 +307,14 @@
306307
" \"rules\": ruleset,\n",
307308
" }\n",
308309
" ]\n",
310+
"\n",
311+
" print(\n",
312+
" f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n",
313+
" )\n",
309314
" extr_count += 1\n",
310315
" inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n",
311316
" \"extraction_rulesets\"\n",
312-
" ] = temp_extraction_rulesets\n",
313-
"\n",
314-
"print(f\"{extr_count} total extraction rules found!\")"
317+
" ] = temp_extraction_rulesets"
315318
]
316319
},
317320
{
@@ -348,7 +351,6 @@
348351
"\n",
349352
" # ? comes from Quartz Cron, regular cron doesn't handle it well\n",
350353
" repackaged_definition = repackaged_definition.replace(\"?\", \"*\")\n",
351-
" print(repackaged_definition)\n",
352354
" return repackaged_definition\n",
353355
"\n",
354356
"\n",
@@ -402,15 +404,16 @@
402404
"source": [
403405
"# Final transform of the in-memory data structure to a form we can dump to YAML\n",
404406
"# for each crawler, collect all of its domain configurations into a list\n",
405-
"for crawler_config in inflight_configuration_data.values():\n",
407+
"for crawler_oid, crawler_config in inflight_configuration_data.items():\n",
406408
" all_crawler_domains = []\n",
407409
"\n",
408410
" for domain_config in crawler_config[\"domains_temp\"].values():\n",
409411
" all_crawler_domains.append(domain_config)\n",
410412
" # create a new key called \"domains\" that points to a list of domain configs only - no domain_oid values as keys\n",
411413
" crawler_config[\"domains\"] = all_crawler_domains\n",
412414
" # delete the temporary domain key\n",
413-
" del crawler_config[\"domains_temp\"]"
415+
" del crawler_config[\"domains_temp\"]\n",
416+
" print(f\"Transform for {crawler_oid} complete!\")"
414417
]
415418
},
416419
{
@@ -518,6 +521,14 @@
518521
" \"--------------------------------------------------------------------------------\"\n",
519522
" )"
520523
]
524+
},
525+
{
526+
"cell_type": "code",
527+
"execution_count": null,
528+
"id": "7aaee4e8-c388-4b22-a8ad-a657550d92c7",
529+
"metadata": {},
530+
"outputs": [],
531+
"source": []
521532
}
522533
],
523534
"metadata": {

0 commit comments

Comments
 (0)