Skip to content

Commit 8cf33a7

Browse files
committed
Fixed related bug where a crawler with multiple extraction rules was only receiving the last-known extraction rule and not all extraction rules
1 parent 5ca678d commit 8cf33a7

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

notebooks/enterprise-search/elastic-crawler-to-open-crawler-migration.ipynb

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@
275275
" and domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]\n",
276276
" ):\n",
277277
"\n",
278+
" # initialize extraction rulesets an empty array if it doesn't exist yet\n",
279+
" if (\n",
280+
" not \"extraction_rulesets\"\n",
281+
" in inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid]\n",
282+
" ):\n",
283+
" inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n",
284+
" \"extraction_rulesets\"\n",
285+
" ] = []\n",
286+
"\n",
278287
" all_rules = source[\"rules\"]\n",
279288
" all_url_filters = source[\"url_filters\"]\n",
280289
"\n",
@@ -294,7 +303,7 @@
294303
" \"extracted\": \"extract\",\n",
295304
" }\n",
296305
"\n",
297-
" ruleset = {}\n",
306+
" ruleset = []\n",
298307
" if all_rules:\n",
299308
" ruleset = [\n",
300309
" {\n",
@@ -309,13 +318,10 @@
309318
" }\n",
310319
" ]\n",
311320
"\n",
312-
" # populate the in-memory data structure\n",
313-
" temp_extraction_rulesets = [\n",
314-
" {\n",
315-
" \"url_filters\": url_filters,\n",
316-
" \"rules\": ruleset,\n",
317-
" }\n",
318-
" ]\n",
321+
" temp_extraction_rulesets = {\n",
322+
" \"url_filters\": url_filters,\n",
323+
" \"rules\": ruleset,\n",
324+
" }\n",
319325
"\n",
320326
" print(\n",
321327
" f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n",
@@ -324,7 +330,7 @@
324330
"\n",
325331
" inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n",
326332
" \"extraction_rulesets\"\n",
327-
" ] = temp_extraction_rulesets"
333+
" ].append(temp_extraction_rulesets)"
328334
]
329335
},
330336
{

0 commit comments

Comments
 (0)