|
269 | 269 | " config_oid = source[\"configuration_oid\"]\n", |
270 | 270 | " domain_oid = source[\"domain_oid\"]\n", |
271 | 271 | "\n", |
272 | | - " all_rules = source[\"rules\"]\n", |
273 | | - " all_url_filters = source[\"url_filters\"]\n", |
274 | | - "\n", |
275 | | - " # extract url filters\n", |
276 | | - " url_filters = []\n", |
277 | | - " if all_url_filters:\n", |
278 | | - " url_filters = [\n", |
279 | | - " {\n", |
280 | | - " \"type\": all_url_filters[0][\"filter\"],\n", |
281 | | - " \"pattern\": all_url_filters[0][\"pattern\"],\n", |
282 | | - " }\n", |
283 | | - " ]\n", |
284 | | - "\n", |
285 | | - " # extract rulesets\n", |
286 | | - " action_translation_map = {\n", |
287 | | - " \"fixed\": \"set\",\n", |
288 | | - " \"extracted\": \"extract\",\n", |
289 | | - " }\n", |
| 272 | + " # ensure the config and domain oids actually exist in our in-memory data structure\n", |
| 273 | + " if (\n", |
| 274 | + " config_oid in inflight_configuration_data\n", |
| 275 | + " and domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]\n", |
| 276 | + " ):\n", |
| 277 | + "\n", |
| 278 | + " # initialize extraction rulesets an empty array if it doesn't exist yet\n", |
| 279 | + " if (\n", |
| 280 | + " not \"extraction_rulesets\"\n", |
| 281 | + " in inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid]\n", |
| 282 | + " ):\n", |
| 283 | + " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
| 284 | + " \"extraction_rulesets\"\n", |
| 285 | + " ] = []\n", |
| 286 | + "\n", |
| 287 | + " all_rules = source[\"rules\"]\n", |
| 288 | + " all_url_filters = source[\"url_filters\"]\n", |
| 289 | + "\n", |
| 290 | + " # extract url filters\n", |
| 291 | + " url_filters = []\n", |
| 292 | + " if all_url_filters:\n", |
| 293 | + " url_filters = [\n", |
| 294 | + " {\n", |
| 295 | + " \"type\": all_url_filters[0][\"filter\"],\n", |
| 296 | + " \"pattern\": all_url_filters[0][\"pattern\"],\n", |
| 297 | + " }\n", |
| 298 | + " ]\n", |
290 | 299 | "\n", |
291 | | - " ruleset = {}\n", |
292 | | - " if all_rules:\n", |
293 | | - " ruleset = [\n", |
294 | | - " {\n", |
295 | | - " \"action\": action_translation_map[\n", |
296 | | - " all_rules[0][\"content_from\"][\"value_type\"]\n", |
297 | | - " ],\n", |
298 | | - " \"field_name\": all_rules[0][\"field_name\"],\n", |
299 | | - " \"selector\": all_rules[0][\"selector\"],\n", |
300 | | - " \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n", |
301 | | - " \"value\": all_rules[0][\"content_from\"][\"value\"],\n", |
302 | | - " \"source\": all_rules[0][\"source_type\"],\n", |
303 | | - " }\n", |
304 | | - " ]\n", |
| 300 | + " # extract rulesets\n", |
| 301 | + " action_translation_map = {\n", |
| 302 | + " \"fixed\": \"set\",\n", |
| 303 | + " \"extracted\": \"extract\",\n", |
| 304 | + " }\n", |
305 | 305 | "\n", |
306 | | - " # populate the in-memory data structure\n", |
307 | | - " temp_extraction_rulesets = [\n", |
308 | | - " {\n", |
| 306 | + " ruleset = []\n", |
| 307 | + " if all_rules:\n", |
| 308 | + " ruleset = [\n", |
| 309 | + " {\n", |
| 310 | + " \"action\": action_translation_map[\n", |
| 311 | + " all_rules[0][\"content_from\"][\"value_type\"]\n", |
| 312 | + " ],\n", |
| 313 | + " \"field_name\": all_rules[0][\"field_name\"],\n", |
| 314 | + " \"selector\": all_rules[0][\"selector\"],\n", |
| 315 | + " \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n", |
| 316 | + " \"value\": all_rules[0][\"content_from\"][\"value\"],\n", |
| 317 | + " \"source\": all_rules[0][\"source_type\"],\n", |
| 318 | + " }\n", |
| 319 | + " ]\n", |
| 320 | + "\n", |
| 321 | + " temp_extraction_rulesets = {\n", |
309 | 322 | " \"url_filters\": url_filters,\n", |
310 | 323 | " \"rules\": ruleset,\n", |
311 | 324 | " }\n", |
312 | | - " ]\n", |
313 | 325 | "\n", |
314 | | - " print(\n", |
315 | | - " f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n", |
316 | | - " )\n", |
317 | | - " extr_count += 1\n", |
318 | | - " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
319 | | - " \"extraction_rulesets\"\n", |
320 | | - " ] = temp_extraction_rulesets" |
| 326 | + " print(\n", |
| 327 | + " f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n", |
| 328 | + " )\n", |
| 329 | + " extr_count += 1\n", |
| 330 | + "\n", |
| 331 | + " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
| 332 | + " \"extraction_rulesets\"\n", |
| 333 | + " ].append(temp_extraction_rulesets)" |
321 | 334 | ] |
322 | 335 | }, |
323 | 336 | { |
|
0 commit comments