|
265 | 265 | " structured_data = []\n", |
266 | 266 | "\n", |
267 | 267 | " # Combine paragraph and table content\n", |
268 | | - " for p_number, contents in {**paragraph_content, **table_content}.items():\n", |
| 268 | + " for p_number in set(paragraph_content.keys()).union(table_content.keys()):\n", |
269 | 269 | " concatenated_text = \"\"\n", |
270 | | - " for content in contents:\n", |
271 | | - " concatenated_text += content[\"content_text\"] + \"\\n\"\n", |
| 270 | + "\n", |
| 271 | + " if p_number in paragraph_content:\n", |
| 272 | + " for content in paragraph_content[p_number]:\n", |
| 273 | + " concatenated_text += content[\"content_text\"] + \"\\n\"\n", |
| 274 | + "\n", |
| 275 | + " if p_number in table_content:\n", |
| 276 | + " for content in table_content[p_number]:\n", |
| 277 | + " concatenated_text += content[\"content_text\"] + \"\\n\"\n", |
272 | 278 | "\n", |
273 | 279 | " page_content_concatenated[p_number] = concatenated_text.strip()\n", |
274 | 280 | "\n", |
|
360 | 366 | "ES_URL = os.getenv(\"ES_URL\")\n", |
361 | 367 | "ES_API_KEY = os.getenv(\"ES_API_KEY\")\n", |
362 | 368 | "\n", |
363 | | - "es = Elasticsearch(hosts=ES_URL, api_key=ES_API_KEY, request_timeout=60)" |
| 369 | + "es = Elasticsearch(hosts=ES_URL, api_key=ES_API_KEY, request_timeout=300)" |
364 | 370 | ] |
365 | 371 | }, |
366 | 372 | { |
|
0 commit comments