Skip to content

Commit 3cae572

Browse files
committed
Add retry with backoff for ES connection timeouts and increase default request timeout
1 parent 17a80e0 commit 3cae572

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

ddbj_search_converter/es/bulk_insert.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
"""Elasticsearch bulk insert operations."""
22

33
import json
4+
import time
45
from collections.abc import Iterator
56
from pathlib import Path
67
from typing import Any
78

9+
from elastic_transport import ConnectionTimeout
810
from pydantic import BaseModel
911

1012
from ddbj_search_converter.config import Config
1113
from ddbj_search_converter.es.client import check_index_exists, get_es_client, refresh_index, set_refresh_interval
1214
from ddbj_search_converter.es.index import IndexName
1315
from ddbj_search_converter.es.settings import BULK_INSERT_SETTINGS
16+
from ddbj_search_converter.logging.logger import log_warn
1417
from elasticsearch import helpers
1518

1619

@@ -102,8 +105,18 @@ def bulk_insert_jsonl(
102105
"""
103106
es_client = get_es_client(config)
104107

105-
if not check_index_exists(es_client, index):
106-
raise Exception(f"Index '{index}' does not exist.")
108+
# ES が過負荷の場合に備えてリトライする
109+
for attempt in range(3):
110+
try:
111+
if not check_index_exists(es_client, index):
112+
raise Exception(f"Index '{index}' does not exist.")
113+
break
114+
except ConnectionTimeout:
115+
if attempt == 2:
116+
raise
117+
wait = 30 * (attempt + 1)
118+
log_warn(f"ES connection timed out, retrying in {wait}s (attempt {attempt + 1}/3)")
119+
time.sleep(wait)
107120

108121
total_docs = 0
109122
success_count = 0

ddbj_search_converter/es/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
def get_es_client(config: Config) -> Elasticsearch:
1010
"""Return a cached Elasticsearch client for the given config."""
1111
if config.es_url not in _clients:
12-
_clients[config.es_url] = Elasticsearch(config.es_url)
12+
_clients[config.es_url] = Elasticsearch(config.es_url, request_timeout=120)
1313
return _clients[config.es_url]
1414

1515

0 commit comments

Comments
 (0)