@@ -65,12 +65,15 @@ def deprecated(message: str):
6565 Args:
6666 message: The deprecation message to show in the warning.
6767 """
68+
6869 def decorator (func ):
6970 @wraps (func )
7071 def wrapper (* args , ** kwargs ):
7172 warnings .warn (message , DeprecationWarning , stacklevel = 2 )
7273 return func (* args , ** kwargs )
74+
7375 return wrapper
76+
7477 return decorator
7578
7679
@@ -113,9 +116,7 @@ async def _startup_backfill() -> None:
113116 if namespace and project_id :
114117 namespaces_projects .add ((namespace , project_id ))
115118 except Exception as e :
116- logger .error (
117- "Failed to scan existing data for startup backfill: %s" , str (e )
118- )
119+ logger .error ("Failed to scan existing data for startup backfill: %s" , str (e ))
119120 return
120121
121122 logger .info (
@@ -261,18 +262,17 @@ class EmbedResponse(BaseModel):
261262 "yes" ,
262263}
263264# Check if we're in testing mode
264- TESTING_MODE : bool = (
265- "pytest" in sys .modules or
266- os .environ .get ("TESTING" , "" ).lower () in {"1" , "true" , "yes" }
267- )
265+ TESTING_MODE : bool = "pytest" in sys .modules or os .environ .get (
266+ "TESTING" , ""
267+ ).lower () in {"1" , "true" , "yes" }
268268
269269API_KEY = os .environ .get ("CF_API_KEY" )
270270
271271# Validate required configuration
272272if not API_KEY or not API_KEY .strip ():
273273 if TESTING_MODE :
274274 # Use dummy key for testing
275- API_KEY = "test-key"
275+ API_KEY = "test-key" # pragma: allowlist secret
276276 logger .info ("Using test API key for testing mode" )
277277 else :
278278 logger .error (
@@ -374,9 +374,7 @@ def _get_env_int(key: str, default: int) -> int:
374374BACKFILL_DELAY_SECONDS = 0.1 # Delay between backfill task scheduling
375375
376376# Search optimization configuration
377- FILE_SIZE_WARNING_THRESHOLD = _get_env_int (
378- "CF_FILE_SIZE_THRESHOLD" , 50 * 1024 * 1024
379- )
377+ FILE_SIZE_WARNING_THRESHOLD = _get_env_int ("CF_FILE_SIZE_THRESHOLD" , 50 * 1024 * 1024 )
380378EMBEDDING_CACHE_SIZE = _get_env_int ("CF_EMBEDDING_CACHE_SIZE" , 10000 )
381379EMBEDDING_DIMENSION = 32 # Derived from _embed_text slice length (digest[:32])
382380
@@ -1046,41 +1044,28 @@ def _schedule_background_backfill(namespace: str, project_id: str) -> str:
10461044 )
10471045 return existing_task
10481046
1049- logger .info (
1050- "Scheduling background backfill for %s (task: %s)" , key , task_id
1051- )
1052- future = _io_pool .submit (
1053- _safe_backfill_index , namespace , project_id , task_id
1054- )
1047+ logger .info ("Scheduling background backfill for %s (task: %s)" , key , task_id )
1048+ future = _io_pool .submit (_safe_backfill_index , namespace , project_id , task_id )
10551049
10561050 # Add done-callback to clean up completed futures
10571051 def _cleanup_future (fut : Future [None ]) -> None :
10581052 with _backfill_tasks_lock :
1059- if (
1060- task_id in _backfill_tasks
1061- and _backfill_tasks [task_id ] is fut
1062- ):
1053+ if task_id in _backfill_tasks and _backfill_tasks [task_id ] is fut :
10631054 del _backfill_tasks [task_id ]
1064- logger .debug (
1065- "Cleaned up completed backfill task %s" , task_id
1066- )
1055+ logger .debug ("Cleaned up completed backfill task %s" , task_id )
10671056
10681057 future .add_done_callback (_cleanup_future )
10691058 _backfill_tasks [task_id ] = future
10701059 return task_id
10711060
10721061
1073- def _safe_backfill_index (
1074- namespace : str , project_id : str , task_id : str
1075- ) -> None :
1062+ def _safe_backfill_index (namespace : str , project_id : str , task_id : str ) -> None :
10761063 """Safely backfill index with guards and timeout protection."""
10771064 start_time = time .time ()
10781065 bucket = DATA_DIR / "store.jsonl"
10791066
10801067 if not bucket .exists ():
1081- logger .debug (
1082- "No data file found for backfill of %s:%s" , namespace , project_id
1083- )
1068+ logger .debug ("No data file found for backfill of %s:%s" , namespace , project_id )
10841069 return
10851070
10861071 try :
@@ -1108,9 +1093,7 @@ def _scan_file() -> tuple[list[str], list[str]]:
11081093 and data .get ("project_id" ) == project_id
11091094 ):
11101095 backfill_ids .append (data ["id" ]) # type: ignore[index]
1111- backfill_texts .append (
1112- data ["text" ]
1113- ) # type: ignore[index]
1096+ backfill_texts .append (data ["text" ]) # type: ignore[index]
11141097
11151098 # Enforce item limit
11161099 if len (backfill_ids ) >= MAX_BACKFILL_ITEMS :
@@ -1153,12 +1136,8 @@ def _scan_file() -> tuple[list[str], list[str]]:
11531136 )
11541137 break
11551138
1156- batch_ids = missing_ids [
1157- batch_start :batch_start + BACKFILL_BATCH_SIZE
1158- ]
1159- batch_texts = missing_texts [
1160- batch_start :batch_start + BACKFILL_BATCH_SIZE
1161- ]
1139+ batch_ids = missing_ids [batch_start : batch_start + BACKFILL_BATCH_SIZE ]
1140+ batch_texts = missing_texts [batch_start : batch_start + BACKFILL_BATCH_SIZE ]
11621141
11631142 # Retry mechanism for batch processing
11641143 success = False
0 commit comments