From f663c457108dc304b630c4c982974c74a978be60 Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 17 Jan 2026 05:50:55 +0000 Subject: [PATCH] =?UTF-8?q?fix(usage=5Fmanager):=20=F0=9F=90=9B=20prevent?= =?UTF-8?q?=20stale=20API=20responses=20from=20resetting=20quota=20count?= =?UTF-8?q?=20(#75)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When background refresh receives remainingFraction: 1.0 (100% remaining) from a stale/cached API response, the calculated used_requests becomes 0, which was incorrectly wiping out local request tracking. Use max(current_count, used_requests) to ensure the API can only increase our count (if we missed requests), never decrease it. Applied to both primary model and grouped models to maintain state consistency. --- src/rotator_library/usage_manager.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/rotator_library/usage_manager.py b/src/rotator_library/usage_manager.py index dd2bd480..8a477851 100644 --- a/src/rotator_library/usage_manager.py +++ b/src/rotator_library/usage_manager.py @@ -3244,8 +3244,13 @@ async def update_quota_baseline( max_requests = model_data.get("quota_max_requests") # Sync local request count to API's authoritative value - model_data["request_count"] = used_requests - model_data["requests_at_baseline"] = used_requests + # Use max() to prevent API from resetting our count if it returns stale/cached 100% + # The API can only increase our count (if we missed requests), not decrease it + # See: https://github.com/Mirrowel/LLM-API-Key-Proxy/issues/75 + current_count = model_data.get("request_count", 0) + synced_count = max(current_count, used_requests) + model_data["request_count"] = synced_count + model_data["requests_at_baseline"] = synced_count # Update baseline fields model_data["baseline_remaining_fraction"] = remaining_fraction @@ -3254,7 +3259,7 @@ async def update_quota_baseline( # Update max_requests and quota_display if max_requests is not None: model_data["quota_max_requests"] = max_requests - model_data["quota_display"] = f"{used_requests}/{max_requests}" + model_data["quota_display"] = f"{synced_count}/{max_requests}" # Handle reset_timestamp: only trust it when quota has been used (< 100%) # API returns garbage reset times for unused quota @@ -3339,19 +3344,19 @@ async def update_quota_baseline( "approx_cost": 0.0, }, ) - # Sync request tracking - other_model_data["request_count"] = used_requests + # Sync request tracking (use synced_count to prevent reset bug) + other_model_data["request_count"] = synced_count if max_requests is not None: other_model_data["quota_max_requests"] = max_requests other_model_data["quota_display"] = ( - f"{used_requests}/{max_requests}" + f"{synced_count}/{max_requests}" ) # Sync baseline fields other_model_data["baseline_remaining_fraction"] = ( remaining_fraction ) other_model_data["baseline_fetched_at"] = now_ts - other_model_data["requests_at_baseline"] = used_requests + other_model_data["requests_at_baseline"] = synced_count # Sync reset timestamp if valid if valid_reset_ts: other_model_data["quota_reset_ts"] = reset_timestamp @@ -3381,7 +3386,7 @@ async def update_quota_baseline( lib_logger.debug( f"Updated quota baseline for {mask_credential(credential)} model={model}: " - f"remaining={remaining_fraction:.2%}, synced_request_count={used_requests}" + f"remaining={remaining_fraction:.2%}, synced_request_count={synced_count}" ) await self._save_usage()