Skip to content

Commit 4a6d334

Browse files
Clément VALENTINclaude
andcommitted
feat: cache scraped offers + auto-create providers with defaults
- Add Redis cache between preview and refresh to avoid double scraping - Preview caches offers (TTL 5min), refresh uses cache if available - Fix progression continuity: 80% → 82% → 90% → 100% with cache - Auto-create missing providers with default scraper URLs from classes - Fix provider query param (was sent in body, now in query string) - Use datetime.fromisoformat() instead of dateutil (no extra dep) - Faster polling (500ms) during sync for responsive progress - Close preview modal immediately when applying changes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent f72cbd4 commit 4a6d334

File tree

8 files changed

+333
-87
lines changed

8 files changed

+333
-87
lines changed

apps/api/Dockerfile

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,43 @@
1-
FROM python:3.12-slim
1+
# =============================================================================
2+
# Stage 1: Dependencies (cached separately from code)
3+
# =============================================================================
4+
FROM python:3.12-slim AS deps
25

36
WORKDIR /app
47

5-
# Install system dependencies
8+
# Install system dependencies (rarely changes - cached)
69
RUN apt-get update && apt-get install -y \
710
gcc \
11+
&& rm -rf /var/lib/apt/lists/*
12+
13+
# Copy ONLY dependency files first (better cache)
14+
COPY pyproject.toml README.md ./
15+
16+
# Install dependencies (cached if pyproject.toml unchanged)
17+
# uv crashes under QEMU emulation, so we use pip for multi-arch builds
18+
RUN pip install --no-cache-dir .
19+
20+
# =============================================================================
21+
# Stage 2: Production image
22+
# =============================================================================
23+
FROM python:3.12-slim
24+
25+
WORKDIR /app
26+
27+
# Install runtime dependencies only (no gcc needed)
28+
RUN apt-get update && apt-get install -y \
829
tzdata \
930
&& rm -rf /var/lib/apt/lists/*
1031

1132
# Set timezone
1233
ENV TZ=Europe/Paris
1334
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
1435

15-
# Copy dependency files
16-
COPY pyproject.toml .
17-
COPY README.md .
18-
19-
# Install dependencies using pip (compatible with cross-platform builds)
20-
# uv crashes under QEMU emulation, so we use pip for multi-arch builds
21-
RUN pip install --no-cache-dir .
36+
# Copy installed packages from deps stage
37+
COPY --from=deps /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
38+
COPY --from=deps /usr/local/bin /usr/local/bin
2239

23-
# Copy application code
40+
# Copy application code (changes frequently - last layer)
2441
COPY . .
2542

2643
# Expose port

apps/api/src/routers/admin.py

Lines changed: 100 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
import logging
77
import json
88
import asyncio
9-
from typing import Optional
9+
from typing import Optional, List
1010
from ..models import User, PDL, EnergyProvider, EnergyOffer
1111
from ..models.database import get_db
1212
from ..middleware import require_admin, require_permission, get_current_user
1313
from ..schemas import APIResponse
14-
from ..services import rate_limiter, cache_service
14+
from ..services import rate_limiter
1515
from ..services.price_update_service import PriceUpdateService
16+
from ..config import settings
17+
import redis.asyncio as redis
1618

1719
logger = logging.getLogger(__name__)
1820

@@ -27,6 +29,55 @@
2729
"progress": 0
2830
}
2931

32+
# Cache pour les offres scrapées (évite de re-scraper entre preview et refresh)
33+
# TTL de 5 minutes - les offres scrapées sont réutilisées si le refresh est fait rapidement
34+
SCRAPED_OFFERS_CACHE_TTL = 300 # 5 minutes
35+
36+
37+
async def _get_redis_client():
38+
"""Get Redis client"""
39+
return await redis.from_url(settings.REDIS_URL, encoding="utf-8", decode_responses=True)
40+
41+
42+
async def _cache_scraped_offers(provider: str, offers: List[dict]) -> None:
43+
"""Cache les offres scrapées pour éviter un double scraping"""
44+
try:
45+
client = await _get_redis_client()
46+
cache_key = f"scraped_offers:{provider}"
47+
await client.setex(cache_key, SCRAPED_OFFERS_CACHE_TTL, json.dumps(offers))
48+
await client.close()
49+
logger.info(f"Cached {len(offers)} scraped offers for {provider} (TTL: {SCRAPED_OFFERS_CACHE_TTL}s)")
50+
except Exception as e:
51+
logger.error(f"Failed to cache offers for {provider}: {e}")
52+
53+
54+
async def _get_cached_offers(provider: str) -> List[dict] | None:
55+
"""Récupère les offres scrapées du cache si disponibles"""
56+
try:
57+
client = await _get_redis_client()
58+
cache_key = f"scraped_offers:{provider}"
59+
cached = await client.get(cache_key)
60+
await client.close()
61+
if cached:
62+
offers = json.loads(cached)
63+
logger.info(f"Found {len(offers)} cached offers for {provider}")
64+
return offers
65+
except Exception as e:
66+
logger.error(f"Failed to get cached offers for {provider}: {e}")
67+
return None
68+
69+
70+
async def _clear_cached_offers(provider: str) -> None:
71+
"""Supprime les offres scrapées du cache après utilisation"""
72+
try:
73+
client = await _get_redis_client()
74+
cache_key = f"scraped_offers:{provider}"
75+
await client.delete(cache_key)
76+
await client.close()
77+
logger.info(f"Cleared cached offers for {provider}")
78+
except Exception as e:
79+
logger.error(f"Failed to clear cached offers for {provider}: {e}")
80+
3081

3182
def _update_scraper_progress(step: str, progress: int):
3283
"""Update scraper progress status"""
@@ -952,6 +1003,10 @@ async def preview_offers_update(
9521003
}
9531004
)
9541005

1006+
# Cache scraped offers for later refresh (avoids re-scraping)
1007+
if preview_result.get("scraped_offers"):
1008+
await _cache_scraped_offers(provider, preview_result["scraped_offers"])
1009+
9551010
return APIResponse(
9561011
success=True,
9571012
data={
@@ -1099,9 +1154,25 @@ async def refresh_offers(
10991154
}
11001155
)
11011156

1102-
_update_scraper_progress(f"Téléchargement des tarifs {provider}", 20)
1103-
result = await service.update_provider(provider)
1104-
_update_scraper_progress("Mise à jour de la base de données", 80)
1157+
# Check for cached offers from preview (avoids re-scraping)
1158+
cached_offers = await _get_cached_offers(provider)
1159+
1160+
if cached_offers:
1161+
# Continue from where preview left off (80%)
1162+
# Preview: 0% → 20% (download) → 80% (analysis done)
1163+
# Refresh with cache: 80% → 90% (DB update) → 100% (done)
1164+
_update_scraper_progress(f"Utilisation des données en cache pour {provider}", 82)
1165+
else:
1166+
_update_scraper_progress(f"Téléchargement des tarifs {provider}", 20)
1167+
1168+
result = await service.update_provider(provider, cached_offers=cached_offers)
1169+
1170+
# Progress depends on whether we used cache
1171+
_update_scraper_progress("Mise à jour de la base de données", 90 if cached_offers else 80)
1172+
1173+
# Clear cache after use
1174+
if cached_offers:
1175+
await _clear_cached_offers(provider)
11051176

11061177
if not result.get("success"):
11071178
return APIResponse(
@@ -1116,8 +1187,9 @@ async def refresh_offers(
11161187
return APIResponse(
11171188
success=True,
11181189
data={
1119-
"message": f"Successfully updated {provider}",
1120-
"result": result
1190+
"message": f"Successfully updated {provider}" + (" (from cache)" if cached_offers else ""),
1191+
"result": result,
1192+
"used_cache": cached_offers is not None
11211193
}
11221194
)
11231195
else:
@@ -1350,31 +1422,40 @@ async def list_available_scrapers(
13501422

13511423
@router.get("/providers", response_model=APIResponse)
13521424
async def list_providers(
1353-
include_missing_scrapers: bool = Query(False, description="Include providers with scrapers that don't exist in DB yet"),
13541425
current_user: User = Depends(require_permission('offers')),
13551426
db: AsyncSession = Depends(get_db)
13561427
) -> APIResponse:
13571428
"""
13581429
List all energy providers
13591430
1360-
Args:
1361-
include_missing_scrapers: If True, also returns providers with scrapers not yet in DB
1431+
Automatically creates missing providers from scrapers with default values.
13621432
13631433
Returns:
13641434
APIResponse with list of providers
13651435
"""
13661436
try:
1437+
# First, ensure all scrapers have corresponding providers in DB
1438+
service = PriceUpdateService(db)
1439+
existing_result = await db.execute(select(EnergyProvider.name))
1440+
existing_names = {row[0] for row in existing_result.fetchall()}
1441+
1442+
# Create missing providers with defaults
1443+
for scraper_name in PriceUpdateService.SCRAPERS.keys():
1444+
if scraper_name not in existing_names:
1445+
logger.info(f"Auto-creating missing provider: {scraper_name}")
1446+
await service._get_or_create_provider(scraper_name)
1447+
1448+
await db.commit()
1449+
1450+
# Now fetch all providers
13671451
result = await db.execute(
13681452
select(EnergyProvider).order_by(EnergyProvider.name)
13691453
)
13701454
providers = result.scalars().all()
13711455

13721456
providers_data = []
1373-
existing_names = set()
13741457

13751458
for provider in providers:
1376-
existing_names.add(provider.name)
1377-
13781459
# Count active offers
13791460
offers_result = await db.execute(
13801461
select(func.count()).select_from(EnergyOffer).where(
@@ -1389,6 +1470,11 @@ async def list_providers(
13891470
# Check if this provider has a scraper
13901471
has_scraper = provider.name in PriceUpdateService.SCRAPERS
13911472

1473+
# Get default URLs if provider has none
1474+
scraper_urls = provider.scraper_urls
1475+
if not scraper_urls and has_scraper:
1476+
scraper_urls = PriceUpdateService.get_default_scraper_urls(provider.name)
1477+
13921478
providers_data.append({
13931479
"id": provider.id,
13941480
"name": provider.name,
@@ -1397,30 +1483,11 @@ async def list_providers(
13971483
"is_active": provider.is_active,
13981484
"active_offers_count": offers_count,
13991485
"has_scraper": has_scraper,
1400-
"scraper_urls": provider.scraper_urls,
1486+
"scraper_urls": scraper_urls,
14011487
"created_at": provider.created_at.isoformat(),
14021488
"updated_at": provider.updated_at.isoformat(),
14031489
})
14041490

1405-
# Add providers with scrapers that don't exist in DB yet
1406-
if include_missing_scrapers:
1407-
for scraper_name in PriceUpdateService.SCRAPERS.keys():
1408-
if scraper_name not in existing_names:
1409-
# Generate a placeholder ID and default values
1410-
providers_data.append({
1411-
"id": f"scraper-{scraper_name.lower().replace(' ', '-')}",
1412-
"name": scraper_name,
1413-
"logo_url": None,
1414-
"website": None,
1415-
"is_active": False,
1416-
"active_offers_count": 0,
1417-
"has_scraper": True,
1418-
"scraper_urls": None,
1419-
"created_at": None,
1420-
"updated_at": None,
1421-
"not_in_database": True, # Flag to indicate this is a placeholder
1422-
})
1423-
14241491
# Sort by name
14251492
providers_data.sort(key=lambda x: x["name"])
14261493

apps/api/src/services/cache.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,31 @@ async def delete(self, key: str) -> bool:
7171
except Exception:
7272
return False
7373

74+
async def get_raw(self, key: str) -> Optional[str]:
75+
"""Get cached value without decryption (for non-sensitive data)"""
76+
if not self.redis_client:
77+
return None
78+
79+
try:
80+
data = await self.redis_client.get(key)
81+
if data:
82+
return data.decode() if isinstance(data, bytes) else data
83+
return None
84+
except Exception:
85+
return None
86+
87+
async def set_raw(self, key: str, value: str, ttl: Optional[int] = None) -> bool:
88+
"""Cache value without encryption (for non-sensitive data)"""
89+
if not self.redis_client:
90+
return False
91+
92+
try:
93+
cache_ttl = ttl if ttl is not None else self.ttl
94+
await self.redis_client.setex(key, cache_ttl, value)
95+
return True
96+
except Exception:
97+
return False
98+
7499
async def delete_pattern(self, pattern: str) -> int:
75100
"""Delete all keys matching pattern"""
76101
if not self.redis_client:

apps/api/src/services/price_scrapers/base.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,14 @@ def __init__(
103103
self.valid_from = valid_from
104104
self.valid_to = valid_to
105105

106-
def to_dict(self) -> Dict[str, Any]:
107-
"""Convert to dictionary for database insertion"""
108-
return {
106+
def to_dict(self, for_json: bool = False) -> Dict[str, Any]:
107+
"""
108+
Convert to dictionary for database insertion or JSON serialization.
109+
110+
Args:
111+
for_json: If True, convert datetime to ISO strings for JSON serialization
112+
"""
113+
result = {
109114
"name": self.name,
110115
"offer_type": self.offer_type,
111116
"description": self.description,
@@ -131,12 +136,22 @@ def to_dict(self) -> Dict[str, Any]:
131136
"peak_day_price": self.peak_day_price,
132137
"hc_schedules": self.hc_schedules,
133138
"power_kva": self.power_kva,
134-
"valid_from": self.valid_from,
135-
"valid_to": self.valid_to,
136-
"price_updated_at": datetime.now(UTC),
137139
"is_active": True,
138140
}
139141

142+
if for_json:
143+
# Convert datetime to ISO string for JSON serialization (cache)
144+
result["valid_from"] = self.valid_from.isoformat() if self.valid_from else None
145+
result["valid_to"] = self.valid_to.isoformat() if self.valid_to else None
146+
result["price_updated_at"] = datetime.now(UTC).isoformat()
147+
else:
148+
# Keep native datetime for database insertion
149+
result["valid_from"] = self.valid_from
150+
result["valid_to"] = self.valid_to
151+
result["price_updated_at"] = datetime.now(UTC)
152+
153+
return result
154+
140155

141156
class BasePriceScraper(ABC):
142157
"""Abstract base class for price scrapers"""

0 commit comments

Comments
 (0)