Skip to content

Commit 87c9c1d

Browse files
m4dm4rtig4nClément VALENTINclaude
authored
fix: implement Ekwateur HTML parsing and update Dec 2025 prices (#57)
* fix(scrapers): implement Ekwateur HTML parsing and update December 2025 prices Replace stub _parse_html with actual BeautifulSoup-based HTML parsing to extract pricing from Ekwateur's website tables instead of always falling back to manual data. Update fallback prices to December 2025 and remove obsolete offers (Prix variable, VE) no longer available on their website. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix(scrapers): improve Ekwateur HTML parsing to match actual page structure Update table detection logic to work with Ekwateur's actual HTML structure: - Look for "heures creuses" instead of "€/kWh" or "kWh" in table detection - Handle combined "heures pleines / heures creuses" header pattern - Add debug logging for parsed prices 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Clément VALENTIN <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent ca5df43 commit 87c9c1d

File tree

1 file changed

+197
-96
lines changed

1 file changed

+197
-96
lines changed

apps/api/src/services/price_scrapers/ekwateur_scraper.py

Lines changed: 197 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""Ekwateur price scraper - Fetches tariffs from Ekwateur website"""
2+
import re
23
from typing import List
34
import httpx
45
from datetime import datetime, UTC
6+
from bs4 import BeautifulSoup
57

68
from .base import BasePriceScraper, OfferData
79

@@ -12,46 +14,22 @@ class EkwateurScraper(BasePriceScraper):
1214
# Ekwateur pricing page URL
1315
PRICING_URL = "https://ekwateur.fr/prix-kwh-electricite-abonnement-ekwateur/"
1416

15-
# Fallback: Manual pricing data (updated November 2025)
17+
# Fallback: Manual pricing data (updated December 2025)
1618
# Source: https://ekwateur.fr/prix-kwh-electricite-abonnement-ekwateur/
1719
# Note: Ekwateur only provides pricing for 3, 6, 9 kVA on their website
1820
FALLBACK_PRICES = {
19-
"VARIABLE_BASE": {
20-
# Électricité verte - Prix variable - Option Base
21-
# Prix TTC novembre 2025
22-
3: {"subscription": 15.89, "kwh": 0.2000},
23-
6: {"subscription": 19.70, "kwh": 0.2000},
24-
9: {"subscription": 23.65, "kwh": 0.2018},
25-
},
26-
"VARIABLE_HC_HP": {
27-
# Électricité verte - Prix variable - Heures Creuses
28-
3: {"subscription": 15.96, "hp": 0.2189, "hc": 0.1704},
29-
6: {"subscription": 20.10, "hp": 0.2189, "hc": 0.1704},
30-
9: {"subscription": 24.28, "hp": 0.2189, "hc": 0.1704},
31-
},
3221
"FIXE_BASE": {
3322
# Électricité verte - Prix fixe - Option Base
34-
3: {"subscription": 11.73, "kwh": 0.1791},
35-
6: {"subscription": 19.70, "kwh": 0.1791},
36-
9: {"subscription": 23.65, "kwh": 0.2015},
23+
# Prix TTC décembre 2025
24+
3: {"subscription": 11.78, "kwh": 0.1606},
25+
6: {"subscription": 15.57, "kwh": 0.1606},
26+
9: {"subscription": 19.655, "kwh": 0.1606},
3727
},
3828
"FIXE_HC_HP": {
3929
# Électricité verte - Prix fixe - Heures Creuses
40-
3: {"subscription": 15.08, "hp": 0.2257, "hc": 0.1770},
41-
6: {"subscription": 15.74, "hp": 0.2257, "hc": 0.1770},
42-
9: {"subscription": 24.28, "hp": 0.2257, "hc": 0.1770},
43-
},
44-
"VE_BASE": {
45-
# Électricité verte - Spéciale véhicule électrique - Option Base
46-
3: {"subscription": 15.89, "kwh": 0.1929},
47-
6: {"subscription": 19.70, "kwh": 0.1929},
48-
9: {"subscription": 23.65, "kwh": 0.2015},
49-
},
50-
"VE_HC_HP": {
51-
# Électricité verte - Spéciale véhicule électrique - Heures Creuses
52-
3: {"subscription": 15.96, "hp": 0.2257, "hc": 0.1347},
53-
6: {"subscription": 20.10, "hp": 0.2257, "hc": 0.1347},
54-
9: {"subscription": 24.28, "hp": 0.2257, "hc": 0.1347},
30+
3: {"subscription": 15.13, "hp": 0.17914, "hc": 0.14026},
31+
6: {"subscription": 15.84, "hp": 0.17914, "hc": 0.1426},
32+
9: {"subscription": 20.48, "hp": 0.17914, "hc": 0.1426},
5533
},
5634
}
5735

@@ -107,54 +85,206 @@ async def fetch_offers(self) -> List[OfferData]:
10785
raise Exception(f"Échec complet du scraping Ekwateur (y compris fallback) : {' | '.join(errors)}")
10886

10987
def _parse_html(self, html: str) -> List[OfferData]:
110-
"""Parse HTML from Ekwateur pricing page"""
111-
# For now, return empty list to use fallback
112-
# HTML parsing can be implemented later with BeautifulSoup or regex
113-
return []
88+
"""
89+
Parse HTML from Ekwateur pricing page.
11490
115-
def _get_fallback_offers(self) -> List[OfferData]:
116-
"""Generate offers from fallback pricing data"""
91+
The page contains 2 tables:
92+
- Table 1: kWh prices (Base, HP, HC) per power level (3, 6, 9 kVA)
93+
- Table 2: Subscription prices per power level
94+
"""
11795
offers = []
96+
soup = BeautifulSoup(html, "html.parser")
11897

119-
# Date: November 2025
120-
valid_from = datetime(2025, 11, 1, 0, 0, 0, 0, tzinfo=UTC)
98+
# Find all pricing tables
99+
tables = soup.find_all("table")
100+
if len(tables) < 2:
101+
self.logger.warning(f"Expected at least 2 tables, found {len(tables)}")
102+
return []
121103

122-
# Électricité verte - Prix variable - BASE
123-
for power, prices in self.FALLBACK_PRICES["VARIABLE_BASE"].items():
124-
offers.append(
125-
OfferData(
126-
name=f"Électricité verte - Prix variable - Base {power} kVA",
127-
offer_type="BASE",
128-
description=f"Offre d'électricité 100% verte à prix variable indexé sur le marché - Option Base - {power} kVA",
129-
subscription_price=prices["subscription"],
130-
base_price=prices["kwh"],
131-
power_kva=power,
132-
valid_from=valid_from,
104+
# Parse pricing data from tables
105+
kwh_data = self._parse_kwh_table(tables)
106+
subscription_data = self._parse_subscription_table(tables)
107+
108+
if not kwh_data or not subscription_data:
109+
self.logger.warning("Failed to parse pricing tables")
110+
return []
111+
112+
# Current date for valid_from
113+
valid_from = datetime.now(UTC).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
114+
115+
# Generate offers: Prix fixe BASE
116+
for power in [3, 6, 9]:
117+
base_key = f"base_{power}"
118+
if base_key in kwh_data and base_key in subscription_data:
119+
offers.append(
120+
OfferData(
121+
name=f"Électricité verte - Prix fixe - Base {power} kVA",
122+
offer_type="BASE",
123+
description=f"Offre d'électricité 100% verte à prix fixe - Option Base - {power} kVA",
124+
subscription_price=subscription_data[base_key],
125+
base_price=kwh_data[base_key],
126+
power_kva=power,
127+
valid_from=valid_from,
128+
)
133129
)
134-
)
135130

136-
# Électricité verte - Prix variable - HC/HP
137-
for power, prices in self.FALLBACK_PRICES["VARIABLE_HC_HP"].items():
138-
offers.append(
139-
OfferData(
140-
name=f"Électricité verte - Prix variable - Heures Creuses {power} kVA",
141-
offer_type="HC_HP",
142-
description=f"Offre d'électricité 100% verte à prix variable indexé sur le marché - Heures Creuses - {power} kVA",
143-
subscription_price=prices["subscription"],
144-
hp_price=prices["hp"],
145-
hc_price=prices["hc"],
146-
power_kva=power,
147-
valid_from=valid_from,
131+
# Generate offers: Prix fixe HC/HP
132+
for power in [3, 6, 9]:
133+
hp_key = f"hp_{power}"
134+
hc_key = f"hc_{power}"
135+
sub_key = f"hchp_{power}"
136+
if hp_key in kwh_data and hc_key in kwh_data and sub_key in subscription_data:
137+
offers.append(
138+
OfferData(
139+
name=f"Électricité verte - Prix fixe - Heures Creuses {power} kVA",
140+
offer_type="HC_HP",
141+
description=f"Offre d'électricité 100% verte à prix fixe - Heures Creuses - {power} kVA",
142+
subscription_price=subscription_data[sub_key],
143+
hp_price=kwh_data[hp_key],
144+
hc_price=kwh_data[hc_key],
145+
power_kva=power,
146+
valid_from=valid_from,
147+
)
148148
)
149-
)
149+
150+
self.logger.info(f"Parsed {len(offers)} offers from HTML")
151+
return offers
152+
153+
def _parse_kwh_table(self, tables: list) -> dict:
154+
"""
155+
Parse kWh prices from the first table (kWh prices).
156+
157+
Table structure:
158+
- Headers: Offre | Base (3,6,9 kVA) | Heures pleines (3,6,9 kVA) | Heures creuses (3,6,9 kVA)
159+
- Data row: "Électricité vertePrix fixe" | 9 prices
160+
161+
Returns dict with keys like 'base_3', 'hp_6', 'hc_9'
162+
"""
163+
data = {}
164+
165+
# First table contains kWh prices (has "heures creuses" in header)
166+
for table in tables:
167+
text = table.get_text().lower()
168+
if "heures creuses" not in text:
169+
continue
170+
171+
rows = table.find_all("tr")
172+
for row in rows:
173+
cells = row.find_all(["td", "th"])
174+
if len(cells) < 2:
175+
continue
176+
177+
row_text = row.get_text().lower()
178+
179+
# Look for the data row with prices (contains "prix fixe" or "électricité")
180+
if "prix fixe" in row_text or "électricité" in row_text:
181+
# Extract all prices from cells
182+
prices = []
183+
for cell in cells:
184+
cell_text = cell.get_text().strip().replace(",", ".").replace("€", "")
185+
# Match price patterns like "0.1606"
186+
price_match = re.search(r"(\d+\.\d+)", cell_text)
187+
if price_match:
188+
try:
189+
price = float(price_match.group(1))
190+
# kWh prices are typically between 0.10 and 0.50
191+
if 0.05 < price < 0.60:
192+
prices.append(price)
193+
except ValueError:
194+
pass
195+
196+
# If we found 9 prices: Base(3,6,9), HP(3,6,9), HC(3,6,9)
197+
if len(prices) >= 9:
198+
data["base_3"] = prices[0]
199+
data["base_6"] = prices[1]
200+
data["base_9"] = prices[2]
201+
data["hp_3"] = prices[3]
202+
data["hp_6"] = prices[4]
203+
data["hp_9"] = prices[5]
204+
data["hc_3"] = prices[6]
205+
data["hc_6"] = prices[7]
206+
data["hc_9"] = prices[8]
207+
self.logger.info(f"Parsed kWh prices: {data}")
208+
return data
209+
210+
return data
211+
212+
def _parse_subscription_table(self, tables: list) -> dict:
213+
"""
214+
Parse subscription prices from the second table.
215+
216+
Table structure:
217+
- Headers: Offre | Base (3,6,9 kVA) | Heures pleines/Heures creuses (3,6,9 kVA)
218+
- Data row: "Électricité vertePrix fixe" | 6 prices
219+
220+
Returns dict with keys like 'base_3', 'hchp_6'
221+
"""
222+
data = {}
223+
224+
# Second table contains subscription prices (has "heures pleines / heures creuses" combined)
225+
for table in tables:
226+
text = table.get_text().lower()
227+
# This table has combined "heures pleines / heures creuses" header, not separate
228+
if "heures creuses" in text and "heures pleines" in text:
229+
# Check if it's the subscription table (no 9-column kWh prices)
230+
# by looking for the combined header pattern
231+
header_text = table.find("thead").get_text().lower() if table.find("thead") else text
232+
if "heures pleines / heures creuses" in header_text or text.count("kva") == 6:
233+
pass # This is the subscription table
234+
else:
235+
continue
236+
237+
rows = table.find_all("tr")
238+
for row in rows:
239+
cells = row.find_all(["td", "th"])
240+
if len(cells) < 2:
241+
continue
242+
243+
row_text = row.get_text().lower()
244+
245+
# Look for the data row with prices
246+
if "prix fixe" in row_text or "électricité" in row_text:
247+
prices = []
248+
for cell in cells:
249+
cell_text = cell.get_text().strip().replace(",", ".").replace("€", "")
250+
# Match price patterns like "15.57"
251+
price_match = re.search(r"(\d+\.\d+)", cell_text)
252+
if price_match:
253+
try:
254+
price = float(price_match.group(1))
255+
# Subscription prices are typically between 5 and 50 €/month
256+
if 5.0 < price < 60.0:
257+
prices.append(price)
258+
except ValueError:
259+
pass
260+
261+
# If we found 6 prices: Base(3,6,9), HC/HP(3,6,9)
262+
if len(prices) >= 6:
263+
data["base_3"] = prices[0]
264+
data["base_6"] = prices[1]
265+
data["base_9"] = prices[2]
266+
data["hchp_3"] = prices[3]
267+
data["hchp_6"] = prices[4]
268+
data["hchp_9"] = prices[5]
269+
self.logger.info(f"Parsed subscription prices: {data}")
270+
return data
271+
272+
return data
273+
274+
def _get_fallback_offers(self) -> List[OfferData]:
275+
"""Generate offers from fallback pricing data (December 2025)"""
276+
offers = []
277+
278+
# Date: December 2025
279+
valid_from = datetime(2025, 12, 1, 0, 0, 0, 0, tzinfo=UTC)
150280

151281
# Électricité verte - Prix fixe - BASE
152282
for power, prices in self.FALLBACK_PRICES["FIXE_BASE"].items():
153283
offers.append(
154284
OfferData(
155285
name=f"Électricité verte - Prix fixe - Base {power} kVA",
156286
offer_type="BASE",
157-
description=f"Offre d'électricité 100% verte à prix fixe pendant 1 an - Option Base - {power} kVA",
287+
description=f"Offre d'électricité 100% verte à prix fixe - Option Base - {power} kVA",
158288
subscription_price=prices["subscription"],
159289
base_price=prices["kwh"],
160290
power_kva=power,
@@ -168,36 +298,7 @@ def _get_fallback_offers(self) -> List[OfferData]:
168298
OfferData(
169299
name=f"Électricité verte - Prix fixe - Heures Creuses {power} kVA",
170300
offer_type="HC_HP",
171-
description=f"Offre d'électricité 100% verte à prix fixe pendant 1 an - Heures Creuses - {power} kVA",
172-
subscription_price=prices["subscription"],
173-
hp_price=prices["hp"],
174-
hc_price=prices["hc"],
175-
power_kva=power,
176-
valid_from=valid_from,
177-
)
178-
)
179-
180-
# Électricité verte - Spéciale VE - BASE
181-
for power, prices in self.FALLBACK_PRICES["VE_BASE"].items():
182-
offers.append(
183-
OfferData(
184-
name=f"Électricité verte - Spéciale VE - Base {power} kVA",
185-
offer_type="BASE",
186-
description=f"Offre d'électricité 100% verte spéciale véhicule électrique - Option Base - {power} kVA",
187-
subscription_price=prices["subscription"],
188-
base_price=prices["kwh"],
189-
power_kva=power,
190-
valid_from=valid_from,
191-
)
192-
)
193-
194-
# Électricité verte - Spéciale VE - HC/HP
195-
for power, prices in self.FALLBACK_PRICES["VE_HC_HP"].items():
196-
offers.append(
197-
OfferData(
198-
name=f"Électricité verte - Spéciale VE - Heures Creuses {power} kVA",
199-
offer_type="HC_HP",
200-
description=f"Offre d'électricité 100% verte spéciale véhicule électrique - Heures Creuses - {power} kVA - HC renforcées",
301+
description=f"Offre d'électricité 100% verte à prix fixe - Heures Creuses - {power} kVA",
201302
subscription_price=prices["subscription"],
202303
hp_price=prices["hp"],
203304
hc_price=prices["hc"],

0 commit comments

Comments
 (0)