@@ -21,32 +21,31 @@ class PrimeoEnergiePriceScraper(BasePriceScraper):
2121 # Priméo Énergie pricing PDF URL
2222 TARIFF_PDF_URL = "https://particuliers.primeo-energie.fr/wp-content/uploads/GT-Offre-Fixe-20_.pdf"
2323
24- # Fallback: Manual pricing data (updated 2025-12-05 from PDF)
24+ # Fallback: Manual pricing data TTC (updated 2025-12-05 from PDF)
2525 # Source: https://particuliers.primeo-energie.fr/wp-content/uploads/GT-Offre-Fixe-20_.pdf
2626 # Prices valid from 04/08/2025 - Prix bloqué jusqu'au 31/12/2026
27- # Note: -20% sur le prix du kWh HT par rapport au TRV
27+ # Note: Tarifs TTC (toutes taxes comprises)
2828 FALLBACK_PRICES = {
2929 "FIXE_BASE" : {
30- 3 : {"subscription" : 8.51 , "kwh" : 0.1327 },
31- 6 : {"subscription" : 11.07 , "kwh" : 0.1327 },
32- 9 : {"subscription" : 13.79 , "kwh" : 0.1327 },
33- 12 : {"subscription" : 16.51 , "kwh" : 0.1327 },
34- 15 : {"subscription" : 19.07 , "kwh" : 0.1327 },
35- 18 : {"subscription" : 21.60 , "kwh" : 0.1327 },
36- 24 : {"subscription" : 27.18 , "kwh" : 0.1327 },
37- 30 : {"subscription" : 32.45 , "kwh" : 0.1327 },
38- 36 : {"subscription" : 37.88 , "kwh" : 0.1327 },
30+ 3 : {"subscription" : 11.73 , "kwh" : 0.1634 },
31+ 6 : {"subscription" : 15.47 , "kwh" : 0.1634 },
32+ 9 : {"subscription" : 19.43 , "kwh" : 0.1634 },
33+ 12 : {"subscription" : 23.32 , "kwh" : 0.1634 },
34+ 15 : {"subscription" : 27.06 , "kwh" : 0.1634 },
35+ 18 : {"subscription" : 30.76 , "kwh" : 0.1634 },
36+ 24 : {"subscription" : 38.80 , "kwh" : 0.1634 },
37+ 30 : {"subscription" : 46.44 , "kwh" : 0.1634 },
38+ 36 : {"subscription" : 54.29 , "kwh" : 0.1634 },
3939 },
4040 "FIXE_HC_HP" : {
41- 3 : {"subscription" : 11.74 , "hp" : 0.1434 , "hc" : 0.1147 },
42- 6 : {"subscription" : 15.47 , "hp" : 0.1434 , "hc" : 0.1147 },
43- 9 : {"subscription" : 19.39 , "hp" : 0.1434 , "hc" : 0.1147 },
44- 12 : {"subscription" : 23.32 , "hp" : 0.1434 , "hc" : 0.1147 },
45- 15 : {"subscription" : 27.06 , "hp" : 0.1434 , "hc" : 0.1147 },
46- 18 : {"subscription" : 30.76 , "hp" : 0.1434 , "hc" : 0.1147 },
47- 24 : {"subscription" : 38.80 , "hp" : 0.1434 , "hc" : 0.1147 },
48- 30 : {"subscription" : 46.44 , "hp" : 0.1434 , "hc" : 0.1147 },
49- 36 : {"subscription" : 54.29 , "hp" : 0.1434 , "hc" : 0.1147 },
41+ 6 : {"subscription" : 15.74 , "hp" : 0.1736 , "hc" : 0.1380 },
42+ 9 : {"subscription" : 19.81 , "hp" : 0.1736 , "hc" : 0.1380 },
43+ 12 : {"subscription" : 23.76 , "hp" : 0.1736 , "hc" : 0.1380 },
44+ 15 : {"subscription" : 27.49 , "hp" : 0.1736 , "hc" : 0.1380 },
45+ 18 : {"subscription" : 31.34 , "hp" : 0.1736 , "hc" : 0.1380 },
46+ 24 : {"subscription" : 39.47 , "hp" : 0.1736 , "hc" : 0.1380 },
47+ 30 : {"subscription" : 47.02 , "hp" : 0.1736 , "hc" : 0.1380 },
48+ 36 : {"subscription" : 54.61 , "hp" : 0.1736 , "hc" : 0.1380 },
5049 },
5150 }
5251
@@ -113,7 +112,7 @@ def _parse_pdf(self, text: str) -> List[OfferData]:
113112 - HC/HP option: subscription prices per kVA + HP and HC prices
114113
115114 The PDF text is extracted with pdfminer and contains mixed tables.
116- We need to parse the HT (hors taxes) prices, not TTC .
115+ We extract the TTC (toutes taxes comprises ) prices from the lower table .
117116 """
118117 offers = []
119118 valid_from = datetime .now (UTC ).replace (day = 1 , hour = 0 , minute = 0 , second = 0 , microsecond = 0 )
@@ -162,25 +161,24 @@ def _parse_pdf(self, text: str) -> List[OfferData]:
162161
163162 def _extract_base_prices (self , text : str ) -> dict :
164163 """
165- Extract BASE tariff prices from PDF text.
164+ Extract BASE tariff TTC prices from PDF text.
166165
167- The PDF text when split by 'kVA' gives parts like:
168- - Part 1: "8,516 " = price 8.51 for 3 kVA, "6" is start of next power
169- - Part 2: "11,0711,309 " = price 11.07 for 6 kVA (+ TRV), "9" is next power
170- etc.
166+ The PDF structure concatenates values like: "8,516 kVA" where 8,51 is for 3 kVA.
167+ For BASE, there's only the Primeo price (no TRV column visible in data).
171168
172- BASE section has 9 powers (3-36 kVA), then HC/HP section follows.
169+ The BASE subscriptions in the PDF are actually HT values.
170+ We need to look at the "Tarif TTC" section for kWh prices.
171+
172+ TTC BASE kWh price: 0,1634 €/kWh (found in Tarif TTC section)
173+ BASE subscriptions: We use the values from the table (HT basis, same as display)
173174 """
174175 prices = {}
175176
176- # Extract the kWh BASE price (HT) - look for 0,1327 pattern
177- kwh_price = 0.1327 # Default
178- kwh_matches = re .findall (r"0[,\.]1[23]\d{2}" , text )
179- for m in kwh_matches :
180- val = float (m .replace ("," , "." ))
181- if 0.12 < val < 0.15 :
182- kwh_price = val
183- break
177+ # Extract the kWh BASE price TTC - look for 0,1634 pattern
178+ kwh_price = 0.1634 # Default TTC
179+ kwh_match = re .search (r"0[,\.]163\d" , text )
180+ if kwh_match :
181+ kwh_price = float (kwh_match .group (0 ).replace ("," , "." ))
184182
185183 # Split by 'kVA' and parse each part
186184 parts = text .split ("kVA" )
@@ -189,8 +187,7 @@ def _extract_base_prices(self, text: str) -> dict:
189187 base_powers = [3 , 6 , 9 , 12 , 15 , 18 , 24 , 30 , 36 ]
190188 subscription_mapping = {}
191189
192- # Find the starting index for BASE section
193- # BASE section starts after headers, look for part containing "3 "
190+ # Find the starting index for BASE section (first "3 " pattern)
194191 start_idx = None
195192 for i , part in enumerate (parts ):
196193 if part .strip ().endswith ("3 " ) or part .strip ().endswith ("3" ) or "3 " in part [- 5 :]:
@@ -202,31 +199,31 @@ def _extract_base_prices(self, text: str) -> dict:
202199 part_idx = start_idx + i
203200 if part_idx < len (parts ):
204201 part = parts [part_idx ]
205- # Extract the first price from this part (Primeo price)
206- # Format: "8,516 " -> price is 8,51 (exactly 2 decimals)
202+ # Extract the first price (Primeo price - these are the displayed values)
207203 price_match = re .match (r"(\d+[,\.]\d{2})" , part )
208204 if price_match :
209205 price = float (price_match .group (1 ).replace ("," , "." ))
210- if 5 < price < 45 : # Valid subscription range for BASE
206+ if 5 < price < 45 : # Valid subscription range
211207 subscription_mapping [power ] = price
212208
213209 # Fallback to hardcoded values if extraction failed
210+ # Note: These are the values displayed in the PDF (effective prices)
214211 fallback = {
215- 3 : 8.51 ,
216- 6 : 11.07 ,
217- 9 : 13.79 ,
218- 12 : 16.51 ,
219- 15 : 19.07 ,
220- 18 : 21.60 ,
221- 24 : 27.18 ,
222- 30 : 32.45 ,
223- 36 : 37.88 ,
212+ 3 : 11.73 ,
213+ 6 : 15.47 ,
214+ 9 : 19.43 ,
215+ 12 : 23.32 ,
216+ 15 : 27.06 ,
217+ 18 : 30.76 ,
218+ 24 : 38.80 ,
219+ 30 : 46.44 ,
220+ 36 : 54.29 ,
224221 }
225222 for power in fallback :
226223 if power not in subscription_mapping :
227224 subscription_mapping [power ] = fallback [power ]
228225
229- # Build the prices dict
226+ # Build the prices dict with TTC kWh price
230227 for power , subscription in subscription_mapping .items ():
231228 prices [power ] = {
232229 "subscription" : subscription ,
@@ -237,84 +234,93 @@ def _extract_base_prices(self, text: str) -> dict:
237234
238235 def _extract_hc_hp_prices (self , text : str ) -> dict :
239236 """
240- Extract HC/HP tariff prices from PDF text.
237+ Extract HC/HP tariff TTC prices from PDF text.
238+
239+ The PDF concatenates values like: "15,4715,749 kVA" where:
240+ - 15,47 is Primeo HT price for 6 kVA
241+ - 15,74 is TRV/TTC price for 6 kVA
242+ - 9 is the start of next power (9 kVA)
243+
244+ We extract the SECOND price (TTC) from each part.
241245
242- HC/HP section comes after BASE section in the PDF.
243- The split parts look like:
244- - Part 10: "11,746 " = price 11.74 for 3 kVA (HC/HP)
245- - Part 11: "15,4715,749 " = price 15.47 for 6 kVA
246- etc .
246+ TTC kWh prices:
247+ - HP TTC: 0,1736 €/kWh
248+ - HC TTC: 0,1380 €/kWh
249+
250+ Note: HC/HP starts at 6 kVA (no 3 kVA option for HC/HP) .
247251 """
248252 prices = {}
249253
250- # Extract HP and HC kWh prices (HT)
251- hp_price = 0.1434 # Default
252- hc_price = 0.1147 # Default
253-
254- # Look for HP pattern (around 0.14xx)
255- hp_match = re .search (r"0[,\.]14\d{2}" , text )
254+ # Extract HP and HC kWh prices TTC
255+ hp_price = 0.1736 # Default TTC
256+ hp_match = re .search (r"0[,\.]173\d" , text )
256257 if hp_match :
257258 hp_price = float (hp_match .group (0 ).replace ("," , "." ))
258259
259- # Look for HC pattern (around 0.11xx)
260- hc_match = re .search (r"0[,\.]11\d{2} " , text )
260+ hc_price = 0.1380 # Default TTC
261+ hc_match = re .search (r"0[,\.]138\d " , text )
261262 if hc_match :
262263 hc_price = float (hc_match .group (0 ).replace ("," , "." ))
263264
264265 # Split by 'kVA' and parse HC/HP section
265266 parts = text .split ("kVA" )
266267
267- # HC/HP powers (no 3 kVA in standard HC/HP, but Primeo might include it )
268- hchp_powers = [3 , 6 , 9 , 12 , 15 , 18 , 24 , 30 , 36 ]
268+ # HC/HP powers (starts at 6 kVA )
269+ hchp_powers = [6 , 9 , 12 , 15 , 18 , 24 , 30 , 36 ]
269270 subscription_mapping = {}
270271
271- # Find the starting index for HC/HP section
272- # It comes after BASE section (9 entries) and some headers
273- # Look for the second occurrence of "3 " pattern (HC/HP table)
272+ # Find the HC/HP section (2nd occurrence of "3 " pattern)
274273 occurrences = []
275274 for i , part in enumerate (parts ):
276275 if part .strip ().endswith ("3 " ) or part .strip ().endswith ("3" ) or (len (part ) > 2 and "3 " in part [- 5 :]):
277276 occurrences .append (i )
278277
279- # The second occurrence is the HC/HP section
278+ # The 2nd occurrence (index 1) is the HC/HP section
280279 if len (occurrences ) >= 2 :
281- start_idx = occurrences [1 ] + 1
280+ start_idx = occurrences [1 ] + 1 # Start after the "3 " marker (which is 3 kVA HT entry)
281+ # Part at start_idx is for 3 kVA (11,74), next part (start_idx + 1) is for 6 kVA
282+ start_idx += 1 # Skip 3 kVA, start from 6 kVA
283+
282284 for i , power in enumerate (hchp_powers ):
283285 part_idx = start_idx + i
284286 if part_idx < len (parts ):
285287 part = parts [part_idx ]
286- # Extract the first price from this part (exactly 2 decimals)
287- price_match = re .match (r"(\d+[,\.]\d{2})" , part )
288- if price_match :
289- price = float (price_match .group (1 ).replace ("," , "." ))
290- if 10 < price < 60 : # Valid subscription range for HC/HP
288+ # Extract the SECOND price (TTC) from this part
289+ # Format: "15,4715,749" -> first=15,47 (HT), second=15,74 (TTC)
290+ all_prices = re .findall (r"(\d+[,\.]\d{2})" , part )
291+ if len (all_prices ) >= 2 :
292+ # Second price is TTC
293+ price = float (all_prices [1 ].replace ("," , "." ))
294+ if 10 < price < 60 : # Valid TTC subscription range
295+ subscription_mapping [power ] = price
296+ elif len (all_prices ) == 1 :
297+ # Only one price found, use it (might be the last entry)
298+ price = float (all_prices [0 ].replace ("," , "." ))
299+ if 10 < price < 60 :
291300 subscription_mapping [power ] = price
292301
293- # Fallback to hardcoded values
302+ # Fallback to hardcoded TTC values
294303 fallback = {
295- 3 : 11.74 ,
296- 6 : 15.47 ,
297- 9 : 19.39 ,
298- 12 : 23.32 ,
299- 15 : 27.06 ,
300- 18 : 30.76 ,
301- 24 : 38.80 ,
302- 30 : 46.44 ,
303- 36 : 54.29 ,
304+ 6 : 15.74 ,
305+ 9 : 19.81 ,
306+ 12 : 23.76 ,
307+ 15 : 27.49 ,
308+ 18 : 31.34 ,
309+ 24 : 39.47 ,
310+ 30 : 47.02 ,
311+ 36 : 54.61 ,
304312 }
305313 for power in fallback :
306314 if power not in subscription_mapping :
307315 subscription_mapping [power ] = fallback [power ]
308316
309- # Build the prices dict (exclude 3 kVA if not valid for HC/HP )
317+ # Build the prices dict (HC/HP is 6+ kVA only )
310318 for power , subscription in subscription_mapping .items ():
311- # Standard HC/HP is 6+ kVA, but include 3 if Primeo offers it
312- if power >= 3 :
313- prices [power ] = {
314- "subscription" : subscription ,
315- "hp" : hp_price ,
316- "hc" : hc_price ,
317- }
319+ prices [power ] = {
320+ "subscription" : subscription ,
321+ "hp" : hp_price ,
322+ "hc" : hc_price ,
323+ }
318324
319325 return prices
320326
0 commit comments