Skip to content

Commit 73207bc

Browse files
author
Pablo Martin-Gomez
committed
Add Clikodoc platform
1 parent ea5ed32 commit 73207bc

File tree

8 files changed

+524
-3
lines changed

8 files changed

+524
-3
lines changed

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@ stats: ## Run the statistic scripts
2727
doctoscrap: ## Scrap all doctolib centers, output : data/output/doctolib-centers.json
2828
venv/bin/python -m scraper.doctolib.doctolib_center_scrap
2929

30-
keldocscrap: ## Scrap all doctolib centers, output : data/output/keldoc-centers.json
30+
keldocscrap: ## Scrap all keldoc centers, output : data/output/keldoc-centers.json
3131
venv/bin/python -m scraper.keldoc.keldoc_center_scrap
3232

3333
maiiascrap: ## Retrieve maiia centers from API
3434
venv/bin/python -m scraper.maiia.maiia_center_scrap
3535

36+
clikodocscrap: ## Scrap all clikodoc centers, output : data/output/clikodoc-centers.json
37+
venv/bin/python -m scraper.clikodoc.clikodoc_center_scrap
38+
3639
lint: install
3740
venv/bin/pip install black
3841
venv/bin/black $$(git ls-files | grep .py$$)

config.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,34 @@
398398
"0": "Dimanche"
399399
}
400400
}
401+
},
402+
"clikodoc": {
403+
"enabled": true,
404+
"timeout": 25,
405+
"recognized_urls": [
406+
"https://www.clikodoc.com"
407+
],
408+
"api": {
409+
"homepage": "https://www.clikodoc.com",
410+
"doctors": "https://www.clikodoc.com/getDoctorsListForAllState",
411+
"next_available_date": "https://www.clikodoc.com/foreapicntrl/nextavaildatelite?module=patients",
412+
"slots": "https://www.clikodoc.com/getSlotsForBookingV2"
413+
},
414+
"doctors_options": {
415+
"speciality_id": "9",
416+
"city_id": "0"
417+
},
418+
"slot_limit": 3,
419+
"days_limit": 50,
420+
"filters": {
421+
"first_injection_typeids": [
422+
"699",
423+
"737"
424+
]
425+
},
426+
"center_scraper": {
427+
"result_path": "data/output/clikodoc_centers.json"
428+
}
401429
}
402430
}
403431
}

scraper/clikodoc/clikodoc.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import httpx
2+
from bs4 import BeautifulSoup
3+
import json
4+
from urllib.parse import urlencode
5+
from pathlib import Path
6+
import re
7+
8+
from functools import wraps
9+
from typing import NamedTuple, Optional, Tuple, Dict, List, Iterator
10+
from datetime import datetime, timedelta, date
11+
from dataclasses import dataclass, field
12+
13+
from scraper.pattern.center_info import INTERVAL_SPLIT_DAYS, CHRONODOSES
14+
from scraper.pattern.vaccine import Vaccine
15+
from scraper.pattern.scraper_request import ScraperRequest
16+
from scraper.pattern.scraper_result import GENERAL_PRACTITIONER
17+
from utils.vmd_logger import get_logger
18+
from utils.vmd_config import get_conf_platform, get_config
19+
from scraper.profiler import Profiling
20+
21+
import httpx
22+
from .clikodoc_api import get_next_available_date, get_slots, DEFAULT_CLIENT
23+
24+
CLIKODOC_CONF = get_conf_platform("clikodoc")
25+
CLIKODOC_ENABLED = CLIKODOC_CONF.get("enabled", False)
26+
DATA_AUTO_URL = get_config().get("data-auto", {}).get("base_url", "")
27+
CLIKODOC_CENTERS_PATH = CLIKODOC_CONF.get("center_scraper", {}).get("result_path", "")
28+
SLOT_LIMIT = CLIKODOC_CONF.get("slot_limit", 3)
29+
DAYS_LIMIT = CLIKODOC_CONF.get("days_limit", 50)
30+
31+
32+
logger = get_logger()
33+
34+
doctors_list = []
35+
36+
37+
@dataclass(order=True)
38+
class Slot:
39+
timestamp: datetime
40+
motive: str = field(compare=False)
41+
42+
43+
def _populate_doctors_list():
44+
global doctors_list
45+
center_path = Path(CLIKODOC_CENTERS_PATH)
46+
#url = f"{DATA_AUTO_URL}{center_path}"
47+
#response = httpx.get(url)
48+
#response.raise_for_status()
49+
#data = response.json()
50+
#center_path.write_text(json.dumps(data, indent=2))
51+
data = json.loads(center_path.read_text())
52+
doctors_list = data
53+
logger.info(f"Found {len(data)} Clikodoc centers (external scraper).")
54+
55+
56+
def _get_first_available_date(doctorid: str, request: ScraperRequest = None) -> Optional[date]:
57+
first_date = get_next_available_date(doctorid, request=request)
58+
59+
if not first_date or first_date["status"] != "success":
60+
return None
61+
else:
62+
return date.fromisoformat(first_date["nextavaildate"])
63+
64+
65+
def _get_slots(doctorid: str, motiveid: str, start_date: date, end_date: date, request: ScraperRequest = None) -> List[Slot]:
66+
all_slots: List[Slot] = []
67+
68+
last_date = start_date
69+
max_attempts = 3
70+
while last_date < end_date and max_attempts > 0:
71+
data = get_slots(doctorid, motiveid, last_date, request=request)
72+
if not data["slots"]:
73+
last_date += timedelta(days=SLOT_LIMIT)
74+
max_attempts -= 1
75+
continue
76+
max_attempts = 3
77+
for day, slots in data["slots"].items():
78+
all_slots.extend(
79+
[Slot(datetime.strptime(f"{day} {slot['start']}", "%Y-%m-%d %H:%M"), motiveid) for slot in slots]
80+
)
81+
last_date += timedelta(days=SLOT_LIMIT)
82+
83+
return all_slots
84+
85+
86+
def _count_slots(slots: List[Slot], start_date: date, end_date: date) -> int:
87+
return len([slot for slot in slots if slot.timestamp > start_date and slot.timestamp < end_date])
88+
89+
90+
@Profiling.measure("clikodoc_slot")
91+
def fetch_slots(request: ScraperRequest, client: httpx.Client = DEFAULT_CLIENT) -> Optional[str]:
92+
if not CLIKODOC_ENABLED:
93+
return None
94+
first_availability = None
95+
96+
if not doctors_list:
97+
_populate_doctors_list()
98+
99+
doctor = [doc for doc in doctors_list if doc["rdv_site_web"] == request.url]
100+
if not doctor:
101+
return None
102+
doctor = doctor[0]
103+
104+
only_by_phone = True
105+
slots = []
106+
for motive in doctor["motives"]:
107+
only_by_phone = only_by_phone and motive["onlyByPhone"]
108+
if motive["onlyByPhone"] is True:
109+
continue
110+
111+
first_date = _get_first_available_date(doctor["user_id"], request=request)
112+
if not first_date:
113+
continue
114+
115+
slots.extend(_get_slots(doctor["user_id"], motive["id"], first_date, first_date + timedelta(days=DAYS_LIMIT), request=request))
116+
117+
if not slots:
118+
return None
119+
120+
first_availability = min(slots).timestamp
121+
request.add_vaccine_type(Vaccine.ASTRAZENECA)
122+
request.update_appointment_count(len(slots))
123+
124+
# create appointment_schedules array with names and dates
125+
appointment_schedules = []
126+
start_date = datetime.fromisoformat(request.get_start_date())
127+
end_date = start_date + timedelta(days=CHRONODOSES["Interval"], seconds=-1)
128+
appointment_schedules.append(
129+
{"name": "chronodose", "from": start_date.isoformat(), "to": end_date.isoformat(), "total": 0}
130+
)
131+
for n in INTERVAL_SPLIT_DAYS:
132+
end_date = start_date + timedelta(days=n, seconds=-1)
133+
appointment_schedules.append(
134+
{
135+
"name": f"{n}_days",
136+
"from": start_date.isoformat(),
137+
"to": end_date.isoformat(),
138+
"total": _count_slots(slots, start_date, end_date),
139+
}
140+
)
141+
142+
request.update_appointment_schedules(appointment_schedules)
143+
logger.info(f"appointment_schedules: {request.appointment_schedules}")
144+
return first_availability.isoformat()
145+
146+
147+
def center_iterator() -> Iterator[Dict]:
148+
if not CLIKODOC_ENABLED:
149+
return
150+
try:
151+
if not doctors_list:
152+
_populate_doctors_list()
153+
for doctor in doctors_list:
154+
center = {}
155+
center["gid"] = doctor["gid"]
156+
center["rdv_site_web"] = doctor["rdv_site_web"]
157+
center["com_cp"] = doctor["location"]["com_zipcode"]
158+
center["com_insee"] = doctor["location"]["com_insee"]
159+
center["address"] = doctor["location"]["full_address"]
160+
center["nom"] = doctor["doctor_name"]
161+
center["phone_number"] = doctor["phone"]
162+
center["location"] = {
163+
"long_coor1": doctor["location"]["longitude"],
164+
"lat_coor1": doctor["location"]["latitude"],
165+
"com_nom": doctor["location"]["com_name"],
166+
"com_cp": doctor["location"]["com_zipcode"],
167+
}
168+
center["iterator"] = "clikodoc"
169+
center["type"] = GENERAL_PRACTITIONER
170+
center["business_hours"] = doctor["business_hours"]
171+
yield center
172+
except Exception:
173+
logger.exception(f"Unable to scrape clikodoc centers")
174+
175+
176+
if __name__ == "__main__":
177+
print_token()

0 commit comments

Comments
 (0)