Skip to content

Commit 8a24d6d

Browse files
committed
[RP] Adapt to new website
1 parent d4b8f95 commit 8a24d6d

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

jedeschule/spiders/rheinland_pfalz.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,25 @@ class RheinlandPfalzSpider(CrawlSpider, SchoolSpider):
1111
name = "rheinland-pfalz"
1212
# Note, one could also use the geo portal:
1313
# https://www.geoportal.rlp.de/spatial-objects/350/collections/schulstandorte/items?f=html&limit=4000
14-
start_urls = ["https://schulen.bildung-rp.de"]
14+
start_urls = ["https://bildung.rlp.de/schulen"]
1515
rules = [
1616
Rule(
17-
LinkExtractor(allow="https://schulen.bildung-rp.de/einzelanzeige.html?.*"),
17+
LinkExtractor(allow="https://bildung.rlp.de/schulen/einzelanzeige.*"),
1818
callback="parse_school",
1919
follow=False,
2020
)
2121
]
2222

2323
# get the information
2424
def parse_school(self, response):
25-
container = response.css("#wfqbeResults")
25+
container = response.css(".rlp-schooldatabase-detail")
2626
item = {"name": container.css("h1::text").get()}
2727
for row in container.css("tr"):
2828
key, value = row.css("td")
2929
value_parts = value.css("*::text").extract()
30+
cleaned = [part.strip() for part in value_parts]
3031
item[key.css("::text").extract_first().replace(":", "")] = (
31-
value_parts[0] if len(value_parts) == 1 else value_parts
32+
cleaned[0] if len(cleaned) == 1 else cleaned
3233
)
3334
item["id"] = item["Schulnummer"]
3435

@@ -39,9 +40,13 @@ def parse_school(self, response):
3940
yield item
4041

4142
def normalize(self, item: Item) -> School:
42-
zip, city = item.get("Anschrift")[-1].split("\xa0")
43+
zip, city = item.get("Anschrift")[-1].rsplit(" ")
4344
email_parts = item.get("E-Mail")
44-
email = email_parts[0].replace("(at)", "@") + email_parts[2]
45+
email = (
46+
email_parts[0].replace("(at)", "@") + email_parts[2]
47+
if email_parts is not None
48+
else None
49+
)
4550
return School(
4651
name=item.get("name"),
4752
id="RP-{}".format(item.get("id")),

0 commit comments

Comments
 (0)