Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 12 additions & 24 deletions jedeschule/spiders/hamburg.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,28 @@
import xml.etree.ElementTree as ET

from scrapy import Item

from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.wfs_basic_parsers import parse_geojson_features


class HamburgSpider(SchoolSpider):
name = "hamburg"

start_urls = [
"https://geodienste.hamburg.de/HH_WFS_Schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=de.hh.up:nicht_staatliche_schulen,de.hh.up:staatliche_schulen&srsname=EPSG:4326"
"https://api.hamburg.de/datasets/v1/schulen/collections/staatliche_schulen/items"
"?limit=1000",
"https://api.hamburg.de/datasets/v1/schulen/collections/nicht_staatliche_schulen/items"
"?limit=1000"
]


def parse(self, response):
namespaces = {
"gml": "http://www.opengis.net/gml",
custom_settings = {
"DEFAULT_REQUEST_HEADERS": {
"Accept": "application/geo+json, application/json, */*"
}
}

elem = ET.fromstring(response.body)

for member in elem:
data_elem = {}
for attr in member[0]:
if attr.tag == "{https://registry.gdi-de.org/id/de.hh.up}the_geom":
# This nested entry contains the coordinates that we would like to expand
lon, lat = attr.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[attr.tag.split("}", 1)[1]] = attr.text
yield data_elem
def parse(self, response, **kwargs):
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
Expand Down
1 change: 0 additions & 1 deletion jedeschule/wfs_basic_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ def parse_geojson_features(response: Response):
properties["lon"] = coords[0]
properties["lat"] = coords[1]


yield properties
95 changes: 95 additions & 0 deletions test/test_hamburg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import unittest

from scrapy.http import TextResponse

from jedeschule.spiders.hamburg import HamburgSpider


class TestHamburgSpider(unittest.TestCase):
def test_parse(self):
json_response = """
{
"type": "FeatureCollection",
"numberReturned": 1,
"numberMatched": 453,
"timeStamp": "2025-07-14T19:20:02Z",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
10.047106063058099,
53.601522503676144
]
},
"properties": {
"abschluss": "Allgemeine Hochschulreife|erster allgemeinbildender Schulabschluss|Erweiterter erster allgemeinbildender Schulabschluss|mittlerer Schulabschluss|schulischer Teil der Fachhochschulreife",
"adresse_ort": "22307 Hamburg",
"adresse_strasse_hausnr": "Benzenbergweg 2",
"ansprechp_klasse_5": "Nadine Kalsow",
"ansprechp_buero": "Janka Gierck",
"anzahl_schueler": 996,
"anzahl_schueler_gesamt": "1261 an 2 Standorten",
"bezirk": "Hamburg-Nord",
"fax": "+49 40 428 88 15 22",
"fremdsprache": "Englisch|Französisch|Spanisch|Spanisch",
"fremdsprache_mit_klasse": "Englisch ab Klasse 5|Französisch ab Klasse 7|Spanisch ab Klasse 11|Spanisch ab Klasse 7",
"ganztagsform": "GTS teilweise gebunden",
"is_rebbz": "true",
"kapitelbezeichnung": "Stadtteilschulen",
"lgv_standortk_erwachsenenbildung": "No",
"name_schulleiter": "Bianca Thies",
"name_stellv_schulleiter": "Christian Pape",
"name_oberstufenkoordinator": "Frau Scheuermann-Andersen *49 40 428 88 15-61",
"name_verwaltungsleitung": "Grit Sobottka",
"rebbz_homepage": "http://rebbz-winterhude.hamburg.de/",
"rechtsform": "staatlich",
"schueleranzahl_schuljahr": "2024",
"schul_email": "[email protected]",
"schul_homepage": "https://helmuthhuebener.de",
"schul_id": "5043-0",
"schul_telefonnr": "+49 40 428 88 15 0",
"schulaufsicht": "Christine Zopff",
"schulform": "Stadtteilschule",
"schulinspektion_link": "https://www.hamburg.de/politik-und-verwaltung/behoerden/schulbehoerde/themen/schulaufsicht/inspektionsberichte/weiterfuehrende-schulen-hamburg-nord",
"schulname": "Stadtteilschule Helmuth Hübener",
"schultyp": "Hauptstandort",
"sozialindex": "Stufe 2",
"stadtteil": "Barmbek-Nord",
"standort_id": "431",
"zuegigkeit_kl_5": "7",
"zustaendiges_rebbz": "ReBBZ Winterhude"
},
"id": 875415
}
],
"links": []
}
"""

spider = HamburgSpider()
response = TextResponse(
url="http://test_webserver.com",
body=json_response.encode("utf-8"),
encoding="utf-8",
)

schools = list(spider.parse(response))
self.assertEqual(len(schools), 1)

school = schools[0]
self.assertAlmostEqual(school["lon"], 10.047106063058099)
self.assertAlmostEqual(school["lat"], 53.601522503676144)
self.assertEqual(school["schul_id"], "5043-0")
self.assertEqual(school["schulname"], "Stadtteilschule Helmuth Hübener")
self.assertEqual(school["adresse_ort"], "22307 Hamburg")
self.assertEqual(school["adresse_strasse_hausnr"], "Benzenbergweg 2")
self.assertEqual(school["schul_telefonnr"], "+49 40 428 88 15 0")
self.assertEqual(school["fax"], "+49 40 428 88 15 22")
self.assertEqual(school["schul_email"], "[email protected]")
self.assertEqual(school["schul_homepage"], "https://helmuthhuebener.de")


if __name__ == "__main__":
unittest.main()