File tree Expand file tree Collapse file tree 3 files changed +6
-9
lines changed
Expand file tree Collapse file tree 3 files changed +6
-9
lines changed Original file line number Diff line number Diff line change 2121# Crawl responsibly by identifying yourself (and your website) on the user-agent
2222USER_AGENT = "jedeschule (+http://jedeschule.codefor.de/docs)"
2323
24- REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
25-
2624# Obey robots.txt rules
2725ROBOTSTXT_OBEY = False
2826
Original file line number Diff line number Diff line change @@ -61,9 +61,9 @@ def normalize(item: Item) -> School:
6161 return School (
6262 name = item .get ("name" ).strip (),
6363 id = "HB-{}" .format (item .get ("id" )),
64- address = re .split ("\d{5}" , item .get ("Anschrift:" ).strip ())[0 ].strip (),
65- zip = re .findall ("\d{5}" , item .get ("Anschrift:" ).strip ())[0 ],
66- city = re .split ("\d{5}" , item .get ("Anschrift:" ).strip ())[1 ].strip (),
64+ address = re .split (r "\d{5}" , item .get ("Anschrift:" ).strip ())[0 ].strip (),
65+ zip = re .findall (r "\d{5}" , item .get ("Anschrift:" ).strip ())[0 ],
66+ city = re .split (r "\d{5}" , item .get ("Anschrift:" ).strip ())[1 ].strip (),
6767 website = item .get ("Internet" ).strip () if item .get ("Internet" ) else None ,
6868 email = item .get ("E-Mail-Adresse" ).strip (),
6969 fax = BremenSpider .fix_number (item .get ("Telefax" )),
Original file line number Diff line number Diff line change @@ -54,10 +54,9 @@ def normalize(item: Item) -> School:
5454 return School (
5555 name = item .get ("Name" ),
5656 id = "ST-{}" .format (item .get ("ID" )),
57- address = re .split ("\d{5}" , item .get ("Adresse" ).strip ())[0 ].strip (),
58- zip = re .findall ("\d{5}" , item .get ("Adresse" ).strip ())[0 ],
59- city = re .split ("\d{5}" , item .get ("Adresse" ).strip ())[1 ].strip (),
60- # address=item.get('Adresse'),
57+ address = re .split (r"\d{5}" , item .get ("Adresse" ).strip ())[0 ].strip (),
58+ zip = re .findall (r"\d{5}" , item .get ("Adresse" ).strip ())[0 ],
59+ city = re .split (r"\d{5}" , item .get ("Adresse" ).strip ())[1 ].strip (),
6160 website = item .get ("Homepage" ),
6261 email = item .get ("E-Mail" ),
6362 fax = item .get ("Telefax" ),
You can’t perform that action at this time.
0 commit comments