-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevents_example.py
More file actions
61 lines (38 loc) · 1.39 KB
/
events_example.py
File metadata and controls
61 lines (38 loc) · 1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# import urllib.request
import requests
from bs4 import BeautifulSoup as bs
from html_table_parser import parser_functions as parse
import pprint
pp = pprint.PrettyPrinter(indent=4, width=100)
__author__ = 'oswaldjones'
URL = 'https://apm.activecommunities.com/santamonicarecreation/Activity_Search'
def event_dict(name, first_date, number, category, days, time, location, open, action):
event = dict()
event['name'] = name.text
event['first_date'] = first_date.text
event['number'] = number.text
event['category'] = category.text
event['days'] = days.text
event['time'] = time.text
event['location'] = location.text
event['open'] = open.text
# url key is computed from href in name cell
event['url'] = name.find('a').get('href')
# action value is not interesting so we ignore
# event['action'] = action.text
return event
def fetch_html():
html = ""
page = requests.get(URL)
html = page.content
return html
if __name__ == '__main__':
events = []
soup = bs(fetch_html(), "html.parser")
event_table = soup.find_all('table')[1]
# using text_only false because we want soup cells in order to reference href attr
twod = parse.make2d(event_table, text_only=False)
for row in twod[2:]:
# using event_dict to explicitly set custom column keys
events.append(event_dict(*row))
pp.pprint(events)