-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape_episodes.py
More file actions
30 lines (22 loc) · 900 Bytes
/
scrape_episodes.py
File metadata and controls
30 lines (22 loc) · 900 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import subprocess
import xml.etree.ElementTree as ET
from data import rssxml
root = ET.fromstring(rssxml)
def download_item(item):
guid = item.findtext('guid')
data = {
"guid": guid,
"title": item.findtext('title'),
"description": item.findtext('description'),
"link": item.findtext('link'),
"enclosure_url": item.find('enclosure').get('url'),
"pubDate": item.findtext('pubDate'),
"image": item.find('{http://www.itunes.com/dtds/podcast-1.0.dtd}image').get('href'),
}
subprocess.run(["wget", "-O", f"site/images/{guid}.jpg", data['image']])
with open(f'metadata/{guid}.json', 'w') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# subprocess.run(['wget', '-O', f'hasadnapodcasts/{guid}.mp3', data['enclosure_url']])
for item in root.findall('channel/item'):
download_item(item)