-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathclient.py
More file actions
44 lines (34 loc) · 1.26 KB
/
client.py
File metadata and controls
44 lines (34 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import logging
import requests
log = logging.getLogger(__name__)
class ArticleNotFound(RuntimeError):
""" Article query returned no results """
class Client(requests.Session):
""" Mediawiki API client """
def __init__(self, base_url='wikipedia.org/w/api/php', lang="en"):
super(Client, self).__init__()
if base_url.startswith('wikipedia.org'):
self.base_url = f'https://{lang}.{base_url}'
else:
self.base_url = base_url
def fetch_page(self, title, method='GET'):
""" Query for page by title """
params = {
'prop': 'revisions',
'format': 'json',
'action': 'query',
'explaintext': '',
'titles': _parse_title(title),
'rvprop': 'content',
}
req = self.request(method, self.base_url, params=params)
req.raise_for_status()
pages = req.json()["query"]["pages"]
# use key from first result in 'pages' array
page_id = list(pages.keys())[0]
if page_id == '-1':
raise ArticleNotFound('no matching articles returned')
return pages[page_id]
def _parse_title(s):
# extract title from, potentially, a URL
return s.split('/')[-1].split('#')[0].split('?')[0]