Skip to content

Commit 5fec005

Browse files
authored
Add a table with names in english for any available mk_idividual_id (#352)
* fix for newer sqalchemy * start to add a members_eng pipeline * data flow starts to work * add a slow option * use query to get mk_individual_id
1 parent d04e605 commit 5fec005

5 files changed

Lines changed: 90 additions & 3 deletions

File tree

airflow/knesset_data_pipelines/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def main(load_dotenv):
1616
for module_name, function_name in [
1717
('.google_drive_upload.cli', 'google_drive_upload'),
1818
('.committees.cli', 'committees'),
19+
( '.members_eng.cli', 'members_eng'),
1920
]:
2021
main.add_command(getattr(importlib.import_module(module_name, __package__), function_name))
2122

airflow/knesset_data_pipelines/committees/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .. import db
2-
2+
from sqlalchemy import text
33

44
def get_committee_parents(committee, committees):
55
parent_committee_id = committee['parent_committee_id']
@@ -21,14 +21,14 @@ def get_committees_tree():
2121
'parent_committee_id': row.parent_committee_id,
2222
'name': row.name,
2323
'category_desc': row.category_desc,
24-
} for row in conn.execute('''
24+
} for row in conn.execute(text('''
2525
select
2626
"CommitteeID" as committee_id,
2727
"ParentCommitteeID" as parent_committee_id,
2828
"Name" as name,
2929
"CategoryDesc" as category_desc
3030
from committees_kns_committee
31-
''')
31+
'''))
3232
}
3333
for committee in committees.values():
3434
committee['parent_committee_ids'] = get_committee_parents(committee, committees)

airflow/knesset_data_pipelines/members_eng/__init__.py

Whitespace-only changes.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import click
2+
3+
4+
@click.group()
5+
def members_eng():
6+
pass
7+
8+
@members_eng.command()
9+
@click.option('--slow', is_flag=True)
10+
def members_eng(**kwargs):
11+
'''Create a table of knesset member names in english
12+
'''
13+
from .members_eng import main
14+
main(**kwargs)
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import os
2+
import requests
3+
from textwrap import dedent
4+
import time
5+
import traceback
6+
import json
7+
8+
import dataflows as DF
9+
from pyquery import PyQuery as pq
10+
11+
from .. import db, config
12+
from ..get_retry_response_content import get_retry_response_content
13+
14+
def get_members_id():
15+
"""Return an iterable of all valid mk_individual_id
16+
"""
17+
url = "https://backend.oknesset.org/members"
18+
for item in requests.get(f"{url}?is_current=true").json():
19+
yield item['mk_individual_id']
20+
for item in requests.get(f"{url}?is_current=false").json():
21+
yield item['mk_individual_id']
22+
23+
def iterate_members(slow: bool = False):
24+
delay=10
25+
if slow:
26+
delay = 20
27+
for member_id in get_members_id():
28+
URL = f"https://knesset.gov.il/WebSiteApi/knessetapi/MKs/GetMkdetailsHeader?mkId={member_id}&languageKey=en"
29+
print(f"getting {URL}")
30+
try:
31+
content = get_retry_response_content(
32+
URL, None, None, None, retry_num=1,
33+
num_retries=10, seconds_between_retries=delay,
34+
skip_not_found_errors=True)
35+
if slow:
36+
time.sleep(1)
37+
except Exception:
38+
traceback.print_exc()
39+
print(f'failed to get {URL}')
40+
else:
41+
data = json.loads(content)
42+
if not data:
43+
print(f" failed to parse {content=}")
44+
continue
45+
name = data.get('Name', '')
46+
if not name:
47+
continue
48+
yield {
49+
"NameEng": name,
50+
"mk_individual_id": member_id,
51+
}
52+
53+
def main(slow=False):
54+
table_name = 'member_english_names'
55+
temp_table_name = f'__temp__{table_name}'
56+
DF.Flow(
57+
iterate_members(slow=slow),
58+
DF.update_resource(-1, name='member_english_names', path='member_english_names.csv'),
59+
DF.dump_to_path(os.path.join(config.KNESSET_PIPELINES_DATA_PATH, 'members', 'member_english_names')),
60+
DF.dump_to_sql(
61+
{temp_table_name: {'resource-name': 'member_english_names'}},
62+
db.get_db_engine(),
63+
batch_size=100000,
64+
),
65+
).process()
66+
with db.get_db_engine().connect() as conn:
67+
with conn.begin():
68+
conn.execute(dedent(f'''
69+
drop table if exists {table_name};
70+
alter table {temp_table_name} rename to {table_name};
71+
'''))
72+

0 commit comments

Comments
 (0)