Skip to content

Commit b839ac4

Browse files
authored
Merge pull request #671 from praekeltfoundation/update-mcxp-migration-scripts
Update MCxP migration scripts
2 parents 8cb7dc1 + d2d27f3 commit b839ac4

File tree

4 files changed

+194
-16
lines changed

4 files changed

+194
-16
lines changed

scripts/migrate_to_turn/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ The `FIELD_MAPPING` variable should be updated with all the fields we want to mo
1717

1818
The script will write all the contacts to a file with the start and end date in the name.
1919

20-
It will also output the latest modified on date in the batch, this can then be used as a start date to get the next batch.
20+
It will also output the oldest modified on date in the batch, this can then be used as an end date to get the next batch when syncing new to old.
2121

2222
### update_turn_contacts.py
2323

@@ -82,9 +82,9 @@ The key is the field name in Rapidpro, the value determinues the rest:
8282
1. Make sure the `FIELD_MAPPING` is configured
8383
1. Set the required environment variables: `TURN_TOKEN` and `RAPIDPRO_TOKEN`.
8484
1. Update the start date, end date, limit and field mapping in `fetch_rapidpro_contacts.py` script
85-
1. Run `python scripts/migrate_to_turn/fetch_rapidpro_contacts.py` and take note of the last modified date and the filename.
85+
1. Run `python scripts/migrate_to_turn/fetch_rapidpro_contacts.py` and take note of the oldest modified date and the filename.
8686
1. Run `python scripts/migrate_to_turn/update_turn_contacts.py contacts-2025-01-01-2025-01-07.csv > update_turn_contacts.json`
8787
1. Use jq to check if there were any errors `jq .response.status update_turn_contacts.json | sort | uniq -c`
8888
1. To retry errors, run `cat update_turn_contacts.json | python scripts/migrate_to_rapidpro/retry_requests.py > update_turn_contacts2.json`
8989
1. Repeat previous two steps until all contacts successfully completed.
90-
1. Update the start and end date in `fetch_rapidpro_contacts.py` script. Repeat from step 3.
90+
1. Update the end date in `fetch_rapidpro_contacts.py` script to the oldest modified date from the previous batch. Repeat from step 3.

scripts/migrate_to_turn/fetch_rapidpro_contacts.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,42 @@
33
from datetime import datetime
44

55
import pytz
6-
from process_fields import process_datetime
6+
from process_fields import (
7+
get_user_babies,
8+
get_user_tier,
9+
get_user_type,
10+
process_datetime,
11+
to_lowercase,
12+
)
713
from temba_client.v2 import TembaClient
814

915
RAPIDPRO_URL = "https://rapidpro.qa.momconnect.co.za"
1016

11-
START_DATE = "2025-01-20 01:13:06"
12-
END_DATE = "2025-01-20 19:13:06"
17+
START_DATE = "2025-11-01 01:13:06"
18+
END_DATE = "2026-01-12 19:13:06"
1319
LIMIT = 1000
1420

1521
# TODO: add all the fields here: <rapidpro-field-name>: <details>
1622
FIELD_MAPPING = {
17-
"edd": {"turn_name": "test", "process": process_datetime, "type": "custom"},
23+
"edd": {
24+
"turn_name": "pregnancy_expected_due_date",
25+
"process": process_datetime,
26+
"type": "custom",
27+
},
1828
"name": {"turn_name": "name", "type": "default"},
1929
"language": {"turn_name": "language", "type": "default"},
30+
"research_consent": {
31+
"turn_name": "research_consent",
32+
"process": to_lowercase,
33+
"type": "custom",
34+
},
35+
"clinic_code": {"turn_name": "clinic_code", "type": "custom"},
36+
}
37+
38+
NEW_TURN_FIELD_MAPPING = {
39+
"user_tier": {"process": get_user_tier},
40+
"user_type": {"process": get_user_type},
41+
"babies": {"process": get_user_babies},
2042
}
2143

2244
client = TembaClient(RAPIDPRO_URL, os.environ["RAPIDPRO_TOKEN"])
@@ -30,11 +52,14 @@ def get_field_data(contact):
3052
if turn_details["type"] == "default":
3153
data[turn_field] = getattr(contact, rapidpro_field)
3254
else:
33-
data[turn_field] = contact.fields[rapidpro_field]
55+
data[turn_field] = contact.fields.get(rapidpro_field)
3456

3557
if "process" in turn_details:
3658
data[turn_field] = turn_details["process"](data[turn_field])
3759

60+
for new_field, details in NEW_TURN_FIELD_MAPPING.items():
61+
data[new_field] = details["process"](contact)
62+
3863
return data
3964

4065

@@ -52,9 +77,9 @@ def is_opted_out(contact):
5277
def get_rapidpro_contacts(start_date=None, end_date=None):
5378
print(f"> Getting rapidpro contacts from {start_date} to {end_date}")
5479
contacts = []
55-
latest_date = start_date.replace(tzinfo=pytz.utc)
80+
oldest_date = end_date.replace(tzinfo=pytz.utc)
5681
for contact_batch in client.get_contacts(
57-
before=end_date, after=start_date, reverse=True
82+
before=end_date, after=start_date
5883
).iterfetches(retry_on_rate_exceed=True):
5984
for contact in contact_batch:
6085
wa_id = get_wa_id(contact)
@@ -68,23 +93,23 @@ def get_rapidpro_contacts(start_date=None, end_date=None):
6893
contacts.append(data)
6994

7095
modified_on = contact.modified_on.astimezone(pytz.utc)
71-
if modified_on > latest_date:
72-
latest_date = modified_on
96+
if modified_on < oldest_date:
97+
oldest_date = modified_on
7398

7499
if len(contacts) >= LIMIT:
75-
return contacts, latest_date
100+
return contacts, oldest_date
76101

77-
return contacts, latest_date
102+
return contacts, oldest_date
78103

79104

80105
if __name__ == "__main__":
81106
start_date = datetime.strptime(START_DATE, "%Y-%m-%d %H:%M:%S")
82107
end_date = datetime.strptime(END_DATE, "%Y-%m-%d %H:%M:%S")
83108

84-
contacts, latest_date = get_rapidpro_contacts(start_date, end_date)
109+
contacts, oldest_date = get_rapidpro_contacts(start_date, end_date)
85110

86111
print(f"Found: {len(contacts)}")
87-
print(f"Latest modified on date: {latest_date}")
112+
print(f"Oldest modified on date: {oldest_date}")
88113

89114
if contacts:
90115
start = START_DATE.split(" ")[0]

scripts/migrate_to_turn/process_fields.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from datetime import datetime
23

34
import pytz
@@ -11,6 +12,12 @@ def is_datetime(date):
1112
return True
1213

1314

15+
def to_lowercase(value):
16+
if isinstance(value, str):
17+
return value.lower()
18+
return value
19+
20+
1421
def process_datetime(value):
1522
if not value:
1623
return
@@ -20,3 +27,46 @@ def process_datetime(value):
2027
value = datetime.fromisoformat(value)
2128
value = value.astimezone(pytz.utc).isoformat()
2229
return value
30+
31+
32+
def get_user_tier(contact):
33+
# TODO: figure out user tier based on rapidpro fields
34+
return None
35+
36+
37+
def get_user_type(contact):
38+
# TODO: figure out user type based on rapidpro fields
39+
return None
40+
41+
42+
def get_user_babies(contact):
43+
babies = []
44+
fields = getattr(contact, "fields", {}) or {}
45+
for index in range(1, 4):
46+
dob = fields.get(f"baby_dob{index}")
47+
if not dob:
48+
continue
49+
50+
value = dob.replace("Z", "").split("+")[0]
51+
if not is_datetime(value):
52+
continue
53+
54+
birth_date = datetime.fromisoformat(value)
55+
babies.append(
56+
{
57+
"all_vaccines_received": "all",
58+
"appointments_attended": 0,
59+
"baby_birth_day": birth_date.day,
60+
"baby_birth_month": birth_date.month,
61+
"baby_birth_year": birth_date.year,
62+
"name": "",
63+
"next_vacc_day": 0,
64+
"next_vacc_month": 0,
65+
"next_vacc_year": 0,
66+
"pregnancy_edd": 0,
67+
"vaccination_status_at_reg": "all",
68+
}
69+
)
70+
71+
if babies:
72+
return json.dumps(babies)
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import json
2+
from datetime import datetime
3+
from unittest import TestCase
4+
5+
import pytz
6+
7+
from scripts.migrate_to_turn.process_fields import (
8+
get_user_babies,
9+
get_user_tier,
10+
get_user_type,
11+
is_datetime,
12+
process_datetime,
13+
)
14+
15+
16+
class IsDatetimeTests(TestCase):
17+
def test_valid_isoformat(self):
18+
"""
19+
Returns True for valid ISO format strings
20+
"""
21+
self.assertTrue(is_datetime("2024-01-01"))
22+
self.assertTrue(is_datetime("2024-01-01T12:30:00"))
23+
24+
def test_invalid_isoformat(self):
25+
"""
26+
Returns False for invalid inputs
27+
"""
28+
self.assertFalse(is_datetime("not-a-date"))
29+
self.assertFalse(is_datetime(None))
30+
31+
32+
class ProcessDatetimeTests(TestCase):
33+
def test_none_returns_none(self):
34+
"""
35+
Returns None for empty values
36+
"""
37+
self.assertIsNone(process_datetime(None))
38+
self.assertIsNone(process_datetime(""))
39+
40+
def test_invalid_returns_none(self):
41+
"""
42+
Returns None for invalid values
43+
"""
44+
self.assertIsNone(process_datetime("not-a-date"))
45+
46+
def test_strips_timezone_and_converts_to_utc(self):
47+
"""
48+
Removes timezone suffixes and normalizes to UTC
49+
"""
50+
for value in ("2024-01-01T12:30:00Z", "2024-01-01T12:30:00+02:00"):
51+
with self.subTest(value=value):
52+
expected = datetime.fromisoformat("2024-01-01T12:30:00").astimezone(
53+
pytz.utc
54+
)
55+
self.assertEqual(process_datetime(value), expected.isoformat())
56+
57+
58+
class GetUserTierTests(TestCase):
59+
def test_default(self):
60+
"""
61+
Default behavior returns None
62+
"""
63+
self.assertIsNone(get_user_tier(object()))
64+
65+
66+
class GetUserTypeTests(TestCase):
67+
def test_default(self):
68+
"""
69+
Default behavior returns None
70+
"""
71+
self.assertIsNone(get_user_type(object()))
72+
73+
74+
class GetUserBabiesTests(TestCase):
75+
def test_default(self):
76+
"""
77+
Default behavior returns None
78+
"""
79+
self.assertIsNone(get_user_babies(object()))
80+
81+
def test_baby_dobs_convert_to_babies(self):
82+
"""
83+
Converts baby dob fields into babies payload
84+
"""
85+
contact = type("Contact", (), {"fields": {"baby_dob1": "2025-01-02"}})()
86+
87+
baby_list = [
88+
{
89+
"all_vaccines_received": "all",
90+
"appointments_attended": 0,
91+
"baby_birth_day": 2,
92+
"baby_birth_month": 1,
93+
"baby_birth_year": 2025,
94+
"name": "",
95+
"next_vacc_day": 0,
96+
"next_vacc_month": 0,
97+
"next_vacc_year": 0,
98+
"pregnancy_edd": 0,
99+
"vaccination_status_at_reg": "all",
100+
}
101+
]
102+
103+
self.assertEqual(get_user_babies(contact), json.dumps(baby_list))

0 commit comments

Comments
 (0)