Skip to content

Commit ff5462f

Browse files
authored
Adding support for "iframe" source_version in normalize_tweet
1 parent 2a999af commit ff5462f

File tree

9 files changed

+1266
-37
lines changed

9 files changed

+1266
-37
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ When setting `extract_referenced_posts` to `True` it will instead return a list
265265
* **locale** *(pytz.timezone as str, optional)*: timezone used to convert dates. If not given, will default to UTC.
266266
* **extract_referenced_posts** *(bool, optional)*: whether to return in the output, in addition to the tweet to be normalized, also normalized data for each other referenced tweets found in the payload data (including retweeted and quoted tweets). If `False`, the function will return a `dict`, if `True` a `list` of `dict`. Defaults to `False`.
267267
* **collection_source** *(string, optional)*: An optional information to add within the `collected_via` field of the normalized tweet to indicate whence it was collected.
268+
* **source_version** *(str, optional)*: version of the Twitter payload. Must be either "v1" or "iframe". Default to "v1".
268269

269270
### normalize_tweets_payload_v2
270271

test/anonymizers_test.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ def test_anonymize_normalized_tweet(self):
7777
"user_id",
7878
"user_name",
7979
"user_verified",
80+
"user_verified_type",
81+
"user_is_blue_verified",
8082
"user_description",
8183
"user_url",
8284
"user_image",
@@ -139,6 +141,8 @@ def test_anonymize_normalized_tweet(self):
139141
"",
140142
"",
141143
"",
144+
"",
145+
"",
142146
"2787",
143147
"1663",
144148
"286",
@@ -198,6 +202,8 @@ def test_anonymize_normalized_tweet(self):
198202
"",
199203
"",
200204
"",
205+
"",
206+
"",
201207
"20948",
202208
"462",
203209
"213",
@@ -257,6 +263,8 @@ def test_anonymize_normalized_tweet(self):
257263
"",
258264
"",
259265
"",
266+
"",
267+
"",
260268
"6082",
261269
"4554",
262270
"372",
@@ -316,6 +324,8 @@ def test_anonymize_normalized_tweet(self):
316324
"",
317325
"",
318326
"",
327+
"",
328+
"",
319329
"5098",
320330
"133",
321331
"1931",

test/normalizers_test.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,48 @@ def test_incomplete_includes(self):
230230

231231
assert exc_info.value.kind == "user"
232232
assert exc_info.value.key == "1217864994852941825"
233+
234+
def test_normalize_tweet_without_api_key(self):
235+
# tz = timezone("Europe/Paris")
236+
237+
tests = get_json_resource("normalization-no-api-key.json")
238+
fn = partial(normalize_tweet, source_version="iframe")
239+
240+
# from test.utils import dump_json_resource
241+
# for idx, test in enumerate(tests):
242+
# tests[idx]['normalized'] = fn(test['source'], extract_referenced_tweets=True)
243+
# dump_json_resource(tests, 'normalization.json')
244+
245+
# With referenced tweets
246+
for test in tests:
247+
result = fn(test["source"], extract_referenced_tweets=True)
248+
249+
assert isinstance(result, list)
250+
assert set(t["id"] for t in result) == set(
251+
t["id"] for t in test["normalized"]
252+
)
253+
254+
for tweet in result:
255+
assert "collection_time" in tweet and isinstance(
256+
tweet["collection_time"], str
257+
)
258+
259+
for t1, t2 in zip(result, test["normalized"]):
260+
compare_tweets(test["source"]["id_str"], t1, t2)
261+
262+
# With single output
263+
for test in tests:
264+
tweet = fn(test["source"])
265+
266+
assert isinstance(tweet, dict)
267+
268+
_id = test["source"]["id_str"]
269+
compare_tweets(
270+
_id, tweet, next(t for t in test["normalized"] if t["id"] == _id)
271+
)
272+
273+
# With custom collection_source
274+
for test in tests:
275+
tweet = fn(test["source"], collection_source="unit_test")
276+
277+
assert tweet["collected_via"] == ["unit_test"]

0 commit comments

Comments
 (0)