diff --git a/test/normalizers_test.py b/test/normalizers_test.py index 8faed76..73a99a6 100644 --- a/test/normalizers_test.py +++ b/test/normalizers_test.py @@ -144,6 +144,7 @@ def test_normalize_tweets_payload_v2(self): get_json_resource("payload-v2-geo.json"), get_json_resource("payload-v2-tweet-retweet.json"), get_json_resource("payload-v2-video.json"), + get_json_resource("payload-v2-singletweet.json"), ] ntweets = [] diff --git a/test/resources/normalized-tweets-v2-all.json b/test/resources/normalized-tweets-v2-all.json index c9b391d..2c9c446 100644 --- a/test/resources/normalized-tweets-v2-all.json +++ b/test/resources/normalized-tweets-v2-all.json @@ -1109,7 +1109,55 @@ "collected_via": [ "test" ], - "collection_time": "2023-05-12T14:42:08.206096", + "collection_time": "2026-02-23T14:17:43.608759", + "hashtags": [], + "id": "1618628579440619521", + "impression_count": 0, + "lang": "ja", + "like_count": 0, + "links": [], + "local_time": "2023-01-26T15:15:00", + "match_query": true, + "media_files": [], + "media_types": [], + "media_urls": [], + "mentioned_ids": [ + "135064055" + ], + "mentioned_names": [ + "kojiharunyan" + ], + "possibly_sensitive": false, + "quote_count": 0, + "reply_count": 0, + "retweet_count": 0, + "source_name": null, + "text": "@kojiharunyan ใ‚ใƒผใ‚“็ฅž3๐Ÿฐ๐Ÿฐ๐Ÿฐ๐Ÿ’ž", + "timestamp_utc": 1674746100, + "to_tweetid": "1618619004586950657", + "to_userid": "135064055", + "to_username": "kojiharunyan", + "url": "https://twitter.com/TOMOKAPEACE/status/1618628579440619521", + "user_created_at": "2015-11-14T13:35:31", + "user_description": "๏ผ‘ๆœฌไผšใƒ•ใ‚งใ‚ขใƒชใƒผใ‚บโ™กๅฅฝใใซใฏๅ…จๅŠ›โ™กใƒ‡ใ‚ฃใ‚บใƒ‹ใƒผใจใƒŸใƒƒใ‚ญใƒผใจๅคง้˜ชใจใใ ใ‚‰ใชใ„ใ“ใจๅคงๅฅฝใใฃใฃโ™ก๐Ÿ‘ถ๐Ÿป๐Ÿผ๐Ÿ’•", + "user_followers": 250, + "user_friends": 44, + "user_id": "4234967894", + "user_image": "https://pbs.twimg.com/profile_images/1421660585549123584/MO7HGIV1_normal.jpg", + "user_lists": 3, + "user_location": "ใ‚‰ใถใ‚ใ‚“ใฉใดใƒผใ™โ˜บ๏ธŽโค๏ธŽ", + "user_name": "โค๏ธŽ๐’•๐’๐’Ž๐’๐’Œ๐’‚๐’‘๐’†๐’‚๐’„๐’†โค๏ธŽ", + "user_screen_name": "TOMOKAPEACE", + "user_timestamp_utc": 1447508131, + "user_tweets": 14905, + "user_url": null, + "user_verified": false + }, + { + "collected_via": [ + "test" + ], + "collection_time": "2026-02-23T14:17:43.608715", "hashtags": [], "id": "1656684087061381121", "impression_count": 19422, diff --git a/test/resources/normalized-tweets-v2.json b/test/resources/normalized-tweets-v2.json index fcefcd3..72a31f4 100644 --- a/test/resources/normalized-tweets-v2.json +++ b/test/resources/normalized-tweets-v2.json @@ -965,5 +965,53 @@ "user_tweets": 165675, "user_url": "https://bfmbusiness.bfmtv.com/", "user_verified": false + }, + { + "collected_via": [ + "test" + ], + "collection_time": "2026-02-23T14:17:43.605763", + "hashtags": [], + "id": "1618628579440619521", + "impression_count": 0, + "lang": "ja", + "like_count": 0, + "links": [], + "local_time": "2023-01-26T16:15:00", + "match_query": true, + "media_files": [], + "media_types": [], + "media_urls": [], + "mentioned_ids": [ + "135064055" + ], + "mentioned_names": [ + "kojiharunyan" + ], + "possibly_sensitive": false, + "quote_count": 0, + "reply_count": 0, + "retweet_count": 0, + "source_name": null, + "text": "@kojiharunyan ใ‚ใƒผใ‚“็ฅž3๐Ÿฐ๐Ÿฐ๐Ÿฐ๐Ÿ’ž", + "timestamp_utc": 1674746100, + "to_tweetid": "1618619004586950657", + "to_userid": "135064055", + "to_username": "kojiharunyan", + "url": "https://twitter.com/TOMOKAPEACE/status/1618628579440619521", + "user_created_at": "2015-11-14T14:35:31", + "user_description": "๏ผ‘ๆœฌไผšใƒ•ใ‚งใ‚ขใƒชใƒผใ‚บโ™กๅฅฝใใซใฏๅ…จๅŠ›โ™กใƒ‡ใ‚ฃใ‚บใƒ‹ใƒผใจใƒŸใƒƒใ‚ญใƒผใจๅคง้˜ชใจใใ ใ‚‰ใชใ„ใ“ใจๅคงๅฅฝใใฃใฃโ™ก๐Ÿ‘ถ๐Ÿป๐Ÿผ๐Ÿ’•", + "user_followers": 250, + "user_friends": 44, + "user_id": "4234967894", + "user_image": "https://pbs.twimg.com/profile_images/1421660585549123584/MO7HGIV1_normal.jpg", + "user_lists": 3, + "user_location": "ใ‚‰ใถใ‚ใ‚“ใฉใดใƒผใ™โ˜บ๏ธŽโค๏ธŽ", + "user_name": "โค๏ธŽ๐’•๐’๐’Ž๐’๐’Œ๐’‚๐’‘๐’†๐’‚๐’„๐’†โค๏ธŽ", + "user_screen_name": "TOMOKAPEACE", + "user_timestamp_utc": 1447508131, + "user_tweets": 14905, + "user_url": null, + "user_verified": false } ] \ No newline at end of file diff --git a/test/resources/payload-v2-singletweet.json b/test/resources/payload-v2-singletweet.json new file mode 100644 index 0000000..c2b7ec9 --- /dev/null +++ b/test/resources/payload-v2-singletweet.json @@ -0,0 +1 @@ +{"data":{"attachments":{},"author_id":"4234967894","context_annotations":[{"domain":{"id":"46","name":"Business Taxonomy","description":"Categories within Brand Verticals that narrow down the scope of Brands"},"entity":{"id":"1557696802391859201","name":"Entertainment & Leisure Business","description":"Brands, companies, advertisers and every non-person handle with the profit intent related to movies, music, television, franchises, venues, theme parks, toys, tourism, hotels"}},{"domain":{"id":"10","name":"Person","description":"Named people in the world like Nelson Mandela"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"54","name":"Musician","description":"A musician in the world, like Adele or Bob Dylan"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"55","name":"Music Genre","description":"A category for a musical style, like Pop, Rock, or Rap"},"entity":{"id":"810938095772123136","name":"J-pop","description":"J-POP"}},{"domain":{"id":"56","name":"Actor","description":"An actor or actress in the world, like Kate Winslet or Leonardo DiCaprio"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"58","name":"Entertainment Personality","description":"An entertainment personality in the world, like Anderson Cooper or Miranda Sings"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"847524658970636288","name":"Music","description":"Music and radio"}}],"conversation_id":"1618619004586950657","created_at":"2023-01-26T15:15:00.000Z","edit_controls":{"edits_remaining":5,"is_edit_eligible":false,"editable_until":"2023-01-26T15:45:00.000Z"},"edit_history_tweet_ids":["1618628579440619521"],"entities":{"mentions":[{"start":0,"end":13,"username":"kojiharunyan","id":"135064055"}]},"geo":{},"id":"1618628579440619521","in_reply_to_user_id":"135064055","lang":"ja","possibly_sensitive":false,"public_metrics":{"retweet_count":0,"reply_count":0,"like_count":0,"quote_count":0,"impression_count":0},"referenced_tweets":[{"type":"replied_to","id":"1618619004586950657"}],"reply_settings":"everyone","text":"@kojiharunyan ใ‚ใƒผใ‚“็ฅž3๐Ÿฐ๐Ÿฐ๐Ÿฐ๐Ÿ’ž"},"includes":{"users":[{"created_at":"2015-11-14T13:35:31.000Z","description":"๏ผ‘ๆœฌไผšใƒ•ใ‚งใ‚ขใƒชใƒผใ‚บโ™กๅฅฝใใซใฏๅ…จๅŠ›โ™กใƒ‡ใ‚ฃใ‚บใƒ‹ใƒผใจใƒŸใƒƒใ‚ญใƒผใจๅคง้˜ชใจใใ ใ‚‰ใชใ„ใ“ใจๅคงๅฅฝใใฃใฃโ™ก๐Ÿ‘ถ๐Ÿป๐Ÿผ๐Ÿ’•","id":"4234967894","location":"ใ‚‰ใถใ‚ใ‚“ใฉใดใƒผใ™โ˜บ๏ธŽโค๏ธŽ","name":"โค๏ธŽ๐’•๐’๐’Ž๐’๐’Œ๐’‚๐’‘๐’†๐’‚๐’„๐’†โค๏ธŽ","profile_image_url":"https://pbs.twimg.com/profile_images/1421660585549123584/MO7HGIV1_normal.jpg","protected":false,"public_metrics":{"followers_count":250,"following_count":44,"tweet_count":14905,"listed_count":3},"username":"TOMOKAPEACE","verified":false},{"created_at":"2010-04-20T06:57:44.000Z","description":"@herlipto_info Founder & CCO ๐Ÿˆ https://t.co/PJhnBqxIny","entities":{"url":{"urls":[{"start":0,"end":23,"url":"https://t.co/KHC89msjYl","expanded_url":"https://www.instagram.com/nyanchan22","display_url":"instagram.com/nyanchan22"}]},"description":{"urls":[{"start":31,"end":54,"url":"https://t.co/PJhnBqxIny","expanded_url":"https://www.herlipto.jp","display_url":"herlipto.jp"}],"mentions":[{"start":0,"end":14,"username":"herlipto_info"}]}},"id":"135064055","location":"Tokyo","name":"ๅฐๅถ‹ ้™ฝ่œ","pinned_tweet_id":"1495930377663561730","profile_image_url":"https://pbs.twimg.com/profile_images/877735653819142144/WEAzcEyJ_normal.jpg","protected":false,"public_metrics":{"followers_count":3117015,"following_count":231,"tweet_count":13569,"listed_count":14549},"url":"https://t.co/KHC89msjYl","username":"kojiharunyan","verified":true}],"tweets":[{"attachments":{},"author_id":"4234967894","context_annotations":[{"domain":{"id":"46","name":"Business Taxonomy","description":"Categories within Brand Verticals that narrow down the scope of Brands"},"entity":{"id":"1557696802391859201","name":"Entertainment & Leisure Business","description":"Brands, companies, advertisers and every non-person handle with the profit intent related to movies, music, television, franchises, venues, theme parks, toys, tourism, hotels"}},{"domain":{"id":"10","name":"Person","description":"Named people in the world like Nelson Mandela"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"54","name":"Musician","description":"A musician in the world, like Adele or Bob Dylan"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"55","name":"Music Genre","description":"A category for a musical style, like Pop, Rock, or Rap"},"entity":{"id":"810938095772123136","name":"J-pop","description":"J-POP"}},{"domain":{"id":"56","name":"Actor","description":"An actor or actress in the world, like Kate Winslet or Leonardo DiCaprio"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"58","name":"Entertainment Personality","description":"An entertainment personality in the world, like Anderson Cooper or Miranda Sings"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"847524658970636288","name":"Music","description":"Music and radio"}}],"conversation_id":"1618619004586950657","created_at":"2023-01-26T15:15:00.000Z","edit_controls":{"edits_remaining":5,"is_edit_eligible":false,"editable_until":"2023-01-26T15:45:00.000Z"},"edit_history_tweet_ids":["1618628579440619521"],"entities":{"mentions":[{"start":0,"end":13,"username":"kojiharunyan","id":"135064055"}]},"geo":{},"id":"1618628579440619521","in_reply_to_user_id":"135064055","lang":"ja","possibly_sensitive":false,"public_metrics":{"retweet_count":0,"reply_count":0,"like_count":0,"quote_count":0,"impression_count":0},"referenced_tweets":[{"type":"replied_to","id":"1618619004586950657"}],"reply_settings":"everyone","text":"@kojiharunyan ใ‚ใƒผใ‚“็ฅž3๐Ÿฐ๐Ÿฐ๐Ÿฐ๐Ÿ’ž"},{"attachments":{"media_keys":["13_1618618834424070144"]},"author_id":"135064055","context_annotations":[{"domain":{"id":"10","name":"Person","description":"Named people in the world like Nelson Mandela"},"entity":{"id":"987356481501650946","name":"AKB48","description":"AKB48"}},{"domain":{"id":"29","name":"Events [Entity Service]","description":"Real world events. "},"entity":{"id":"1319305164751163393","name":"New Years Eve","description":"This entity includes all conversations for New Years Eve for all years."}},{"domain":{"id":"29","name":"Events [Entity Service]","description":"Real world events. "},"entity":{"id":"1457786335108816904","name":"New Year's day","description":"This entity includes all conversations for the New Year's day for all years. "}},{"domain":{"id":"46","name":"Business Taxonomy","description":"Categories within Brand Verticals that narrow down the scope of Brands"},"entity":{"id":"1557696802391859201","name":"Entertainment & Leisure Business","description":"Brands, companies, advertisers and every non-person handle with the profit intent related to movies, music, television, franchises, venues, theme parks, toys, tourism, hotels"}},{"domain":{"id":"54","name":"Musician","description":"A musician in the world, like Adele or Bob Dylan"},"entity":{"id":"987356481501650946","name":"AKB48","description":"AKB48"}},{"domain":{"id":"55","name":"Music Genre","description":"A category for a musical style, like Pop, Rock, or Rap"},"entity":{"id":"810938095772123136","name":"J-pop","description":"J-POP"}},{"domain":{"id":"119","name":"Holiday","description":"Holidays like Christmas or Halloween"},"entity":{"id":"1319305164751163393","name":"New Years Eve","description":"This entity includes all conversations for New Years Eve for all years."}},{"domain":{"id":"119","name":"Holiday","description":"Holidays like Christmas or Halloween"},"entity":{"id":"1457786335108816904","name":"New Year's day","description":"This entity includes all conversations for the New Year's day for all years. "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"847524658970636288","name":"Music","description":"Music and radio"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"847548225712119808","name":"Holidays","description":"Holidays"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"987356481501650946","name":"AKB48","description":"AKB48"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"1291095284756357120","name":"Cultural events"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"1296381531510849536","name":"New Years Eve\t"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"1436053202835689485","name":"Events"}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"1457786335108816904","name":"New Year's day","description":"This entity includes all conversations for the New Year's day for all years. "}},{"domain":{"id":"10","name":"Person","description":"Named people in the world like Nelson Mandela"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"54","name":"Musician","description":"A musician in the world, like Adele or Bob Dylan"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"55","name":"Music Genre","description":"A category for a musical style, like Pop, Rock, or Rap"},"entity":{"id":"810938095772123136","name":"J-pop","description":"J-POP"}},{"domain":{"id":"56","name":"Actor","description":"An actor or actress in the world, like Kate Winslet or Leonardo DiCaprio"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"58","name":"Entertainment Personality","description":"An entertainment personality in the world, like Anderson Cooper or Miranda Sings"},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"844395767762595840","name":"Haruna Kojima","description":"Haruna Kojima (ๅฐๅถ‹้™ฝ่œ) "}},{"domain":{"id":"131","name":"Unified Twitter Taxonomy","description":"A taxonomy of user interests. "},"entity":{"id":"847524658970636288","name":"Music","description":"Music and radio"}}],"conversation_id":"1618619004586950657","created_at":"2023-01-26T14:36:57.000Z","edit_controls":{"edits_remaining":5,"is_edit_eligible":true,"editable_until":"2023-01-26T15:06:57.000Z"},"edit_history_tweet_ids":["1618619004586950657"],"entities":{"urls":[{"start":135,"end":158,"url":"https://t.co/VyPXVTbAAu","expanded_url":"https://twitter.com/kojiharunyan/status/1618619004586950657/video/1","display_url":"pic.twitter.com/VyPXVTbAAu","media_key":"13_1618618834424070144"}]},"geo":{},"id":"1618619004586950657","lang":"ja","possibly_sensitive":false,"public_metrics":{"retweet_count":82,"reply_count":12,"like_count":917,"quote_count":1,"impression_count":46392},"reply_settings":"everyone","text":"ใพใ ่ผ‰ใ›ใฆใชใ‹ใฃใŸๆœ‰ๅ‰ใฎๅ†ฌไผ‘ใฟๆ–ฐๅนดไผšใฎ่ฃๅด๐Ÿ˜‚AKB48ๆ™‚ไปฃใ‹ใ‚‰ใŠไธ–่ฉฑใซใชใฃใฆใ‚‹ใƒ˜ใ‚ขใƒกใ‚คใ‚ฏใ•ใ‚“ใŒใ‚ปใƒƒใƒˆไธญ๐Ÿ’‡โ€โ™€๏ธๅฝ“ๆ™‚ใ‹ใ‚‰ใฉใ‚“ใชใซ่ธŠใฃใฆใ‚‚ใƒ˜ใ‚ขใ‚ขใ‚ฏใ‚ปใŒๅ–ใ‚Œใชใ„ใ‚ˆใ†ใซ้ซชใจใƒ”ใƒณใงๅœŸๅฐใ‚’ไฝœใฃใฆใ—ใฃใ‹ใ‚Šๅ›บๅฎšใ—ใฆใใ‚Œใ‚‹ใ‚“ใ ใ‘ใฉใ€ๆœฌ็•ชใ‚ฎใƒชใ‚ฎใƒชใซ็”ฐไธญใ•ใ‚“ใ‚‚ไป˜ใ‘ใ‚‹ใ“ใจใŒใงใใพใ—ใŸ๐Ÿฅน๐Ÿ‘๐Ÿปโœจ https://t.co/VyPXVTbAAu"}]}} \ No newline at end of file diff --git a/twitwi/normalizers.py b/twitwi/normalizers.py index 67e46a8..e5d079e 100644 --- a/twitwi/normalizers.py +++ b/twitwi/normalizers.py @@ -879,7 +879,13 @@ def normalize_tweets_payload_v2( output = [] already_seen = {} - for item in payload["data"]: + ## payload can contain just a single tweet + if isinstance(payload["data"], dict): + items = [payload["data"]] + elif isinstance(payload["data"], list): + items = payload["data"] + + for item in items: normalized_tweets = normalize_tweet_v2( item, users_by_id=users_by_id, diff --git a/twitwi/utils.py b/twitwi/utils.py index 6016b56..6c2766f 100644 --- a/twitwi/utils.py +++ b/twitwi/utils.py @@ -127,7 +127,8 @@ def validate_payload_v2(payload): else: return False - if not isinstance(payload["data"], list): + # there can also just be a single tweet in the payload + if not isinstance(payload["data"], (list, dict)): return False # NOTE: not sure it cannot be absent altogether