Skip to content

Commit db80da3

Browse files
committed
Latest learnings from running the migration logic
1 parent 228078a commit db80da3

File tree

1 file changed

+26
-9
lines changed

1 file changed

+26
-9
lines changed

migrate_collections.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,19 @@
1818
one_day = timedelta(days=1)
1919

2020

21-
# Function to create a composite key
22-
def create_key(timestamp, metadata):
23-
return f"{timestamp}_{metadata['device_id']}_{metadata.get('name', 'NA')}_{metadata.get('user_id', 'NA')}"
21+
TIME_WINDOW = timedelta(seconds=3)
22+
23+
24+
def find_nearby_key(timestamp, metadata):
25+
for (key_time, key_device_id, key_name, key_user_id) in buffer.keys():
26+
if (
27+
key_device_id == metadata["device_id"]
28+
and key_name == metadata.get("name", "NA")
29+
and key_user_id == metadata.get("user_id", "NA")
30+
and abs(key_time - timestamp) <= TIME_WINDOW
31+
):
32+
return (key_time, key_device_id, key_name, key_user_id)
33+
return None
2434

2535

2636
while start_date <= datetime(2023, 11, 10):
@@ -33,18 +43,27 @@ def create_key(timestamp, metadata):
3343
collection = db[collection_name]
3444
for document in collection.find({"timestamp": {"$gte": start_date, "$lt": end_date}}):
3545
unit = document["metadata"].get("unit")
36-
key = create_key(document["timestamp"], document["metadata"])
3746

38-
# This will check if a buffer entry exists for the given timestamp and metadata
39-
# If it doesn't exist, it initializes a new dictionary for it
47+
key = (
48+
document["timestamp"],
49+
document["metadata"]["device_id"],
50+
document["metadata"].get("name", "NA"),
51+
document["metadata"].get("user_id", "NA"),
52+
)
53+
nearby_key = find_nearby_key(document["timestamp"], document["metadata"])
54+
55+
if nearby_key:
56+
key = nearby_key
57+
58+
# Initialize the key if it doesn't exist yet in the buffer
4059
if key not in buffer:
4160
buffer[key] = {
4261
"metadata": {
4362
"device_id": document["metadata"]["device_id"],
4463
"name": document["metadata"].get("name"),
4564
"user_id": document["metadata"].get("user_id"),
4665
},
47-
"timestamp": document["timestamp"],
66+
"timestamp": key[0], # first part of the key is the timestamp
4867
}
4968

5069
buffer[key][new_collections[collection_name]] = document.get(
@@ -55,11 +74,9 @@ def create_key(timestamp, metadata):
5574

5675
all_documents = sorted(buffer.values(), key=itemgetter("timestamp"))
5776

58-
# Insert the batch of documents for the current day
5977
if all_documents:
6078
db["FreeTier"].insert_many(all_documents)
6179

62-
# Move to the next day
6380
start_date = end_date
6481

6582
db["Migration"].update_one({"migration_name": "FreeTier"}, {"$set": {"migration_complete": True}})

0 commit comments

Comments
 (0)