Skip to content

Commit 3198ea1

Browse files
committed
add current oneshot scripts
1 parent 371f4eb commit 3198ea1

File tree

4 files changed

+156
-0
lines changed

4 files changed

+156
-0
lines changed

oneshots/redis_hit_all.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import ujson
2+
from placedump.common import get_redis
3+
4+
redis = get_redis()
5+
6+
for x in range(0, 1000):
7+
for y in range(0, 1000):
8+
redis.sadd(
9+
"queue:pixels",
10+
ujson.dumps(
11+
{
12+
"x": str(x),
13+
"y": str(y),
14+
"board": "0",
15+
}
16+
),
17+
)

oneshots/url_table_backfill.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import sys
2+
from concurrent.futures import ThreadPoolExecutor
3+
4+
import ujson as json
5+
from placedump.common import get_redis
6+
from placedump.constants import socket_key
7+
from placedump.model import URL, sm
8+
from placedump.tasks.pixels import download_url
9+
from sqlalchemy import func, select
10+
11+
db = sm()
12+
redis = get_redis()
13+
sql_url = set()
14+
to_backfill = set()
15+
last_id = "0"
16+
pool = ThreadPoolExecutor(max_workers=8)
17+
18+
for url in db.execute(select(URL)).scalars():
19+
sql_url.add(url.url)
20+
21+
print(len(sql_url), "in database")
22+
23+
while True:
24+
results = redis.xread({socket_key: last_id}, count=10000)
25+
if not results:
26+
break
27+
messages = results[0][1]
28+
for message_id, message in messages:
29+
last_id = message_id
30+
decoded = json.loads(message["message"])
31+
32+
# get canvas id
33+
34+
# get url
35+
try:
36+
if decoded["type"] in ["ka", "connection_ack"]:
37+
continue
38+
39+
if (
40+
decoded["payload"]["data"]["subscribe"]["data"]["__typename"]
41+
== "ConfigurationMessageData"
42+
):
43+
continue
44+
except KeyError:
45+
continue
46+
47+
try:
48+
url = decoded["subscribe"]["data"]["name"]
49+
canvas_id = decoded.get("canvas_id", int(decoded.get("id", 2)) - 2)
50+
except KeyError:
51+
try:
52+
url = decoded["payload"]["data"]["subscribe"]["data"]["name"]
53+
canvas_id = int(decoded.get("id", 2)) - 2
54+
except KeyError:
55+
continue
56+
57+
if url not in sql_url:
58+
to_backfill.add((canvas_id, url))
59+
60+
print(len(to_backfill), "to backfill")
61+
62+
63+
for item in to_backfill:
64+
download_url.apply_async(
65+
args=(item[0], item[1]),
66+
priority=10,
67+
)
68+
sys.stdout.write(".")
69+
sys.stdout.flush()

oneshots/url_table_backfill_b2.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import datetime
2+
import sys
3+
from concurrent.futures import ThreadPoolExecutor
4+
5+
import ujson as json
6+
from placedump.common import get_b2_api, get_redis
7+
from placedump.constants import socket_key
8+
from placedump.model import URL, Pixel, sm
9+
from placedump.tasks.pixels import download_url
10+
from sqlalchemy import func, select
11+
12+
db = sm()
13+
b2 = get_b2_api()
14+
sql_url = set()
15+
bucket = b2.get_bucket_by_name("erin-reddit-afd2022")
16+
17+
for url in db.execute(select(URL)).scalars():
18+
sql_url.add(url.url)
19+
20+
print(len(sql_url), "in database")
21+
22+
urls_to_insert = []
23+
24+
with sm() as db_insert:
25+
for file_version, folder_name in bucket.ls(
26+
folder_to_list="hot-potato.reddit.com/media/canvas-images", latest_only=True
27+
):
28+
fixed_url = "https://" + file_version.file_name
29+
if fixed_url in sql_url:
30+
continue
31+
32+
timestamp = float(file_version.upload_timestamp) / 1000.0
33+
timestamp = datetime.datetime.fromtimestamp(timestamp)
34+
35+
urls_to_insert.append(
36+
{
37+
"url": fixed_url,
38+
"fetched": timestamp,
39+
}
40+
)
41+
42+
if len(urls_to_insert) > 1024:
43+
db_insert.bulk_insert_mappings(
44+
URL,
45+
urls_to_insert,
46+
)
47+
db_insert.commit()
48+
print("Added 1024 URLs.")
49+
urls_to_insert.clear()
50+
51+
db_insert.bulk_insert_mappings(
52+
URL,
53+
urls_to_insert,
54+
)
55+
db_insert.commit()
56+
print(f"Added {len(urls_to_insert)} URLs.")

scripts/dump-redis.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from placedump.common import get_redis
2+
from placedump.constants import socket_key
3+
4+
redis = get_redis()
5+
last_id = "0"
6+
7+
while True:
8+
results = redis.xread({socket_key: last_id}, count=10000)
9+
if not results:
10+
break
11+
messages = results[0][1]
12+
for message_id, message in messages:
13+
last_id = message_id
14+
print(message["message"], end="")

0 commit comments

Comments
 (0)