Skip to content

Commit 8221fff

Browse files
committed
fix db to store code with special characters correctly
1 parent 6f0fcf0 commit 8221fff

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

src/libkernelbot/leaderboard_db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ def get_submission_by_id(self, submission_id: int) -> Optional["SubmissionItem"]
784784
user_id=submission[3],
785785
submission_time=submission[4],
786786
done=submission[5],
787-
code=submission[6],
787+
code=bytes(submission[6]).decode("utf-8"),
788788
runs=runs,
789789
)
790790

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""
2+
Fix code table
3+
Uses bytea to store user-submitted code so we're safe to have any sort of special characters.
4+
"""
5+
6+
from yoyo import step
7+
8+
__depends__ = {"20250617_01_c5mrF-task-split"}
9+
10+
"""
11+
Yoyo migration to convert code_files table from TEXT to BYTEA
12+
"""
13+
14+
15+
def convert_code_to_bytea(conn):
16+
"""Convert existing TEXT code to BYTEA and recalculate hashes"""
17+
cursor = conn.cursor()
18+
19+
# Get all existing records
20+
cursor.execute("SELECT id, old_code FROM leaderboard.code_files")
21+
records = cursor.fetchall()
22+
23+
existing_codes = {}
24+
25+
for record_id, code_text in records:
26+
# broken with the old code
27+
if code_text.startswith("\\x"):
28+
code_text = bytes.fromhex(code_text[2:]).decode("utf-8")
29+
code_bytes = code_text.encode("utf-8")
30+
# with the old broken code and experimentation, it is possible that we got some
31+
# duplicates; fix this here
32+
if code_bytes in existing_codes:
33+
cursor.execute(
34+
"UPDATE leaderboard.submission SET code_id = %s WHERE code_id = %s",
35+
(existing_codes[code_bytes], record_id),
36+
)
37+
cursor.execute("DELETE FROM leaderboard.code_files WHERE id = %s", (record_id,))
38+
continue
39+
40+
existing_codes[code_bytes] = record_id
41+
42+
# Update record with bytea and new hash
43+
cursor.execute(
44+
"UPDATE leaderboard.code_files SET code = %s WHERE id = %s", (code_bytes, record_id)
45+
)
46+
47+
48+
def convert_bytea_to_text(conn):
49+
"""Convert existing BYTEA code to TEXT and recalculate hashes"""
50+
cursor = conn.cursor()
51+
# Get all existing records
52+
cursor.execute("SELECT id, code FROM leaderboard.code_files")
53+
records = cursor.fetchall()
54+
55+
for record_id, code_bytes in records:
56+
code_text = bytes(code_bytes).decode("utf-8")
57+
cursor.execute(
58+
"UPDATE leaderboard.code_files SET old_code = %s WHERE id = %s",
59+
(code_text.encode("utf-8"), record_id),
60+
)
61+
62+
63+
steps = [
64+
# prepare the table columns
65+
step(
66+
"""
67+
ALTER TABLE leaderboard.code_files DROP COLUMN hash;
68+
ALTER TABLE leaderboard.code_files RENAME COLUMN code TO old_code;
69+
ALTER TABLE leaderboard.code_files ADD COLUMN code BYTEA NOT NULL DEFAULT '';
70+
""",
71+
"""
72+
ALTER TABLE leaderboard.code_files DROP COLUMN code;
73+
ALTER TABLE leaderboard.code_files RENAME COLUMN old_code TO code;
74+
ALTER TABLE leaderboard.code_files ADD COLUMN hash TEXT
75+
GENERATED ALWAYS AS (encode(sha256(code::bytea), 'hex')) STORED;
76+
""",
77+
),
78+
# run the conversion
79+
step(convert_code_to_bytea, convert_bytea_to_text),
80+
# clean up the table and reintroduce hashes
81+
step(
82+
"""
83+
ALTER TABLE leaderboard.code_files DROP COLUMN old_code;
84+
ALTER TABLE leaderboard.code_files ADD COLUMN hash TEXT
85+
GENERATED ALWAYS AS (encode(sha256(code), 'hex')) STORED NOT NULL UNIQUE;
86+
ALTER TABLE leaderboard.code_files ALTER COLUMN code DROP DEFAULT;
87+
""",
88+
"""
89+
ALTER TABLE leaderboard.code_files ADD COLUMN old_code TEXT;
90+
ALTER TABLE leaderboard.code_files DROP COLUMN hash;
91+
""",
92+
),
93+
]

0 commit comments

Comments
 (0)