|
| 1 | +""" |
| 2 | +Fix code table |
| 3 | +Uses bytea to store user-submitted code so we're safe to have any sort of special characters. |
| 4 | +""" |
| 5 | + |
| 6 | +from yoyo import step |
| 7 | + |
| 8 | +__depends__ = {"20250617_01_c5mrF-task-split"} |
| 9 | + |
| 10 | +""" |
| 11 | +Yoyo migration to convert code_files table from TEXT to BYTEA |
| 12 | +""" |
| 13 | + |
| 14 | + |
| 15 | +def convert_code_to_bytea(conn): |
| 16 | + """Convert existing TEXT code to BYTEA and recalculate hashes""" |
| 17 | + cursor = conn.cursor() |
| 18 | + |
| 19 | + # Get all existing records |
| 20 | + cursor.execute("SELECT id, old_code FROM leaderboard.code_files") |
| 21 | + records = cursor.fetchall() |
| 22 | + |
| 23 | + existing_codes = {} |
| 24 | + |
| 25 | + for record_id, code_text in records: |
| 26 | + # broken with the old code |
| 27 | + if code_text.startswith("\\x"): |
| 28 | + code_text = bytes.fromhex(code_text[2:]).decode("utf-8") |
| 29 | + code_bytes = code_text.encode("utf-8") |
| 30 | + # with the old broken code and experimentation, it is possible that we got some |
| 31 | + # duplicates; fix this here |
| 32 | + if code_bytes in existing_codes: |
| 33 | + cursor.execute( |
| 34 | + "UPDATE leaderboard.submission SET code_id = %s WHERE code_id = %s", |
| 35 | + (existing_codes[code_bytes], record_id), |
| 36 | + ) |
| 37 | + cursor.execute("DELETE FROM leaderboard.code_files WHERE id = %s", (record_id,)) |
| 38 | + continue |
| 39 | + |
| 40 | + existing_codes[code_bytes] = record_id |
| 41 | + |
| 42 | + # Update record with bytea and new hash |
| 43 | + cursor.execute( |
| 44 | + "UPDATE leaderboard.code_files SET code = %s WHERE id = %s", (code_bytes, record_id) |
| 45 | + ) |
| 46 | + |
| 47 | + |
| 48 | +def convert_bytea_to_text(conn): |
| 49 | + """Convert existing BYTEA code to TEXT and recalculate hashes""" |
| 50 | + cursor = conn.cursor() |
| 51 | + # Get all existing records |
| 52 | + cursor.execute("SELECT id, code FROM leaderboard.code_files") |
| 53 | + records = cursor.fetchall() |
| 54 | + |
| 55 | + for record_id, code_bytes in records: |
| 56 | + code_text = bytes(code_bytes).decode("utf-8") |
| 57 | + cursor.execute( |
| 58 | + "UPDATE leaderboard.code_files SET old_code = %s WHERE id = %s", |
| 59 | + (code_text.encode("utf-8"), record_id), |
| 60 | + ) |
| 61 | + |
| 62 | + |
| 63 | +steps = [ |
| 64 | + # prepare the table columns |
| 65 | + step( |
| 66 | + """ |
| 67 | + ALTER TABLE leaderboard.code_files DROP COLUMN hash; |
| 68 | + ALTER TABLE leaderboard.code_files RENAME COLUMN code TO old_code; |
| 69 | + ALTER TABLE leaderboard.code_files ADD COLUMN code BYTEA NOT NULL DEFAULT ''; |
| 70 | + """, |
| 71 | + """ |
| 72 | + ALTER TABLE leaderboard.code_files DROP COLUMN code; |
| 73 | + ALTER TABLE leaderboard.code_files RENAME COLUMN old_code TO code; |
| 74 | + ALTER TABLE leaderboard.code_files ADD COLUMN hash TEXT |
| 75 | + GENERATED ALWAYS AS (encode(sha256(code::bytea), 'hex')) STORED; |
| 76 | + """, |
| 77 | + ), |
| 78 | + # run the conversion |
| 79 | + step(convert_code_to_bytea, convert_bytea_to_text), |
| 80 | + # clean up the table and reintroduce hashes |
| 81 | + step( |
| 82 | + """ |
| 83 | + ALTER TABLE leaderboard.code_files DROP COLUMN old_code; |
| 84 | + ALTER TABLE leaderboard.code_files ADD COLUMN hash TEXT |
| 85 | + GENERATED ALWAYS AS (encode(sha256(code), 'hex')) STORED NOT NULL UNIQUE; |
| 86 | + ALTER TABLE leaderboard.code_files ALTER COLUMN code DROP DEFAULT; |
| 87 | + """, |
| 88 | + """ |
| 89 | + ALTER TABLE leaderboard.code_files ADD COLUMN old_code TEXT; |
| 90 | + ALTER TABLE leaderboard.code_files DROP COLUMN hash; |
| 91 | + """, |
| 92 | + ), |
| 93 | +] |
0 commit comments