Skip to content

Commit 016960a

Browse files
committed
Fix negative ints and add MARK ops
1 parent a447216 commit 016960a

File tree

2 files changed

+118
-3
lines changed

2 files changed

+118
-3
lines changed

modal-js/src/pickle.test.ts

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import { expect, test } from "vitest";
1+
import { describe, expect, test } from "vitest";
22
import { dumps, loads, type Protocol } from "./pickle";
3+
import { Buffer } from "buffer";
34

45
test("PickleUnpickle", () => {
56
const sample = {
@@ -14,3 +15,58 @@ test("PickleUnpickle", () => {
1415
expect(back).toEqual(sample);
1516
}
1617
});
18+
19+
// Python pickle compatibility tests (v4)
20+
// Using `python -c "import pickle, base64; print(base64.b64encode(pickle.dumps(..., protocol=4)).decode())"`
21+
const testCases = [
22+
{
23+
name: "MinusOne",
24+
b64: "gASVBgAAAAAAAABK/////y4=",
25+
expected: -1,
26+
},
27+
{
28+
// [b'1', b'2', b'3']
29+
name: "BytesList - uses MARK and APPENDS",
30+
b64: "gASVEQAAAAAAAABdlChDATGUQwEylEMBM5RlLg==",
31+
expected: [
32+
new Uint8Array([49]),
33+
new Uint8Array([50]),
34+
new Uint8Array([51]),
35+
],
36+
},
37+
{
38+
name: "SimpleList",
39+
b64: "gASVCwAAAAAAAABdlChLAUsCSwNlLg==",
40+
expected: [1, 2, 3],
41+
},
42+
{
43+
name: "SimpleDict",
44+
b64: "gASVEQAAAAAAAAB9lCiMAWGUSwGMAWKUSwJ1Lg==",
45+
expected: { a: 1, b: 2 },
46+
},
47+
// Integer edge cases
48+
{ name: "BININT1_0", b64: "gARLAC4=", expected: 0 },
49+
{ name: "BININT1_255", b64: "gARL/y4=", expected: 255 },
50+
{ name: "BININT2_-32768", b64: "gASVBgAAAAAAAABKAID//y4=", expected: -32768 },
51+
{ name: "BININT2_32767", b64: "gASVBAAAAAAAAABN/38u", expected: 32767 },
52+
{
53+
name: "BININT4_-2147483648",
54+
b64: "gASVBgAAAAAAAABKAAAAgC4=",
55+
expected: -2147483648,
56+
},
57+
{
58+
name: "BININT4_2147483647",
59+
b64: "gASVBgAAAAAAAABK////fy4=",
60+
expected: 2147483647,
61+
},
62+
];
63+
64+
describe("Python compatibility", () => {
65+
for (const { name, b64, expected } of testCases) {
66+
test(name, () => {
67+
const buf = Buffer.from(b64, "base64");
68+
const val = loads(new Uint8Array(buf));
69+
expect(val).toEqual(expected);
70+
});
71+
}
72+
});

modal-js/src/pickle.ts

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const enum Op {
4848
APPEND = 0x61, // a
4949
EMPTY_DICT = 0x7d, // }
5050
SETITEM = 0x73, // s
51+
MARK = 0x28, // ( (mark stack position)
5152

5253
// Memo / frame machinery
5354
BINPUT = 0x71, // q idx(1)
@@ -56,6 +57,8 @@ const enum Op {
5657
LONG_BINGET = 0x6a, // j idx(4)
5758
MEMOIZE = 0x94, // \x94 (≥4)
5859
FRAME = 0x95, // \x95 size(8) (proto‑5)
60+
APPENDS = 0x65, // e
61+
SETITEMS = 0x75, // u
5962
}
6063

6164
// ─── Binary helpers ─────────────────────────────────────────
@@ -118,6 +121,15 @@ class Reader {
118121
const hi = this.uint32LE() >>> 0;
119122
return hi * 2 ** 32 + lo;
120123
}
124+
int32LE() {
125+
const v = new DataView(
126+
this.buf.buffer,
127+
this.buf.byteOffset + this.pos,
128+
4,
129+
).getInt32(0, true);
130+
this.pos += 4;
131+
return v;
132+
}
121133
float64BE() {
122134
const v = new DataView(
123135
this.buf.buffer,
@@ -276,6 +288,9 @@ export function loads(buf: Uint8Array): any {
276288
void size; // silence tsclint
277289
}
278290

291+
// Unique marker for stack operations (cannot be confused with user data)
292+
const MARK = Symbol("pickle-mark");
293+
279294
while (!r.eof()) {
280295
const op = r.byte();
281296
switch (op) {
@@ -302,8 +317,7 @@ export function loads(buf: Uint8Array): any {
302317
break;
303318
}
304319
case Op.BININT4: {
305-
const n = r.uint32LE();
306-
push(n >>> 31 ? n - 0x1_0000_0000 : n);
320+
push(r.int32LE());
307321
break;
308322
}
309323
case Op.BINFLOAT:
@@ -386,6 +400,51 @@ export function loads(buf: Uint8Array): any {
386400
/* ignore */ break;
387401
}
388402

403+
case Op.MARK:
404+
push(MARK);
405+
break;
406+
407+
case Op.APPENDS: {
408+
// Pops all items after the last MARK and appends them to the list below the MARK
409+
// Find the last MARK
410+
const markIndex = stack.lastIndexOf(MARK);
411+
if (markIndex === -1) {
412+
throw new PickleError("APPENDS without MARK");
413+
}
414+
const lst = stack[markIndex - 1];
415+
if (!Array.isArray(lst)) {
416+
throw new PickleError("APPENDS expects a list below MARK");
417+
}
418+
const items = stack.slice(markIndex + 1);
419+
lst.push(...items);
420+
stack.length = markIndex - 1; // Remove everything after the list
421+
push(lst);
422+
break;
423+
}
424+
425+
case Op.SETITEMS: {
426+
// Sets multiple key-value pairs in a dict after the last MARK
427+
// Find the last MARK
428+
const markIndex = stack.lastIndexOf(MARK);
429+
if (markIndex === -1) {
430+
throw new PickleError("SETITEMS without MARK");
431+
}
432+
const d = stack[markIndex - 1];
433+
if (typeof d !== "object" || d === null || Array.isArray(d)) {
434+
throw new PickleError("SETITEMS expects a dict below MARK");
435+
}
436+
const items = stack.slice(markIndex + 1);
437+
// Set key-value pairs (items come in pairs: key, value, key, value, ...)
438+
for (let i = 0; i < items.length; i += 2) {
439+
if (i + 1 < items.length) {
440+
d[items[i]] = items[i + 1];
441+
}
442+
}
443+
stack.length = markIndex - 1; // Remove everything after the dict
444+
push(d);
445+
break;
446+
}
447+
389448
default:
390449
throw new PickleError(`Unsupported opcode 0x${op.toString(16)}`);
391450
}

0 commit comments

Comments
 (0)