Skip to content

Commit d77781a

Browse files
committed
Fix PyCode Constructor
1 parent 3b48dcc commit d77781a

File tree

9 files changed

+513
-190
lines changed

9 files changed

+513
-190
lines changed

Lib/test/test_code.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,6 @@ class List(list):
222222
obj = List([1, 2, 3])
223223
self.assertEqual(obj[0], "Foreign getitem: 1")
224224

225-
# TODO: RUSTPYTHON
226-
@unittest.expectedFailure
227225
def test_constructor(self):
228226
def func(): pass
229227
co = func.__code__
@@ -255,8 +253,6 @@ def test_qualname(self):
255253
CodeTest.test_qualname.__qualname__
256254
)
257255

258-
# TODO: RUSTPYTHON
259-
@unittest.expectedFailure
260256
def test_replace(self):
261257
def func():
262258
x = 1
@@ -297,8 +293,6 @@ def func2():
297293
self.assertEqual(new_code.co_varnames, code2.co_varnames)
298294
self.assertEqual(new_code.co_nlocals, code2.co_nlocals)
299295

300-
# TODO: RUSTPYTHON
301-
@unittest.expectedFailure
302296
def test_nlocals_mismatch(self):
303297
def func():
304298
x = 1
@@ -362,8 +356,6 @@ def func():
362356
with self.assertWarns(DeprecationWarning):
363357
func.__code__.co_lnotab
364358

365-
# TODO: RUSTPYTHON
366-
@unittest.expectedFailure
367359
def test_invalid_bytecode(self):
368360
def foo():
369361
pass

compiler/codegen/src/ir.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{IndexMap, IndexSet, error::InternalError};
44
use rustpython_compiler_core::{
55
OneIndexed, SourceLocation,
66
bytecode::{
7-
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
7+
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Instructions, Label,
88
OpArg, PyCodeLocationInfoKind,
99
},
1010
};
@@ -214,7 +214,7 @@ impl CodeInfo {
214214
qualname: qualname.unwrap_or(obj_name),
215215

216216
max_stackdepth,
217-
instructions: instructions.into_boxed_slice(),
217+
instructions: Instructions::Parsed(instructions.into_boxed_slice()),
218218
locations: locations.into_boxed_slice(),
219219
constants: constants.into_iter().collect(),
220220
names: name_cache.into_iter().collect(),

compiler/core/src/bytecode.rs

Lines changed: 100 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,55 @@ impl ConstantBag for BasicBag {
191191
}
192192
}
193193

194+
/// Container for bytecode instructions - either parsed or raw
195+
#[derive(Clone)]
196+
pub enum Instructions {
197+
/// Parsed bytecode instructions
198+
Parsed(Box<[CodeUnit]>),
199+
/// Raw bytecode bytes (may contain invalid opcodes)
200+
Raw(Box<[u8]>),
201+
}
202+
203+
impl Instructions {
204+
/// Get as parsed instructions, panics if raw
205+
pub fn as_parsed(&self) -> &[CodeUnit] {
206+
match self {
207+
Instructions::Parsed(units) => units,
208+
Instructions::Raw(_) => panic!("Cannot get parsed instructions from raw bytecode"),
209+
}
210+
}
211+
212+
/// Get as raw bytes (borrowed)
213+
pub fn as_bytes(&self) -> &[u8] {
214+
match self {
215+
Instructions::Parsed(units) => {
216+
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
217+
let (_, instructions_bytes, _) = unsafe { units.align_to() };
218+
instructions_bytes
219+
}
220+
Instructions::Raw(bytes) => bytes,
221+
}
222+
}
223+
224+
/// Check if this contains raw bytecode
225+
pub fn is_raw(&self) -> bool {
226+
matches!(self, Instructions::Raw(_))
227+
}
228+
229+
/// Get the number of instructions (or byte pairs for raw)
230+
pub fn len(&self) -> usize {
231+
match self {
232+
Instructions::Parsed(units) => units.len(),
233+
Instructions::Raw(bytes) => bytes.len() / 2,
234+
}
235+
}
236+
}
237+
194238
/// Primary container of a single code object. Each python function has
195239
/// a code object. Also a module has a code object.
196240
#[derive(Clone)]
197241
pub struct CodeObject<C: Constant = ConstantData> {
198-
pub instructions: Box<[CodeUnit]>,
242+
pub instructions: Instructions,
199243
pub locations: Box<[SourceLocation]>,
200244
pub flags: CodeFlags,
201245
/// Number of positional-only arguments
@@ -1179,11 +1223,18 @@ impl<C: Constant> CodeObject<C> {
11791223
/// Return the labels targeted by the instructions of this CodeObject
11801224
pub fn label_targets(&self) -> BTreeSet<Label> {
11811225
let mut label_targets = BTreeSet::new();
1182-
let mut arg_state = OpArgState::default();
1183-
for instruction in &*self.instructions {
1184-
let (instruction, arg) = arg_state.get(*instruction);
1185-
if let Some(l) = instruction.label_arg() {
1186-
label_targets.insert(l.get(arg));
1226+
match &self.instructions {
1227+
Instructions::Parsed(units) => {
1228+
let mut arg_state = OpArgState::default();
1229+
for instruction in &**units {
1230+
let (instruction, arg) = arg_state.get(*instruction);
1231+
if let Some(l) = instruction.label_arg() {
1232+
label_targets.insert(l.get(arg));
1233+
}
1234+
}
1235+
}
1236+
Instructions::Raw(_) => {
1237+
// Raw bytecode doesn't have parsed labels
11871238
}
11881239
}
11891240
label_targets
@@ -1195,46 +1246,54 @@ impl<C: Constant> CodeObject<C> {
11951246
expand_code_objects: bool,
11961247
level: usize,
11971248
) -> fmt::Result {
1198-
let label_targets = self.label_targets();
1199-
let line_digits = (3).max(self.locations.last().unwrap().row.to_string().len());
1200-
let offset_digits = (4).max(self.instructions.len().to_string().len());
1201-
let mut last_line = OneIndexed::MAX;
1202-
let mut arg_state = OpArgState::default();
1203-
for (offset, &instruction) in self.instructions.iter().enumerate() {
1204-
let (instruction, arg) = arg_state.get(instruction);
1205-
// optional line number
1206-
let line = self.locations[offset].row;
1207-
if line != last_line {
1208-
if last_line != OneIndexed::MAX {
1209-
writeln!(f)?;
1210-
}
1211-
last_line = line;
1212-
write!(f, "{line:line_digits$}")?;
1213-
} else {
1214-
for _ in 0..line_digits {
1215-
write!(f, " ")?;
1216-
}
1249+
match &self.instructions {
1250+
Instructions::Raw(bytes) => {
1251+
writeln!(f, "Raw bytecode ({} bytes): {:?}", bytes.len(), bytes)?;
1252+
return Ok(());
12171253
}
1218-
write!(f, " ")?;
1254+
Instructions::Parsed(units) => {
1255+
let label_targets = self.label_targets();
1256+
let line_digits = (3).max(self.locations.last().unwrap().row.to_string().len());
1257+
let offset_digits = (4).max(units.len().to_string().len());
1258+
let mut last_line = OneIndexed::MAX;
1259+
let mut arg_state = OpArgState::default();
1260+
for (offset, &instruction) in units.iter().enumerate() {
1261+
let (instruction, arg) = arg_state.get(instruction);
1262+
// optional line number
1263+
let line = self.locations[offset].row;
1264+
if line != last_line {
1265+
if last_line != OneIndexed::MAX {
1266+
writeln!(f)?;
1267+
}
1268+
last_line = line;
1269+
write!(f, "{line:line_digits$}")?;
1270+
} else {
1271+
for _ in 0..line_digits {
1272+
write!(f, " ")?;
1273+
}
1274+
}
1275+
write!(f, " ")?;
12191276

1220-
// level indent
1221-
for _ in 0..level {
1222-
write!(f, " ")?;
1223-
}
1277+
// level indent
1278+
for _ in 0..level {
1279+
write!(f, " ")?;
1280+
}
12241281

1225-
// arrow and offset
1226-
let arrow = if label_targets.contains(&Label(offset as u32)) {
1227-
">>"
1228-
} else {
1229-
" "
1230-
};
1231-
write!(f, "{arrow} {offset:offset_digits$} ")?;
1282+
// arrow and offset
1283+
let arrow = if label_targets.contains(&Label(offset as u32)) {
1284+
">>"
1285+
} else {
1286+
" "
1287+
};
1288+
write!(f, "{arrow} {offset:offset_digits$} ")?;
12321289

1233-
// instruction
1234-
instruction.fmt_dis(arg, f, self, expand_code_objects, 21, level)?;
1235-
writeln!(f)?;
1290+
// instruction
1291+
instruction.fmt_dis(arg, f, self, expand_code_objects, 21, level)?;
1292+
writeln!(f)?;
1293+
}
1294+
Ok(())
1295+
}
12361296
}
1237-
Ok(())
12381297
}
12391298

12401299
/// Recursively display this CodeObject

compiler/core/src/marshal.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,15 +184,24 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
184184
bag: Bag,
185185
) -> Result<CodeObject<Bag::Constant>> {
186186
let len = rdr.read_u32()?;
187-
let instructions = rdr.read_slice(len * 2)?;
188-
let instructions = instructions
187+
let instructions_bytes = rdr.read_slice(len * 2)?;
188+
189+
// Try to parse as valid instructions
190+
let instructions = match instructions_bytes
189191
.chunks_exact(2)
190192
.map(|cu| {
191193
let op = Instruction::try_from(cu[0])?;
192194
let arg = OpArgByte(cu[1]);
193195
Ok(CodeUnit { op, arg })
194196
})
195-
.collect::<Result<Box<[CodeUnit]>>>()?;
197+
.collect::<Result<Box<[CodeUnit]>>>()
198+
{
199+
Ok(parsed) => Instructions::Parsed(parsed),
200+
Err(_) => {
201+
// Contains invalid opcodes, store as raw
202+
Instructions::Raw(instructions_bytes.to_vec().into_boxed_slice())
203+
}
204+
};
196205

197206
let len = rdr.read_u32()?;
198207
let locations = (0..len)
@@ -649,9 +658,9 @@ pub fn serialize_value<W: Write, D: Dumpable>(
649658
}
650659

651660
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
652-
write_len(buf, code.instructions.len());
653-
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
654-
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
661+
// Write instructions as bytes
662+
let instructions_bytes = code.instructions.as_bytes();
663+
write_len(buf, instructions_bytes.len() / 2);
655664
buf.write_slice(instructions_bytes);
656665

657666
write_len(buf, code.locations.len());

stdlib/src/dis.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ mod decl {
3838

3939
#[pyfunction]
4040
fn disassemble(co: PyRef<PyCode>) -> PyResult<()> {
41-
print!("{}", &co.code);
41+
print!("{}", &**co);
4242
Ok(())
4343
}
4444

0 commit comments

Comments
 (0)