@@ -191,11 +191,55 @@ impl ConstantBag for BasicBag {
191191 }
192192}
193193
194+ /// Container for bytecode instructions - either parsed or raw
195+ #[ derive( Clone ) ]
196+ pub enum Instructions {
197+ /// Parsed bytecode instructions
198+ Parsed ( Box < [ CodeUnit ] > ) ,
199+ /// Raw bytecode bytes (may contain invalid opcodes)
200+ Raw ( Box < [ u8 ] > ) ,
201+ }
202+
203+ impl Instructions {
204+ /// Get as parsed instructions, panics if raw
205+ pub fn as_parsed ( & self ) -> & [ CodeUnit ] {
206+ match self {
207+ Instructions :: Parsed ( units) => units,
208+ Instructions :: Raw ( _) => panic ! ( "Cannot get parsed instructions from raw bytecode" ) ,
209+ }
210+ }
211+
212+ /// Get as raw bytes (borrowed)
213+ pub fn as_bytes ( & self ) -> & [ u8 ] {
214+ match self {
215+ Instructions :: Parsed ( units) => {
216+ // SAFETY: it's ok to transmute CodeUnit to [u8; 2]
217+ let ( _, instructions_bytes, _) = unsafe { units. align_to ( ) } ;
218+ instructions_bytes
219+ }
220+ Instructions :: Raw ( bytes) => bytes,
221+ }
222+ }
223+
224+ /// Check if this contains raw bytecode
225+ pub fn is_raw ( & self ) -> bool {
226+ matches ! ( self , Instructions :: Raw ( _) )
227+ }
228+
229+ /// Get the number of instructions (or byte pairs for raw)
230+ pub fn len ( & self ) -> usize {
231+ match self {
232+ Instructions :: Parsed ( units) => units. len ( ) ,
233+ Instructions :: Raw ( bytes) => bytes. len ( ) / 2 ,
234+ }
235+ }
236+ }
237+
194238/// Primary container of a single code object. Each python function has
195239/// a code object. Also a module has a code object.
196240#[ derive( Clone ) ]
197241pub struct CodeObject < C : Constant = ConstantData > {
198- pub instructions : Box < [ CodeUnit ] > ,
242+ pub instructions : Instructions ,
199243 pub locations : Box < [ SourceLocation ] > ,
200244 pub flags : CodeFlags ,
201245 /// Number of positional-only arguments
@@ -1179,11 +1223,18 @@ impl<C: Constant> CodeObject<C> {
11791223 /// Return the labels targeted by the instructions of this CodeObject
11801224 pub fn label_targets ( & self ) -> BTreeSet < Label > {
11811225 let mut label_targets = BTreeSet :: new ( ) ;
1182- let mut arg_state = OpArgState :: default ( ) ;
1183- for instruction in & * self . instructions {
1184- let ( instruction, arg) = arg_state. get ( * instruction) ;
1185- if let Some ( l) = instruction. label_arg ( ) {
1186- label_targets. insert ( l. get ( arg) ) ;
1226+ match & self . instructions {
1227+ Instructions :: Parsed ( units) => {
1228+ let mut arg_state = OpArgState :: default ( ) ;
1229+ for instruction in & * * units {
1230+ let ( instruction, arg) = arg_state. get ( * instruction) ;
1231+ if let Some ( l) = instruction. label_arg ( ) {
1232+ label_targets. insert ( l. get ( arg) ) ;
1233+ }
1234+ }
1235+ }
1236+ Instructions :: Raw ( _) => {
1237+ // Raw bytecode doesn't have parsed labels
11871238 }
11881239 }
11891240 label_targets
@@ -1195,46 +1246,54 @@ impl<C: Constant> CodeObject<C> {
11951246 expand_code_objects : bool ,
11961247 level : usize ,
11971248 ) -> fmt:: Result {
1198- let label_targets = self . label_targets ( ) ;
1199- let line_digits = ( 3 ) . max ( self . locations . last ( ) . unwrap ( ) . row . to_string ( ) . len ( ) ) ;
1200- let offset_digits = ( 4 ) . max ( self . instructions . len ( ) . to_string ( ) . len ( ) ) ;
1201- let mut last_line = OneIndexed :: MAX ;
1202- let mut arg_state = OpArgState :: default ( ) ;
1203- for ( offset, & instruction) in self . instructions . iter ( ) . enumerate ( ) {
1204- let ( instruction, arg) = arg_state. get ( instruction) ;
1205- // optional line number
1206- let line = self . locations [ offset] . row ;
1207- if line != last_line {
1208- if last_line != OneIndexed :: MAX {
1209- writeln ! ( f) ?;
1210- }
1211- last_line = line;
1212- write ! ( f, "{line:line_digits$}" ) ?;
1213- } else {
1214- for _ in 0 ..line_digits {
1215- write ! ( f, " " ) ?;
1216- }
1249+ match & self . instructions {
1250+ Instructions :: Raw ( bytes) => {
1251+ writeln ! ( f, "Raw bytecode ({} bytes): {:?}" , bytes. len( ) , bytes) ?;
1252+ return Ok ( ( ) ) ;
12171253 }
1218- write ! ( f, " " ) ?;
1254+ Instructions :: Parsed ( units) => {
1255+ let label_targets = self . label_targets ( ) ;
1256+ let line_digits = ( 3 ) . max ( self . locations . last ( ) . unwrap ( ) . row . to_string ( ) . len ( ) ) ;
1257+ let offset_digits = ( 4 ) . max ( units. len ( ) . to_string ( ) . len ( ) ) ;
1258+ let mut last_line = OneIndexed :: MAX ;
1259+ let mut arg_state = OpArgState :: default ( ) ;
1260+ for ( offset, & instruction) in units. iter ( ) . enumerate ( ) {
1261+ let ( instruction, arg) = arg_state. get ( instruction) ;
1262+ // optional line number
1263+ let line = self . locations [ offset] . row ;
1264+ if line != last_line {
1265+ if last_line != OneIndexed :: MAX {
1266+ writeln ! ( f) ?;
1267+ }
1268+ last_line = line;
1269+ write ! ( f, "{line:line_digits$}" ) ?;
1270+ } else {
1271+ for _ in 0 ..line_digits {
1272+ write ! ( f, " " ) ?;
1273+ }
1274+ }
1275+ write ! ( f, " " ) ?;
12191276
1220- // level indent
1221- for _ in 0 ..level {
1222- write ! ( f, " " ) ?;
1223- }
1277+ // level indent
1278+ for _ in 0 ..level {
1279+ write ! ( f, " " ) ?;
1280+ }
12241281
1225- // arrow and offset
1226- let arrow = if label_targets. contains ( & Label ( offset as u32 ) ) {
1227- ">>"
1228- } else {
1229- " "
1230- } ;
1231- write ! ( f, "{arrow} {offset:offset_digits$} " ) ?;
1282+ // arrow and offset
1283+ let arrow = if label_targets. contains ( & Label ( offset as u32 ) ) {
1284+ ">>"
1285+ } else {
1286+ " "
1287+ } ;
1288+ write ! ( f, "{arrow} {offset:offset_digits$} " ) ?;
12321289
1233- // instruction
1234- instruction. fmt_dis ( arg, f, self , expand_code_objects, 21 , level) ?;
1235- writeln ! ( f) ?;
1290+ // instruction
1291+ instruction. fmt_dis ( arg, f, self , expand_code_objects, 21 , level) ?;
1292+ writeln ! ( f) ?;
1293+ }
1294+ Ok ( ( ) )
1295+ }
12361296 }
1237- Ok ( ( ) )
12381297 }
12391298
12401299 /// Recursively display this CodeObject
0 commit comments