@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
1919use std:: ptr:: NonNull ;
2020
2121pub use self :: bindings:: * ;
22- use ruby_prism_sys:: { pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t} ;
22+ use ruby_prism_sys:: {
23+ pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_read, pm_options_t, pm_options_version_t,
24+ pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25+ } ;
2326
2427/// A range in the source file.
2528pub struct Location < ' pr > {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428431 source : & ' pr [ u8 ] ,
429432 parser : NonNull < pm_parser_t > ,
430433 node : NonNull < pm_node_t > ,
434+ options_string : Vec < u8 > ,
435+ options : NonNull < pm_options_t > ,
431436}
432437
433438impl < ' pr > ParseResult < ' pr > {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529534 pub fn node ( & self ) -> Node < ' _ > {
530535 Node :: new ( self . parser , self . node . as_ptr ( ) )
531536 }
537+
538+ /// Returns the serialized representation of the parse result.
539+ #[ must_use]
540+ pub fn serialize ( & self ) -> Vec < u8 > {
541+ let mut buffer = Buffer :: default ( ) ;
542+ unsafe {
543+ pm_serialize ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) , & mut buffer. buffer ) ;
544+ }
545+ buffer. value ( ) . into ( )
546+ }
532547}
533548
534549impl < ' pr > Drop for ParseResult < ' pr > {
@@ -537,10 +552,178 @@ impl<'pr> Drop for ParseResult<'pr> {
537552 pm_node_destroy ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) ) ;
538553 pm_parser_free ( self . parser . as_ptr ( ) ) ;
539554 drop ( Box :: from_raw ( self . parser . as_ptr ( ) ) ) ;
555+
556+ _ = self . options ;
557+ _ = self . options_string ;
558+
559+ // pm_options_free(self.options.as_ptr());
560+ // drop(Box::from_raw(self.options.as_ptr()));
561+ }
562+ }
563+ }
564+
565+ /**
566+ * A scope of locals surrounding the code that is being parsed.
567+ */
568+ #[ derive( Debug , Default , Clone ) ]
569+ pub struct OptionsScope {
570+ /** Flags for the set of forwarding parameters in this scope. */
571+ pub forwarding_flags : u8 ,
572+ /** The names of the locals in the scope. */
573+ pub locals : Vec < String > ,
574+ }
575+
576+ /// The options that can be passed to the parser.
577+ #[ derive( Debug , Clone ) ]
578+ pub struct Options {
579+ /** The name of the file that is currently being parsed. */
580+ pub filepath : String ,
581+ /**
582+ * The line within the file that the parse starts on. This value is
583+ * 1-indexed.
584+ */
585+ pub line : i32 ,
586+ /**
587+ * The name of the encoding that the source file is in. Note that this must
588+ * correspond to a name that can be found with Encoding.find in Ruby.
589+ */
590+ pub encoding : String ,
591+ /**
592+ * Whether or not the frozen string literal option has been set.
593+ * May be:
594+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
595+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
596+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
597+ */
598+ pub frozen_string_literal : Option < bool > ,
599+ /** A bitset of the various options that were set on the command line. */
600+ pub command_line : u8 ,
601+ /**
602+ * The version of prism that we should be parsing with. This is used to
603+ * allow consumers to specify which behavior they want in case they need to
604+ * parse exactly as a specific version of CRuby.
605+ */
606+ pub version : pm_options_version_t ,
607+ /**
608+ * Whether or not the encoding magic comments should be respected. This is a
609+ * niche use-case where you want to parse a file with a specific encoding
610+ * but ignore any encoding magic comments at the top of the file.
611+ */
612+ pub encoding_locked : bool ,
613+ /**
614+ * When the file being parsed is the main script, the shebang will be
615+ * considered for command-line flags (or for implicit -x). The caller needs
616+ * to pass this information to the parser so that it can behave correctly.
617+ */
618+ pub main_script : bool ,
619+ /**
620+ * When the file being parsed is considered a "partial" script, jumps will
621+ * not be marked as errors if they are not contained within loops/blocks.
622+ * This is used in the case that you're parsing a script that you know will
623+ * be embedded inside another script later, but you do not have that context
624+ * yet. For example, when parsing an ERB template that will be evaluated
625+ * inside another script.
626+ */
627+ pub partial_script : bool ,
628+ /**
629+ * Whether or not the parser should freeze the nodes that it creates. This
630+ * makes it possible to have a deeply frozen AST that is safe to share
631+ * between concurrency primitives.
632+ */
633+ pub freeze : bool ,
634+ /**
635+ * The scopes surrounding the code that is being parsed. For most parses
636+ * this will be empty, but for evals it will be the locals that are in scope
637+ * surrounding the eval. Scopes are ordered from the outermost scope to the
638+ * innermost one.
639+ */
640+ pub scopes : Vec < OptionsScope > ,
641+ }
642+
643+ impl Default for Options {
644+ fn default ( ) -> Self {
645+ Self {
646+ filepath : String :: new ( ) ,
647+ line : 1 ,
648+ encoding : String :: new ( ) ,
649+ frozen_string_literal : None ,
650+ command_line : 0 ,
651+ version : pm_options_version_t:: PM_OPTIONS_VERSION_LATEST ,
652+ encoding_locked : false ,
653+ main_script : true ,
654+ partial_script : false ,
655+ freeze : false ,
656+ scopes : Vec :: new ( ) ,
540657 }
541658 }
542659}
543660
661+ impl Options {
662+ #[ allow( clippy:: cast_possible_truncation) ]
663+ fn to_binary_string ( & self ) -> Vec < u8 > {
664+ let mut output = Vec :: new ( ) ;
665+
666+ output. extend ( ( self . filepath . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
667+ output. extend ( self . filepath . as_bytes ( ) ) ;
668+ output. extend ( self . line . to_ne_bytes ( ) ) ;
669+ output. extend ( ( self . encoding . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
670+ output. extend ( self . encoding . as_bytes ( ) ) ;
671+ output. extend ( self . frozen_string_literal . map_or_else ( || 0i8 , |frozen| if frozen { 1 } else { -1 } ) . to_ne_bytes ( ) ) ;
672+ output. push ( self . command_line ) ;
673+ output. extend ( ( self . version as u8 ) . to_ne_bytes ( ) ) ;
674+ output. push ( self . encoding_locked . into ( ) ) ;
675+ output. push ( self . main_script . into ( ) ) ;
676+ output. push ( self . partial_script . into ( ) ) ;
677+ output. push ( self . freeze . into ( ) ) ;
678+ output. extend ( ( self . scopes . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
679+ for scope in & self . scopes {
680+ output. extend ( ( scope. locals . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
681+ output. extend ( scope. forwarding_flags . to_ne_bytes ( ) ) ;
682+ for local in & scope. locals {
683+ output. extend ( ( local. len ( ) as u32 ) . to_ne_bytes ( ) ) ;
684+ output. extend ( local. as_bytes ( ) ) ;
685+ }
686+ }
687+ output
688+ }
689+ }
690+
691+ struct Buffer {
692+ buffer : pm_buffer_t ,
693+ }
694+
695+ impl Default for Buffer {
696+ fn default ( ) -> Self {
697+ let buffer = unsafe {
698+ let mut uninit = MaybeUninit :: < pm_buffer_t > :: uninit ( ) ;
699+ let initialized = pm_buffer_init ( uninit. as_mut_ptr ( ) ) ;
700+ assert ! ( initialized) ;
701+ uninit. assume_init ( )
702+ } ;
703+ Self { buffer }
704+ }
705+ }
706+
707+ impl Buffer {
708+ fn length ( & self ) -> usize {
709+ unsafe { pm_buffer_length ( & self . buffer ) }
710+ }
711+
712+ fn value ( & self ) -> & [ u8 ] {
713+ unsafe {
714+ let value = pm_buffer_value ( & self . buffer ) ;
715+ let value = value. cast :: < u8 > ( ) . cast_const ( ) ;
716+ std:: slice:: from_raw_parts ( value, self . length ( ) )
717+ }
718+ }
719+ }
720+
721+ impl Drop for Buffer {
722+ fn drop ( & mut self ) {
723+ unsafe { pm_buffer_free ( & mut self . buffer ) }
724+ }
725+ }
726+
544727/// Parses the given source string and returns a parse result.
545728///
546729/// # Panics
@@ -549,25 +732,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549732///
550733#[ must_use]
551734pub fn parse ( source : & [ u8 ] ) -> ParseResult < ' _ > {
735+ parse_with_options ( source, & Options :: default ( ) )
736+ }
737+
738+ /// Parses the given source string and returns a parse result.
739+ ///
740+ /// # Panics
741+ ///
742+ /// Panics if the parser fails to initialize.
743+ ///
744+ #[ must_use]
745+ pub fn parse_with_options < ' pr > ( source : & ' pr [ u8 ] , options : & Options ) -> ParseResult < ' pr > {
746+ let options_string = options. to_binary_string ( ) ;
552747 unsafe {
553748 let uninit = Box :: new ( MaybeUninit :: < pm_parser_t > :: uninit ( ) ) ;
554749 let uninit = Box :: into_raw ( uninit) ;
555750
556- pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , std:: ptr:: null ( ) ) ;
751+ let options = Box :: into_raw ( Box :: new ( MaybeUninit :: < pm_options_t > :: zeroed ( ) ) ) ;
752+ pm_options_read ( ( * options) . as_mut_ptr ( ) , options_string. as_ptr ( ) . cast ( ) ) ;
753+ let options = NonNull :: new ( ( * options) . assume_init_mut ( ) ) . unwrap ( ) ;
754+
755+ pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , options. as_ptr ( ) ) ;
557756
558757 let parser = ( * uninit) . assume_init_mut ( ) ;
559758 let parser = NonNull :: new_unchecked ( parser) ;
560759
561760 let node = pm_parse ( parser. as_ptr ( ) ) ;
562761 let node = NonNull :: new_unchecked ( node) ;
563762
564- ParseResult { source, parser, node }
763+ ParseResult { source, parser, node, options_string, options }
764+ }
765+ }
766+
767+ /// Serializes the given source string and returns a parse result.
768+ ///
769+ /// # Panics
770+ ///
771+ /// Panics if the parser fails to initialize.
772+ #[ must_use]
773+ pub fn serialize_parse ( source : & [ u8 ] , options : & Options ) -> Vec < u8 > {
774+ let mut buffer = Buffer :: default ( ) ;
775+ let opts = options. to_binary_string ( ) ;
776+ unsafe {
777+ pm_serialize_parse ( & mut buffer. buffer , source. as_ptr ( ) , source. len ( ) , opts. as_ptr ( ) . cast ( ) ) ;
565778 }
779+ buffer. value ( ) . into ( )
566780}
567781
568782#[ cfg( test) ]
569783mod tests {
570- use super :: parse;
784+ use super :: { parse, parse_with_options , serialize_parse } ;
571785
572786 #[ test]
573787 fn comments_test ( ) {
@@ -1157,6 +1371,28 @@ end
11571371 assert ! ( ( value - 1.0 ) . abs( ) < f64 :: EPSILON ) ;
11581372 }
11591373
1374+ #[ test]
1375+ fn serialize_parse_test ( ) {
1376+ let source = r#"__FILE__"# ;
1377+ let options = crate :: Options { filepath : "test.rb" . to_string ( ) , ..Default :: default ( ) } ;
1378+ let bytes = serialize_parse ( source. as_ref ( ) , & options) ;
1379+
1380+ let result = parse_with_options ( source. as_bytes ( ) , & options) ;
1381+
1382+ assert_eq ! ( bytes, result. serialize( ) ) ;
1383+
1384+ let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1385+ +-- locals: []
1386+ +-- statements:
1387+ @ StatementsNode (location: (1,0)-(1,8))
1388+ +-- body: (length: 1)
1389+ +-- @ SourceFileNode (location: (1,0)-(1,8))
1390+ +-- StringFlags: nil
1391+ +-- filepath: "test.rb"
1392+ "# ;
1393+ assert_eq ! ( expected, result. node( ) . pretty_print( ) . as_str( ) ) ;
1394+ }
1395+
11601396 #[ test]
11611397 fn node_field_lifetime_test ( ) {
11621398 // The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,
0 commit comments