1- /*
1+ /*
22
33 Copyright (C) 2019. rollrat All Rights Reserved.
44
@@ -24,7 +24,9 @@ public class SimpleRegex
2424 public SimpleRegex ( ) { }
2525 public List < string > build_errors = new List < string > ( ) ;
2626 public diagram Diagram ;
27- public const char e_closure = ( char ) 0 ;
27+ public const char e_closure = ( char ) 0xFFFF ;
28+
29+ public const int byte_size = 256 ;
2830
2931 public class transition_node
3032 {
@@ -225,6 +227,14 @@ private diagram make_nfa(string pattern)
225227 case 'r' :
226228 ch_list . Add ( '\t ' ) ;
227229 break ;
230+ case 'x' :
231+ char ch2 ;
232+ ch2 = ( char ) ( pattern [ i + 1 ] >= 'A' ? ( pattern [ i + 1 ] - 'A' + 10 ) : pattern [ i + 1 ] - '0' ) ;
233+ ch2 <<= 4 ;
234+ ch2 |= ( char ) ( pattern [ i + 2 ] >= 'A' ? ( pattern [ i + 2 ] - 'A' + 10 ) : pattern [ i + 2 ] - '0' ) ;
235+ i += 2 ;
236+ ch_list . Add ( ch2 ) ;
237+ break ;
228238
229239 default :
230240 build_errors . Add ( $ "{ pattern [ i ] } escape character not found!") ;
@@ -245,11 +255,11 @@ private diagram make_nfa(string pattern)
245255 var ends_point2 = new transition_node { index = index_count ++ , transition = new List < Tuple < char , transition_node > > ( ) } ;
246256 if ( inverse )
247257 {
248- var set = new bool [ 128 ] ;
258+ var set = new bool [ byte_size ] ;
249259 var nch_list = new List < char > ( ) ;
250260 foreach ( var ch2 in ch_list )
251261 set [ ch2 ] = true ;
252- for ( int j = 0 ; j < 128 ; j ++ )
262+ for ( int j = 0 ; j < byte_size ; j ++ )
253263 if ( ! set [ j ] )
254264 nch_list . Add ( ( char ) j ) ;
255265 ch_list . Clear ( ) ;
@@ -270,7 +280,7 @@ private diagram make_nfa(string pattern)
270280
271281 case '.' :
272282 var ends_point3 = new transition_node { index = index_count ++ , transition = new List < Tuple < char , transition_node > > ( ) } ;
273- for ( int i2 = 0 ; i2 < 128 ; i2 ++ )
283+ for ( int i2 = 0 ; i2 < byte_size ; i2 ++ )
274284 {
275285 cur . transition . Add ( new Tuple < char , transition_node > ( ( char ) i2 , ends_point3 ) ) ;
276286 }
@@ -293,7 +303,7 @@ private diagram make_nfa(string pattern)
293303 ch = pattern [ i ] ;
294304 else
295305 {
296- switch ( ch )
306+ switch ( pattern [ i ] )
297307 {
298308 case 'n' :
299309 ch = '\n ' ;
@@ -304,9 +314,16 @@ private diagram make_nfa(string pattern)
304314 case 'r' :
305315 ch = '\r ' ;
306316 break ;
317+ case 'x' :
318+ ch = ( char ) ( pattern [ i + 1 ] >= 'A' ? ( pattern [ i + 1 ] - 'A' + 10 ) : pattern [ i + 1 ] - '0' ) ;
319+ ch <<= 4 ;
320+ ch |= ( char ) ( pattern [ i + 2 ] >= 'A' ? ( pattern [ i + 2 ] - 'A' + 10 ) : pattern [ i + 2 ] - '0' ) ;
321+ i += 2 ;
322+ break ;
307323
308324 default :
309325 build_errors . Add ( $ "{ pattern [ i ] } escape character not found!") ;
326+ ch = pattern [ i ] ;
310327 break ;
311328 }
312329
@@ -557,7 +574,7 @@ private bool opt_nfa(diagram dia)
557574 check [ tn . index ] = true ;
558575
559576 // Delete unnecessary e-closure with pull left
560- if ( tn . transition . Count == 1 && tn . transition [ 0 ] . Item1 == 0 )
577+ if ( tn . transition . Count == 1 && tn . transition [ 0 ] . Item1 == e_closure )
561578 {
562579 var index_left = tn . index ;
563580 var index_right = tn . transition [ 0 ] . Item2 . index ;
@@ -576,14 +593,14 @@ private bool opt_nfa(diagram dia)
576593
577594 // Delete recursive e-closure
578595 for ( int i = 0 ; i < tn . transition . Count ; i ++ )
579- if ( tn . transition [ i ] . Item1 == 0 && tn . transition [ i ] . Item2 . index == tn . index )
596+ if ( tn . transition [ i ] . Item1 == e_closure && tn . transition [ i ] . Item2 . index == tn . index )
580597 tn . transition . RemoveAt ( i -- ) ;
581598
582599 // Merge rounding e-closure
583600 for ( int i = 0 ; i < tn . transition . Count ; i ++ )
584- if ( tn . transition [ i ] . Item1 == 0 )
601+ if ( tn . transition [ i ] . Item1 == e_closure )
585602 for ( int j = 0 ; j < tn . transition [ i ] . Item2 . transition . Count ; j ++ )
586- if ( tn . transition [ i ] . Item2 . transition [ j ] . Item1 == 0 && tn . transition [ i ] . Item2 . transition [ j ] . Item2 . index == tn . index )
603+ if ( tn . transition [ i ] . Item2 . transition [ j ] . Item1 == e_closure && tn . transition [ i ] . Item2 . transition [ j ] . Item2 . index == tn . index )
587604 {
588605 var index_left = tn . index ;
589606 var index_right = tn . transition [ i ] . Item2 . index ;
@@ -615,7 +632,7 @@ private bool opt_nfa(diagram dia)
615632 var index_right = tn . index ;
616633
617634 for ( int i = 0 ; i < dia . nodes [ index_left ] . transition . Count ; i ++ )
618- if ( dia . nodes [ index_left ] . transition [ i ] . Item2 . index == dia . nodes [ index_right ] . index && dia . nodes [ index_left ] . transition [ i ] . Item1 == 0 )
635+ if ( dia . nodes [ index_left ] . transition [ i ] . Item2 . index == dia . nodes [ index_right ] . index && dia . nodes [ index_left ] . transition [ i ] . Item1 == e_closure )
619636 {
620637 if ( dia . nodes [ index_left ] . transition [ i ] . Item2 . is_acceptable )
621638 {
@@ -647,7 +664,7 @@ private bool opt_nfa(diagram dia)
647664 dia . nodes [ top ] . is_acceptable = true ;
648665 if ( inverse_transition . ContainsKey ( top ) )
649666 foreach ( var inv in inverse_transition [ top ] )
650- if ( dia . nodes [ inv ] . transition . Where ( x => x . Item2 . index == top ) . First ( ) . Item1 == 0 )
667+ if ( dia . nodes [ inv ] . transition . Where ( x => x . Item2 . index == top ) . First ( ) . Item1 == e_closure )
651668 acc_nodes . Enqueue ( inv ) ;
652669 }
653670
@@ -698,11 +715,11 @@ private diagram nfa2dfa(diagram dia)
698715 var check = new List < bool > ( dia . count_of_vertex ) ;
699716 check . AddRange ( Enumerable . Repeat ( false , dia . count_of_vertex ) ) ;
700717 var e_q = new Queue < int > ( ) ;
701- d_q . ToList ( ) . Where ( qe => qe . Item1 == 0 ) . ToList ( ) . ForEach ( qee => { e_q . Enqueue ( qee . Item2 ) ; } ) ;
718+ d_q . ToList ( ) . Where ( qe => qe . Item1 == e_closure ) . ToList ( ) . ForEach ( qee => { e_q . Enqueue ( qee . Item2 ) ; } ) ;
702719
703720 foreach ( var qe in d_q )
704721 {
705- if ( qe . Item1 == 0 )
722+ if ( qe . Item1 == e_closure )
706723 e_q . Enqueue ( qe . Item2 ) ;
707724 else
708725 check [ qe . Item2 ] = true ;
@@ -714,7 +731,7 @@ private diagram nfa2dfa(diagram dia)
714731 if ( check [ d ] ) continue ;
715732 check [ d ] = true ;
716733 foreach ( var tns in dia . nodes [ d ] . transition )
717- if ( tns . Item1 == 0 )
734+ if ( tns . Item1 == e_closure )
718735 e_q . Enqueue ( tns . Item2 . index ) ;
719736 else
720737 d_q . Enqueue ( new Tuple < char , int > ( tns . Item1 , tns . Item2 . index ) ) ;
@@ -725,13 +742,13 @@ private diagram nfa2dfa(diagram dia)
725742 while ( d_q . Count != 0 )
726743 {
727744 var dd = d_q . Dequeue ( ) ;
728- if ( dd . Item1 == 0 ) continue ;
745+ if ( dd . Item1 == e_closure ) continue ;
729746 if ( dic . ContainsKey ( dd . Item1 ) )
730747 dic [ dd . Item1 ] . Add ( dd . Item2 ) ;
731748 else
732749 dic . Add ( dd . Item1 , new HashSet < int > { dd . Item2 } ) ;
733750 foreach ( var node in dia . nodes [ dd . Item2 ] . transition )
734- if ( node . Item1 == 0 )
751+ if ( node . Item1 == e_closure )
735752 dic [ dd . Item1 ] . Add ( node . Item2 . index ) ;
736753 }
737754
@@ -1056,8 +1073,8 @@ public Scanner CreateScannerInstance(string delimiter = "\n\r ")
10561073 var accept_table = new string [ diagram . count_of_vertex ] ;
10571074 for ( int i = 0 ; i < table . Length ; i ++ )
10581075 {
1059- table [ i ] = new int [ 255 ] ;
1060- for ( int j = 0 ; j < 255 ; j ++ )
1076+ table [ i ] = new int [ SimpleRegex . byte_size ] ;
1077+ for ( int j = 0 ; j < SimpleRegex . byte_size ; j ++ )
10611078 table [ i ] [ j ] = - 1 ;
10621079 }
10631080
@@ -1080,7 +1097,9 @@ public Scanner CreateScannerInstance(string delimiter = "\n\r ")
10801097 /// </summary>
10811098 public class Scanner
10821099 {
1100+ [ JsonProperty ]
10831101 int [ ] [ ] transition_table ;
1102+ [ JsonProperty ]
10841103 string [ ] accept_table ;
10851104 string target ;
10861105 int pos = 0 ;
@@ -1155,7 +1174,7 @@ public Tuple<string, string, int, int> Next()
11551174 err_pos . Add ( pos ) ;
11561175 continue ;
11571176 }
1158- return new Tuple < string , string , int , int > ( accept_table [ node_pos ] , builder . ToString ( ) , cur_line + 1 , cur_column + 1 ) ;
1177+ return new Tuple < string , string , int , int > ( accept_table [ node_pos ] , builder . ToString ( ) , cur_line + 1 , cur_column + 1 ) ;
11591178
11601179 default :
11611180 if ( target [ pos ] == '\n ' ) { current_line ++ ; current_column = 1 ; } else current_column ++ ;
@@ -1167,7 +1186,7 @@ public Tuple<string, string, int, int> Next()
11671186 }
11681187 if ( accept_table [ node_pos ] == null )
11691188 throw new Exception ( $ "[SCANNER] Pattern not found! L:{ cur_line } , C:{ cur_column } , D:'{ builder . ToString ( ) } '") ;
1170- return new Tuple < string , string , int , int > ( accept_table [ node_pos ] , builder . ToString ( ) , cur_line + 1 , cur_column + 1 ) ;
1189+ return new Tuple < string , string , int , int > ( accept_table [ node_pos ] , builder . ToString ( ) , cur_line + 1 , cur_column + 1 ) ;
11711190 }
11721191
11731192 public Tuple < string , string , int , int > Lookahead ( )
@@ -1177,5 +1196,42 @@ public Tuple<string, string, int, int> Lookahead()
11771196 pos = npos ;
11781197 return result ;
11791198 }
1199+
1200+ public static Scanner FromString ( string json )
1201+ => JsonConvert . DeserializeObject < Scanner > ( json ) ;
1202+ public override string ToString ( )
1203+ => JsonConvert . SerializeObject ( this , Formatting . None ) ;
1204+ public string ToCSCode ( string class_name )
1205+ {
1206+ var builder = new StringBuilder ( ) ;
1207+ var indent = "" ;
1208+ Action up_indent = ( ) => { indent += " " ; } ;
1209+ Action down_indent = ( ) => { if ( indent . Length > 0 ) indent = indent . Substring ( 4 ) ; } ;
1210+ Action < string > append = ( string s ) => { builder . Append ( $ "{ indent } { s } \r \n ") ; } ;
1211+ append ( "public class " + class_name ) ;
1212+ append ( "{" ) ;
1213+ up_indent ( ) ;
1214+
1215+ ///////////////////
1216+ append ( "int[][] transition_table = new int[][] {" ) ;
1217+ up_indent ( ) ;
1218+ foreach ( var gt in transition_table )
1219+ append ( "new int[] {" + string . Join ( "," , gt . Select ( x => x . ToString ( ) . PadLeft ( 4 ) ) ) + " }," ) ;
1220+ down_indent ( ) ;
1221+ append ( "};" ) ;
1222+ append ( "" ) ;
1223+
1224+ ///////////////////
1225+ append ( "string[] accept_table = new string[] {" ) ;
1226+ up_indent ( ) ;
1227+ append ( string . Join ( "," , accept_table . Select ( x => x != null ? $ "\" { x . ToString ( ) . PadLeft ( 4 ) } \" " : "null" ) ) ) ;
1228+ down_indent ( ) ;
1229+ append ( "};" ) ;
1230+ append ( "" ) ;
1231+
1232+ down_indent ( ) ;
1233+ append ( "}" ) ;
1234+ return builder . ToString ( ) ;
1235+ }
11801236 }
11811237}
0 commit comments