@@ -45,6 +45,7 @@ class Error < RDoc::Error
4545
4646 attr_accessor :continue
4747 attr_accessor :lex_state
48+ attr_accessor :first_in_method_statement
4849 attr_reader :reader
4950
5051 class << self
@@ -112,6 +113,8 @@ def initialize(content, options)
112113 @indent_stack = [ ]
113114 @lex_state = :EXPR_BEG
114115 @space_seen = false
116+ @first_in_method_statement = false
117+ @after_question = false
115118
116119 @continue = false
117120 @line = ""
@@ -352,6 +355,7 @@ def token
352355 begin
353356 tk = @OP . match ( self )
354357 @space_seen = tk . kind_of? ( TkSPACE )
358+ @first_in_method_statement = false if !@space_seen && @first_in_method_statement
355359 rescue SyntaxError => e
356360 raise Error , "syntax error: #{ e . message } " if
357361 @exception_on_syntax_error
@@ -378,7 +382,13 @@ def token
378382 else
379383 tk = tk1
380384 end
385+ elsif ( TkPLUS === tk or TkMINUS === tk ) and peek ( 0 ) =~ /\d / then
386+ tk1 = token
387+ set_token_position tk . seek , tk . line_no , tk . char_no
388+ tk = Token ( tk1 . class , tk . text + tk1 . text )
381389 end
390+ @after_question = false if @after_question and !( TkQUESTION === tk )
391+
382392 # Tracer.off
383393 tk
384394 end
@@ -450,15 +460,18 @@ def lex_init()
450460 proc { |op , io | @prev_char_no == 0 && peek ( 0 ) =~ /\s / } ) do
451461 |op , io |
452462 @ltype = "="
453- res = ''
454- nil until getc == "\n "
463+ res = op
464+ until ( ch = getc ) == "\n " do
465+ res << ch
466+ end
467+ res << ch
455468
456469 until ( peek_equal? ( "=end" ) && peek ( 4 ) =~ /\s / ) do
457470 ( ch = getc )
458471 res << ch
459472 end
460473
461- gets # consume =end
474+ res << gets # consume =end
462475
463476 @ltype = nil
464477 Token ( TkRD_COMMENT , res )
@@ -593,6 +606,7 @@ def lex_init()
593606 |op , io |
594607 if @lex_state == :EXPR_END
595608 @lex_state = :EXPR_BEG
609+ @after_question = true
596610 Token ( TkQUESTION )
597611 else
598612 ch = getc
@@ -602,6 +616,7 @@ def lex_init()
602616 Token ( TkQUESTION )
603617 else
604618 @lex_state = :EXPR_END
619+ ch << getc if "\\ " == ch
605620 Token ( TkCHAR , "?#{ ch } " )
606621 end
607622 end
@@ -727,7 +742,7 @@ def lex_int2
727742 if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state
728743 @lex_state = :EXPR_ARG
729744 Token ( TkId , op )
730- elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
745+ elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID || @first_in_method_statement
731746 identify_string ( op )
732747 elsif peek ( 0 ) == '='
733748 getc
@@ -883,7 +898,8 @@ def lex_int2
883898 identify_quotation
884899 elsif peek ( 0 ) == '='
885900 getc
886- Token ( TkOPASGN , :% )
901+ @lex_state = :EXPR_BEG
902+ Token ( TkOPASGN , '%' )
887903 elsif @lex_state == :EXPR_ARG and @space_seen and peek ( 0 ) !~ /\s /
888904 identify_quotation
889905 else
@@ -985,7 +1001,7 @@ def identify_identifier
9851001
9861002 ungetc
9871003
988- if ( ch == "!" || ch == "?" ) && token [ 0 , 1 ] =~ /\w / && peek ( 0 ) != "="
1004+ if ( ( ch == "!" && peek ( 1 ) != "=" ) || ch == "?" ) && token [ 0 , 1 ] =~ /\w /
9891005 token . concat getc
9901006 end
9911007
@@ -1046,12 +1062,7 @@ def identify_identifier
10461062 @indent_stack . push token_c
10471063 end
10481064 else
1049- if peek ( 0 ) == ':' and !peek_match? ( /^::/ )
1050- token . concat getc
1051- token_c = TkSYMBOL
1052- else
1053- token_c = TkIDENTIFIER
1054- end
1065+ token_c = TkIDENTIFIER
10551066 end
10561067
10571068 elsif DEINDENT_CLAUSE . include? ( token )
@@ -1063,37 +1074,42 @@ def identify_identifier
10631074 @lex_state = :EXPR_END
10641075 end
10651076 end
1077+ if token_c . ancestors . include? ( TkId ) and peek ( 0 ) == ':' and !peek_match? ( /^::/ )
1078+ token . concat getc
1079+ token_c = TkSYMBOL
1080+ end
10661081 return Token ( token_c , token )
10671082 end
10681083 end
10691084
10701085 if @lex_state == :EXPR_FNAME
10711086 @lex_state = :EXPR_END
1072- if peek ( 0 ) == '='
1087+ if peek ( 0 ) == '=' and peek ( 1 ) != '>'
10731088 token . concat getc
10741089 end
10751090 elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT ||
1076- @lex_state == :EXPR_ARG
1091+ @lex_state == :EXPR_ARG || @lex_state == :EXPR_MID
10771092 @lex_state = :EXPR_ARG
10781093 else
10791094 @lex_state = :EXPR_END
10801095 end
10811096
10821097 if token [ 0 , 1 ] =~ /[A-Z]/
10831098 if token [ -1 ] =~ /[!?]/
1084- return Token ( TkIDENTIFIER , token )
1099+ token_c = TkIDENTIFIER
10851100 else
1086- return Token ( TkCONSTANT , token )
1101+ token_c = TkCONSTANT
10871102 end
10881103 elsif token [ token . size - 1 , 1 ] =~ /[!?]/
1089- return Token ( TkFID , token )
1104+ token_c = TkFID
10901105 else
1091- if peek ( 0 ) == ':' and !peek_match? ( /^::/ )
1092- token . concat getc
1093- return Token ( TkSYMBOL , token )
1094- else
1095- return Token ( TkIDENTIFIER , token )
1096- end
1106+ token_c = TkIDENTIFIER
1107+ end
1108+ if peek ( 0 ) == ':' and !peek_match? ( /^::/ )
1109+ token . concat getc
1110+ return Token ( TkSYMBOL , token )
1111+ else
1112+ return Token ( token_c , token )
10971113 end
10981114 end
10991115
@@ -1132,7 +1148,7 @@ def identify_here_document(op)
11321148 indent : indent ,
11331149 started : false
11341150 }
1135- @lex_state = :EXPR_BEG
1151+ @lex_state = :EXPR_END
11361152 Token ( RDoc ::RubyLex ::TkHEREDOCBEG , start_token )
11371153 end
11381154
@@ -1331,13 +1347,13 @@ def identify_string(ltype, quoted = ltype, type = nil)
13311347 ungetc
13321348 end
13331349 elsif ch == '\\'
1334- if %w[ ' / ] . include? @ltype then
1350+ case @ltype
1351+ when "'" then
13351352 case ch = getc
1336- when "\n " , "'"
1337- when @ltype
1353+ when "'" , '\\' then
13381354 str << ch
13391355 else
1340- ungetc
1356+ str << ch
13411357 end
13421358 else
13431359 str << read_escape
@@ -1359,7 +1375,10 @@ def identify_string(ltype, quoted = ltype, type = nil)
13591375 end
13601376 end
13611377
1362- if subtype
1378+ if peek ( 0 ) == ':' and !peek_match? ( /^::/ ) and :EXPR_BEG == @lex_state and !@after_question
1379+ str . concat getc
1380+ return Token ( TkSYMBOL , str )
1381+ elsif subtype
13631382 Token ( DLtype2Token [ ltype ] , str )
13641383 else
13651384 Token ( Ltype2Token [ ltype ] , str )
0 commit comments