@@ -106,6 +106,8 @@ def initialize(content, options)
106106 @rests = [ ]
107107 @seek = 0
108108
109+ @heredoc_queue = [ ]
110+
109111 @indent = 0
110112 @indent_stack = [ ]
111113 @lex_state = :EXPR_BEG
@@ -464,21 +466,43 @@ def lex_init()
464466
465467 @OP . def_rule ( "\n " ) do |op , io |
466468 print "\\ n\n " if RDoc ::RubyLex . debug?
467- case @lex_state
468- when :EXPR_BEG , :EXPR_FNAME , :EXPR_DOT
469- @continue = true
470- else
471- @continue = false
472- @lex_state = :EXPR_BEG
473- until ( @indent_stack . empty? ||
474- [ TkLPAREN , TkLBRACK , TkLBRACE ,
475- TkfLPAREN , TkfLBRACK , TkfLBRACE ] . include? ( @indent_stack . last ) )
476- @indent_stack . pop
469+ unless @heredoc_queue . empty?
470+ info = @heredoc_queue [ 0 ]
471+ if !info [ :started ] # "\n"
472+ info [ :started ] = true
473+ ungetc "\n "
474+ elsif info [ :heredoc_end ] . nil? # heredoc body
475+ tk , heredoc_end = identify_here_document_body ( info [ :quoted ] , info [ :lt ] , info [ :indent ] )
476+ info [ :heredoc_end ] = heredoc_end
477+ ungetc "\n "
478+ else # heredoc end
479+ @heredoc_queue . shift
480+ @lex_state = :EXPR_BEG
481+ tk = Token ( TkHEREDOCEND , info [ :heredoc_end ] )
482+ if !@heredoc_queue . empty?
483+ @heredoc_queue [ 0 ] [ :started ] = true
484+ ungetc "\n "
485+ end
477486 end
478487 end
479- @current_readed = @readed
480- @here_readed . clear
481- Token ( TkNL )
488+ unless tk
489+ case @lex_state
490+ when :EXPR_BEG , :EXPR_FNAME , :EXPR_DOT
491+ @continue = true
492+ else
493+ @continue = false
494+ @lex_state = :EXPR_BEG
495+ until ( @indent_stack . empty? ||
496+ [ TkLPAREN , TkLBRACK , TkLBRACE ,
497+ TkfLPAREN , TkfLBRACK , TkfLBRACE ] . include? ( @indent_stack . last ) )
498+ @indent_stack . pop
499+ end
500+ end
501+ @current_readed = @readed
502+ @here_readed . clear
503+ tk = Token ( TkNL )
504+ end
505+ tk
482506 end
483507
484508 @OP . def_rules ( "=" ) do
@@ -509,6 +533,12 @@ def lex_init()
509533 tk
510534 end
511535
536+ @OP . def_rules ( "->" ) do
537+ |op , io |
538+ @lex_state = :EXPR_ENDFN
539+ Token ( op )
540+ end
541+
512542 @OP . def_rules ( "!" , "!=" , "!~" ) do
513543 |op , io |
514544 case @lex_state
@@ -527,8 +557,8 @@ def lex_init()
527557 if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
528558 ( @lex_state != :EXPR_ARG || @space_seen )
529559 c = peek ( 0 )
530- if /\S / =~ c && ( /["'`]/ =~ c || /\w / =~ c || c == "-" )
531- tk = identify_here_document
560+ if /\S / =~ c && ( /["'`]/ =~ c || /\w / =~ c || c == "-" || c == "~" )
561+ tk = identify_here_document ( op )
532562 end
533563 end
534564 unless tk
@@ -837,14 +867,11 @@ def lex_int2
837867
838868 @OP . def_rule ( '\\' ) do
839869 |op , io |
840- if getc == "\n "
870+ if peek ( 0 ) == "\n "
841871 @space_seen = true
842872 @continue = true
843- Token ( TkSPACE )
844- else
845- ungetc
846- Token ( "\\ " )
847873 end
874+ Token ( "\\ " )
848875 end
849876
850877 @OP . def_rule ( '%' ) do
@@ -1053,7 +1080,11 @@ def identify_identifier
10531080 end
10541081
10551082 if token [ 0 , 1 ] =~ /[A-Z]/
1056- return Token ( TkCONSTANT , token )
1083+ if token [ -1 ] =~ /[!?]/
1084+ return Token ( TkIDENTIFIER , token )
1085+ else
1086+ return Token ( TkCONSTANT , token )
1087+ end
10571088 elsif token [ token . size - 1 , 1 ] =~ /[!?]/
10581089 return Token ( TkFID , token )
10591090 else
@@ -1066,77 +1097,63 @@ def identify_identifier
10661097 end
10671098 end
10681099
1069- def identify_here_document
1100+ def identify_here_document ( op )
10701101 ch = getc
1102+ start_token = op
10711103 # if lt = PERCENT_LTYPE[ch]
1072- if ch == "-"
1104+ if ch == "-" or ch == "~"
1105+ start_token . concat ch
10731106 ch = getc
10741107 indent = true
10751108 end
10761109 if /['"`]/ =~ ch
1110+ start_token . concat ch
10771111 user_quote = lt = ch
10781112 quoted = ""
10791113 while ( c = getc ) && c != lt
10801114 quoted . concat c
10811115 end
1116+ start_token . concat quoted
1117+ start_token . concat lt
10821118 else
10831119 user_quote = nil
10841120 lt = '"'
10851121 quoted = ch . dup
10861122 while ( c = getc ) && c =~ /\w /
10871123 quoted . concat c
10881124 end
1125+ start_token . concat quoted
10891126 ungetc
10901127 end
10911128
1092- ltback , @ltype = @ltype , lt
1093- reserve = [ ]
1094- while ch = getc
1095- reserve . push ch
1096- if ch == "\\ "
1097- reserve . push ch = getc
1098- elsif ch == "\n "
1099- break
1100- end
1101- end
1102-
1103- output_heredoc = reserve . join =~ /\A \r ?\n \z /
1129+ @heredoc_queue << {
1130+ quoted : quoted ,
1131+ lt : lt ,
1132+ indent : indent ,
1133+ started : false
1134+ }
1135+ @lex_state = :EXPR_BEG
1136+ Token ( RDoc ::RubyLex ::TkHEREDOCBEG , start_token )
1137+ end
11041138
1105- if output_heredoc then
1106- doc = '<<'
1107- doc << '-' if indent
1108- doc << "#{ user_quote } #{ quoted } #{ user_quote } \n "
1109- else
1110- doc = '"'
1111- end
1139+ def identify_here_document_body ( quoted , lt , indent )
1140+ ltback , @ltype = @ltype , lt
11121141
1113- @current_readed = @readed
1142+ doc = ""
1143+ heredoc_end = nil
11141144 while l = gets
11151145 l = l . sub ( /(:?\r )?\n \z / , "\n " )
11161146 if ( indent ? l . strip : l . chomp ) == quoted
1147+ heredoc_end = l
11171148 break
11181149 end
11191150 doc << l
11201151 end
1152+ raise Error , "Missing terminating #{ quoted } for string" unless heredoc_end
11211153
1122- if output_heredoc then
1123- raise Error , "Missing terminating #{ quoted } for string" unless l
1124-
1125- doc << l . chomp
1126- else
1127- doc << '"'
1128- end
1129-
1130- @current_readed = @here_readed
1131- @here_readed . concat reserve
1132- while ch = reserve . pop
1133- ungetc ch
1134- end
1135-
1136- token_class = output_heredoc ? RDoc ::RubyLex ::TkHEREDOC : Ltype2Token [ lt ]
11371154 @ltype = ltback
1138- @lex_state = :EXPR_END
1139- Token ( token_class , doc )
1155+ @lex_state = :EXPR_BEG
1156+ [ Token ( RDoc :: RubyLex :: TkHEREDOC , doc ) , heredoc_end ]
11401157 end
11411158
11421159 def identify_quotation
@@ -1163,7 +1180,7 @@ def identify_number(op = "")
11631180
11641181 num = op
11651182
1166- if peek ( 0 ) == "0" && peek ( 1 ) !~ /[.eE ]/
1183+ if peek ( 0 ) == "0" && peek ( 1 ) !~ /[.eEri ]/
11671184 num << getc
11681185
11691186 case peek ( 0 )
@@ -1292,7 +1309,7 @@ def identify_string(ltype, quoted = ltype, type = nil)
12921309 str = if ltype == quoted and %w[ " ' / ] . include? ltype then
12931310 ltype . dup
12941311 else
1295- "%#{ type or PERCENT_LTYPE . key ltype } #{ PERCENT_PAREN_REV [ quoted ] ||quoted } "
1312+ "%#{ type } #{ PERCENT_PAREN_REV [ quoted ] ||quoted } "
12961313 end
12971314
12981315 subtype = nil
0 commit comments