@@ -18,8 +18,10 @@ package gengo
1818
1919import (
2020 "bytes"
21+ "errors"
2122 "fmt"
2223 "strings"
24+ "text/scanner"
2325 "unicode"
2426)
2527
@@ -104,8 +106,10 @@ func ExtractSingleBoolCommentTag(marker string, key string, defaultVal bool, lin
104106// - 'marker' + "key=value"
105107// - 'marker' + "key()=value"
106108// - 'marker' + "key(arg)=value"
109+ // - 'marker' + "key(`raw string`)=value"
107110//
108- // The arg is optional. If not specified (either as "key=value" or as
111+ // The arg is optional. It may be a Go identifier or a raw string literal
112+ // enclosed in back-ticks. If not specified (either as "key=value" or as
109113// "key()=value"), the resulting Tag will have an empty Args list.
110114//
111115// The value is optional. If not specified, the resulting Tag will have "" as
@@ -169,12 +173,10 @@ func ExtractFunctionStyleCommentTags(marker string, tagNames []string, lines []s
169173 if ! strings .HasPrefix (line , marker ) {
170174 continue
171175 }
172- line = stripTrailingComment (line )
173- kv := strings .SplitN (line [len (marker ):], "=" , 2 )
174- key := kv [0 ]
175- val := ""
176- if len (kv ) == 2 {
177- val = kv [1 ]
176+ body := stripTrailingComment (line [len (marker ):])
177+ key , val , err := splitKeyValScanner (body )
178+ if err != nil {
179+ return nil , err
178180 }
179181
180182 tag := Tag {}
@@ -260,31 +262,99 @@ func parseTagKey(input string, tagNames []string) (string, []string, error) {
260262// '(', including the trailing ')'.
261263//
262264// At the moment this assumes that the entire string between the opening '('
263- // and the trailing ')' is a single Go-style identifier token, but in the
264- // future could be extended to have multiple arguments with actual syntax. The
265- // single token may consist only of letters and digits. Whitespace is not
266- // allowed.
265+ // and the trailing ')' is a single Go-style identifier token OR a raw string
266+ // literal. The single Go-style token may consist only of letters and digits
267+ // and whitespace is not allowed.
267268func parseTagArgs (input string ) ([]string , error ) {
268- // This is really dumb, but should be extendable to a "real" parser if
269- // needed.
270- runes := []rune (input )
271- for i , r := range runes {
272- if unicode .IsLetter (r ) || unicode .IsDigit (r ) {
273- continue
274- }
275- if r == ',' {
276- return nil , fmt .Errorf ("multiple arguments are not supported: %q" , input )
269+ s := initArgScanner (input )
270+ var args []string
271+ if s .Peek () != ')' {
272+ // Arg found.
273+ arg , err := parseArg (s )
274+ if err != nil {
275+ return nil , err
277276 }
278- if r == ')' {
279- if i != len (runes )- 1 {
280- return nil , fmt .Errorf ("unexpected characters after ')': %q" , string (runes [i :]))
277+ args = append (args , arg )
278+ }
279+ // Expect one closing ')' after the arg.
280+ if s .Scan () != ')' {
281+ return nil , fmt .Errorf ("no closing ')' found: %q" , input )
282+ }
283+ // Expect no whitespace, etc. after the one ')'.
284+ if s .Scan () != scanner .EOF {
285+ pos := s .Pos ().Offset - len (s .TokenText ())
286+ return nil , fmt .Errorf ("unexpected characters after ')': %q" , input [pos :])
287+ }
288+ return args , nil
289+ }
290+
291+ type argScanner struct {
292+ * scanner.Scanner
293+ errs []error
294+ }
295+
296+ func initArgScanner (input string ) * argScanner {
297+ s := & argScanner {Scanner : & scanner.Scanner {}}
298+
299+ s .Init (strings .NewReader (input ))
300+ s .Mode = scanner .ScanIdents | scanner .ScanRawStrings
301+ s .Whitespace = 0
302+
303+ s .Error = func (_ * scanner.Scanner , msg string ) {
304+ s .errs = append (s .errs ,
305+ fmt .Errorf ("error parsing %q at %v: %s" , input , s .Position , msg ))
306+ }
307+ return s
308+ }
309+
310+ func (s * argScanner ) unexpectedTokenError (expected string , token string ) error {
311+ s .Error (s .Scanner , fmt .Sprintf ("expected %s but got (%q)" , expected , token ))
312+ return errors .Join (s .errs ... )
313+ }
314+
315+ func parseArg (s * argScanner ) (string , error ) {
316+ switch tok := s .Scan (); tok {
317+ case scanner .RawString :
318+ return s .TokenText (), nil
319+ case scanner .Ident :
320+ txt := s .TokenText ()
321+ for _ , r := range txt {
322+ if ! unicode .IsLetter (r ) && ! unicode .IsDigit (r ) {
323+ return "" , s .unexpectedTokenError ("letter or digit" , txt )
281324 }
282- if i == 0 {
283- return nil , nil
325+ }
326+ return txt , nil
327+ case ',' :
328+ return "" , fmt .Errorf ("multiple arguments are not supported" )
329+ default :
330+ return "" , s .unexpectedTokenError ("Go-style identifier or raw string" , s .TokenText ())
331+ }
332+ }
333+
334+ // splitKeyValScanner parses a tag body of the form key[=val]. It parses left to
335+ // right and stops at the first "=" that is not inside a quoted or raw
336+ // string literal. Text before that point becomes the key (trimmed of spaces).
337+ // Text after becomes the val. If no "=" is found, the whole input
338+ // is returned as key and val is empty. The parsing understands Go-style identifiers,
339+ // and raw strings. Any other token or scanner error is
340+ // reported to the caller.
341+ func splitKeyValScanner (input string ) (key , val string , err error ) {
342+ var s scanner.Scanner
343+ s .Init (strings .NewReader (input ))
344+ s .Mode = scanner .ScanIdents | scanner .ScanRawStrings
345+ for {
346+ switch tok := s .Scan (); tok {
347+ case scanner .EOF :
348+ return strings .TrimSpace (input ), "" , nil
349+ case '=' :
350+ // Split at the first top-level '='. Everything before (trimmed) is the
351+ // key, everything after (not trimmed) is the value.
352+ start := s .Pos ().Offset - len (s .TokenText ())
353+ key = strings .TrimSpace (input [:start ])
354+ if start + len (s .TokenText ()) < len (input ) {
355+ val = input [start + len (s .TokenText ()):]
284356 }
285- return [] string { string ( runes [: i ])} , nil
357+ return key , val , nil
286358 }
287- return nil , fmt .Errorf ("unsupported character: %q" , string (r ))
288359 }
289- return nil , fmt .Errorf ("no closing ')' found: %q" , input )
290360}
0 commit comments