Skip to content

Commit 221fad0

Browse files
authored
Merge pull request #296 from aaron-prindle/rawstring-tag-parsing
feat: add raw string support to gengo comment tag arg parsing
2 parents e3bc6f1 + fc15268 commit 221fad0

File tree

2 files changed

+155
-28
lines changed

2 files changed

+155
-28
lines changed

v2/comments.go

Lines changed: 98 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package gengo
1818

1919
import (
2020
"bytes"
21+
"errors"
2122
"fmt"
2223
"strings"
24+
"text/scanner"
2325
"unicode"
2426
)
2527

@@ -104,8 +106,10 @@ func ExtractSingleBoolCommentTag(marker string, key string, defaultVal bool, lin
104106
// - 'marker' + "key=value"
105107
// - 'marker' + "key()=value"
106108
// - 'marker' + "key(arg)=value"
109+
// - 'marker' + "key(`raw string`)=value"
107110
//
108-
// The arg is optional. If not specified (either as "key=value" or as
111+
// The arg is optional. It may be a Go identifier or a raw string literal
112+
// enclosed in back-ticks. If not specified (either as "key=value" or as
109113
// "key()=value"), the resulting Tag will have an empty Args list.
110114
//
111115
// The value is optional. If not specified, the resulting Tag will have "" as
@@ -169,12 +173,10 @@ func ExtractFunctionStyleCommentTags(marker string, tagNames []string, lines []s
169173
if !strings.HasPrefix(line, marker) {
170174
continue
171175
}
172-
line = stripTrailingComment(line)
173-
kv := strings.SplitN(line[len(marker):], "=", 2)
174-
key := kv[0]
175-
val := ""
176-
if len(kv) == 2 {
177-
val = kv[1]
176+
body := stripTrailingComment(line[len(marker):])
177+
key, val, err := splitKeyValScanner(body)
178+
if err != nil {
179+
return nil, err
178180
}
179181

180182
tag := Tag{}
@@ -260,31 +262,99 @@ func parseTagKey(input string, tagNames []string) (string, []string, error) {
260262
// '(', including the trailing ')'.
261263
//
262264
// At the moment this assumes that the entire string between the opening '('
263-
// and the trailing ')' is a single Go-style identifier token, but in the
264-
// future could be extended to have multiple arguments with actual syntax. The
265-
// single token may consist only of letters and digits. Whitespace is not
266-
// allowed.
265+
// and the trailing ')' is a single Go-style identifier token OR a raw string
266+
// literal. The single Go-style token may consist only of letters and digits
267+
// and whitespace is not allowed.
267268
func parseTagArgs(input string) ([]string, error) {
268-
// This is really dumb, but should be extendable to a "real" parser if
269-
// needed.
270-
runes := []rune(input)
271-
for i, r := range runes {
272-
if unicode.IsLetter(r) || unicode.IsDigit(r) {
273-
continue
274-
}
275-
if r == ',' {
276-
return nil, fmt.Errorf("multiple arguments are not supported: %q", input)
269+
s := initArgScanner(input)
270+
var args []string
271+
if s.Peek() != ')' {
272+
// Arg found.
273+
arg, err := parseArg(s)
274+
if err != nil {
275+
return nil, err
277276
}
278-
if r == ')' {
279-
if i != len(runes)-1 {
280-
return nil, fmt.Errorf("unexpected characters after ')': %q", string(runes[i:]))
277+
args = append(args, arg)
278+
}
279+
// Expect one closing ')' after the arg.
280+
if s.Scan() != ')' {
281+
return nil, fmt.Errorf("no closing ')' found: %q", input)
282+
}
283+
// Expect no whitespace, etc. after the one ')'.
284+
if s.Scan() != scanner.EOF {
285+
pos := s.Pos().Offset - len(s.TokenText())
286+
return nil, fmt.Errorf("unexpected characters after ')': %q", input[pos:])
287+
}
288+
return args, nil
289+
}
290+
291+
type argScanner struct {
292+
*scanner.Scanner
293+
errs []error
294+
}
295+
296+
func initArgScanner(input string) *argScanner {
297+
s := &argScanner{Scanner: &scanner.Scanner{}}
298+
299+
s.Init(strings.NewReader(input))
300+
s.Mode = scanner.ScanIdents | scanner.ScanRawStrings
301+
s.Whitespace = 0
302+
303+
s.Error = func(_ *scanner.Scanner, msg string) {
304+
s.errs = append(s.errs,
305+
fmt.Errorf("error parsing %q at %v: %s", input, s.Position, msg))
306+
}
307+
return s
308+
}
309+
310+
func (s *argScanner) unexpectedTokenError(expected string, token string) error {
311+
s.Error(s.Scanner, fmt.Sprintf("expected %s but got (%q)", expected, token))
312+
return errors.Join(s.errs...)
313+
}
314+
315+
func parseArg(s *argScanner) (string, error) {
316+
switch tok := s.Scan(); tok {
317+
case scanner.RawString:
318+
return s.TokenText(), nil
319+
case scanner.Ident:
320+
txt := s.TokenText()
321+
for _, r := range txt {
322+
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
323+
return "", s.unexpectedTokenError("letter or digit", txt)
281324
}
282-
if i == 0 {
283-
return nil, nil
325+
}
326+
return txt, nil
327+
case ',':
328+
return "", fmt.Errorf("multiple arguments are not supported")
329+
default:
330+
return "", s.unexpectedTokenError("Go-style identifier or raw string", s.TokenText())
331+
}
332+
}
333+
334+
// splitKeyValScanner parses a tag body of the form key[=val]. It parses left to
335+
// right and stops at the first "=" that is not inside a quoted or raw
336+
// string literal. Text before that point becomes the key (trimmed of spaces).
337+
// Text after becomes the val. If no "=" is found, the whole input
338+
// is returned as key and val is empty. The parsing understands Go-style identifiers,
339+
// and raw strings. Any other token or scanner error is
340+
// reported to the caller.
341+
func splitKeyValScanner(input string) (key, val string, err error) {
342+
var s scanner.Scanner
343+
s.Init(strings.NewReader(input))
344+
s.Mode = scanner.ScanIdents | scanner.ScanRawStrings
345+
for {
346+
switch tok := s.Scan(); tok {
347+
case scanner.EOF:
348+
return strings.TrimSpace(input), "", nil
349+
case '=':
350+
// Split at the first top-level '='. Everything before (trimmed) is the
351+
// key, everything after (not trimmed) is the value.
352+
start := s.Pos().Offset - len(s.TokenText())
353+
key = strings.TrimSpace(input[:start])
354+
if start+len(s.TokenText()) < len(input) {
355+
val = input[start+len(s.TokenText()):]
284356
}
285-
return []string{string(runes[:i])}, nil
357+
return key, val, nil
286358
}
287-
return nil, fmt.Errorf("unsupported character: %q", string(r))
288359
}
289-
return nil, fmt.Errorf("no closing ')' found: %q", input)
290360
}

v2/comments_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,31 @@ func TestExtractExtendedCommentTags(t *testing.T) {
217217
Tag{"pfx2Foo", nil, "val1"},
218218
Tag{"pfx2Foo", mkstrs("arg"), "val2"}),
219219
},
220+
}, {
221+
name: "raw arg with =, ), and space",
222+
comments: []string{
223+
"+rawEq(`a=b c=d )`)=xyz",
224+
},
225+
expect: map[string][]Tag{
226+
"rawEq": mktags(Tag{"rawEq", mkstrs("`a=b c=d )`"), "xyz"}),
227+
},
228+
}, {
229+
name: "raw arg no value",
230+
comments: []string{
231+
"+onlyRaw(`zzz`)",
232+
},
233+
expect: map[string][]Tag{
234+
"onlyRaw": mktags(Tag{"onlyRaw", mkstrs("`zzz`"), ""}),
235+
},
236+
}, {
237+
name: "raw string arg complex",
238+
comments: []string{
239+
"+rawTag(`[self.foo==10, ()), {}}, \"foo\", 'foo']`)=val",
240+
},
241+
expect: map[string][]Tag{
242+
"rawTag": mktags(
243+
Tag{"rawTag", mkstrs("`[self.foo==10, ()), {}}, \"foo\", 'foo']`"), "val"}),
244+
},
220245
}}
221246

222247
for _, tc := range cases {
@@ -246,6 +271,9 @@ func TestParseTagKey(t *testing.T) {
246271
{"trailingSpace(arg) ", "", nil, true},
247272
{"argWithDash(arg-name) ", "", nil, true},
248273
{"argWithUnder(arg_name) ", "", nil, true},
274+
{"withRaw(`a = b`)", "withRaw", mkss("`a = b`"), false},
275+
{"badRaw(missing`)", "", nil, true},
276+
{"badMix(arg,`raw`)", "", nil, true},
249277
}
250278
for _, tc := range cases {
251279
key, args, err := parseTagKey(tc.input, nil)
@@ -331,6 +359,11 @@ func TestParseTagArgs(t *testing.T) {
331359
{"noClosingParen", nil, true},
332360
{"extraParen))", nil, true},
333361
{"trailingSpace) ", nil, true},
362+
{"`hasRawQuotes`)", mkss("`hasRawQuotes`"), false},
363+
{"`raw with =`)", mkss("`raw with =`"), false},
364+
{"`raw` )", nil, true},
365+
{"`raw`bad)", nil, true},
366+
{"`first``second`)", nil, true},
334367
}
335368
for _, tc := range cases {
336369
ret, err := parseTagArgs(tc.input)
@@ -355,3 +388,27 @@ func TestParseTagArgs(t *testing.T) {
355388
}
356389
}
357390
}
391+
392+
func TestSplitKeyValScanner(t *testing.T) {
393+
cases := []struct {
394+
input string
395+
key string
396+
val string
397+
}{
398+
{`foo=bar`, "foo", "bar"},
399+
{`foo = bar`, "foo", " bar"},
400+
{`keyWithRaw(` + "`a=b`" + `)=value`, "keyWithRaw(`a=b`)", "value"},
401+
{`noValue`, "noValue", ""},
402+
{`rawKey=` + "`x=y`", "rawKey", "`x=y`"},
403+
}
404+
405+
for _, c := range cases {
406+
k, v, err := splitKeyValScanner(c.input)
407+
if err != nil {
408+
t.Fatalf("[%q] unexpected err: %v", c.input, err)
409+
}
410+
if k != c.key || v != c.val {
411+
t.Errorf("[%q] got (%q,%q) want (%q,%q)", c.input, k, v, c.key, c.val)
412+
}
413+
}
414+
}

0 commit comments

Comments
 (0)