Skip to content

Commit ba7cc5f

Browse files
authored
Fix panics from symbolParserV2 (#338)
The added test case contains an escaped multi-byte string that's valid UTF8 that caused a panic before the fix. The fix applied is to advance the entire multi-byte sequence instead of capping it to a single byte. This caused a few other test failures, namely because we were validating unescaped identifiers were valid UTF8 where elsewhere we assume they're ASCII. The follow-up fix applied was to treat these in a uniform way.
1 parent 7421625 commit ba7cc5f

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

bindings/go/scip/symbol_parser.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package scip
33
import (
44
"fmt"
55
"strings"
6-
"unicode"
76

87
"github.com/cockroachdb/errors"
98
"github.com/sourcegraph/beaut"
@@ -469,10 +468,6 @@ func (e unrecognizedDescriptorError) Error() string {
469468
return fmt.Sprintf("unrecognized descriptor %q", e.value)
470469
}
471470

472-
func isIdentifierCharacter(r rune) bool {
473-
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '+' || r == '$' || r == '_'
474-
}
475-
476471
func (z *symbolParserV2) advanceOneByte(b byte) {
477472
assert(z.currentRune == rune(b), "passed in byte does not match current rune")
478473
nextRune, nextRuneByteLength := z.peekNext()
@@ -484,7 +479,7 @@ func (z *symbolParserV2) advanceOneByte(b byte) {
484479

485480
func (z *symbolParserV2) advanceRune() {
486481
nextRune, nextRuneByteLength := z.peekNext()
487-
z.advance(nextRune, min(nextRuneByteLength, 1))
482+
z.advance(nextRune, nextRuneByteLength)
488483
}
489484

490485
func (z *symbolParserV2) acceptOneByte(b byte, what parseCtx) error {
@@ -503,7 +498,7 @@ func (z *symbolParserV2) acceptIdentifier(what parseCtx, sw *stringWriter) error
503498
start := z.byteIndex
504499
slen := len(z.SymbolString)
505500
for z.byteIndex < slen {
506-
if !isIdentifierCharacter(z.currentRune) {
501+
if !shared.IsSimpleIdentifierCharacter(z.currentRune) {
507502
break
508503
}
509504
z.advanceRune()

bindings/go/scip/symbol_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,19 @@ func TestParseSymbol(t *testing.T) {
108108
},
109109
},
110110
},
111+
{
112+
Symbol: "a b c d `F⃗`.", Expected: &Symbol{
113+
Scheme: "a",
114+
Package: &Package{
115+
Manager: "b",
116+
Name: "c",
117+
Version: "d",
118+
},
119+
Descriptors: []*Descriptor{{
120+
Name: "F⃗", Suffix: Descriptor_Term,
121+
}},
122+
},
123+
},
111124
}
112125
for _, test := range tests {
113126
t.Run(test.Symbol, func(t *testing.T) {
@@ -127,6 +140,8 @@ func TestParseSymbolError(t *testing.T) {
127140
"lsif-java maven package 1.0.0 java/io/File#Entry.trailingstring",
128141
"lsif-java maven package 1.0.0 java/io/File#Entry.unrecognizedSuffix@",
129142
"lsif-java maven package 1.0.0 java/io/File#Entry.nonSimpλeIdentifier.",
143+
"lsif-java maven package 1.0.0 java/io/File#Entry.`unterminatedEscapedIdentifier",
144+
"lsif-java maven package 1.0.0 java/io/File#Entry.[UnterminatedDescriptorSuffix",
130145
"local 🧠",
131146
"local ",
132147
"local &&&",

0 commit comments

Comments
 (0)