Skip to content

Commit 4217868

Browse files
patjakdevclaude
andcommitted
x/exp/schema: fix reserved keyword handling and add validation
Add a tokenReservedKeyword token type to the schema parser's lexer, matching the approach used in the main Cedar parser. Previously, reserved keywords like "true", "false", "in", "if", etc. were lexed as plain identifiers, which meant the parser silently accepted them in positions where they should be rejected (e.g. `entity true;`, `type if = String;`). Bugs fixed: - Reserved Cedar keywords were accepted as entity, type, and action names, namespace path components, and attribute names without quoting. The parser now rejects these with a clear error message. - __cedar as a definition name (entity, type, enum) was silently accepted. These are now rejected while still allowing __cedar as an action name, attribute name, and type reference prefix, which matches the Cedar Rust behavior. - Duplicate annotations (e.g. `@doc("a") @doc("b")`) were silently accepted with last-wins semantics. The parser now rejects duplicates. - Duplicate principal, resource, or context declarations within appliesTo were silently accepted. The parser now rejects duplicates. - Empty principal or resource type lists in appliesTo (e.g. `principal: []`) were silently accepted, producing a meaningless empty list. The parser now rejects these. - appliesTo blocks missing a principal or resource declaration were accepted. The parser now requires both. - MarshalSchema emitted reserved keywords as bare identifiers in attribute and action names (e.g. `true: String`), producing output that could not be re-parsed. isValidIdent now checks for reserved keywords and the marshaler quotes them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-Off-By: Patrick Jakubowski <patrick.jakubowski@strongdm.com>
1 parent 682b8a0 commit 4217868

File tree

6 files changed

+195
-19
lines changed

6 files changed

+195
-19
lines changed

internal/parser/cedar_tokenize.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@ type Token struct {
3636
Text string
3737
}
3838

39-
// N.B. "is" is included here for compatibility with the Rust implementation. The Cedar specification does not list
40-
// "is" as a reserved keyword
41-
var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is"}
39+
var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is", "__cedar"}
40+
41+
// IsReservedKeyword reports whether s is a reserved Cedar keyword.
42+
func IsReservedKeyword(s string) bool {
43+
return slices.Contains(reservedKeywords, s)
44+
}
4245

4346
func (t Token) isEOF() bool {
4447
return t.Type == TokenEOF
@@ -488,7 +491,7 @@ redo:
488491

489492
// last minute check for reserved keywords
490493
text := s.tokenText()
491-
if tt == TokenIdent && slices.Contains(reservedKeywords, text) {
494+
if tt == TokenIdent && IsReservedKeyword(text) {
492495
tt = TokenReservedKeyword
493496
}
494497

x/exp/schema/internal/parser/marshal.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"slices"
88
"strings"
99

10+
cedarparser "github.com/cedar-policy/cedar-go/internal/parser"
1011
"github.com/cedar-policy/cedar-go/types"
1112
"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
1213
)
@@ -311,7 +312,7 @@ func isValidIdent(s string) bool {
311312
}
312313
}
313314
}
314-
return true
315+
return !cedarparser.IsReservedKeyword(s)
315316
}
316317

317318
// quoteCedar produces a double-quoted string literal using only Cedar-valid

x/exp/schema/internal/parser/parser.go

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package parser
44
import (
55
"fmt"
66
"slices"
7+
"strings"
78

89
"github.com/cedar-policy/cedar-go/types"
910
"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
@@ -86,6 +87,8 @@ func tokenName(tt tokenType) string {
8687
return "'?'"
8788
case tokenEquals:
8889
return "'='"
90+
case tokenReservedKeyword:
91+
return "reserved keyword"
8992
default:
9093
return "unknown"
9194
}
@@ -99,6 +102,8 @@ func tokenDesc(tok token) string {
99102
return fmt.Sprintf("identifier %q", tok.Text)
100103
case tokenString:
101104
return fmt.Sprintf("string %q", tok.Text)
105+
case tokenReservedKeyword:
106+
return fmt.Sprintf("reserved keyword %q", tok.Text)
102107
default:
103108
return fmt.Sprintf("%q", tok.Text)
104109
}
@@ -145,6 +150,9 @@ func (p *parser) parseNamespace(annotations ast.Annotations) (parsedNamespace, e
145150
if err != nil {
146151
return parsedNamespace{}, err
147152
}
153+
if slices.Contains(strings.Split(string(path), "::"), "__cedar") {
154+
return parsedNamespace{}, fmt.Errorf("%s: the name %q contains \"__cedar\", which is reserved", p.tok.Pos, path)
155+
}
148156
if err := p.expect(tokenLBrace); err != nil {
149157
return parsedNamespace{}, err
150158
}
@@ -213,7 +221,7 @@ func (p *parser) parseEntity(annotations ast.Annotations, schema *ast.Schema) er
213221

214222
// Parse optional 'in' clause
215223
var memberOf []ast.EntityTypeRef
216-
if p.tok.Type == tokenIdent && p.tok.Text == "in" {
224+
if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
217225
if err := p.readToken(); err != nil {
218226
return err
219227
}
@@ -333,7 +341,7 @@ func (p *parser) parseAction(annotations ast.Annotations, schema *ast.Schema) er
333341

334342
// Parse optional 'in' clause
335343
var memberOf []ast.ParentRef
336-
if p.tok.Type == tokenIdent && p.tok.Text == "in" {
344+
if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
337345
if err := p.readToken(); err != nil {
338346
return err
339347
}
@@ -431,7 +439,7 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
431439
if err := p.readToken(); err != nil {
432440
return nil, err
433441
}
434-
if p.tok.Type != tokenIdent {
442+
if p.tok.Type != tokenIdent && p.tok.Type != tokenReservedKeyword {
435443
return nil, p.errorf("expected annotation name, got %s", tokenDesc(p.tok))
436444
}
437445
key := types.Ident(p.tok.Text)
@@ -459,6 +467,9 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
459467
if annotations == nil {
460468
annotations = ast.Annotations{}
461469
}
470+
if _, ok := annotations[key]; ok {
471+
return nil, p.errorf("duplicate annotation %q", key)
472+
}
462473
if hasValue {
463474
annotations[key] = value
464475
} else {
@@ -469,8 +480,10 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
469480
}
470481

471482
// parsePath parses IDENT { '::' IDENT }
483+
// As a special case, "__cedar" is accepted as the first component even though it is
484+
// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
472485
func (p *parser) parsePath() (types.Path, error) {
473-
if p.tok.Type != tokenIdent {
486+
if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
474487
return "", p.errorf("expected identifier, got %s", tokenDesc(p.tok))
475488
}
476489
path := p.tok.Text
@@ -494,8 +507,10 @@ func (p *parser) parsePath() (types.Path, error) {
494507

495508
// parsePathForRef parses a path that may include a trailing '::' followed by a string literal
496509
// for action parent references. Returns the path and whether a string was found.
510+
// As a special case, "__cedar" is accepted as the first component even though it is
511+
// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
497512
func (p *parser) parsePathForRef() (path types.Path, str types.String, qualified bool, err error) {
498-
if p.tok.Type != tokenIdent {
513+
if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
499514
return "", "", false, p.errorf("expected identifier, got %s", tokenDesc(p.tok))
500515
}
501516
pathStr := p.tok.Text
@@ -570,14 +585,17 @@ func (p *parser) parseNames() ([]types.String, error) {
570585
}
571586

572587
func (p *parser) parseName() (types.String, error) {
573-
switch p.tok.Type {
574-
case tokenIdent:
588+
// Weirdly, Cedar schemas allow __cedar as an attribute or action name without
589+
// double quotes, while all other reserved keywords require double quotes
590+
switch {
591+
case p.tok.Type == tokenIdent,
592+
p.tok.Type == tokenReservedKeyword && p.tok.Text == "__cedar":
575593
name := types.String(p.tok.Text)
576594
if err := p.readToken(); err != nil {
577595
return "", err
578596
}
579597
return name, nil
580-
case tokenString:
598+
case p.tok.Type == tokenString:
581599
name := types.String(p.tok.Text)
582600
if err := p.readToken(); err != nil {
583601
return "", err
@@ -674,6 +692,9 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
674692
return nil, err
675693
}
676694
at := &ast.AppliesTo{}
695+
hasPrincipal := false
696+
hasResource := false
697+
hasContext := false
677698
for p.tok.Type != tokenRBrace {
678699
if p.tok.Type == tokenEOF {
679700
return nil, p.errorf("expected '}' to close appliesTo, got EOF")
@@ -683,6 +704,10 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
683704
}
684705
switch p.tok.Text {
685706
case "principal":
707+
if hasPrincipal {
708+
return nil, p.errorf("duplicate principal declaration in appliesTo")
709+
}
710+
hasPrincipal = true
686711
if err := p.readToken(); err != nil {
687712
return nil, err
688713
}
@@ -693,8 +718,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
693718
if err != nil {
694719
return nil, err
695720
}
721+
if len(refs) == 0 {
722+
return nil, p.errorf("principal types must not be empty")
723+
}
696724
at.Principals = refs
697725
case "resource":
726+
if hasResource {
727+
return nil, p.errorf("duplicate resource declaration in appliesTo")
728+
}
729+
hasResource = true
698730
if err := p.readToken(); err != nil {
699731
return nil, err
700732
}
@@ -705,8 +737,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
705737
if err != nil {
706738
return nil, err
707739
}
740+
if len(refs) == 0 {
741+
return nil, p.errorf("resource types must not be empty")
742+
}
708743
at.Resources = refs
709744
case "context":
745+
if hasContext {
746+
return nil, p.errorf("duplicate context declaration in appliesTo")
747+
}
748+
hasContext = true
710749
if err := p.readToken(); err != nil {
711750
return nil, err
712751
}
@@ -727,6 +766,12 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
727766
}
728767
}
729768
}
769+
if !hasPrincipal {
770+
return nil, p.errorf("appliesTo must include a principal declaration")
771+
}
772+
if !hasResource {
773+
return nil, p.errorf("appliesTo must include a resource declaration")
774+
}
730775
return at, p.readToken() // consume '}'
731776
}
732777

x/exp/schema/internal/parser/parser_internal_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ func TestTokenName(t *testing.T) {
137137
{tokenDoubleColon, "'::'"},
138138
{tokenQuestion, "'?'"},
139139
{tokenEquals, "'='"},
140+
{tokenReservedKeyword, "reserved keyword"},
140141
{tokenType(999), "unknown"},
141142
}
142143
for _, tt := range tests {
@@ -158,6 +159,8 @@ func TestIsValidIdent(t *testing.T) {
158159
testutil.Equals(t, isValidIdent(""), false)
159160
testutil.Equals(t, isValidIdent("1abc"), false)
160161
testutil.Equals(t, isValidIdent("foo bar"), false)
162+
testutil.Equals(t, isValidIdent("in"), false)
163+
testutil.Equals(t, isValidIdent("__cedar"), false)
161164
}
162165

163166
func TestLexerBadStringEscape(t *testing.T) {

0 commit comments

Comments
 (0)