Skip to content

Commit 490ec1e

Browse files
patjakdevclaude
andcommitted
x/exp/schema: fix reserved keyword handling and add validation
Add a tokenReservedKeyword token type to the schema parser's lexer, matching the approach used in the main Cedar parser. Previously, reserved keywords like "true", "false", "in", "if", etc. were lexed as plain identifiers, which meant the parser silently accepted them in positions where they should be rejected (e.g. `entity true;`, `type if = String;`). Bugs fixed: - Reserved Cedar keywords were accepted as entity, type, and action names, namespace path components, and attribute names without quoting. The parser now rejects these with a clear error message. - __cedar as a definition name (entity, type, enum) was silently accepted. These are now rejected while still allowing __cedar as an action name, attribute name, and type reference prefix, which matches the Cedar Rust behavior. - Duplicate annotations (e.g. `@doc("a") @doc("b")`) were silently accepted with last-wins semantics. The parser now rejects duplicates. - Duplicate principal, resource, or context declarations within appliesTo were silently accepted. The parser now rejects duplicates. - Empty principal or resource type lists in appliesTo (e.g. `principal: []`) were silently accepted, producing a meaningless empty list. The parser now rejects these. - appliesTo blocks missing a principal or resource declaration were accepted. The parser now requires both. - MarshalSchema emitted reserved keywords as bare identifiers in attribute and action names (e.g. `true: String`), producing output that could not be re-parsed. isValidIdent now checks for reserved keywords and the marshaler quotes them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-Off-By: Patrick Jakubowski <patrick.jakubowski@strongdm.com>
1 parent f8c7a1b commit 490ec1e

File tree

6 files changed

+195
-19
lines changed

6 files changed

+195
-19
lines changed

internal/parser/cedar_tokenize.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@ type Token struct {
3636
Text string
3737
}
3838

39-
// N.B. "is" is included here for compatibility with the Rust implementation. The Cedar specification does not list
40-
// "is" as a reserved keyword
41-
var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is"}
39+
var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is", "__cedar"}
40+
41+
// IsReservedKeyword reports whether s is a reserved Cedar keyword.
42+
func IsReservedKeyword(s string) bool {
43+
return slices.Contains(reservedKeywords, s)
44+
}
4245

4346
func (t Token) isEOF() bool {
4447
return t.Type == TokenEOF
@@ -488,7 +491,7 @@ redo:
488491

489492
// last minute check for reserved keywords
490493
text := s.tokenText()
491-
if tt == TokenIdent && slices.Contains(reservedKeywords, text) {
494+
if tt == TokenIdent && IsReservedKeyword(text) {
492495
tt = TokenReservedKeyword
493496
}
494497

x/exp/schema/internal/parser/marshal.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"slices"
88
"strings"
99

10+
cedarparser "github.com/cedar-policy/cedar-go/internal/parser"
1011
"github.com/cedar-policy/cedar-go/types"
1112
"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
1213
)
@@ -311,7 +312,7 @@ func isValidIdent(s string) bool {
311312
}
312313
}
313314
}
314-
return true
315+
return !cedarparser.IsReservedKeyword(s)
315316
}
316317

317318
// quoteCedar produces a double-quoted string literal using only Cedar-valid

x/exp/schema/internal/parser/parser.go

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package parser
44
import (
55
"fmt"
66
"slices"
7+
"strings"
78

89
"github.com/cedar-policy/cedar-go/types"
910
"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
@@ -86,6 +87,8 @@ func tokenName(tt tokenType) string {
8687
return "'?'"
8788
case tokenEquals:
8889
return "'='"
90+
case tokenReservedKeyword:
91+
return "reserved keyword"
8992
default:
9093
return "unknown"
9194
}
@@ -99,6 +102,8 @@ func tokenDesc(tok token) string {
99102
return fmt.Sprintf("identifier %q", tok.Text)
100103
case tokenString:
101104
return fmt.Sprintf("string %q", tok.Text)
105+
case tokenReservedKeyword:
106+
return fmt.Sprintf("reserved keyword %q", tok.Text)
102107
default:
103108
return fmt.Sprintf("%q", tok.Text)
104109
}
@@ -142,6 +147,9 @@ func (p *parser) parseNamespace(annotations ast.Annotations) (parsedNamespace, e
142147
if err != nil {
143148
return parsedNamespace{}, err
144149
}
150+
if slices.Contains(strings.Split(string(path), "::"), "__cedar") {
151+
return parsedNamespace{}, fmt.Errorf("%s: the name %q contains \"__cedar\", which is reserved", p.tok.Pos, path)
152+
}
145153
if err := p.expect(tokenLBrace); err != nil {
146154
return parsedNamespace{}, err
147155
}
@@ -211,7 +219,7 @@ func (p *parser) parseEntity(annotations ast.Annotations, namespace *types.Path,
211219

212220
// Parse optional 'in' clause
213221
var memberOf []ast.EntityTypeRef
214-
if p.tok.Type == tokenIdent && p.tok.Text == "in" {
222+
if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
215223
if err := p.readToken(); err != nil {
216224
return err
217225
}
@@ -319,7 +327,7 @@ func (p *parser) parseAction(annotations ast.Annotations, namespace *types.Path,
319327

320328
// Parse optional 'in' clause
321329
var memberOf []ast.ParentRef
322-
if p.tok.Type == tokenIdent && p.tok.Text == "in" {
330+
if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
323331
if err := p.readToken(); err != nil {
324332
return err
325333
}
@@ -410,7 +418,7 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
410418
if err := p.readToken(); err != nil {
411419
return nil, err
412420
}
413-
if p.tok.Type != tokenIdent {
421+
if p.tok.Type != tokenIdent && p.tok.Type != tokenReservedKeyword {
414422
return nil, p.errorf("expected annotation name, got %s", tokenDesc(p.tok))
415423
}
416424
key := types.Ident(p.tok.Text)
@@ -438,6 +446,9 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
438446
if annotations == nil {
439447
annotations = ast.Annotations{}
440448
}
449+
if _, ok := annotations[key]; ok {
450+
return nil, p.errorf("duplicate annotation %q", key)
451+
}
441452
if hasValue {
442453
annotations[key] = value
443454
} else {
@@ -448,8 +459,10 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
448459
}
449460

450461
// parsePath parses IDENT { '::' IDENT }
462+
// As a special case, "__cedar" is accepted as the first component even though it is
463+
// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
451464
func (p *parser) parsePath() (types.Path, error) {
452-
if p.tok.Type != tokenIdent {
465+
if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
453466
return "", p.errorf("expected identifier, got %s", tokenDesc(p.tok))
454467
}
455468
path := p.tok.Text
@@ -473,8 +486,10 @@ func (p *parser) parsePath() (types.Path, error) {
473486

474487
// parsePathForRef parses a path that may include a trailing '::' followed by a string literal
475488
// for action parent references. Returns the path and whether a string was found.
489+
// As a special case, "__cedar" is accepted as the first component even though it is
490+
// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
476491
func (p *parser) parsePathForRef() (path types.Path, str types.String, qualified bool, err error) {
477-
if p.tok.Type != tokenIdent {
492+
if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
478493
return "", "", false, p.errorf("expected identifier, got %s", tokenDesc(p.tok))
479494
}
480495
pathStr := p.tok.Text
@@ -549,14 +564,17 @@ func (p *parser) parseNames() ([]types.String, error) {
549564
}
550565

551566
func (p *parser) parseName() (types.String, error) {
552-
switch p.tok.Type {
553-
case tokenIdent:
567+
// Weirdly, Cedar schemas allow __cedar as an attribute or action name without
568+
// double quotes, while all other reserved keywords require double quotes
569+
switch {
570+
case p.tok.Type == tokenIdent,
571+
p.tok.Type == tokenReservedKeyword && p.tok.Text == "__cedar":
554572
name := types.String(p.tok.Text)
555573
if err := p.readToken(); err != nil {
556574
return "", err
557575
}
558576
return name, nil
559-
case tokenString:
577+
case p.tok.Type == tokenString:
560578
name := types.String(p.tok.Text)
561579
if err := p.readToken(); err != nil {
562580
return "", err
@@ -653,6 +671,9 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
653671
return nil, err
654672
}
655673
at := &ast.AppliesTo{}
674+
hasPrincipal := false
675+
hasResource := false
676+
hasContext := false
656677
for p.tok.Type != tokenRBrace {
657678
if p.tok.Type == tokenEOF {
658679
return nil, p.errorf("expected '}' to close appliesTo, got EOF")
@@ -662,6 +683,10 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
662683
}
663684
switch p.tok.Text {
664685
case "principal":
686+
if hasPrincipal {
687+
return nil, p.errorf("duplicate principal declaration in appliesTo")
688+
}
689+
hasPrincipal = true
665690
if err := p.readToken(); err != nil {
666691
return nil, err
667692
}
@@ -672,8 +697,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
672697
if err != nil {
673698
return nil, err
674699
}
700+
if len(refs) == 0 {
701+
return nil, p.errorf("principal types must not be empty")
702+
}
675703
at.Principals = refs
676704
case "resource":
705+
if hasResource {
706+
return nil, p.errorf("duplicate resource declaration in appliesTo")
707+
}
708+
hasResource = true
677709
if err := p.readToken(); err != nil {
678710
return nil, err
679711
}
@@ -684,8 +716,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
684716
if err != nil {
685717
return nil, err
686718
}
719+
if len(refs) == 0 {
720+
return nil, p.errorf("resource types must not be empty")
721+
}
687722
at.Resources = refs
688723
case "context":
724+
if hasContext {
725+
return nil, p.errorf("duplicate context declaration in appliesTo")
726+
}
727+
hasContext = true
689728
if err := p.readToken(); err != nil {
690729
return nil, err
691730
}
@@ -706,6 +745,12 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
706745
}
707746
}
708747
}
748+
if !hasPrincipal {
749+
return nil, p.errorf("appliesTo must include a principal declaration")
750+
}
751+
if !hasResource {
752+
return nil, p.errorf("appliesTo must include a resource declaration")
753+
}
709754
return at, p.readToken() // consume '}'
710755
}
711756

x/exp/schema/internal/parser/parser_internal_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ func TestTokenName(t *testing.T) {
137137
{tokenDoubleColon, "'::'"},
138138
{tokenQuestion, "'?'"},
139139
{tokenEquals, "'='"},
140+
{tokenReservedKeyword, "reserved keyword"},
140141
{tokenType(999), "unknown"},
141142
}
142143
for _, tt := range tests {
@@ -158,6 +159,8 @@ func TestIsValidIdent(t *testing.T) {
158159
testutil.Equals(t, isValidIdent(""), false)
159160
testutil.Equals(t, isValidIdent("1abc"), false)
160161
testutil.Equals(t, isValidIdent("foo bar"), false)
162+
testutil.Equals(t, isValidIdent("in"), false)
163+
testutil.Equals(t, isValidIdent("__cedar"), false)
161164
}
162165

163166
func TestLexerBadStringEscape(t *testing.T) {

0 commit comments

Comments
 (0)