x/exp/schema: fix reserved keyword handling and add validation

patjakdev · claude · patjakdev · commit 490ec1e46384 · 2026-02-10T13:55:31.000-08:00
Add a tokenReservedKeyword token type to the schema parser's lexer, matching the approach used in the main Cedar parser. Previously, reserved keywords like "true", "false", "in", "if", etc. were lexed as plain identifiers, which meant the parser silently accepted them in positions where they should be rejected (e.g. `entity true;`, `type if = String;`). Bugs fixed: - Reserved Cedar keywords were accepted as entity, type, and action names, namespace path components, and attribute names without quoting. The parser now rejects these with a clear error message. - __cedar as a definition name (entity, type, enum) was silently accepted. These are now rejected while still allowing __cedar as an action name, attribute name, and type reference prefix, which matches the Cedar Rust behavior. - Duplicate annotations (e.g. `@doc("a") @doc("b")`) were silently accepted with last-wins semantics. The parser now rejects duplicates. - Duplicate principal, resource, or context declarations within appliesTo were silently accepted. The parser now rejects duplicates. - Empty principal or resource type lists in appliesTo (e.g. `principal: []`) were silently accepted, producing a meaningless empty list. The parser now rejects these. - appliesTo blocks missing a principal or resource declaration were accepted. The parser now requires both. - MarshalSchema emitted reserved keywords as bare identifiers in attribute and action names (e.g. `true: String`), producing output that could not be re-parsed. isValidIdent now checks for reserved keywords and the marshaler quotes them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-Off-By: Patrick Jakubowski <patrick.jakubowski@strongdm.com>
diff --git a/internal/parser/cedar_tokenize.go b/internal/parser/cedar_tokenize.go
@@ -36,9 +36,12 @@ type Token struct {
 	Text string
 }
 
-// N.B. "is" is included here for compatibility with the Rust implementation. The Cedar specification does not list
-// "is" as a reserved keyword
-var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is"}
+var reservedKeywords = []string{"true", "false", "if", "then", "else", "in", "like", "has", "is", "__cedar"}
+
+// IsReservedKeyword reports whether s is a reserved Cedar keyword.
+func IsReservedKeyword(s string) bool {
+	return slices.Contains(reservedKeywords, s)
+}
 
 func (t Token) isEOF() bool {
 	return t.Type == TokenEOF
@@ -488,7 +491,7 @@ redo:
 
 	// last minute check for reserved keywords
 	text := s.tokenText()
-	if tt == TokenIdent && slices.Contains(reservedKeywords, text) {
+	if tt == TokenIdent && IsReservedKeyword(text) {
 		tt = TokenReservedKeyword
 	}
 
diff --git a/x/exp/schema/internal/parser/marshal.go b/x/exp/schema/internal/parser/marshal.go
@@ -7,6 +7,7 @@ import (
 	"slices"
 	"strings"
 
+	cedarparser "github.com/cedar-policy/cedar-go/internal/parser"
 	"github.com/cedar-policy/cedar-go/types"
 	"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
 )
@@ -311,7 +312,7 @@ func isValidIdent(s string) bool {
 			}
 		}
 	}
-	return true
+	return !cedarparser.IsReservedKeyword(s)
 }
 
 // quoteCedar produces a double-quoted string literal using only Cedar-valid
diff --git a/x/exp/schema/internal/parser/parser.go b/x/exp/schema/internal/parser/parser.go
@@ -4,6 +4,7 @@ package parser
 import (
 	"fmt"
 	"slices"
+	"strings"
 
 	"github.com/cedar-policy/cedar-go/types"
 	"github.com/cedar-policy/cedar-go/x/exp/schema/ast"
@@ -86,6 +87,8 @@ func tokenName(tt tokenType) string {
 		return "'?'"
 	case tokenEquals:
 		return "'='"
+	case tokenReservedKeyword:
+		return "reserved keyword"
 	default:
 		return "unknown"
 	}
@@ -99,6 +102,8 @@ func tokenDesc(tok token) string {
 		return fmt.Sprintf("identifier %q", tok.Text)
 	case tokenString:
 		return fmt.Sprintf("string %q", tok.Text)
+	case tokenReservedKeyword:
+		return fmt.Sprintf("reserved keyword %q", tok.Text)
 	default:
 		return fmt.Sprintf("%q", tok.Text)
 	}
@@ -142,6 +147,9 @@ func (p *parser) parseNamespace(annotations ast.Annotations) (parsedNamespace, e
 	if err != nil {
 		return parsedNamespace{}, err
 	}
+	if slices.Contains(strings.Split(string(path), "::"), "__cedar") {
+		return parsedNamespace{}, fmt.Errorf("%s: the name %q contains \"__cedar\", which is reserved", p.tok.Pos, path)
+	}
 	if err := p.expect(tokenLBrace); err != nil {
 		return parsedNamespace{}, err
 	}
@@ -211,7 +219,7 @@ func (p *parser) parseEntity(annotations ast.Annotations, namespace *types.Path,
 
 	// Parse optional 'in' clause
 	var memberOf []ast.EntityTypeRef
-	if p.tok.Type == tokenIdent && p.tok.Text == "in" {
+	if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
 		if err := p.readToken(); err != nil {
 			return err
 		}
@@ -319,7 +327,7 @@ func (p *parser) parseAction(annotations ast.Annotations, namespace *types.Path,
 
 	// Parse optional 'in' clause
 	var memberOf []ast.ParentRef
-	if p.tok.Type == tokenIdent && p.tok.Text == "in" {
+	if p.tok.Type == tokenReservedKeyword && p.tok.Text == "in" {
 		if err := p.readToken(); err != nil {
 			return err
 		}
@@ -410,7 +418,7 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
 		if err := p.readToken(); err != nil {
 			return nil, err
 		}
-		if p.tok.Type != tokenIdent {
+		if p.tok.Type != tokenIdent && p.tok.Type != tokenReservedKeyword {
 			return nil, p.errorf("expected annotation name, got %s", tokenDesc(p.tok))
 		}
 		key := types.Ident(p.tok.Text)
@@ -438,6 +446,9 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
 		if annotations == nil {
 			annotations = ast.Annotations{}
 		}
+		if _, ok := annotations[key]; ok {
+			return nil, p.errorf("duplicate annotation %q", key)
+		}
 		if hasValue {
 			annotations[key] = value
 		} else {
@@ -448,8 +459,10 @@ func (p *parser) parseAnnotations() (ast.Annotations, error) {
 }
 
 // parsePath parses IDENT { '::' IDENT }
+// As a special case, "__cedar" is accepted as the first component even though it is
+// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
 func (p *parser) parsePath() (types.Path, error) {
-	if p.tok.Type != tokenIdent {
+	if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
 		return "", p.errorf("expected identifier, got %s", tokenDesc(p.tok))
 	}
 	path := p.tok.Text
@@ -473,8 +486,10 @@ func (p *parser) parsePath() (types.Path, error) {
 
 // parsePathForRef parses a path that may include a trailing '::' followed by a string literal
 // for action parent references. Returns the path and whether a string was found.
+// As a special case, "__cedar" is accepted as the first component even though it is
+// a reserved keyword, because it is valid as a type reference prefix (e.g. __cedar::String).
 func (p *parser) parsePathForRef() (path types.Path, str types.String, qualified bool, err error) {
-	if p.tok.Type != tokenIdent {
+	if p.tok.Type != tokenIdent && (p.tok.Type != tokenReservedKeyword || p.tok.Text != "__cedar") {
 		return "", "", false, p.errorf("expected identifier, got %s", tokenDesc(p.tok))
 	}
 	pathStr := p.tok.Text
@@ -549,14 +564,17 @@ func (p *parser) parseNames() ([]types.String, error) {
 }
 
 func (p *parser) parseName() (types.String, error) {
-	switch p.tok.Type {
-	case tokenIdent:
+	// Weirdly, Cedar schemas allow __cedar as an attribute or action name without
+	// double quotes, while all other reserved keywords require double quotes
+	switch {
+	case p.tok.Type == tokenIdent,
+		p.tok.Type == tokenReservedKeyword && p.tok.Text == "__cedar":
 		name := types.String(p.tok.Text)
 		if err := p.readToken(); err != nil {
 			return "", err
 		}
 		return name, nil
-	case tokenString:
+	case p.tok.Type == tokenString:
 		name := types.String(p.tok.Text)
 		if err := p.readToken(); err != nil {
 			return "", err
@@ -653,6 +671,9 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
 		return nil, err
 	}
 	at := &ast.AppliesTo{}
+	hasPrincipal := false
+	hasResource := false
+	hasContext := false
 	for p.tok.Type != tokenRBrace {
 		if p.tok.Type == tokenEOF {
 			return nil, p.errorf("expected '}' to close appliesTo, got EOF")
@@ -662,6 +683,10 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
 		}
 		switch p.tok.Text {
 		case "principal":
+			if hasPrincipal {
+				return nil, p.errorf("duplicate principal declaration in appliesTo")
+			}
+			hasPrincipal = true
 			if err := p.readToken(); err != nil {
 				return nil, err
 			}
@@ -672,8 +697,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
 			if err != nil {
 				return nil, err
 			}
+			if len(refs) == 0 {
+				return nil, p.errorf("principal types must not be empty")
+			}
 			at.Principals = refs
 		case "resource":
+			if hasResource {
+				return nil, p.errorf("duplicate resource declaration in appliesTo")
+			}
+			hasResource = true
 			if err := p.readToken(); err != nil {
 				return nil, err
 			}
@@ -684,8 +716,15 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
 			if err != nil {
 				return nil, err
 			}
+			if len(refs) == 0 {
+				return nil, p.errorf("resource types must not be empty")
+			}
 			at.Resources = refs
 		case "context":
+			if hasContext {
+				return nil, p.errorf("duplicate context declaration in appliesTo")
+			}
+			hasContext = true
 			if err := p.readToken(); err != nil {
 				return nil, err
 			}
@@ -706,6 +745,12 @@ func (p *parser) parseAppliesTo() (*ast.AppliesTo, error) {
 			}
 		}
 	}
+	if !hasPrincipal {
+		return nil, p.errorf("appliesTo must include a principal declaration")
+	}
+	if !hasResource {
+		return nil, p.errorf("appliesTo must include a resource declaration")
+	}
 	return at, p.readToken() // consume '}'
 }
 
diff --git a/x/exp/schema/internal/parser/parser_internal_test.go b/x/exp/schema/internal/parser/parser_internal_test.go
@@ -137,6 +137,7 @@ func TestTokenName(t *testing.T) {
 		{tokenDoubleColon, "'::'"},
 		{tokenQuestion, "'?'"},
 		{tokenEquals, "'='"},
+		{tokenReservedKeyword, "reserved keyword"},
 		{tokenType(999), "unknown"},
 	}
 	for _, tt := range tests {
@@ -158,6 +159,8 @@ func TestIsValidIdent(t *testing.T) {
 	testutil.Equals(t, isValidIdent(""), false)
 	testutil.Equals(t, isValidIdent("1abc"), false)
 	testutil.Equals(t, isValidIdent("foo bar"), false)
+	testutil.Equals(t, isValidIdent("in"), false)
+	testutil.Equals(t, isValidIdent("__cedar"), false)
 }
 
 func TestLexerBadStringEscape(t *testing.T) {
diff --git a/x/exp/schema/internal/parser/parser_test.go b/x/exp/schema/internal/parser/parser_test.go
diff --git a/x/exp/schema/internal/parser/token.go b/x/exp/schema/internal/parser/token.go

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ import (`
`7`	`7`	`"slices"`
`8`	`8`	`"strings"`
`9`	`9`
	`10`	`+ cedarparser "github.com/cedar-policy/cedar-go/internal/parser"`
`10`	`11`	`"github.com/cedar-policy/cedar-go/types"`
`11`	`12`	`"github.com/cedar-policy/cedar-go/x/exp/schema/ast"`
`12`	`13`	`)`
`@@ -311,7 +312,7 @@ func isValidIdent(s string) bool {`
`311`	`312`	`}`
`312`	`313`	`}`
`313`	`314`	`}`
`314`		`- return true`
	`315`	`+ return !cedarparser.IsReservedKeyword(s)`
`315`	`316`	`}`
`316`	`317`
`317`	`318`	`// quoteCedar produces a double-quoted string literal using only Cedar-valid`