Skip to content

Commit 2a5043c

Browse files
committed
test: add regression tests for nested starform advanced-format parsing
Cover the bug fixed in the previous commit where nested sub-expressions containing *-forms were flattened into atoms at the wrong nesting level. New tests in pkg/persist/persist_test.go: TestAdvancedToCanonical — extended with: - nested_starform_range_(regression): (outer (inner (* range numeric ge 080))) - multiple_nested_starforms: full facetec-scan rule with two range predicates - nested_wildcard_starform - flat_starform_range TestParseAdvanced_NestedStarForms: - Directly tests parseAdvanced() at the sexp.Element tree level - Asserts that inner sub-lists contain *starform.Range values (not plain atoms) - Covers single-level, two-level, and facetec-scan (dual range) patterns TestLoadFile_AdvancedNestedRangeRules: - End-to-end: writes a .spoc file in advanced format with comments, loads it via LoadFile, and checks canonical string output matches exactly TestAdvTokenize_NestedParens: - Verifies that advTokenize preserves nested parenthesised groups as single opaque tokens (the property the old tokenizer lacked)
1 parent 6e1ecff commit 2a5043c

File tree

1 file changed

+245
-0
lines changed

1 file changed

+245
-0
lines changed

pkg/persist/persist_test.go

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"testing"
77

88
"github.com/sirosfoundation/go-spocp/pkg/sexp"
9+
"github.com/sirosfoundation/go-spocp/pkg/starform"
910
)
1011

1112
func TestSaveLoadCanonical(t *testing.T) {
@@ -427,6 +428,28 @@ func TestAdvancedToCanonical(t *testing.T) {
427428
advanced: "(http (action GET) (path index.html))",
428429
want: "(4:http(6:action3:GET)(4:path10:index.html))",
429430
},
431+
// Regression: nested starform was previously flattened into atoms at
432+
// the wrong nesting level by the broken advancedToCanonical pipeline.
433+
{
434+
name: "nested starform range (regression)",
435+
advanced: "(outer (inner (* range numeric ge 080)))",
436+
want: "(5:outer(5:inner(1:*5:range7:numeric2:ge3:080)))",
437+
},
438+
{
439+
name: "multiple nested starforms (facetec-scan pattern)",
440+
advanced: "(facetec-scan (liveness-score (* range numeric ge 080)) (face-match-level (* range numeric ge 06)) (doc-type passport) (mrz-verified true))",
441+
want: "(12:facetec-scan(14:liveness-score(1:*5:range7:numeric2:ge3:080))(16:face-match-level(1:*5:range7:numeric2:ge2:06))(8:doc-type8:passport)(12:mrz-verified4:true))",
442+
},
443+
{
444+
name: "nested wildcard starform",
445+
advanced: "(outer (inner (*)))",
446+
want: "(5:outer(5:inner(1:*)))",
447+
},
448+
{
449+
name: "flat starform range",
450+
advanced: "(resource (* range numeric ge 010))",
451+
want: "(8:resource(1:*5:range7:numeric2:ge3:010))",
452+
},
430453
}
431454

432455
for _, tt := range tests {
@@ -575,3 +598,225 @@ func TestLoadBinaryErrors(t *testing.T) {
575598
t.Error("Expected error for truncated file")
576599
}
577600
}
601+
602+
// TestParseAdvanced_NestedStarForms is a regression test for the bug where
603+
// parseAdvanced (and advancedToCanonical) would flatten nested sub-expressions
604+
// containing *-forms into atoms at the wrong depth.
605+
//
606+
// Before the fix, `(outer (inner (* range numeric ge 080)))` produced a list
607+
// whose inner element was NOT a sub-list but a series of sibling atoms, causing
608+
// range predicates in policy rules to be silently ignored during evaluation.
609+
func TestParseAdvanced_NestedStarForms(t *testing.T) {
610+
tests := []struct {
611+
name string
612+
input string
613+
checkInner func(t *testing.T, elem sexp.Element)
614+
}{
615+
{
616+
name: "single level nesting with range",
617+
input: "(outer (* range numeric ge 080))",
618+
checkInner: func(t *testing.T, elem sexp.Element) {
619+
list, ok := elem.(*sexp.List)
620+
if !ok {
621+
t.Fatalf("expected *sexp.List, got %T", elem)
622+
}
623+
if list.Tag != "outer" {
624+
t.Fatalf("expected tag 'outer', got %q", list.Tag)
625+
}
626+
if len(list.Elements) != 1 {
627+
t.Fatalf("expected 1 element, got %d", len(list.Elements))
628+
}
629+
r, ok := list.Elements[0].(*starform.Range)
630+
if !ok {
631+
t.Fatalf("expected *starform.Range as child, got %T", list.Elements[0])
632+
}
633+
if r.RangeType != starform.RangeNumeric {
634+
t.Errorf("expected RangeNumeric, got %v", r.RangeType)
635+
}
636+
if r.LowerBound == nil || r.LowerBound.Op != starform.OpGE || r.LowerBound.Value != "080" {
637+
t.Errorf("unexpected LowerBound: %+v", r.LowerBound)
638+
}
639+
},
640+
},
641+
{
642+
// Regression: this exact pattern was broken before the fix.
643+
name: "two levels of nesting with range",
644+
input: "(outer (inner (* range numeric ge 080)))",
645+
checkInner: func(t *testing.T, elem sexp.Element) {
646+
outer, ok := elem.(*sexp.List)
647+
if !ok {
648+
t.Fatalf("expected *sexp.List for outer, got %T", elem)
649+
}
650+
if outer.Tag != "outer" {
651+
t.Fatalf("outer tag: got %q, want 'outer'", outer.Tag)
652+
}
653+
if len(outer.Elements) != 1 {
654+
t.Fatalf("outer should have 1 child, got %d; elements: %v",
655+
len(outer.Elements), outer.Elements)
656+
}
657+
inner, ok := outer.Elements[0].(*sexp.List)
658+
if !ok {
659+
t.Fatalf("outer.Elements[0]: expected *sexp.List (inner), got %T — "+
660+
"this is the regression: starform was flattened", outer.Elements[0])
661+
}
662+
if inner.Tag != "inner" {
663+
t.Fatalf("inner tag: got %q, want 'inner'", inner.Tag)
664+
}
665+
if len(inner.Elements) != 1 {
666+
t.Fatalf("inner should have 1 child, got %d", len(inner.Elements))
667+
}
668+
r, ok := inner.Elements[0].(*starform.Range)
669+
if !ok {
670+
t.Fatalf("inner.Elements[0]: expected *starform.Range, got %T", inner.Elements[0])
671+
}
672+
if r.RangeType != starform.RangeNumeric {
673+
t.Errorf("expected RangeNumeric, got %v", r.RangeType)
674+
}
675+
if r.LowerBound == nil || r.LowerBound.Value != "080" {
676+
t.Errorf("unexpected LowerBound: %+v", r.LowerBound)
677+
}
678+
},
679+
},
680+
{
681+
// The exact facetec-scan pattern from facetec-api/rules/default.spoc.
682+
name: "facetec-scan rule with two range predicates",
683+
input: "(facetec-scan (liveness-score (* range numeric ge 080)) (face-match-level (* range numeric ge 06)) (doc-type passport) (mrz-verified true))",
684+
checkInner: func(t *testing.T, elem sexp.Element) {
685+
outer, ok := elem.(*sexp.List)
686+
if !ok {
687+
t.Fatalf("expected *sexp.List for facetec-scan, got %T", elem)
688+
}
689+
if outer.Tag != "facetec-scan" {
690+
t.Fatalf("tag: got %q, want 'facetec-scan'", outer.Tag)
691+
}
692+
if len(outer.Elements) != 4 {
693+
t.Fatalf("expected 4 children, got %d", len(outer.Elements))
694+
}
695+
696+
// Check liveness-score sub-list.
697+
ls, ok := outer.Elements[0].(*sexp.List)
698+
if !ok {
699+
t.Fatalf("Elements[0]: expected *sexp.List (liveness-score), got %T", outer.Elements[0])
700+
}
701+
if ls.Tag != "liveness-score" {
702+
t.Errorf("liveness-score tag: got %q", ls.Tag)
703+
}
704+
if len(ls.Elements) != 1 {
705+
t.Fatalf("liveness-score should have 1 child, got %d", len(ls.Elements))
706+
}
707+
lr, ok := ls.Elements[0].(*starform.Range)
708+
if !ok {
709+
t.Fatalf("liveness-score child: expected *starform.Range, got %T", ls.Elements[0])
710+
}
711+
if lr.LowerBound == nil || lr.LowerBound.Value != "080" {
712+
t.Errorf("liveness-score range bound: %+v", lr.LowerBound)
713+
}
714+
715+
// Check face-match-level sub-list.
716+
fm, ok := outer.Elements[1].(*sexp.List)
717+
if !ok {
718+
t.Fatalf("Elements[1]: expected *sexp.List (face-match-level), got %T", outer.Elements[1])
719+
}
720+
if fm.Tag != "face-match-level" {
721+
t.Errorf("face-match-level tag: got %q", fm.Tag)
722+
}
723+
fr, ok := fm.Elements[0].(*starform.Range)
724+
if !ok {
725+
t.Fatalf("face-match-level child: expected *starform.Range, got %T", fm.Elements[0])
726+
}
727+
if fr.LowerBound == nil || fr.LowerBound.Value != "06" {
728+
t.Errorf("face-match-level range bound: %+v", fr.LowerBound)
729+
}
730+
},
731+
},
732+
}
733+
734+
for _, tt := range tests {
735+
t.Run(tt.name, func(t *testing.T) {
736+
elem, err := parseAdvanced(tt.input)
737+
if err != nil {
738+
t.Fatalf("parseAdvanced(%q): %v", tt.input, err)
739+
}
740+
tt.checkInner(t, elem)
741+
})
742+
}
743+
}
744+
745+
// TestLoadFile_AdvancedNestedRangeRules is an end-to-end regression test:
746+
// write a .spoc file in advanced format containing rules with nested starform
747+
// range predicates, load it, and verify the canonical representation matches.
748+
func TestLoadFile_AdvancedNestedRangeRules(t *testing.T) {
749+
tmpDir := t.TempDir()
750+
rulesFile := filepath.Join(tmpDir, "test.spoc")
751+
752+
content := `; accept passports
753+
(facetec-scan (liveness-score (* range numeric ge 080)) (face-match-level (* range numeric ge 06)) (doc-type passport) (mrz-verified true))
754+
; accept driving licences
755+
(facetec-scan (liveness-score (* range numeric ge 080)) (face-match-level (* range numeric ge 06)) (doc-type dl) (barcode-verified true))
756+
`
757+
if err := os.WriteFile(rulesFile, []byte(content), 0644); err != nil {
758+
t.Fatalf("write rules file: %v", err)
759+
}
760+
761+
opts := LoadOptions{Format: FormatAdvanced, SkipInvalid: false, Comments: []string{"#", "//", ";"}}
762+
rules, err := LoadFile(rulesFile, opts)
763+
if err != nil {
764+
t.Fatalf("LoadFile: %v", err)
765+
}
766+
if len(rules) != 2 {
767+
t.Fatalf("expected 2 rules, got %d", len(rules))
768+
}
769+
770+
want := []string{
771+
"(12:facetec-scan(14:liveness-score(1:*5:range7:numeric2:ge3:080))(16:face-match-level(1:*5:range7:numeric2:ge2:06))(8:doc-type8:passport)(12:mrz-verified4:true))",
772+
"(12:facetec-scan(14:liveness-score(1:*5:range7:numeric2:ge3:080))(16:face-match-level(1:*5:range7:numeric2:ge2:06))(8:doc-type2:dl)(16:barcode-verified4:true))",
773+
}
774+
for i, rule := range rules {
775+
got := rule.String()
776+
if got != want[i] {
777+
t.Errorf("rule[%d]:\n got: %s\n want: %s", i, got, want[i])
778+
}
779+
}
780+
}
781+
782+
// TestAdvTokenize_NestedParens verifies that advTokenize preserves nested
783+
// parenthesised groups as single tokens (this is the property that
784+
// distinguishes it from the old broken tokenize implementation).
785+
func TestAdvTokenize_NestedParens(t *testing.T) {
786+
tests := []struct {
787+
input string
788+
want []string
789+
}{
790+
{
791+
input: "http GET",
792+
want: []string{"http", "GET"},
793+
},
794+
{
795+
input: "(action GET) (path /api)",
796+
want: []string{"(action GET)", "(path /api)"},
797+
},
798+
{
799+
// The critical case: a sub-expression containing a further nested group.
800+
input: "(liveness-score (* range numeric ge 080)) (doc-type passport)",
801+
want: []string{"(liveness-score (* range numeric ge 080))", "(doc-type passport)"},
802+
},
803+
{
804+
input: `"quoted string" plain`,
805+
want: []string{`"quoted string"`, "plain"},
806+
},
807+
}
808+
809+
for _, tt := range tests {
810+
got := advTokenize(tt.input)
811+
if len(got) != len(tt.want) {
812+
t.Errorf("advTokenize(%q): got %d tokens %v, want %d tokens %v",
813+
tt.input, len(got), got, len(tt.want), tt.want)
814+
continue
815+
}
816+
for i := range tt.want {
817+
if got[i] != tt.want[i] {
818+
t.Errorf("advTokenize(%q)[%d]: got %q, want %q", tt.input, i, got[i], tt.want[i])
819+
}
820+
}
821+
}
822+
}

0 commit comments

Comments
 (0)