@@ -13,6 +13,7 @@ import (
1313 "strings"
1414
1515 "github.com/sirosfoundation/go-spocp/pkg/sexp"
16+ "github.com/sirosfoundation/go-spocp/pkg/starform"
1617)
1718
1819// FileFormat represents the format of a ruleset file
@@ -121,10 +122,8 @@ func loadText(r io.Reader, opts LoadOptions) ([]sexp.Element, error) {
121122 var err error
122123
123124 if opts .Format == FormatAdvanced {
124- // Convert advanced form to canonical, then parse
125- canonical := advancedToCanonical (line )
126- parser := sexp .NewParser (canonical )
127- elem , err = parser .Parse ()
125+ // Parse advanced form directly into sexp.Element (handles star-forms)
126+ elem , err = parseAdvanced (line )
128127 } else {
129128 // Parse canonical form directly
130129 parser := sexp .NewParser (line )
@@ -350,102 +349,202 @@ func isBinaryFile(filename string) bool {
350349 strings .HasSuffix (filename , ".bin" )
351350}
352351
353- // advancedToCanonical converts advanced form to canonical form
354- // This is a simple implementation - for production use, you might want
355- // a more sophisticated parser
356- func advancedToCanonical (advanced string ) string {
357- // Remove outer parentheses if present
358- advanced = strings .TrimSpace (advanced )
359- if strings .HasPrefix (advanced , "(" ) && strings .HasSuffix (advanced , ")" ) {
360- advanced = advanced [1 : len (advanced )- 1 ]
352+ // parseAdvanced parses a human-readable advanced-form S-expression string
353+ // directly into a sexp.Element tree, without going through canonical form.
354+ // This replaces the old advancedToCanonical + sexp.NewParser two-step approach
355+ // which was incomplete for deeply nested expressions.
356+ //
357+ // The advanced format is:
358+ //
359+ // atom → any whitespace-delimited token without parens
360+ // list → "(" atom element* ")"
361+ // star-wildcard → "(*)"
362+ // star-range → "(* range <type> <op> <val> [<op> <val>])"
363+ // star-prefix → "(* prefix <value>)"
364+ // star-suffix → "(* suffix <value>)"
365+ // star-set → "(* set <element>...)"
366+ func parseAdvanced (s string ) (sexp.Element , error ) {
367+ s = strings .TrimSpace (s )
368+ tokens := advTokenize (s )
369+ if len (tokens ) == 0 {
370+ return nil , fmt .Errorf ("empty expression" )
361371 }
362-
363- // Split into tokens
364- tokens := tokenize (advanced )
365-
366- // Convert to canonical
367- return tokensToCanonical (tokens )
372+ elem , rest , err := advParseElement (tokens )
373+ if err != nil {
374+ return nil , err
375+ }
376+ if len (rest ) > 0 {
377+ return nil , fmt .Errorf ("unexpected trailing tokens: %v" , rest )
378+ }
379+ return elem , nil
368380}
369381
370- func tokenize (s string ) []string {
382+ // advTokenize splits an advanced-form string into a flat list of string tokens.
383+ // Sub-expressions in parens are kept as single opaque tokens (with parens).
384+ // Quoted strings are kept as single tokens (including the quote characters).
385+ func advTokenize (s string ) []string {
371386 var tokens []string
372387 var current strings.Builder
373388 depth := 0
374389 inQuote := false
375390
376- for i , ch := range s {
377- switch ch {
378- case '(' :
379- if ! inQuote {
380- if current .Len () > 0 {
381- tokens = append (tokens , current .String ())
382- current .Reset ()
383- }
384- depth ++
391+ runes := []rune (s )
392+ for i := 0 ; i < len (runes ); i ++ {
393+ ch := runes [i ]
394+ switch {
395+ case ch == '"' :
396+ inQuote = ! inQuote
397+ current .WriteRune (ch )
398+ case inQuote :
399+ current .WriteRune (ch )
400+ case ch == '(' :
401+ if depth > 0 {
385402 current .WriteRune (ch )
386403 } else {
387- current .WriteRune (ch )
388- }
389- case ')' :
390- if ! inQuote {
391- current .WriteRune (ch )
392- depth --
393- if depth == 0 {
404+ if current .Len () > 0 {
394405 tokens = append (tokens , current .String ())
395406 current .Reset ()
396407 }
397- } else {
398408 current .WriteRune (ch )
399409 }
400- case '"' :
401- inQuote = ! inQuote
410+ depth ++
411+ case ch == ')' :
402412 current .WriteRune (ch )
403- case ' ' , '\t' , '\n' , '\r' :
404- if ! inQuote && depth == 0 {
405- if current .Len () > 0 {
406- tokens = append ( tokens , current .String () )
407- current . Reset ()
408- }
409- } else {
413+ depth --
414+ if depth == 0 {
415+ tokens = append ( tokens , current .String ())
416+ current .Reset ( )
417+ }
418+ case ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' :
419+ if depth > 0 {
410420 current .WriteRune (ch )
421+ } else if current .Len () > 0 {
422+ tokens = append (tokens , current .String ())
423+ current .Reset ()
411424 }
412425 default :
413426 current .WriteRune (ch )
414427 }
415-
416- // Handle last character
417- if i == len (s )- 1 && current .Len () > 0 {
418- tokens = append (tokens , current .String ())
419- }
420428 }
421-
429+ if current .Len () > 0 {
430+ tokens = append (tokens , current .String ())
431+ }
422432 return tokens
423433}
424434
425- func tokensToCanonical (tokens []string ) string {
435+ // advParseElement parses the first element from tokens and returns it along
436+ // with the remaining unconsumed tokens.
437+ func advParseElement (tokens []string ) (sexp.Element , []string , error ) {
426438 if len (tokens ) == 0 {
427- return ""
439+ return nil , nil , fmt .Errorf ("unexpected end of tokens" )
440+ }
441+ tok := tokens [0 ]
442+ if ! strings .HasPrefix (tok , "(" ) {
443+ // Plain atom
444+ return sexp .NewAtom (tok ), tokens [1 :], nil
445+ }
446+ // It's a list — strip the outer parens and tokenize its contents
447+ inner := strings .TrimSpace (tok [1 : len (tok )- 1 ])
448+ innerTokens := advTokenize (inner )
449+ if len (innerTokens ) == 0 {
450+ return nil , nil , fmt .Errorf ("empty list" )
451+ }
452+ tag := innerTokens [0 ]
453+ if tag == "*" {
454+ // Star form
455+ elem , err := advParseStarForm (innerTokens [1 :])
456+ if err != nil {
457+ return nil , nil , err
458+ }
459+ return elem , tokens [1 :], nil
460+ }
461+ // Regular list: tag + elements
462+ var elems []sexp.Element
463+ rest := innerTokens [1 :]
464+ for len (rest ) > 0 {
465+ var elem sexp.Element
466+ var err error
467+ elem , rest , err = advParseElement (rest )
468+ if err != nil {
469+ return nil , nil , err
470+ }
471+ elems = append (elems , elem )
428472 }
473+ return sexp .NewList (tag , elems ... ), tokens [1 :], nil
474+ }
429475
430- // Single token
431- if len (tokens ) == 1 {
432- token := tokens [0 ]
433- if strings .HasPrefix (token , "(" ) {
434- return tokensToCanonical (tokenize (token [1 : len (token )- 1 ]))
476+ // advParseStarForm constructs the appropriate starform type from the tokens
477+ // that follow the "*" tag inside a star-form list.
478+ func advParseStarForm (args []string ) (sexp.Element , error ) {
479+ if len (args ) == 0 {
480+ return & starform.Wildcard {}, nil
481+ }
482+ switch args [0 ] {
483+ case "range" :
484+ return advParseRange (args [1 :])
485+ case "prefix" :
486+ if len (args ) != 2 {
487+ return nil , fmt .Errorf ("prefix star-form expects 1 argument, got %d" , len (args )- 1 )
435488 }
436- return fmt .Sprintf ("%d:%s" , len (token ), token )
489+ return & starform.Prefix {Value : args [1 ]}, nil
490+ case "suffix" :
491+ if len (args ) != 2 {
492+ return nil , fmt .Errorf ("suffix star-form expects 1 argument, got %d" , len (args )- 1 )
493+ }
494+ return & starform.Suffix {Value : args [1 ]}, nil
495+ case "set" :
496+ var elems []sexp.Element
497+ rest := args [1 :]
498+ for len (rest ) > 0 {
499+ elem , remaining , err := advParseElement (rest )
500+ if err != nil {
501+ return nil , err
502+ }
503+ elems = append (elems , elem )
504+ rest = remaining
505+ }
506+ return & starform.Set {Elements : elems }, nil
507+ default :
508+ return nil , fmt .Errorf ("unknown star-form type: %q" , args [0 ])
437509 }
510+ }
438511
439- // Multiple tokens form a list
440- var buf strings.Builder
441- buf .WriteString ("(" )
442- for _ , token := range tokens {
443- if strings .HasPrefix (token , "(" ) {
444- buf .WriteString (tokensToCanonical (tokenize (token [1 : len (token )- 1 ])))
445- } else {
446- buf .WriteString (fmt .Sprintf ("%d:%s" , len (token ), token ))
512+ // advParseRange parses the contents of a range star-form: <type> (<op> <val>)...
513+ func advParseRange (args []string ) (sexp.Element , error ) {
514+ if len (args ) == 0 {
515+ return nil , fmt .Errorf ("range star-form requires a type argument" )
516+ }
517+ r := & starform.Range {RangeType : starform .RangeType (args [0 ])}
518+ i := 1
519+ for i + 1 < len (args ) {
520+ op := starform .RangeOp (args [i ])
521+ val := args [i + 1 ]
522+ bound := & starform.RangeBound {Op : op , Value : val }
523+ switch op {
524+ case starform .OpGE , starform .OpGT :
525+ r .LowerBound = bound
526+ case starform .OpLE , starform .OpLT :
527+ r .UpperBound = bound
528+ default :
529+ return nil , fmt .Errorf ("unknown range operator: %q" , op )
447530 }
531+ i += 2
448532 }
449- buf .WriteString (")" )
450- return buf .String ()
533+ return r , nil
534+ }
535+
536+ // advancedToCanonical converts advanced form to canonical form.
537+ // Kept for backward compatibility with saveAdvanced; new loading code uses
538+ // parseAdvanced directly.
539+ func advancedToCanonical (advanced string ) string {
540+ elem , err := parseAdvanced (advanced )
541+ if err != nil {
542+ return ""
543+ }
544+ return elem .String ()
545+ }
546+
547+ // tokenize splits a string into tokens (kept for tests that reference it).
548+ func tokenize (s string ) []string {
549+ return advTokenize (s )
451550}
0 commit comments