From 6b883c49742385e65ea59a917742bb09f96c7e2b Mon Sep 17 00:00:00 2001 From: h3n4l Date: Fri, 29 Aug 2025 15:05:24 +0800 Subject: [PATCH 01/15] chore: remove depth check --- tools/fuzzing/internal/generator/generator.go | 4 ---- tools/fuzzing/tests/postgresql_test.go | 12 ++++++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 9eb2ea6..eeead79 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -77,10 +77,6 @@ func (g *Generator) generateQuery(index int) string { // generateFromRule generates text from a grammar rule func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { - // Check depth limit to prevent infinite recursion - if currentDepth >= g.config.MaxDepth { - return fmt.Sprintf("<%s_MAX_DEPTH>", ruleName) - } // Get the rule rule := g.getRule(ruleName) diff --git a/tools/fuzzing/tests/postgresql_test.go b/tools/fuzzing/tests/postgresql_test.go index fa067a4..b571304 100644 --- a/tools/fuzzing/tests/postgresql_test.go +++ b/tools/fuzzing/tests/postgresql_test.go @@ -19,7 +19,7 @@ func getRepoRoot() string { func TestPostgreSQLSelectStmt(t *testing.T) { repoRoot := getRepoRoot() - + // PostgreSQL grammar file paths lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -36,7 +36,7 @@ func TestPostgreSQLSelectStmt(t *testing.T) { name: "Simple SELECT statements", startRule: "selectstmt", count: 3, - maxDepth: 5, + maxDepth: 10, optionalProb: 0.7, seed: 42, }, @@ -74,7 +74,7 @@ func TestPostgreSQLSelectStmt(t *testing.T) { } fmt.Printf("\n=== %s ===\n", tt.name) - fmt.Printf("Config: MaxDepth=%d, OptionalProb=%.1f, Count=%d, Seed=%d\n", + fmt.Printf("Config: MaxDepth=%d, OptionalProb=%.1f, Count=%d, Seed=%d\n", tt.maxDepth, tt.optionalProb, tt.count, tt.seed) fmt.Println() @@ -92,7 +92,7 @@ func TestPostgreSQLSelectStmt(t *testing.T) { func TestPostgreSQLExpressions(t *testing.T) { repoRoot := getRepoRoot() - + // PostgreSQL grammar file paths lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -126,7 +126,7 @@ func TestPostgreSQLExpressions(t *testing.T) { func TestPostgreSQLVerboseOutput(t *testing.T) { repoRoot := getRepoRoot() - + // PostgreSQL grammar file paths lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -161,7 +161,7 @@ func TestPostgreSQLVerboseOutput(t *testing.T) { // Benchmark test for performance measurement func BenchmarkPostgreSQLGeneration(b *testing.B) { repoRoot := getRepoRoot() - + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") From f4df6cd0ab093a0859f55dc732336421dbdd14b1 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Fri, 29 Aug 2025 15:05:33 +0800 Subject: [PATCH 02/15] chore: remove depth check --- tools/fuzzing/tests/postgresql_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/fuzzing/tests/postgresql_test.go b/tools/fuzzing/tests/postgresql_test.go index b571304..406db98 100644 --- a/tools/fuzzing/tests/postgresql_test.go +++ b/tools/fuzzing/tests/postgresql_test.go @@ -189,4 +189,4 @@ func BenchmarkPostgreSQLGeneration(b *testing.B) { b.Fatalf("Generation failed: %v", err) } } -} \ No newline at end of file +} From 6263a2515dbc5c18098a4857a3a5832769815cd2 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Mon, 1 Sep 2025 14:13:37 +0800 Subject: [PATCH 03/15] feat: recursion depth control --- tools/fuzzing/docs/recursion_control.md | 244 +++++++++++ tools/fuzzing/internal/generator/generator.go | 414 +++++++++++++++++- tools/fuzzing/internal/grammar/dependency.go | 273 ++++++++++++ tools/fuzzing/internal/grammar/parser.go | 62 ++- tools/fuzzing/tests/dependency_test.go | 155 +++++++ tools/fuzzing/tests/postgresql_test.go | 101 ----- tools/fuzzing/tests/recursive_test.go | 81 ++++ .../tests/terminal_completeness_test.go | 361 +++++++++++++++ 8 files changed, 1561 insertions(+), 130 deletions(-) create mode 100644 tools/fuzzing/docs/recursion_control.md create mode 100644 tools/fuzzing/internal/grammar/dependency.go create mode 100644 tools/fuzzing/tests/dependency_test.go create mode 100644 tools/fuzzing/tests/recursive_test.go create mode 100644 tools/fuzzing/tests/terminal_completeness_test.go diff --git a/tools/fuzzing/docs/recursion_control.md b/tools/fuzzing/docs/recursion_control.md new file mode 100644 index 0000000..bd6a17f --- /dev/null +++ b/tools/fuzzing/docs/recursion_control.md @@ -0,0 +1,244 @@ +# Recursion Control in Grammar-Aware Fuzzing + +## Overview + +This document describes our dependency graph-based approach to handle recursion in ANTLR 4 grammars for the fuzzing system. The strategy ensures valid output generation while preventing infinite loops and stack overflows. + +## Our Strategy: Dependency Graph with Terminal Reachability + +### Core Approach + +1. **Build dependency graph** during grammar parsing +2. **Analyze terminal reachability** for each rule +3. **Force terminal alternatives** when hitting recursion/depth limits + +### Key Principles + +- **Rule = Graph Node**: Each grammar rule becomes a node +- **Reference = Graph Edge**: `a -> b` when rule `a` references rule `b` +- **Terminal Reachability**: Every rule must have at least one path to terminal nodes +- **Alternative Classification**: Mark which alternatives can terminate without recursion + +## Graph Structure + +### Node Definition + +```go +type GraphNode struct { + RuleName string // Rule name (e.g., "selectStmt", "expr") + HasTerminalAlternatives bool // Can reach terminal without recursion + Alternatives []Alternative // All alternatives for this rule + TerminalAlternativeIndex []int // Indices of alternatives that terminate +} + +type DependencyGraph struct { + Nodes map[string]*GraphNode +} +``` + +### Edge Types + +- **Self-Reference**: `expr -> expr` (direct recursion) +- **Cross-Reference**: `selectStmt -> whereClause` (potential indirect recursion) +- **Terminal Reference**: `expr -> NUMBER` (terminates) + +## Implementation Algorithm + +### Step 1: Build Graph During Parsing + +```go +func BuildDependencyGraph(grammar *ParsedGrammar) *DependencyGraph { + graph := &DependencyGraph{Nodes: make(map[string]*GraphNode)} + + // Create nodes for all rules + for ruleName, rule := range grammar.GetAllRules() { + node := &GraphNode{ + RuleName: ruleName, + Alternatives: rule.Alternatives, + } + graph.Nodes[ruleName] = node + } + + // Analyze each rule for terminal reachability + analyzeTerminalReachability(graph) + + return graph +} +``` + +### Step 2: Terminal Reachability Analysis + +```go +func analyzeTerminalReachability(graph *DependencyGraph) { + // Phase 1: Mark lexer rules as terminal + for _, node := range graph.Nodes { + if isLexerRule(node.RuleName) { + node.HasTerminalAlternatives = true + // All lexer alternatives are terminal + for i := range node.Alternatives { + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, i) + } + } + } + + // Phase 2: Propagate terminal reachability + changed := true + for changed { + changed = false + for _, node := range graph.Nodes { + if node.HasTerminalAlternatives { + continue + } + + // Check each alternative + for altIndex, alt := range node.Alternatives { + if canAlternativeTerminate(alt, graph) { + if !node.HasTerminalAlternatives { + node.HasTerminalAlternatives = true + changed = true + } + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) + } + } + } + } +} + +func canAlternativeTerminate(alt Alternative, graph *DependencyGraph) bool { + for _, element := range alt.Elements { + if element.IsRule() { + referencedNode := graph.Nodes[element.RuleName] + if referencedNode == nil || !referencedNode.HasTerminalAlternatives { + return false + } + } + // Literals and lexer rules are always terminal + } + return true +} +``` + +### Step 3: Generation with Terminal Forcing + +```go +func (g *Generator) generateFromRule(ruleName string, activeRules map[string]bool, depth int) string { + node := g.dependencyGraph.Nodes[ruleName] + + // Grammar validation: ensure rule can terminate + if !node.HasTerminalAlternatives { + return "", fmt.Errorf("unsupported grammar: rule '%s' has no terminal alternatives", ruleName) + } + + // Force terminal alternatives when hitting limits + if activeRules[ruleName] || depth >= g.config.MaxDepth { + return g.forceTerminalGeneration(node) + } + + // Normal generation + activeRules[ruleName] = true + defer delete(activeRules, ruleName) + + altIndex := g.random.Intn(len(node.Alternatives)) + return g.generateFromAlternative(node.Alternatives[altIndex], activeRules, depth+1) +} + +func (g *Generator) forceTerminalGeneration(node *GraphNode) string { + // Choose randomly from terminal alternatives only + terminalIndex := g.random.Intn(len(node.TerminalAlternativeIndex)) + altIndex := node.TerminalAlternativeIndex[terminalIndex] + + // Generate with fresh context to avoid recursion + return g.generateFromAlternative(node.Alternatives[altIndex], make(map[string]bool), 0) +} +``` + +## Special Cases + +### Empty Alternatives (ε-transitions) + +```antlr +optionalClause: whereClause | /* empty */ ; +``` + +**Handling**: Create implicit ε-node for empty alternatives: +```go +// Empty alternatives are always terminal +if len(alt.Elements) == 0 { + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) +} +``` + +### Quantified Elements + +```antlr +stmt: 'BEGIN' stmt* 'END'; // stmt* can be 0 occurrences +``` + +**Handling**: Quantifiers `*` and `?` create implicit terminal paths: +```go +func canElementTerminate(element Element, graph *DependencyGraph) bool { + if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { + return true // Can generate 0 occurrences + } + // Check if referenced rule can terminate + return graph.Nodes[element.RuleName].HasTerminalAlternatives +} +``` + +### Grammar Validation + +**Unsupported Grammars**: Rules with no terminal alternatives: +```antlr +// This will cause validation error +expr: '(' expr ')'; // No base case! +``` + +**Error Handling**: +```go +func ValidateGrammar(graph *DependencyGraph) error { + for ruleName, node := range graph.Nodes { + if !node.HasTerminalAlternatives { + return fmt.Errorf("grammar error: rule '%s' has no terminal alternatives", ruleName) + } + } + return nil +} +``` + +## Example: PostgreSQL Expression Rule + +```antlr +a_expr: a_expr '+' a_expr // Alternative 0: NON-TERMINAL (recursive) + | a_expr '*' a_expr // Alternative 1: NON-TERMINAL (recursive) + | '(' a_expr ')' // Alternative 2: NON-TERMINAL (depends on a_expr) + | c_expr // Alternative 3: TERMINAL (if c_expr terminates) + ; + +c_expr: columnref // Alternative 0: TERMINAL (lexer rule) + | '(' a_expr ')' // Alternative 1: NON-TERMINAL (recursive) + ; + +columnref: IDENTIFIER; // TERMINAL (lexer rule) +``` + +**Analysis Result**: +```go +a_expr.HasTerminalAlternatives = true +a_expr.TerminalAlternativeIndex = [3] // Only c_expr alternative + +c_expr.HasTerminalAlternatives = true +c_expr.TerminalAlternativeIndex = [0] // Only columnref alternative +``` + +**Generation Behavior**: +- **Normal case**: Choose any alternative randomly +- **Recursion/MaxDepth**: Force choice from `TerminalAlternativeIndex` only +- **Result**: Always generates valid expressions without stack overflow + +## Benefits + +1. **No Stack Overflow**: Guaranteed termination via terminal forcing +2. **Valid Output**: No placeholders, always generates parseable content +3. **Grammar Coverage**: Supports all ANTLR 4 constructs including quantifiers +4. **Early Validation**: Detects unsupported grammars during initialization +5. **Efficient**: O(1) lookup for terminal alternatives during generation \ No newline at end of file diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index eeead79..6ba18dc 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -12,9 +12,10 @@ import ( // Generator handles the fuzzing logic type Generator struct { - config *config.Config - random *rand.Rand - grammar *grammar.ParsedGrammar + config *config.Config + random *rand.Rand + grammar *grammar.ParsedGrammar + dependencyGraph *grammar.DependencyGraph } // WorkItem represents a unit of work in the generation stack @@ -27,9 +28,10 @@ type WorkItem struct { // New creates a new generator with the given configuration func New(cfg *config.Config) *Generator { return &Generator{ - config: cfg, - random: rand.New(rand.NewSource(cfg.Seed)), - grammar: nil, + config: cfg, + random: rand.New(rand.NewSource(cfg.Seed)), + grammar: nil, + dependencyGraph: nil, } } @@ -46,10 +48,23 @@ func (g *Generator) Generate() error { fmt.Printf("Parsed and merged %d grammar files into single grammar\n", len(g.config.GrammarFiles)) + // Set up dependency graph + g.dependencyGraph = g.grammar.GetDependencyGraph() + + // Validate grammar has terminal alternatives (non-fatal warning) + if err := g.grammar.ValidateGrammar(); err != nil { + fmt.Printf("Grammar validation warning: %v\n", err) + } + // Validate start rule exists if g.grammar.GetRule(g.config.StartRule) == nil { return errors.Errorf("start rule '%s' not found in merged grammar", g.config.StartRule) } + + // Check if start rule has terminal alternatives + if !g.dependencyGraph.HasTerminalAlternatives(g.config.StartRule) { + fmt.Printf("Warning: start rule '%s' has no terminal alternatives\n", g.config.StartRule) + } fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule) @@ -70,18 +85,26 @@ func (g *Generator) getRule(ruleName string) *grammar.Rule { // generateQuery creates a single query using grammar rules func (g *Generator) generateQuery(index int) string { - // Start generation from the specified start rule with no recursion limit for now - result := g.generateFromRule(g.config.StartRule, 0) + // Start generation with fresh active rules tracking + activeRules := make(map[string]bool) + result := g.generateFromRuleWithRecursionTracking(g.config.StartRule, activeRules, 0) return result } -// generateFromRule generates text from a grammar rule -func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { - +// generateFromRuleWithRecursionTracking generates text from a grammar rule with recursion tracking +func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activeRules map[string]bool, depth int) string { + // Check if we're in recursion or hit depth limit + if activeRules[ruleName] || depth >= g.config.MaxDepth { + return g.forceTerminalGeneration(ruleName) + } + + // Mark rule as active + activeRules[ruleName] = true + defer delete(activeRules, ruleName) + // Get the rule rule := g.getRule(ruleName) if rule == nil { - // If rule not found, return placeholder return fmt.Sprintf("<%s>", ruleName) } @@ -96,7 +119,7 @@ func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { // Generate from all elements in the alternative var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromElement(&element, currentDepth) + elementResult := g.generateFromElementWithRecursionTracking(&element, activeRules, depth+1) if elementResult != "" { result = append(result, elementResult) } @@ -105,19 +128,161 @@ func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { // Format output based on configuration switch g.config.OutputFormat { case config.CompactOutput: - // Clean, readable output without verbose comments (default) return joinWithSpaces(result) case config.VerboseOutput: - // Full grammar rule traversal with comments return fmt.Sprintf("/* %s */ %s", ruleName, joinWithSpaces(result)) default: - // Default to compact return joinWithSpaces(result) } } -// generateFromElement generates text from a single grammar element -func (g *Generator) generateFromElement(element *grammar.Element, currentDepth int) string { +// forceTerminalGeneration forces generation of terminal alternatives when recursion is detected +func (g *Generator) forceTerminalGeneration(ruleName string) string { + // Check if rule has terminal alternatives + terminalAlts := g.dependencyGraph.GetTerminalAlternatives(ruleName) + if len(terminalAlts) == 0 { + // No terminal alternatives - use synthetic generation based on rule name + return g.generateSyntheticTerminal(ruleName) + } + + rule := g.getRule(ruleName) + if rule == nil { + return g.generateSyntheticTerminal(ruleName) + } + + // Try to find the best terminal alternative (prefer ones with more literals) + bestAltIndex := g.selectBestTerminalAlternative(rule, terminalAlts) + if bestAltIndex == -1 { + return g.generateSyntheticTerminal(ruleName) + } + + alternative := rule.Alternatives[bestAltIndex] + + // Generate using aggressive terminal mode + result := g.generateFromAlternativeAggressiveTerminal(&alternative, ruleName) + + switch g.config.OutputFormat { + case config.CompactOutput: + return result + case config.VerboseOutput: + return fmt.Sprintf("/* %s[terminal] */ %s", ruleName, result) + default: + return result + } +} + +// generateSyntheticTerminal generates a synthetic terminal based on common SQL patterns +func (g *Generator) generateSyntheticTerminal(ruleName string) string { + switch ruleName { + case "selectstmt", "select_no_parens", "select_with_parens": + return "SELECT 1" + case "a_expr", "b_expr", "c_expr": + return "42" + case "insertStmt", "insertstmt": + return "INSERT INTO table1 VALUES (1)" + case "updateStmt", "updatestmt": + return "UPDATE table1 SET col1 = 1" + case "deleteStmt", "deletestmt": + return "DELETE FROM table1" + case "where_clause", "whereClause": + return "WHERE 1=1" + case "having_clause", "havingClause": + return "HAVING 1=1" + case "order_by_clause", "orderByClause", "sort_clause": + return "ORDER BY 1" + case "group_by_clause", "groupByClause": + return "GROUP BY 1" + case "colid", "identifier", "name": + return "col1" + case "tablename", "table_name": + return "table1" + default: + // Return a safe default that indicates the rule couldn't be generated + return fmt.Sprintf("/* %s: synthetic terminal */", ruleName) + } +} + +// generateFromElementWithDepthLimit generates from element with very strict depth limits +func (g *Generator) generateFromElementWithDepthLimit(element *grammar.Element, activeRules map[string]bool, depth int, maxDepth int) string { + if depth >= maxDepth { + return g.generateElementFallback(element) + } + + // Handle optional elements - skip them more aggressively when depth limited + if element.IsOptional() && g.random.Float64() > 0.3 { // Lower probability + return "" + } + + // Handle quantified elements - generate very few + if element.IsQuantified() { + count := 0 + if element.Quantifier == grammar.ONE_MORE { + count = 1 // Only generate minimum required + } + // For ZERO_MORE, count stays 0 + + var results []string + for i := 0; i < count; i++ { + result := g.generateFromElementWithDepthLimit(&grammar.Element{ + Value: element.Value, + Quantifier: grammar.NONE, + }, activeRules, depth+1, maxDepth) + if result != "" { + results = append(results, result) + } + } + return joinWithSpaces(results) + } + + // Generate single element + if element.IsRule() { + if refValue, ok := element.Value.(grammar.ReferenceValue); ok { + if activeRules[refValue.Name] { + return g.generateSyntheticTerminal(refValue.Name) + } + activeRules[refValue.Name] = true + defer delete(activeRules, refValue.Name) + + // Check if this is a lexer rule + if rule := g.grammar.GetRule(refValue.Name); rule != nil && rule.IsLexer { + return g.generateConcreteToken(refValue.Name) + } + return g.generateSyntheticTerminal(refValue.Name) + } + return g.generateElementFallback(element) + } else if element.IsTerminal() { + if litValue, ok := element.Value.(grammar.LiteralValue); ok { + return cleanLiteral(litValue.Text) + } + return cleanLiteral(element.Value.String()) + } + + return element.Value.String() +} + +// generateElementFallback provides fallback generation for complex elements +func (g *Generator) generateElementFallback(element *grammar.Element) string { + if element.IsRule() { + if refValue, ok := element.Value.(grammar.ReferenceValue); ok { + return g.generateSyntheticTerminal(refValue.Name) + } + } else if element.IsTerminal() { + if litValue, ok := element.Value.(grammar.LiteralValue); ok { + return cleanLiteral(litValue.Text) + } + return cleanLiteral(element.Value.String()) + } + return "1" // Ultimate fallback +} + +// generateFromRule generates text from a grammar rule (legacy method, kept for compatibility) +func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { + activeRules := make(map[string]bool) + return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, currentDepth) +} + +// generateFromElementWithRecursionTracking generates text from a single grammar element with recursion tracking +func (g *Generator) generateFromElementWithRecursionTracking(element *grammar.Element, activeRules map[string]bool, depth int) string { // Handle optional elements if element.IsOptional() && g.random.Float64() > g.config.OptionalProb { return "" // Skip optional element @@ -125,17 +290,17 @@ func (g *Generator) generateFromElement(element *grammar.Element, currentDepth i // Handle quantified elements if element.IsQuantified() { - return g.generateQuantified(element, currentDepth) + return g.generateQuantifiedWithRecursionTracking(element, activeRules, depth) } // Generate single element if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - return g.generateFromRuleOrToken(refValue.Name, currentDepth+1) + return g.generateFromRuleOrTokenWithRecursionTracking(refValue.Name, activeRules, depth) } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - return g.generateFromBlock(blockValue, currentDepth) + return g.generateFromBlockWithRecursionTracking(blockValue, activeRules, depth) } - return g.generateFromRuleOrToken(element.Value.String(), currentDepth+1) + return g.generateFromRuleOrTokenWithRecursionTracking(element.Value.String(), activeRules, depth) } else if element.IsTerminal() { if litValue, ok := element.Value.(grammar.LiteralValue); ok { return cleanLiteral(litValue.Text) @@ -146,6 +311,12 @@ func (g *Generator) generateFromElement(element *grammar.Element, currentDepth i return element.Value.String() } +// generateFromElement generates text from a single grammar element (legacy method) +func (g *Generator) generateFromElement(element *grammar.Element, currentDepth int) string { + activeRules := make(map[string]bool) + return g.generateFromElementWithRecursionTracking(element, activeRules, currentDepth) +} + // generateQuantified handles quantified elements (* +) func (g *Generator) generateQuantified(element *grammar.Element, currentDepth int) string { var count int @@ -459,4 +630,203 @@ func joinStrings(strs []string, sep string) string { result += sep + strs[i] } return result +} + +// generateQuantifiedWithRecursionTracking handles quantified elements with recursion tracking +func (g *Generator) generateQuantifiedWithRecursionTracking(element *grammar.Element, activeRules map[string]bool, depth int) string { + var count int + + // Use fixed count if specified, otherwise use random count + if g.config.QuantifierCount > 0 { + count = g.config.QuantifierCount + } else { + switch element.Quantifier { + case grammar.ZERO_MORE: // * + count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier + case grammar.ONE_MORE: // + + count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier + default: + count = 1 + } + } + + var results []string + for i := 0; i < count; i++ { + if element.IsRule() { + if refValue, ok := element.Value.(grammar.ReferenceValue); ok { + result := g.generateFromRuleOrTokenWithRecursionTracking(refValue.Name, activeRules, depth) + results = append(results, result) + } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { + result := g.generateFromBlockWithRecursionTracking(blockValue, activeRules, depth) + results = append(results, result) + } else { + result := g.generateFromRuleOrTokenWithRecursionTracking(element.Value.String(), activeRules, depth) + results = append(results, result) + } + } else if element.IsTerminal() { + if litValue, ok := element.Value.(grammar.LiteralValue); ok { + results = append(results, cleanLiteral(litValue.Text)) + } else { + results = append(results, cleanLiteral(element.Value.String())) + } + } + } + + return joinWithSpaces(results) +} + +// generateFromBlockWithRecursionTracking generates content from a block value with recursion tracking +func (g *Generator) generateFromBlockWithRecursionTracking(blockValue grammar.BlockValue, activeRules map[string]bool, depth int) string { + if len(blockValue.Alternatives) == 0 { + return "" + } + + // Select a random alternative from the block + altIndex := g.random.Intn(len(blockValue.Alternatives)) + alternative := blockValue.Alternatives[altIndex] + + // Generate from all elements in the selected alternative + var result []string + for _, element := range alternative.Elements { + elementResult := g.generateFromElementWithRecursionTracking(&element, activeRules, depth) + if elementResult != "" { + result = append(result, elementResult) + } + } + + return joinWithSpaces(result) +} + +// generateFromRuleOrTokenWithRecursionTracking generates from a rule using recursion tracking +func (g *Generator) generateFromRuleOrTokenWithRecursionTracking(ruleName string, activeRules map[string]bool, depth int) string { + // Check if this is a lexer rule and generate concrete token + if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { + return g.generateConcreteToken(ruleName) + } + + // Otherwise expand as parser rule with recursion tracking + return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, depth) +} + +// selectBestTerminalAlternative randomly selects from terminal alternatives +func (g *Generator) selectBestTerminalAlternative(rule *grammar.Rule, terminalAlts []int) int { + if len(terminalAlts) == 0 { + return -1 + } + + // Simply choose randomly from available terminal alternatives + randomIndex := g.random.Intn(len(terminalAlts)) + return terminalAlts[randomIndex] +} + +// generateFromAlternativeAggressiveTerminal generates from an alternative using aggressive terminal mode +func (g *Generator) generateFromAlternativeAggressiveTerminal(alt *grammar.Alternative, ruleName string) string { + var result []string + + for _, element := range alt.Elements { + elementResult := g.generateFromElementAggressiveTerminal(&element, ruleName) + if elementResult != "" { + result = append(result, elementResult) + } + } + + if len(result) == 0 { + // Ultimate fallback - use simple pattern based on rule name + return g.generateSimpleFallback(ruleName) + } + + return joinWithSpaces(result) +} + +// generateFromElementAggressiveTerminal generates from an element using aggressive terminal mode +func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Element, contextRuleName string) string { + // Handle quantified elements - be very conservative + if element.IsQuantified() { + if element.Quantifier == grammar.ZERO_MORE || element.Quantifier == grammar.OPTIONAL_Q { + // Skip optional/zero-more elements in terminal mode + return "" + } else if element.Quantifier == grammar.ONE_MORE { + // Generate exactly one for ONE_MORE + nonQuantifiedElement := grammar.Element{ + Value: element.Value, + Quantifier: grammar.NONE, + } + return g.generateFromElementAggressiveTerminal(&nonQuantifiedElement, contextRuleName) + } + } + + // Handle different element types + if element.IsTerminal() { + // Direct literal - just return it + if literal, ok := element.Value.(grammar.LiteralValue); ok { + return strings.Trim(literal.Text, "'\"") + } + return element.Value.String() + } + + if element.IsRule() { + switch value := element.Value.(type) { + case grammar.ReferenceValue: + // Check if it's a simple lexer rule + if g.isSimpleLexerRule(value.Name) { + return g.generateConcreteToken(value.Name) + } + + // For parser rules, generate simple fallback based on rule name + return g.generateSimpleFallback(value.Name) + + case grammar.BlockValue: + // For blocks, try the first alternative only + if len(value.Alternatives) > 0 { + return g.generateFromAlternativeAggressiveTerminal(&value.Alternatives[0], contextRuleName) + } + return "" + } + } + + return "" +} + +// isSimpleLexerRule checks if a rule is a simple lexer rule that can be safely generated +func (g *Generator) isSimpleLexerRule(ruleName string) bool { + rule := g.getRule(ruleName) + if rule == nil || !rule.IsLexer { + return false + } + + // Consider lexer rules with simple patterns as safe + simpleLexerRules := map[string]bool{ + "IDENTIFIER": true, "ID": true, "NAME": true, + "INTEGER": true, "NUMBER": true, "NUMERIC": true, "INT": true, + "STRING": true, "STRING_LITERAL": true, + "SELECT": true, "FROM": true, "WHERE": true, "AND": true, "OR": true, + "COMMA": true, "SEMICOLON": true, "DOT": true, + "OPEN_PAREN": true, "CLOSE_PAREN": true, + "PLUS": true, "MINUS": true, "STAR": true, "SLASH": true, + } + + return simpleLexerRules[ruleName] +} + +// generateSimpleFallback generates a simple fallback value based on rule name patterns +func (g *Generator) generateSimpleFallback(ruleName string) string { + // Generate context-appropriate fallbacks + ruleLower := strings.ToLower(ruleName) + + if strings.Contains(ruleLower, "expr") || strings.Contains(ruleLower, "expression") { + return "1" + } else if strings.Contains(ruleLower, "name") || strings.Contains(ruleLower, "id") { + return "col1" + } else if strings.Contains(ruleLower, "list") { + return "1" + } else if strings.Contains(ruleLower, "clause") { + return "1" + } else if strings.Contains(ruleLower, "stmt") || strings.Contains(ruleLower, "statement") { + return "SELECT 1" + } else if strings.Contains(ruleLower, "select") { + return "SELECT 1" + } else { + // Generic fallback + return "1" + } } \ No newline at end of file diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go new file mode 100644 index 0000000..b36fa7a --- /dev/null +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -0,0 +1,273 @@ +package grammar + +import ( + "fmt" +) + +// DependencyGraph represents the dependency relationships between grammar rules +type DependencyGraph struct { + Nodes map[string]*GraphNode +} + +// GraphNode represents a single rule in the dependency graph +type GraphNode struct { + RuleName string // Rule name (e.g., "selectStmt", "expr") + HasTerminalAlternatives bool // Can reach terminal without recursion + Alternatives []Alternative // All alternatives for this rule + TerminalAlternativeIndex []int // Indices of alternatives that can terminate + IsLexer bool // Whether this is a lexer rule +} + +// NewDependencyGraph creates a new dependency graph +func NewDependencyGraph() *DependencyGraph { + return &DependencyGraph{ + Nodes: make(map[string]*GraphNode), + } +} + +// AddNode adds a rule node to the dependency graph +func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { + node := &GraphNode{ + RuleName: ruleName, + HasTerminalAlternatives: false, + Alternatives: rule.Alternatives, + TerminalAlternativeIndex: []int{}, + IsLexer: rule.IsLexer, + } + g.Nodes[ruleName] = node +} + +// GetNode retrieves a node by rule name +func (g *DependencyGraph) GetNode(ruleName string) *GraphNode { + return g.Nodes[ruleName] +} + +// AnalyzeTerminalReachability performs terminal reachability analysis on the graph +func (g *DependencyGraph) AnalyzeTerminalReachability() { + // Phase 1: Mark lexer rules as terminal + g.markLexerRulesAsTerminal() + + // Phase 2: Propagate terminal reachability using fixed-point iteration + g.propagateTerminalReachability() +} + +// markLexerRulesAsTerminal marks all lexer rules as having terminal alternatives +func (g *DependencyGraph) markLexerRulesAsTerminal() { + for _, node := range g.Nodes { + if node.IsLexer { + node.HasTerminalAlternatives = true + // All lexer alternatives are considered terminal + for i := range node.Alternatives { + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, i) + } + } + } +} + +// propagateTerminalReachability uses fixed-point iteration to determine which rules can terminate +func (g *DependencyGraph) propagateTerminalReachability() { + changed := true + iterations := 0 + maxIterations := len(g.Nodes) * 2 // Prevent infinite loops + + for changed && iterations < maxIterations { + changed = false + iterations++ + + for _, node := range g.Nodes { + if node.IsLexer { + continue // Already processed + } + + // Check each alternative to see if it can terminate + for altIndex, alt := range node.Alternatives { + // Skip if this alternative is already marked as terminal + if g.isAlternativeAlreadyMarked(node, altIndex) { + continue + } + + if g.canAlternativeTerminate(alt) { + if !node.HasTerminalAlternatives { + node.HasTerminalAlternatives = true + changed = true + } + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) + changed = true + } + } + } + } + + if iterations >= maxIterations { + fmt.Printf("Warning: Terminal reachability analysis reached max iterations (%d)\n", maxIterations) + } +} + +// isAlternativeAlreadyMarked checks if an alternative is already in the terminal list +func (g *DependencyGraph) isAlternativeAlreadyMarked(node *GraphNode, altIndex int) bool { + for _, terminalIndex := range node.TerminalAlternativeIndex { + if terminalIndex == altIndex { + return true + } + } + return false +} + +// CanAlternativeTerminate checks if an alternative can terminate without recursion (exported for testing) +func (g *DependencyGraph) CanAlternativeTerminate(alt Alternative) bool { + return g.canAlternativeTerminate(alt) +} + +// CanElementTerminate checks if a single element can terminate (exported for testing) +func (g *DependencyGraph) CanElementTerminate(element Element) bool { + return g.canElementTerminate(element) +} + +// CanBlockValueTerminate checks if a block value can terminate (exported for testing) +func (g *DependencyGraph) CanBlockValueTerminate(block BlockValue) bool { + return g.canBlockValueTerminate(block) +} + +// canAlternativeTerminate checks if an alternative can terminate without recursion +func (g *DependencyGraph) canAlternativeTerminate(alt Alternative) bool { + // Empty alternative (ε-transition) can always terminate + if len(alt.Elements) == 0 { + return true + } + + // Check each element in the alternative + for _, element := range alt.Elements { + if !g.canElementTerminate(element) { + return false + } + } + + return true +} + +// canElementTerminate checks if a single element can terminate +func (g *DependencyGraph) canElementTerminate(element Element) bool { + // Terminal elements (literals) can always terminate + if element.IsTerminal() { + return true + } + + // Handle quantified elements + if element.IsQuantified() { + // * and ? quantifiers can generate 0 occurrences, so they can terminate + if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { + return true + } + // + quantifier requires at least one occurrence, so check the referenced rule + if element.Quantifier == ONE_MORE { + return g.canRuleReferenceTerminate(element) + } + } + + // For rule references, check if the referenced rule can terminate + if element.IsRule() { + return g.canRuleReferenceTerminate(element) + } + + return false +} + +// canRuleReferenceTerminate checks if a rule reference can terminate +func (g *DependencyGraph) canRuleReferenceTerminate(element Element) bool { + var referencedRuleName string + + // Extract rule name based on element value type + switch value := element.Value.(type) { + case ReferenceValue: + referencedRuleName = value.Name + case BlockValue: + // For block values, we need to check if any alternative in the block can terminate + return g.canBlockValueTerminate(value) + default: + return false + } + + // Check if the referenced rule exists and can terminate + referencedNode := g.GetNode(referencedRuleName) + if referencedNode == nil { + // Handle ANTLR built-in tokens that are always terminal + if isAntlrBuiltinToken(referencedRuleName) { + return true + } + // Rule not found - could be a forward reference or external rule + // For now, we'll be conservative and assume it cannot terminate + return false + } + + return referencedNode.HasTerminalAlternatives +} + +// canBlockValueTerminate checks if a block value can terminate +func (g *DependencyGraph) canBlockValueTerminate(block BlockValue) bool { + // A block can terminate if any of its alternatives can terminate + for _, alt := range block.Alternatives { + if g.canAlternativeTerminate(alt) { + return true + } + } + return false +} + +// ValidateGrammar checks if all rules have at least one terminal alternative +func (g *DependencyGraph) ValidateGrammar() error { + var invalidRules []string + + for ruleName, node := range g.Nodes { + if !node.HasTerminalAlternatives { + invalidRules = append(invalidRules, ruleName) + } + } + + if len(invalidRules) > 0 { + return fmt.Errorf("grammar validation failed: the following rules have no terminal alternatives: %v", invalidRules) + } + + return nil +} + +// GetTerminalAlternatives returns the indices of terminal alternatives for a rule +func (g *DependencyGraph) GetTerminalAlternatives(ruleName string) []int { + node := g.GetNode(ruleName) + if node == nil { + return nil + } + return node.TerminalAlternativeIndex +} + +// HasTerminalAlternatives checks if a rule has terminal alternatives +func (g *DependencyGraph) HasTerminalAlternatives(ruleName string) bool { + node := g.GetNode(ruleName) + if node == nil { + return false + } + return node.HasTerminalAlternatives +} + +// PrintAnalysisResults prints the dependency graph analysis results for debugging +func (g *DependencyGraph) PrintAnalysisResults() { + fmt.Println("=== Dependency Graph Analysis Results ===") + for ruleName, node := range g.Nodes { + fmt.Printf("Rule: %s (lexer=%t)\n", ruleName, node.IsLexer) + fmt.Printf(" HasTerminalAlternatives: %t\n", node.HasTerminalAlternatives) + fmt.Printf(" TerminalAlternativeIndex: %v\n", node.TerminalAlternativeIndex) + fmt.Printf(" Total alternatives: %d\n", len(node.Alternatives)) + fmt.Println() + } +} + +// isAntlrBuiltinToken checks if a token name refers to an ANTLR built-in token +// that should always be considered terminal +func isAntlrBuiltinToken(tokenName string) bool { + // ANTLR built-in tokens that are always terminal + builtinTokens := map[string]bool{ + "EOF": true, // End-of-file token + "": true, // Alternative EOF notation + } + + return builtinTokens[tokenName] +} \ No newline at end of file diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index cd43d1c..e79170b 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -18,6 +18,8 @@ type ParsedGrammar struct { // BlockAltMap stores temporary block rules for debugging // Key: block ID (e.g., "block_1_alts"), Value: the block alternatives BlockAltMap map[string][]Alternative + // DependencyGraph for recursion analysis + DependencyGraph *DependencyGraph } // Rule represents a grammar rule with its alternatives @@ -148,12 +150,34 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { - return &ParsedGrammar{ - LexerRules: visitor.lexerRules, - ParserRules: visitor.parserRules, - FilePath: filePath, - BlockAltMap: visitor.blockAltMap, - }, nil + parsedGrammar := &ParsedGrammar{ + LexerRules: visitor.lexerRules, + ParserRules: visitor.parserRules, + FilePath: filePath, + BlockAltMap: visitor.blockAltMap, + DependencyGraph: NewDependencyGraph(), + } + + // Build dependency graph + buildDependencyGraph(parsedGrammar) + + return parsedGrammar, nil +} + +// buildDependencyGraph constructs the dependency graph for the parsed grammar +func buildDependencyGraph(parsedGrammar *ParsedGrammar) { + // Add all lexer rules to the graph + for ruleName, rule := range parsedGrammar.LexerRules { + parsedGrammar.DependencyGraph.AddNode(ruleName, rule) + } + + // Add all parser rules to the graph + for ruleName, rule := range parsedGrammar.ParserRules { + parsedGrammar.DependencyGraph.AddNode(ruleName, rule) + } + + // Perform terminal reachability analysis + parsedGrammar.DependencyGraph.AnalyzeTerminalReachability() } // GetRule gets a rule by name from either lexer or parser rules @@ -222,6 +246,10 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { g.FilePath = fmt.Sprintf("%s + %s", g.FilePath, other.FilePath) } + // Rebuild dependency graph with merged rules + g.DependencyGraph = NewDependencyGraph() + buildDependencyGraph(g) + return nil } @@ -253,6 +281,26 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { return mergedGrammar, nil } +// GetDependencyGraph returns the dependency graph for the parsed grammar +func (g *ParsedGrammar) GetDependencyGraph() *DependencyGraph { + return g.DependencyGraph +} + +// ValidateGrammar validates that the grammar has valid dependency structure +func (g *ParsedGrammar) ValidateGrammar() error { + if g.DependencyGraph == nil { + return fmt.Errorf("dependency graph not built") + } + return g.DependencyGraph.ValidateGrammar() +} + +// PrintDependencyAnalysis prints dependency graph analysis for debugging +func (g *ParsedGrammar) PrintDependencyAnalysis() { + if g.DependencyGraph != nil { + g.DependencyGraph.PrintAnalysisResults() + } +} + // IsRule checks if an element refers to another rule or generated block func (e *Element) IsRule() bool { _, isRef := e.Value.(ReferenceValue) @@ -274,7 +322,7 @@ func (e *Element) IsOptional() bool { // IsQuantified checks if an element has repetition quantifiers func (e *Element) IsQuantified() bool { - return e.Quantifier == ZERO_MORE || e.Quantifier == ONE_MORE + return e.Quantifier == ZERO_MORE || e.Quantifier == ONE_MORE || e.Quantifier == OPTIONAL_Q } // GrammarErrorListener collects parsing errors diff --git a/tools/fuzzing/tests/dependency_test.go b/tools/fuzzing/tests/dependency_test.go new file mode 100644 index 0000000..25b5b10 --- /dev/null +++ b/tools/fuzzing/tests/dependency_test.go @@ -0,0 +1,155 @@ +package tests + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/bytebase/parser/tools/fuzzing/internal/grammar" +) + +func TestDependencyGraphConstruction(t *testing.T) { + repoRoot := getRepoRoot() + + // PostgreSQL grammar file paths + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + // Parse grammar files + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + // Test dependency graph exists + depGraph := parsedGrammar.GetDependencyGraph() + if depGraph == nil { + t.Fatal("Dependency graph was not created") + } + + // Test that nodes were created for rules + totalRules := len(parsedGrammar.GetAllRules()) + if len(depGraph.Nodes) != totalRules { + t.Errorf("Expected %d nodes in dependency graph, got %d", totalRules, len(depGraph.Nodes)) + } + + // Test lexer rules are marked as terminal + lexerTerminalCount := 0 + for ruleName := range parsedGrammar.LexerRules { + if depGraph.HasTerminalAlternatives(ruleName) { + lexerTerminalCount++ + } + } + + fmt.Printf("Lexer rules marked as terminal: %d/%d\n", lexerTerminalCount, len(parsedGrammar.LexerRules)) + + if lexerTerminalCount == 0 { + t.Error("No lexer rules were marked as terminal") + } + + // Debug: Print first 10 lexer and parser rules to see what's available + fmt.Println("\nFirst 10 lexer rules:") + count := 0 + for ruleName := range parsedGrammar.LexerRules { + if count < 10 { + fmt.Printf(" %s\n", ruleName) + count++ + } + } + + fmt.Println("\nFirst 10 parser rules:") + count = 0 + for ruleName := range parsedGrammar.ParserRules { + if count < 10 { + node := depGraph.GetNode(ruleName) + fmt.Printf(" %s (HasTerminal=%t, TerminalAlts=%v)\n", + ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) + count++ + } + } + + // Test some specific rules that should exist + testRules := []string{"selectstmt", "a_expr", "IDENT", "ICONST"} + + for _, ruleName := range testRules { + node := depGraph.GetNode(ruleName) + if node != nil { + fmt.Printf("Rule %s: HasTerminalAlternatives=%t, TerminalAlts=%v\n", + ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) + } else { + fmt.Printf("Rule %s: Not found in dependency graph\n", ruleName) + } + } + + t.Log("Dependency graph construction completed successfully") +} + +func TestGrammarValidation(t *testing.T) { + repoRoot := getRepoRoot() + + // PostgreSQL grammar file paths + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + // Parse grammar files + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + // Validate grammar + err = parsedGrammar.ValidateGrammar() + if err != nil { + t.Errorf("Grammar validation failed: %v", err) + + // Print analysis results for debugging + fmt.Println("\n=== Grammar Analysis Results ===") + parsedGrammar.PrintDependencyAnalysis() + } else { + t.Log("Grammar validation passed - all rules have terminal alternatives") + } +} + +func TestDependencyGraphSpecificRules(t *testing.T) { + repoRoot := getRepoRoot() + + // PostgreSQL grammar file paths + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + // Parse grammar files + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + depGraph := parsedGrammar.GetDependencyGraph() + + // Test specific known patterns + tests := []struct { + ruleName string + expectTerminal bool + description string + }{ + {"OPEN_PAREN", true, "Lexer rule should be terminal"}, + {"SELECT", true, "Lexer rule should be terminal"}, + {"selectstmt", true, "Should have at least one terminal alternative"}, + {"a_expr", true, "Expression rule should have terminal alternatives"}, + } + + for _, test := range tests { + hasTerminal := depGraph.HasTerminalAlternatives(test.ruleName) + if hasTerminal != test.expectTerminal { + t.Errorf("Rule %s: expected HasTerminalAlternatives=%t, got %t (%s)", + test.ruleName, test.expectTerminal, hasTerminal, test.description) + } + + if hasTerminal { + terminalAlts := depGraph.GetTerminalAlternatives(test.ruleName) + if len(terminalAlts) == 0 { + t.Errorf("Rule %s: HasTerminalAlternatives=true but no terminal alternatives found", test.ruleName) + } + fmt.Printf("✓ Rule %s has %d terminal alternatives: %v\n", test.ruleName, len(terminalAlts), terminalAlts) + } + } +} \ No newline at end of file diff --git a/tools/fuzzing/tests/postgresql_test.go b/tools/fuzzing/tests/postgresql_test.go index 406db98..3ba36e2 100644 --- a/tools/fuzzing/tests/postgresql_test.go +++ b/tools/fuzzing/tests/postgresql_test.go @@ -89,104 +89,3 @@ func TestPostgreSQLSelectStmt(t *testing.T) { }) } } - -func TestPostgreSQLExpressions(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - cfg := &config.Config{ - GrammarFiles: []string{lexerPath, parserPath}, - StartRule: "a_expr", // PostgreSQL expression rule - Count: 5, - MaxDepth: 4, - OptionalProb: 0.6, - MaxQuantifier: 2, - MinQuantifier: 1, - QuantifierCount: 0, - OutputFormat: config.CompactOutput, - Seed: 789, - } - - fmt.Printf("\n=== PostgreSQL Expressions ===\n") - fmt.Printf("Generating %d expressions with max depth %d\n", cfg.Count, cfg.MaxDepth) - fmt.Println() - - gen := generator.New(cfg) - err := gen.Generate() - - if err != nil { - t.Errorf("Failed to generate PostgreSQL expressions: %v", err) - } else { - t.Logf("Successfully generated %d PostgreSQL expressions", cfg.Count) - } -} - -func TestPostgreSQLVerboseOutput(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - cfg := &config.Config{ - GrammarFiles: []string{lexerPath, parserPath}, - StartRule: "selectstmt", - Count: 2, - MaxDepth: 4, - OptionalProb: 0.8, - MaxQuantifier: 2, - MinQuantifier: 1, - QuantifierCount: 0, - OutputFormat: config.VerboseOutput, // Show rule traversal - Seed: 999, - } - - fmt.Printf("\n=== PostgreSQL Verbose Output ===\n") - fmt.Printf("Generating with verbose output to show rule traversal\n") - fmt.Println() - - gen := generator.New(cfg) - err := gen.Generate() - - if err != nil { - t.Errorf("Failed to generate PostgreSQL statements with verbose output: %v", err) - } else { - t.Logf("Successfully generated PostgreSQL statements with verbose output") - } -} - -// Benchmark test for performance measurement -func BenchmarkPostgreSQLGeneration(b *testing.B) { - repoRoot := getRepoRoot() - - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - cfg := &config.Config{ - GrammarFiles: []string{lexerPath, parserPath}, - StartRule: "selectstmt", - Count: 1, - MaxDepth: 6, - OptionalProb: 0.5, - MaxQuantifier: 3, - MinQuantifier: 1, - QuantifierCount: 0, - OutputFormat: config.CompactOutput, - Seed: 42, - } - - gen := generator.New(cfg) - - // Reset the timer to exclude setup time - b.ResetTimer() - - for i := 0; i < b.N; i++ { - err := gen.Generate() - if err != nil { - b.Fatalf("Generation failed: %v", err) - } - } -} diff --git a/tools/fuzzing/tests/recursive_test.go b/tools/fuzzing/tests/recursive_test.go new file mode 100644 index 0000000..a48b783 --- /dev/null +++ b/tools/fuzzing/tests/recursive_test.go @@ -0,0 +1,81 @@ +package tests + +import ( + "os" + "path/filepath" + "testing" + + "github.com/bytebase/parser/tools/fuzzing/internal/grammar" +) + +func TestPureLeftRecursiveGrammarNonTerminal(t *testing.T) { + // Create a grammar with ONLY left-recursive rules (no terminal alternatives) + tempDir := t.TempDir() + grammarContent := `grammar PureLeftRecursive; + +// Parser rules +root: expr EOF; + +// This rule has NO terminal alternatives - pure left recursion +expr: expr '+' expr + | expr '*' expr + ; + +// Lexer rules +PLUS: '+'; +MULTIPLY: '*'; +WS: [ \t\r\n]+ -> skip; +EOF: ''; +` + + grammarFile := filepath.Join(tempDir, "PureLeftRecursive.g4") + err := os.WriteFile(grammarFile, []byte(grammarContent), 0644) + if err != nil { + t.Fatalf("Failed to create pure left-recursive grammar file: %v", err) + } + + // Parse the grammar + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{grammarFile}) + if err != nil { + t.Fatalf("Failed to parse pure left-recursive grammar: %v", err) + } + + depGraph := parsedGrammar.GetDependencyGraph() + + // Check that the expr rule is NOT terminal + exprNode := depGraph.GetNode("expr") + if exprNode == nil { + t.Fatal("expr rule not found in dependency graph") + } + + t.Logf("=== Pure Left-Recursive Grammar Analysis ===") + t.Logf("expr rule has %d alternatives", len(exprNode.Alternatives)) + t.Logf("expr HasTerminalAlternatives: %t", exprNode.HasTerminalAlternatives) + t.Logf("expr TerminalAlternativeIndex: %v", exprNode.TerminalAlternativeIndex) + + // This rule should NOT have terminal alternatives because all alternatives + // are left-recursive and there's no base case + if exprNode.HasTerminalAlternatives { + t.Errorf("Expected pure left-recursive expr rule to NOT have terminal alternatives, but it does") + } + + // Validate the grammar should fail + err = depGraph.ValidateGrammar() + if err == nil { + t.Errorf("Expected grammar validation to fail for pure left-recursive grammar, but it passed") + } else { + t.Logf("Grammar validation correctly failed: %v", err) + } + + // Check that root is also non-terminal because it depends on non-terminal expr + rootNode := depGraph.GetNode("root") + if rootNode == nil { + t.Fatal("root rule not found in dependency graph") + } + + if rootNode.HasTerminalAlternatives { + t.Errorf("Expected root rule to be non-terminal due to non-terminal expr dependency, but it's terminal") + } + + t.Logf("root HasTerminalAlternatives: %t (expected false)", rootNode.HasTerminalAlternatives) +} diff --git a/tools/fuzzing/tests/terminal_completeness_test.go b/tools/fuzzing/tests/terminal_completeness_test.go new file mode 100644 index 0000000..3601829 --- /dev/null +++ b/tools/fuzzing/tests/terminal_completeness_test.go @@ -0,0 +1,361 @@ +package tests + +import ( + "fmt" + "path/filepath" + "sort" + "testing" + + "github.com/bytebase/parser/tools/fuzzing/internal/grammar" +) + +func TestPostgreSQLTerminalCompleteness(t *testing.T) { + repoRoot := getRepoRoot() + + // PostgreSQL grammar file paths + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + // Parse grammar files + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + depGraph := parsedGrammar.GetDependencyGraph() + + // Analyze all nodes + fmt.Println("=== PostgreSQL Grammar Terminal Analysis ===") + + totalNodes := len(depGraph.Nodes) + terminalNodes := 0 + nonTerminalNodes := 0 + + var terminalRules []string + var nonTerminalRules []string + + lexerTerminal := 0 + lexerNonTerminal := 0 + parserTerminal := 0 + parserNonTerminal := 0 + + for ruleName, node := range depGraph.Nodes { + if node.HasTerminalAlternatives { + terminalNodes++ + terminalRules = append(terminalRules, ruleName) + if node.IsLexer { + lexerTerminal++ + } else { + parserTerminal++ + } + } else { + nonTerminalNodes++ + nonTerminalRules = append(nonTerminalRules, ruleName) + if node.IsLexer { + lexerNonTerminal++ + } else { + parserNonTerminal++ + } + } + } + + fmt.Printf("Total Nodes: %d\n", totalNodes) + fmt.Printf("Terminal Nodes: %d (%.1f%%)\n", terminalNodes, float64(terminalNodes)/float64(totalNodes)*100) + fmt.Printf("Non-Terminal Nodes: %d (%.1f%%)\n", nonTerminalNodes, float64(nonTerminalNodes)/float64(totalNodes)*100) + fmt.Println() + + fmt.Printf("Lexer Rules: Terminal=%d, Non-Terminal=%d\n", lexerTerminal, lexerNonTerminal) + fmt.Printf("Parser Rules: Terminal=%d, Non-Terminal=%d\n", parserTerminal, parserNonTerminal) + fmt.Println() + + // Show non-terminal rules (these should ideally be zero) + if len(nonTerminalRules) > 0 { + sort.Strings(nonTerminalRules) + fmt.Printf("❌ Non-Terminal Rules (%d):\n", len(nonTerminalRules)) + for i, ruleName := range nonTerminalRules { + node := depGraph.GetNode(ruleName) + ruleType := "PARSER" + if node.IsLexer { + ruleType = "LEXER" + } + fmt.Printf(" %d. %s (%s, %d alternatives)\n", i+1, ruleName, ruleType, len(node.Alternatives)) + } + fmt.Println() + } else { + fmt.Println("✅ All rules have terminal alternatives!") + } + + // Test: If your hypothesis is correct, this should pass + if nonTerminalNodes == 0 { + t.Log("✅ HYPOTHESIS CONFIRMED: All PostgreSQL rules have terminal alternatives") + } else { + t.Errorf("❌ HYPOTHESIS REJECTED: %d rules have no terminal alternatives", nonTerminalNodes) + + // Analyze WHY these rules don't have terminal alternatives + fmt.Println("=== Analysis of Non-Terminal Rules ===") + analyzeNonTerminalRules(parsedGrammar, depGraph, nonTerminalRules[:min(5, len(nonTerminalRules))], t) + } +} + +func TestSpecificNonTerminalRules(t *testing.T) { + repoRoot := getRepoRoot() + + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + depGraph := parsedGrammar.GetDependencyGraph() + + // Test specific rules that should be terminal based on our earlier analysis + expectedTerminalRules := []string{ + "columnref", // Should be terminal (colid + indirection) + "c_expr", // Should be terminal (has columnref alternative) + "a_expr_typecast", // Should be terminal (depends on c_expr) + "a_expr_collate", // Should be terminal (depends on a_expr_typecast) + } + + fmt.Println("=== Testing Expected Terminal Rules ===") + for _, ruleName := range expectedTerminalRules { + node := depGraph.GetNode(ruleName) + if node == nil { + t.Errorf("Rule %s not found", ruleName) + continue + } + + fmt.Printf("%s: HasTerminal=%t, TerminalAlts=%v\n", + ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) + + if !node.HasTerminalAlternatives { + t.Errorf("Expected %s to be terminal, but it's not", ruleName) + } + } +} + +func analyzeNonTerminalRules(parsedGrammar *grammar.ParsedGrammar, depGraph *grammar.DependencyGraph, ruleNames []string, t *testing.T) { + for _, ruleName := range ruleNames { + rule := parsedGrammar.GetRule(ruleName) + if rule == nil { + continue + } + + fmt.Printf("\n--- Analyzing %s ---\n", ruleName) + fmt.Printf("Type: %s, Alternatives: %d\n", + map[bool]string{true: "LEXER", false: "PARSER"}[rule.IsLexer], len(rule.Alternatives)) + + for altIndex, alt := range rule.Alternatives { + canTerminate := depGraph.CanAlternativeTerminate(alt) + fmt.Printf(" Alt %d (%d elements): canTerminate=%t\n", altIndex, len(alt.Elements), canTerminate) + + for elemIndex, element := range alt.Elements { + canElemTerminate := depGraph.CanElementTerminate(element) + fmt.Printf(" Elem %d: %s", elemIndex, element.Value.String()) + + if element.IsQuantified() { + fmt.Printf("[%v]", element.Quantifier) + } + + fmt.Printf(" → canTerminate=%t", canElemTerminate) + + // If it's a rule reference, show the referenced rule's status + if element.IsRule() { + if refValue, ok := element.Value.(grammar.ReferenceValue); ok { + referencedNode := depGraph.GetNode(refValue.Name) + if referencedNode != nil { + fmt.Printf(" (ref: %s, hasTerminal=%t)", refValue.Name, referencedNode.HasTerminalAlternatives) + } else { + fmt.Printf(" (ref: %s, NOT_FOUND)", refValue.Name) + } + } + } + + fmt.Println() + } + } + } +} + +// Test to validate that our terminal propagation algorithm is working correctly +func TestManualTerminalPropagation(t *testing.T) { + repoRoot := getRepoRoot() + + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") + parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") + + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) + if err != nil { + t.Fatalf("Failed to parse grammar files: %v", err) + } + + // Create fresh dependency graph and run manual propagation + freshGraph := grammar.NewDependencyGraph() + + // Add all nodes + for ruleName, rule := range parsedGrammar.GetAllRules() { + freshGraph.AddNode(ruleName, rule) + } + + // Mark lexer rules as terminal + initialTerminalCount := 0 + for _, node := range freshGraph.Nodes { + if node.IsLexer { + node.HasTerminalAlternatives = true + for i := range node.Alternatives { + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, i) + } + initialTerminalCount++ + } + } + + fmt.Printf("Starting with %d lexer rules marked as terminal\n", initialTerminalCount) + + // Manual propagation with more iterations + maxIterations := 100 + totalNewTerminals := 0 + + for iteration := 0; iteration < maxIterations; iteration++ { + changed := false + newTerminalsThisIteration := 0 + + for _, node := range freshGraph.Nodes { + if node.IsLexer || node.HasTerminalAlternatives { + continue + } + + for altIndex, alt := range node.Alternatives { + // Check if already marked + alreadyMarked := false + for _, termIndex := range node.TerminalAlternativeIndex { + if termIndex == altIndex { + alreadyMarked = true + break + } + } + if alreadyMarked { + continue + } + + // Check if this alternative can terminate + if canAlternativeTerminateManual(alt, freshGraph) { + if !node.HasTerminalAlternatives { + node.HasTerminalAlternatives = true + changed = true + newTerminalsThisIteration++ + totalNewTerminals++ + } + node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) + } + } + } + + if newTerminalsThisIteration > 0 { + fmt.Printf("Iteration %d: +%d new terminal rules (total: %d)\n", + iteration+1, newTerminalsThisIteration, initialTerminalCount+totalNewTerminals) + } + + if !changed { + fmt.Printf("Converged after %d iterations\n", iteration+1) + break + } + + if iteration == maxIterations-1 { + fmt.Printf("Reached max iterations (%d)\n", maxIterations) + } + } + + // Count final results + finalTerminalCount := 0 + finalNonTerminalCount := 0 + + for _, node := range freshGraph.Nodes { + if node.HasTerminalAlternatives { + finalTerminalCount++ + } else { + finalNonTerminalCount++ + } + } + + fmt.Printf("\nFinal Results:\n") + fmt.Printf("Terminal: %d\n", finalTerminalCount) + fmt.Printf("Non-Terminal: %d\n", finalNonTerminalCount) + + if finalNonTerminalCount == 0 { + t.Log("✅ Manual propagation: All rules are terminal!") + } else { + t.Logf("❌ Manual propagation: Still %d non-terminal rules", finalNonTerminalCount) + } +} + +func canAlternativeTerminateManual(alt grammar.Alternative, graph *grammar.DependencyGraph) bool { + // Empty alternative is always terminal + if len(alt.Elements) == 0 { + return true + } + + // All elements must be able to terminate + for _, element := range alt.Elements { + if !canElementTerminateManual(element, graph) { + return false + } + } + + return true +} + +func canElementTerminateManual(element grammar.Element, graph *grammar.DependencyGraph) bool { + // Terminal elements (literals) can always terminate + if element.IsTerminal() { + return true + } + + // Handle quantified elements - THIS IS KEY! + if element.IsQuantified() { + // * and ? quantifiers can generate 0 occurrences, so they can terminate + if element.Quantifier == grammar.ZERO_MORE || element.Quantifier == grammar.OPTIONAL_Q { + return true // Can be empty, so always terminal + } + // + quantifier requires at least one occurrence, so check the content + } + + // For rule references + if element.IsRule() { + switch value := element.Value.(type) { + case grammar.ReferenceValue: + referencedNode := graph.GetNode(value.Name) + if referencedNode == nil { + // Handle ANTLR built-in tokens like EOF + return isBuiltinTerminal(value.Name) + } + return referencedNode.HasTerminalAlternatives + case grammar.BlockValue: + // A block can terminate if any of its alternatives can terminate + for _, alt := range value.Alternatives { + if canAlternativeTerminateManual(alt, graph) { + return true + } + } + return false + } + } + + return false +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// isBuiltinTerminal checks if a token name refers to an ANTLR built-in token +func isBuiltinTerminal(tokenName string) bool { + builtinTokens := map[string]bool{ + "EOF": true, // End-of-file token + "": true, // Alternative EOF notation + } + return builtinTokens[tokenName] +} \ No newline at end of file From ff4edea4352626e77035bf86d45bb3580f1286a3 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Mon, 1 Sep 2025 15:20:20 +0800 Subject: [PATCH 04/15] chore: simplify chore: simplify chore: simplify --- tools/fuzzing/internal/generator/generator.go | 321 +++--------------- tools/fuzzing/internal/grammar/dependency.go | 32 +- tools/fuzzing/internal/grammar/parser.go | 89 ++--- .../tests/terminal_completeness_test.go | 112 +++--- .../tests/unterminated_grammar_test.go | 104 ++++++ 5 files changed, 295 insertions(+), 363 deletions(-) create mode 100644 tools/fuzzing/tests/unterminated_grammar_test.go diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 6ba18dc..807169a 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -12,10 +12,10 @@ import ( // Generator handles the fuzzing logic type Generator struct { - config *config.Config - random *rand.Rand - grammar *grammar.ParsedGrammar - dependencyGraph *grammar.DependencyGraph + config *config.Config + random *rand.Rand + grammar *grammar.ParsedGrammar + dependencyGraph *grammar.DependencyGraph } // WorkItem represents a unit of work in the generation stack @@ -28,49 +28,49 @@ type WorkItem struct { // New creates a new generator with the given configuration func New(cfg *config.Config) *Generator { return &Generator{ - config: cfg, - random: rand.New(rand.NewSource(cfg.Seed)), - grammar: nil, - dependencyGraph: nil, + config: cfg, + random: rand.New(rand.NewSource(cfg.Seed)), + grammar: nil, + dependencyGraph: nil, } } // Generate produces the specified number of queries func (g *Generator) Generate() error { fmt.Println("Initializing grammar parser...") - + // Parse and merge all grammar files into a single grammar var err error g.grammar, err = grammar.ParseAndMergeGrammarFiles(g.config.GrammarFiles) if err != nil { return errors.Wrap(err, "failed to parse and merge grammar files") } - + fmt.Printf("Parsed and merged %d grammar files into single grammar\n", len(g.config.GrammarFiles)) // Set up dependency graph g.dependencyGraph = g.grammar.GetDependencyGraph() - + // Validate grammar has terminal alternatives (non-fatal warning) if err := g.grammar.ValidateGrammar(); err != nil { fmt.Printf("Grammar validation warning: %v\n", err) } - + // Validate start rule exists if g.grammar.GetRule(g.config.StartRule) == nil { return errors.Errorf("start rule '%s' not found in merged grammar", g.config.StartRule) } - + // Check if start rule has terminal alternatives if !g.dependencyGraph.HasTerminalAlternatives(g.config.StartRule) { fmt.Printf("Warning: start rule '%s' has no terminal alternatives\n", g.config.StartRule) } fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule) - + // Generate queries for i := 0; i < g.config.Count; i++ { - query := g.generateQuery(i + 1) + query := g.generateQuery() fmt.Printf("Query %d: %s\n", i+1, query) } @@ -82,9 +82,8 @@ func (g *Generator) getRule(ruleName string) *grammar.Rule { return g.grammar.GetRule(ruleName) } - // generateQuery creates a single query using grammar rules -func (g *Generator) generateQuery(index int) string { +func (g *Generator) generateQuery() string { // Start generation with fresh active rules tracking activeRules := make(map[string]bool) result := g.generateFromRuleWithRecursionTracking(g.config.StartRule, activeRules, 0) @@ -97,11 +96,11 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ if activeRules[ruleName] || depth >= g.config.MaxDepth { return g.forceTerminalGeneration(ruleName) } - + // Mark rule as active activeRules[ruleName] = true defer delete(activeRules, ruleName) - + // Get the rule rule := g.getRule(ruleName) if rule == nil { @@ -112,7 +111,7 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ if len(rule.Alternatives) == 0 { return fmt.Sprintf("<%s>", ruleName) } - + altIndex := g.random.Intn(len(rule.Alternatives)) alternative := rule.Alternatives[altIndex] @@ -138,29 +137,18 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ // forceTerminalGeneration forces generation of terminal alternatives when recursion is detected func (g *Generator) forceTerminalGeneration(ruleName string) string { - // Check if rule has terminal alternatives + // Get terminal alternatives - dependency graph guarantees these exist terminalAlts := g.dependencyGraph.GetTerminalAlternatives(ruleName) - if len(terminalAlts) == 0 { - // No terminal alternatives - use synthetic generation based on rule name - return g.generateSyntheticTerminal(ruleName) - } - rule := g.getRule(ruleName) - if rule == nil { - return g.generateSyntheticTerminal(ruleName) - } - - // Try to find the best terminal alternative (prefer ones with more literals) - bestAltIndex := g.selectBestTerminalAlternative(rule, terminalAlts) - if bestAltIndex == -1 { - return g.generateSyntheticTerminal(ruleName) - } - - alternative := rule.Alternatives[bestAltIndex] - + + // Randomly select from available terminal alternatives + randomIndex := g.random.Intn(len(terminalAlts)) + altIndex := terminalAlts[randomIndex] + alternative := rule.Alternatives[altIndex] + // Generate using aggressive terminal mode result := g.generateFromAlternativeAggressiveTerminal(&alternative, ruleName) - + switch g.config.OutputFormat { case config.CompactOutput: return result @@ -171,110 +159,6 @@ func (g *Generator) forceTerminalGeneration(ruleName string) string { } } -// generateSyntheticTerminal generates a synthetic terminal based on common SQL patterns -func (g *Generator) generateSyntheticTerminal(ruleName string) string { - switch ruleName { - case "selectstmt", "select_no_parens", "select_with_parens": - return "SELECT 1" - case "a_expr", "b_expr", "c_expr": - return "42" - case "insertStmt", "insertstmt": - return "INSERT INTO table1 VALUES (1)" - case "updateStmt", "updatestmt": - return "UPDATE table1 SET col1 = 1" - case "deleteStmt", "deletestmt": - return "DELETE FROM table1" - case "where_clause", "whereClause": - return "WHERE 1=1" - case "having_clause", "havingClause": - return "HAVING 1=1" - case "order_by_clause", "orderByClause", "sort_clause": - return "ORDER BY 1" - case "group_by_clause", "groupByClause": - return "GROUP BY 1" - case "colid", "identifier", "name": - return "col1" - case "tablename", "table_name": - return "table1" - default: - // Return a safe default that indicates the rule couldn't be generated - return fmt.Sprintf("/* %s: synthetic terminal */", ruleName) - } -} - -// generateFromElementWithDepthLimit generates from element with very strict depth limits -func (g *Generator) generateFromElementWithDepthLimit(element *grammar.Element, activeRules map[string]bool, depth int, maxDepth int) string { - if depth >= maxDepth { - return g.generateElementFallback(element) - } - - // Handle optional elements - skip them more aggressively when depth limited - if element.IsOptional() && g.random.Float64() > 0.3 { // Lower probability - return "" - } - - // Handle quantified elements - generate very few - if element.IsQuantified() { - count := 0 - if element.Quantifier == grammar.ONE_MORE { - count = 1 // Only generate minimum required - } - // For ZERO_MORE, count stays 0 - - var results []string - for i := 0; i < count; i++ { - result := g.generateFromElementWithDepthLimit(&grammar.Element{ - Value: element.Value, - Quantifier: grammar.NONE, - }, activeRules, depth+1, maxDepth) - if result != "" { - results = append(results, result) - } - } - return joinWithSpaces(results) - } - - // Generate single element - if element.IsRule() { - if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - if activeRules[refValue.Name] { - return g.generateSyntheticTerminal(refValue.Name) - } - activeRules[refValue.Name] = true - defer delete(activeRules, refValue.Name) - - // Check if this is a lexer rule - if rule := g.grammar.GetRule(refValue.Name); rule != nil && rule.IsLexer { - return g.generateConcreteToken(refValue.Name) - } - return g.generateSyntheticTerminal(refValue.Name) - } - return g.generateElementFallback(element) - } else if element.IsTerminal() { - if litValue, ok := element.Value.(grammar.LiteralValue); ok { - return cleanLiteral(litValue.Text) - } - return cleanLiteral(element.Value.String()) - } - - return element.Value.String() -} - -// generateElementFallback provides fallback generation for complex elements -func (g *Generator) generateElementFallback(element *grammar.Element) string { - if element.IsRule() { - if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - return g.generateSyntheticTerminal(refValue.Name) - } - } else if element.IsTerminal() { - if litValue, ok := element.Value.(grammar.LiteralValue); ok { - return cleanLiteral(litValue.Text) - } - return cleanLiteral(element.Value.String()) - } - return "1" // Ultimate fallback -} - // generateFromRule generates text from a grammar rule (legacy method, kept for compatibility) func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { activeRules := make(map[string]bool) @@ -311,89 +195,6 @@ func (g *Generator) generateFromElementWithRecursionTracking(element *grammar.El return element.Value.String() } -// generateFromElement generates text from a single grammar element (legacy method) -func (g *Generator) generateFromElement(element *grammar.Element, currentDepth int) string { - activeRules := make(map[string]bool) - return g.generateFromElementWithRecursionTracking(element, activeRules, currentDepth) -} - -// generateQuantified handles quantified elements (* +) -func (g *Generator) generateQuantified(element *grammar.Element, currentDepth int) string { - var count int - - // Use fixed count if specified, otherwise use random count - if g.config.QuantifierCount > 0 { - count = g.config.QuantifierCount - } else { - switch element.Quantifier { - case grammar.ZERO_MORE: // * - count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier - case grammar.ONE_MORE: // + - count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier - default: - count = 1 - } - } - - var results []string - for i := 0; i < count; i++ { - if element.IsRule() { - if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - result := g.generateFromRuleOrToken(refValue.Name, currentDepth+1) - results = append(results, result) - } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - result := g.generateFromBlock(blockValue, currentDepth+1) - results = append(results, result) - } else { - result := g.generateFromRuleOrToken(element.Value.String(), currentDepth+1) - results = append(results, result) - } - } else if element.IsTerminal() { - if litValue, ok := element.Value.(grammar.LiteralValue); ok { - results = append(results, cleanLiteral(litValue.Text)) - } else { - results = append(results, cleanLiteral(element.Value.String())) - } - } - } - - return joinWithSpaces(results) -} - -// generateFromBlock generates content from a block value -func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, currentDepth int) string { - if len(blockValue.Alternatives) == 0 { - return "" - } - - // Select a random alternative from the block - altIndex := g.random.Intn(len(blockValue.Alternatives)) - alternative := blockValue.Alternatives[altIndex] - - // Generate from all elements in the selected alternative - var result []string - for _, element := range alternative.Elements { - elementResult := g.generateFromElement(&element, currentDepth) - if elementResult != "" { - result = append(result, elementResult) - } - } - - return joinWithSpaces(result) -} - - -// generateFromRuleOrToken generates from a rule using standard rule-based generation -func (g *Generator) generateFromRuleOrToken(ruleName string, currentDepth int) string { - // Check if this is a lexer rule and generate concrete token - if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { - return g.generateConcreteToken(ruleName) - } - - // Otherwise expand as parser rule - return g.generateFromRule(ruleName, currentDepth) -} - // generateConcreteToken generates concrete tokens by expanding lexer rules func (g *Generator) generateConcreteToken(ruleName string) string { // Get the lexer rule @@ -468,7 +269,7 @@ func (g *Generator) generateFromLexerElement(element *grammar.Element, currentDe // generateQuantifiedLexer handles quantified lexer elements func (g *Generator) generateQuantifiedLexer(element *grammar.Element, currentDepth int) string { var count int - + // Use fixed count if specified, otherwise use random count if g.config.QuantifierCount > 0 { count = g.config.QuantifierCount @@ -523,16 +324,16 @@ func (g *Generator) generateFromLexerBlock(blockValue grammar.BlockValue, curren func (g *Generator) generateFromLiteral(literal string) string { // Handle character sets like ~[\u0000"] or [a-zA-Z_] if strings.HasPrefix(literal, "~[") && strings.HasSuffix(literal, "]") { - return g.generateFromNegatedSet(literal[2 : len(literal)-1]) + return g.generateFromNegatedSet() } else if strings.HasPrefix(literal, "[") && strings.HasSuffix(literal, "]") { return g.generateFromCharacterSet(literal[1 : len(literal)-1]) } - + // Handle string literals if strings.HasPrefix(literal, "'") && strings.HasSuffix(literal, "'") && len(literal) >= 2 { return literal[1 : len(literal)-1] // Remove quotes } - + // Handle special escape sequences switch literal { case "\\r": @@ -548,7 +349,7 @@ func (g *Generator) generateFromLiteral(literal string) string { case "\\\\": return "\\" } - + // Return as-is for other cases return literal } @@ -556,7 +357,7 @@ func (g *Generator) generateFromLiteral(literal string) string { // generateFromCharacterSet generates a random character from a character set like [a-zA-Z_] func (g *Generator) generateFromCharacterSet(charset string) string { chars := []rune{} - + // Simple character set expansion - handle ranges like a-z, A-Z, 0-9 i := 0 for i < len(charset) { @@ -574,25 +375,24 @@ func (g *Generator) generateFromCharacterSet(charset string) string { i++ } } - + if len(chars) == 0 { return "x" // Fallback } - + return string(chars[g.random.Intn(len(chars))]) } // generateFromNegatedSet generates a character NOT in the specified set -func (g *Generator) generateFromNegatedSet(negatedSet string) string { +func (g *Generator) generateFromNegatedSet() string { // For simplicity, generate common safe characters that are typically not in negated sets safeChars := []string{"a", "b", "c", "x", "y", "z", "_", "1", "2", "3"} - + // TODO: Implement proper negated set handling by expanding the set and excluding those characters // For now, just return a safe character return safeChars[g.random.Intn(len(safeChars))] } - // cleanLiteral removes quotes from literal strings func cleanLiteral(literal string) string { // Remove single quotes from literals like 'SELECT' @@ -624,7 +424,7 @@ func joinStrings(strs []string, sep string) string { if len(strs) == 1 { return strs[0] } - + result := strs[0] for i := 1; i < len(strs); i++ { result += sep + strs[i] @@ -635,7 +435,7 @@ func joinStrings(strs []string, sep string) string { // generateQuantifiedWithRecursionTracking handles quantified elements with recursion tracking func (g *Generator) generateQuantifiedWithRecursionTracking(element *grammar.Element, activeRules map[string]bool, depth int) string { var count int - + // Use fixed count if specified, otherwise use random count if g.config.QuantifierCount > 0 { count = g.config.QuantifierCount @@ -703,38 +503,27 @@ func (g *Generator) generateFromRuleOrTokenWithRecursionTracking(ruleName string if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { return g.generateConcreteToken(ruleName) } - + // Otherwise expand as parser rule with recursion tracking return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, depth) } -// selectBestTerminalAlternative randomly selects from terminal alternatives -func (g *Generator) selectBestTerminalAlternative(rule *grammar.Rule, terminalAlts []int) int { - if len(terminalAlts) == 0 { - return -1 - } - - // Simply choose randomly from available terminal alternatives - randomIndex := g.random.Intn(len(terminalAlts)) - return terminalAlts[randomIndex] -} - // generateFromAlternativeAggressiveTerminal generates from an alternative using aggressive terminal mode func (g *Generator) generateFromAlternativeAggressiveTerminal(alt *grammar.Alternative, ruleName string) string { var result []string - + for _, element := range alt.Elements { elementResult := g.generateFromElementAggressiveTerminal(&element, ruleName) if elementResult != "" { result = append(result, elementResult) } } - + if len(result) == 0 { // Ultimate fallback - use simple pattern based on rule name return g.generateSimpleFallback(ruleName) } - + return joinWithSpaces(result) } @@ -742,10 +531,10 @@ func (g *Generator) generateFromAlternativeAggressiveTerminal(alt *grammar.Alter func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Element, contextRuleName string) string { // Handle quantified elements - be very conservative if element.IsQuantified() { - if element.Quantifier == grammar.ZERO_MORE || element.Quantifier == grammar.OPTIONAL_Q { - // Skip optional/zero-more elements in terminal mode + switch element.Quantifier { + case grammar.ZERO_MORE, grammar.OPTIONAL_Q: return "" - } else if element.Quantifier == grammar.ONE_MORE { + case grammar.ONE_MORE: // Generate exactly one for ONE_MORE nonQuantifiedElement := grammar.Element{ Value: element.Value, @@ -754,7 +543,7 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme return g.generateFromElementAggressiveTerminal(&nonQuantifiedElement, contextRuleName) } } - + // Handle different element types if element.IsTerminal() { // Direct literal - just return it @@ -763,7 +552,7 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme } return element.Value.String() } - + if element.IsRule() { switch value := element.Value.(type) { case grammar.ReferenceValue: @@ -771,10 +560,10 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme if g.isSimpleLexerRule(value.Name) { return g.generateConcreteToken(value.Name) } - + // For parser rules, generate simple fallback based on rule name return g.generateSimpleFallback(value.Name) - + case grammar.BlockValue: // For blocks, try the first alternative only if len(value.Alternatives) > 0 { @@ -783,7 +572,7 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme return "" } } - + return "" } @@ -793,7 +582,7 @@ func (g *Generator) isSimpleLexerRule(ruleName string) bool { if rule == nil || !rule.IsLexer { return false } - + // Consider lexer rules with simple patterns as safe simpleLexerRules := map[string]bool{ "IDENTIFIER": true, "ID": true, "NAME": true, @@ -804,7 +593,7 @@ func (g *Generator) isSimpleLexerRule(ruleName string) bool { "OPEN_PAREN": true, "CLOSE_PAREN": true, "PLUS": true, "MINUS": true, "STAR": true, "SLASH": true, } - + return simpleLexerRules[ruleName] } @@ -812,7 +601,7 @@ func (g *Generator) isSimpleLexerRule(ruleName string) bool { func (g *Generator) generateSimpleFallback(ruleName string) string { // Generate context-appropriate fallbacks ruleLower := strings.ToLower(ruleName) - + if strings.Contains(ruleLower, "expr") || strings.Contains(ruleLower, "expression") { return "1" } else if strings.Contains(ruleLower, "name") || strings.Contains(ruleLower, "id") { @@ -829,4 +618,4 @@ func (g *Generator) generateSimpleFallback(ruleName string) string { // Generic fallback return "1" } -} \ No newline at end of file +} diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index b36fa7a..1c13cab 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -43,12 +43,24 @@ func (g *DependencyGraph) GetNode(ruleName string) *GraphNode { } // AnalyzeTerminalReachability performs terminal reachability analysis on the graph -func (g *DependencyGraph) AnalyzeTerminalReachability() { +func (g *DependencyGraph) AnalyzeTerminalReachability() error { + return g.AnalyzeTerminalReachabilityWithValidation(false) +} + +// AnalyzeTerminalReachabilityWithValidation performs terminal reachability analysis with optional validation +func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { // Phase 1: Mark lexer rules as terminal g.markLexerRulesAsTerminal() // Phase 2: Propagate terminal reachability using fixed-point iteration g.propagateTerminalReachability() + + // Phase 3: Check for unterminated nodes and report error (only if requested) + if validateUnterminated { + return g.validateTerminalReachability() + } + + return nil } // markLexerRulesAsTerminal marks all lexer rules as having terminal alternatives @@ -103,6 +115,24 @@ func (g *DependencyGraph) propagateTerminalReachability() { } } +// validateTerminalReachability checks for rules without terminal alternatives and reports errors +func (g *DependencyGraph) validateTerminalReachability() error { + var unterminatedRules []string + + for ruleName, node := range g.Nodes { + if !node.HasTerminalAlternatives { + unterminatedRules = append(unterminatedRules, ruleName) + } + } + + if len(unterminatedRules) > 0 { + return fmt.Errorf("grammar contains %d rules without terminal alternatives: %v", + len(unterminatedRules), unterminatedRules) + } + + return nil +} + // isAlternativeAlreadyMarked checks if an alternative is already in the terminal list func (g *DependencyGraph) isAlternativeAlreadyMarked(node *GraphNode, altIndex int) bool { for _, terminalIndex := range node.TerminalAlternativeIndex { diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index e79170b..86dda2a 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -6,8 +6,8 @@ import ( "strings" "github.com/antlr4-go/antlr/v4" - "github.com/pkg/errors" grammar "github.com/bytebase/parser/tools/grammar" + "github.com/pkg/errors" ) // ParsedGrammar represents a parsed grammar with extracted rules @@ -59,7 +59,7 @@ func (r ReferenceValue) String() string { return r.Name } // BlockValue represents a generated block (e.g., (',' column)*) type BlockValue struct { - ID string // Global unique ID like "block_1_alts" + ID string // Global unique ID like "block_1_alts" Alternatives []Alternative } @@ -77,7 +77,6 @@ func (b BlockValue) String() string { return b.ID } - // WildcardValue represents a wildcard (.) type WildcardValue struct{} @@ -93,10 +92,10 @@ type Element struct { type Quantifier int const ( - NONE Quantifier = iota - OPTIONAL_Q // ? - ZERO_MORE // * - ONE_MORE // + + NONE Quantifier = iota + OPTIONAL_Q // ? + ZERO_MORE // * + ONE_MORE // + ) // ParseGrammarFile parses a .g4 file and extracts rules for fuzzing @@ -148,8 +147,6 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { visitor := NewGrammarExtractorVisitor() visitor.VisitGrammarSpec(tree) - - parsedGrammar := &ParsedGrammar{ LexerRules: visitor.lexerRules, ParserRules: visitor.parserRules, @@ -157,27 +154,38 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { BlockAltMap: visitor.blockAltMap, DependencyGraph: NewDependencyGraph(), } - + // Build dependency graph - buildDependencyGraph(parsedGrammar) - + if err := buildDependencyGraph(parsedGrammar); err != nil { + return nil, fmt.Errorf("failed to build dependency graph: %w", err) + } + return parsedGrammar, nil } // buildDependencyGraph constructs the dependency graph for the parsed grammar -func buildDependencyGraph(parsedGrammar *ParsedGrammar) { +func buildDependencyGraph(parsedGrammar *ParsedGrammar) error { + return buildDependencyGraphWithValidation(parsedGrammar, false) +} + +// buildDependencyGraphWithValidation constructs the dependency graph with optional validation +func buildDependencyGraphWithValidation(parsedGrammar *ParsedGrammar, validateUnterminated bool) error { // Add all lexer rules to the graph for ruleName, rule := range parsedGrammar.LexerRules { parsedGrammar.DependencyGraph.AddNode(ruleName, rule) } - + // Add all parser rules to the graph for ruleName, rule := range parsedGrammar.ParserRules { parsedGrammar.DependencyGraph.AddNode(ruleName, rule) } - - // Perform terminal reachability analysis - parsedGrammar.DependencyGraph.AnalyzeTerminalReachability() + + // Perform terminal reachability analysis with optional validation + if err := parsedGrammar.DependencyGraph.AnalyzeTerminalReachabilityWithValidation(validateUnterminated); err != nil { + return fmt.Errorf("terminal reachability analysis failed: %w", err) + } + + return nil } // GetRule gets a rule by name from either lexer or parser rules @@ -224,7 +232,7 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { } g.LexerRules[name] = rule } - + // Merge parser rules for name, rule := range other.ParserRules { if _, exists := g.ParserRules[name]; exists { @@ -232,7 +240,7 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { } g.ParserRules[name] = rule } - + // Merge block alternatives map for blockID, alternatives := range other.BlockAltMap { if _, exists := g.BlockAltMap[blockID]; exists { @@ -240,16 +248,18 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { } g.BlockAltMap[blockID] = alternatives } - + // Update file path to indicate it's a merged grammar if g.FilePath != other.FilePath { g.FilePath = fmt.Sprintf("%s + %s", g.FilePath, other.FilePath) } - - // Rebuild dependency graph with merged rules + + // Rebuild dependency graph with merged rules and validate g.DependencyGraph = NewDependencyGraph() - buildDependencyGraph(g) - + if err := buildDependencyGraphWithValidation(g, true); err != nil { + return fmt.Errorf("failed to rebuild dependency graph after merge: %w", err) + } + return nil } @@ -258,13 +268,13 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { if len(filePaths) == 0 { return nil, errors.New("no grammar files provided") } - + // Parse the first grammar file mergedGrammar, err := ParseGrammarFile(filePaths[0]) if err != nil { return nil, errors.Wrapf(err, "failed to parse first grammar file %s", filePaths[0]) } - + // Merge additional grammar files for i := 1; i < len(filePaths); i++ { filePath := filePaths[i] @@ -272,12 +282,12 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { if err != nil { return nil, errors.Wrapf(err, "failed to parse grammar file %s", filePath) } - + if err := mergedGrammar.MergeGrammar(grammar); err != nil { return nil, errors.Wrapf(err, "failed to merge grammar file %s", filePath) } } - + return mergedGrammar, nil } @@ -616,7 +626,7 @@ func (v *GrammarExtractorVisitor) extractLexerAtom(lexerAtomCtx grammar.ILexerAt // Handle not set (e.g., ~[abc]) if notSetCtx := lexerAtomCtx.NotSet(); notSetCtx != nil { - return v.extractNotSet(notSetCtx) + return v.extractNotSet() } // Handle lexer character set (e.g., [abc]) @@ -645,7 +655,7 @@ func (v *GrammarExtractorVisitor) extractLexerBlock(lexerBlockCtx grammar.ILexer blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID) emptyAlts := []Alternative{} v.blockAltMap[blockID] = emptyAlts - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: emptyAlts}, } @@ -658,7 +668,7 @@ func (v *GrammarExtractorVisitor) extractLexerBlock(lexerBlockCtx grammar.ILexer blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID) emptyAlts := []Alternative{} v.blockAltMap[blockID] = emptyAlts - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: emptyAlts}, } @@ -678,12 +688,12 @@ func (v *GrammarExtractorVisitor) extractLexerBlock(lexerBlockCtx grammar.ILexer } blockAlternatives = append(blockAlternatives, Alternative{Elements: elements}) } - + // Generate global unique block ID and store mapping globalBlockID++ blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID) v.blockAltMap[blockID] = blockAlternatives - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: blockAlternatives}, } @@ -705,7 +715,7 @@ func (v *GrammarExtractorVisitor) extractCharacterRange(characterRangeCtx gramma } // extractNotSet extracts a not set (e.g., ~[abc]) -func (v *GrammarExtractorVisitor) extractNotSet(notSetCtx grammar.INotSetContext) *Element { +func (v *GrammarExtractorVisitor) extractNotSet() *Element { // For now, represent as a literal text // In a real implementation, this would need more sophisticated handling return &Element{ @@ -763,7 +773,6 @@ func (v *GrammarExtractorVisitor) extractTerminalDef(terminalDefCtx grammar.ITer return nil } - // extractRuleRef extracts a rule reference func (v *GrammarExtractorVisitor) extractRuleRef(rulerefCtx grammar.IRulerefContext) *Element { if ruleRefToken := rulerefCtx.RULE_REF(); ruleRefToken != nil { @@ -783,7 +792,7 @@ func (v *GrammarExtractorVisitor) extractBlock(blockCtx grammar.IBlockContext) * blockID := fmt.Sprintf("block_%d_alts", globalBlockID) emptyAlts := []Alternative{} v.blockAltMap[blockID] = emptyAlts - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: emptyAlts}, } @@ -796,7 +805,7 @@ func (v *GrammarExtractorVisitor) extractBlock(blockCtx grammar.IBlockContext) * blockID := fmt.Sprintf("block_%d_alts", globalBlockID) emptyAlts := []Alternative{} v.blockAltMap[blockID] = emptyAlts - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: emptyAlts}, } @@ -819,12 +828,12 @@ func (v *GrammarExtractorVisitor) extractBlock(blockCtx grammar.IBlockContext) * if len(blockAlternatives) == 1 && len(blockAlternatives[0].Elements) == 1 { return &blockAlternatives[0].Elements[0] } - + // Generate global unique block ID and store mapping globalBlockID++ blockID := fmt.Sprintf("block_%d_alts", globalBlockID) v.blockAltMap[blockID] = blockAlternatives - + return &Element{ Value: BlockValue{ID: blockID, Alternatives: blockAlternatives}, } @@ -873,4 +882,4 @@ func (v *GrammarExtractorVisitor) extractQuantifier(ebnfSuffixCtx grammar.IEbnfS } return NONE -} \ No newline at end of file +} diff --git a/tools/fuzzing/tests/terminal_completeness_test.go b/tools/fuzzing/tests/terminal_completeness_test.go index 3601829..6507552 100644 --- a/tools/fuzzing/tests/terminal_completeness_test.go +++ b/tools/fuzzing/tests/terminal_completeness_test.go @@ -11,7 +11,7 @@ import ( func TestPostgreSQLTerminalCompleteness(t *testing.T) { repoRoot := getRepoRoot() - + // PostgreSQL grammar file paths lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -23,22 +23,22 @@ func TestPostgreSQLTerminalCompleteness(t *testing.T) { } depGraph := parsedGrammar.GetDependencyGraph() - + // Analyze all nodes fmt.Println("=== PostgreSQL Grammar Terminal Analysis ===") - + totalNodes := len(depGraph.Nodes) terminalNodes := 0 nonTerminalNodes := 0 - + var terminalRules []string var nonTerminalRules []string - + lexerTerminal := 0 lexerNonTerminal := 0 parserTerminal := 0 parserNonTerminal := 0 - + for ruleName, node := range depGraph.Nodes { if node.HasTerminalAlternatives { terminalNodes++ @@ -58,16 +58,16 @@ func TestPostgreSQLTerminalCompleteness(t *testing.T) { } } } - + fmt.Printf("Total Nodes: %d\n", totalNodes) fmt.Printf("Terminal Nodes: %d (%.1f%%)\n", terminalNodes, float64(terminalNodes)/float64(totalNodes)*100) fmt.Printf("Non-Terminal Nodes: %d (%.1f%%)\n", nonTerminalNodes, float64(nonTerminalNodes)/float64(totalNodes)*100) fmt.Println() - + fmt.Printf("Lexer Rules: Terminal=%d, Non-Terminal=%d\n", lexerTerminal, lexerNonTerminal) fmt.Printf("Parser Rules: Terminal=%d, Non-Terminal=%d\n", parserTerminal, parserNonTerminal) fmt.Println() - + // Show non-terminal rules (these should ideally be zero) if len(nonTerminalRules) > 0 { sort.Strings(nonTerminalRules) @@ -84,22 +84,22 @@ func TestPostgreSQLTerminalCompleteness(t *testing.T) { } else { fmt.Println("✅ All rules have terminal alternatives!") } - + // Test: If your hypothesis is correct, this should pass if nonTerminalNodes == 0 { t.Log("✅ HYPOTHESIS CONFIRMED: All PostgreSQL rules have terminal alternatives") } else { t.Errorf("❌ HYPOTHESIS REJECTED: %d rules have no terminal alternatives", nonTerminalNodes) - + // Analyze WHY these rules don't have terminal alternatives fmt.Println("=== Analysis of Non-Terminal Rules ===") - analyzeNonTerminalRules(parsedGrammar, depGraph, nonTerminalRules[:min(5, len(nonTerminalRules))], t) + analyzeNonTerminalRules(parsedGrammar, depGraph, nonTerminalRules[:min(5, len(nonTerminalRules))]) } } func TestSpecificNonTerminalRules(t *testing.T) { repoRoot := getRepoRoot() - + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -109,15 +109,15 @@ func TestSpecificNonTerminalRules(t *testing.T) { } depGraph := parsedGrammar.GetDependencyGraph() - + // Test specific rules that should be terminal based on our earlier analysis expectedTerminalRules := []string{ - "columnref", // Should be terminal (colid + indirection) - "c_expr", // Should be terminal (has columnref alternative) - "a_expr_typecast", // Should be terminal (depends on c_expr) - "a_expr_collate", // Should be terminal (depends on a_expr_typecast) + "columnref", // Should be terminal (colid + indirection) + "c_expr", // Should be terminal (has columnref alternative) + "a_expr_typecast", // Should be terminal (depends on c_expr) + "a_expr_collate", // Should be terminal (depends on a_expr_typecast) } - + fmt.Println("=== Testing Expected Terminal Rules ===") for _, ruleName := range expectedTerminalRules { node := depGraph.GetNode(ruleName) @@ -125,41 +125,41 @@ func TestSpecificNonTerminalRules(t *testing.T) { t.Errorf("Rule %s not found", ruleName) continue } - - fmt.Printf("%s: HasTerminal=%t, TerminalAlts=%v\n", + + fmt.Printf("%s: HasTerminal=%t, TerminalAlts=%v\n", ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) - + if !node.HasTerminalAlternatives { t.Errorf("Expected %s to be terminal, but it's not", ruleName) } } } -func analyzeNonTerminalRules(parsedGrammar *grammar.ParsedGrammar, depGraph *grammar.DependencyGraph, ruleNames []string, t *testing.T) { +func analyzeNonTerminalRules(parsedGrammar *grammar.ParsedGrammar, depGraph *grammar.DependencyGraph, ruleNames []string) { for _, ruleName := range ruleNames { rule := parsedGrammar.GetRule(ruleName) if rule == nil { continue } - + fmt.Printf("\n--- Analyzing %s ---\n", ruleName) - fmt.Printf("Type: %s, Alternatives: %d\n", + fmt.Printf("Type: %s, Alternatives: %d\n", map[bool]string{true: "LEXER", false: "PARSER"}[rule.IsLexer], len(rule.Alternatives)) - + for altIndex, alt := range rule.Alternatives { canTerminate := depGraph.CanAlternativeTerminate(alt) fmt.Printf(" Alt %d (%d elements): canTerminate=%t\n", altIndex, len(alt.Elements), canTerminate) - + for elemIndex, element := range alt.Elements { canElemTerminate := depGraph.CanElementTerminate(element) fmt.Printf(" Elem %d: %s", elemIndex, element.Value.String()) - + if element.IsQuantified() { fmt.Printf("[%v]", element.Quantifier) } - + fmt.Printf(" → canTerminate=%t", canElemTerminate) - + // If it's a rule reference, show the referenced rule's status if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { @@ -171,17 +171,17 @@ func analyzeNonTerminalRules(parsedGrammar *grammar.ParsedGrammar, depGraph *gra } } } - + fmt.Println() } } } } -// Test to validate that our terminal propagation algorithm is working correctly +// Test to validate that our terminal propagation algorithm is working correctly func TestManualTerminalPropagation(t *testing.T) { repoRoot := getRepoRoot() - + lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") @@ -192,12 +192,12 @@ func TestManualTerminalPropagation(t *testing.T) { // Create fresh dependency graph and run manual propagation freshGraph := grammar.NewDependencyGraph() - + // Add all nodes for ruleName, rule := range parsedGrammar.GetAllRules() { freshGraph.AddNode(ruleName, rule) } - + // Mark lexer rules as terminal initialTerminalCount := 0 for _, node := range freshGraph.Nodes { @@ -209,22 +209,22 @@ func TestManualTerminalPropagation(t *testing.T) { initialTerminalCount++ } } - + fmt.Printf("Starting with %d lexer rules marked as terminal\n", initialTerminalCount) - + // Manual propagation with more iterations maxIterations := 100 totalNewTerminals := 0 - + for iteration := 0; iteration < maxIterations; iteration++ { changed := false newTerminalsThisIteration := 0 - + for _, node := range freshGraph.Nodes { if node.IsLexer || node.HasTerminalAlternatives { continue } - + for altIndex, alt := range node.Alternatives { // Check if already marked alreadyMarked := false @@ -237,7 +237,7 @@ func TestManualTerminalPropagation(t *testing.T) { if alreadyMarked { continue } - + // Check if this alternative can terminate if canAlternativeTerminateManual(alt, freshGraph) { if !node.HasTerminalAlternatives { @@ -250,26 +250,26 @@ func TestManualTerminalPropagation(t *testing.T) { } } } - + if newTerminalsThisIteration > 0 { - fmt.Printf("Iteration %d: +%d new terminal rules (total: %d)\n", + fmt.Printf("Iteration %d: +%d new terminal rules (total: %d)\n", iteration+1, newTerminalsThisIteration, initialTerminalCount+totalNewTerminals) } - + if !changed { fmt.Printf("Converged after %d iterations\n", iteration+1) break } - + if iteration == maxIterations-1 { fmt.Printf("Reached max iterations (%d)\n", maxIterations) } } - + // Count final results finalTerminalCount := 0 finalNonTerminalCount := 0 - + for _, node := range freshGraph.Nodes { if node.HasTerminalAlternatives { finalTerminalCount++ @@ -277,11 +277,11 @@ func TestManualTerminalPropagation(t *testing.T) { finalNonTerminalCount++ } } - + fmt.Printf("\nFinal Results:\n") fmt.Printf("Terminal: %d\n", finalTerminalCount) fmt.Printf("Non-Terminal: %d\n", finalNonTerminalCount) - + if finalNonTerminalCount == 0 { t.Log("✅ Manual propagation: All rules are terminal!") } else { @@ -294,14 +294,14 @@ func canAlternativeTerminateManual(alt grammar.Alternative, graph *grammar.Depen if len(alt.Elements) == 0 { return true } - + // All elements must be able to terminate for _, element := range alt.Elements { if !canElementTerminateManual(element, graph) { return false } } - + return true } @@ -310,16 +310,16 @@ func canElementTerminateManual(element grammar.Element, graph *grammar.Dependenc if element.IsTerminal() { return true } - + // Handle quantified elements - THIS IS KEY! if element.IsQuantified() { // * and ? quantifiers can generate 0 occurrences, so they can terminate if element.Quantifier == grammar.ZERO_MORE || element.Quantifier == grammar.OPTIONAL_Q { - return true // Can be empty, so always terminal + return true // Can be empty, so always terminal } // + quantifier requires at least one occurrence, so check the content } - + // For rule references if element.IsRule() { switch value := element.Value.(type) { @@ -340,7 +340,7 @@ func canElementTerminateManual(element grammar.Element, graph *grammar.Dependenc return false } } - + return false } @@ -358,4 +358,4 @@ func isBuiltinTerminal(tokenName string) bool { "": true, // Alternative EOF notation } return builtinTokens[tokenName] -} \ No newline at end of file +} diff --git a/tools/fuzzing/tests/unterminated_grammar_test.go b/tools/fuzzing/tests/unterminated_grammar_test.go new file mode 100644 index 0000000..d02d095 --- /dev/null +++ b/tools/fuzzing/tests/unterminated_grammar_test.go @@ -0,0 +1,104 @@ +package tests + +import ( + "os" + "strings" + "testing" + + "github.com/bytebase/parser/tools/fuzzing/internal/grammar" +) + +func TestUnterminatedGrammarErrorReporting(t *testing.T) { + // Create two grammar files that when merged create unterminated rules + lexerContent := ` +lexer grammar TestLexer; + +PLUS: '+' ; +NUMBER: [0-9]+ ; +` + + parserContent := ` +parser grammar TestParser; + +// Import tokens from lexer +options { tokenVocab=TestLexer; } + +// This creates infinite left recursion with no terminal alternatives +expr: expr PLUS expr ; +` + + // Write the grammars to temporary files + tmpLexer := "/tmp/test_lexer.g4" + tmpParser := "/tmp/test_parser.g4" + + err := os.WriteFile(tmpLexer, []byte(lexerContent), 0644) + if err != nil { + t.Fatalf("Failed to write lexer grammar: %v", err) + } + defer os.Remove(tmpLexer) + + err = os.WriteFile(tmpParser, []byte(parserContent), 0644) + if err != nil { + t.Fatalf("Failed to write parser grammar: %v", err) + } + defer os.Remove(tmpParser) + + // Try to parse and merge the grammars - this should fail with terminal reachability error + _, err = grammar.ParseAndMergeGrammarFiles([]string{tmpLexer, tmpParser}) + + // Verify that we get the expected error + if err == nil { + t.Fatal("Expected error for unterminated grammar, but got none") + } + + if !strings.Contains(err.Error(), "without terminal alternatives") { + t.Errorf("Expected error about terminal alternatives, got: %v", err) + } + + if !strings.Contains(err.Error(), "expr") { + t.Errorf("Expected error to mention 'expr' rule, got: %v", err) + } + + t.Logf("✅ Correctly detected unterminated grammar: %v", err) +} + +func TestValidSimpleGrammar(t *testing.T) { + // Create a simple grammar that should work + grammarContent := ` +grammar TestGrammar; + +// This has terminal alternatives +expr: expr '+' expr | NUMBER ; + +// Lexer rules +PLUS: '+' ; +NUMBER: [0-9]+ ; +` + + // Write the grammar to a temporary file + tmpFile := "/tmp/test_valid.g4" + err := os.WriteFile(tmpFile, []byte(grammarContent), 0644) + if err != nil { + t.Fatalf("Failed to write test grammar: %v", err) + } + defer os.Remove(tmpFile) + + // Try to parse the grammar - this should succeed + parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{tmpFile}) + if err != nil { + t.Fatalf("Expected valid grammar to parse successfully, got error: %v", err) + } + + // Verify the grammar was parsed correctly + if parsedGrammar == nil { + t.Fatal("Expected parsed grammar, got nil") + } + + // Check that expr rule exists and has terminal alternatives + depGraph := parsedGrammar.GetDependencyGraph() + if !depGraph.HasTerminalAlternatives("expr") { + t.Error("Expected expr rule to have terminal alternatives") + } + + t.Log("✅ Valid grammar parsed successfully with terminal alternatives") +} \ No newline at end of file From f0e0d7f699a91738a38ef2302c1db67e7bb6fda9 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Mon, 1 Sep 2025 17:03:38 +0800 Subject: [PATCH 05/15] fix: analyze immediately terminated alternative --- tools/fuzzing/internal/generator/generator.go | 134 +++---- tools/fuzzing/internal/grammar/dependency.go | 177 +++++---- tools/fuzzing/tests/dependency_test.go | 155 -------- tools/fuzzing/tests/postgresql_test.go | 14 +- tools/fuzzing/tests/recursive_test.go | 81 ---- .../tests/terminal_completeness_test.go | 361 ------------------ .../tests/unterminated_grammar_test.go | 104 ----- 7 files changed, 163 insertions(+), 863 deletions(-) delete mode 100644 tools/fuzzing/tests/dependency_test.go delete mode 100644 tools/fuzzing/tests/recursive_test.go delete mode 100644 tools/fuzzing/tests/terminal_completeness_test.go delete mode 100644 tools/fuzzing/tests/unterminated_grammar_test.go diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 807169a..bcde7f3 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -61,9 +61,9 @@ func (g *Generator) Generate() error { return errors.Errorf("start rule '%s' not found in merged grammar", g.config.StartRule) } - // Check if start rule has terminal alternatives - if !g.dependencyGraph.HasTerminalAlternatives(g.config.StartRule) { - fmt.Printf("Warning: start rule '%s' has no terminal alternatives\n", g.config.StartRule) + // Check if start rule has immediately terminal alternatives + if !g.dependencyGraph.HasImmediatelyTerminalAlternatives(g.config.StartRule) { + fmt.Printf("Warning: start rule '%s' has no immediately terminal alternatives\n", g.config.StartRule) } fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule) @@ -137,17 +137,25 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ // forceTerminalGeneration forces generation of terminal alternatives when recursion is detected func (g *Generator) forceTerminalGeneration(ruleName string) string { - // Get terminal alternatives - dependency graph guarantees these exist - terminalAlts := g.dependencyGraph.GetTerminalAlternatives(ruleName) + // Get immediately terminal alternatives first (preferred) + immediateTerminalAlts := g.dependencyGraph.GetImmediatelyTerminalAlternatives(ruleName) rule := g.getRule(ruleName) - // Randomly select from available terminal alternatives - randomIndex := g.random.Intn(len(terminalAlts)) - altIndex := terminalAlts[randomIndex] - alternative := rule.Alternatives[altIndex] + // Check if we have immediately terminal alternatives + if len(immediateTerminalAlts) == 0 { + // Fallback for rules that don't have immediately terminal alternatives + // This can happen with forward references or missing rules + fmt.Printf("Warning: Rule %s has no immediately terminal alternatives, generating simple fallback\n", ruleName) + return generateSimpleFallback(ruleName) + } + + randomIndex := g.random.Intn(len(immediateTerminalAlts)) + selectedAltIndex := immediateTerminalAlts[randomIndex] + + alternative := rule.Alternatives[selectedAltIndex] - // Generate using aggressive terminal mode - result := g.generateFromAlternativeAggressiveTerminal(&alternative, ruleName) + // Generate using normal generation since this is an immediately terminal alternative + result := g.generateFromImmediatelyTerminalAlternative(&alternative) switch g.config.OutputFormat { case config.CompactOutput: @@ -159,6 +167,18 @@ func (g *Generator) forceTerminalGeneration(ruleName string) string { } } + +// ForceTerminalGenerationPublic exposes forceTerminalGeneration for testing +func (g *Generator) ForceTerminalGenerationPublic(ruleName string) string { + return g.forceTerminalGeneration(ruleName) +} + +// SetGrammarForTesting sets the grammar for testing purposes +func (g *Generator) SetGrammarForTesting(grammar *grammar.ParsedGrammar) { + g.grammar = grammar + g.dependencyGraph = grammar.GetDependencyGraph() +} + // generateFromRule generates text from a grammar rule (legacy method, kept for compatibility) func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { activeRules := make(map[string]bool) @@ -508,47 +528,51 @@ func (g *Generator) generateFromRuleOrTokenWithRecursionTracking(ruleName string return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, depth) } -// generateFromAlternativeAggressiveTerminal generates from an alternative using aggressive terminal mode -func (g *Generator) generateFromAlternativeAggressiveTerminal(alt *grammar.Alternative, ruleName string) string { +// generateFromImmediatelyTerminalAlternative generates from an immediately terminal alternative using normal generation +// Since the alternative is immediately terminal, we can safely generate without recursion tracking +func (g *Generator) generateFromImmediatelyTerminalAlternative(alt *grammar.Alternative) string { var result []string for _, element := range alt.Elements { - elementResult := g.generateFromElementAggressiveTerminal(&element, ruleName) + elementResult := g.generateFromImmediatelyTerminalElement(&element) if elementResult != "" { result = append(result, elementResult) } } - if len(result) == 0 { - // Ultimate fallback - use simple pattern based on rule name - return g.generateSimpleFallback(ruleName) - } - return joinWithSpaces(result) } -// generateFromElementAggressiveTerminal generates from an element using aggressive terminal mode -func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Element, contextRuleName string) string { - // Handle quantified elements - be very conservative +// generateFromImmediatelyTerminalElement generates from an element that's part of an immediately terminal alternative +func (g *Generator) generateFromImmediatelyTerminalElement(element *grammar.Element) string { + // Handle quantified elements if element.IsQuantified() { switch element.Quantifier { case grammar.ZERO_MORE, grammar.OPTIONAL_Q: + // Generate 0 or 1 occurrences for optional elements + if g.random.Float32() < float32(g.config.OptionalProb) { + nonQuantifiedElement := grammar.Element{ + Value: element.Value, + Quantifier: grammar.NONE, + } + return g.generateFromImmediatelyTerminalElement(&nonQuantifiedElement) + } return "" case grammar.ONE_MORE: - // Generate exactly one for ONE_MORE + // Generate exactly 1 occurrence for ONE_MORE to stay minimal nonQuantifiedElement := grammar.Element{ Value: element.Value, Quantifier: grammar.NONE, } - return g.generateFromElementAggressiveTerminal(&nonQuantifiedElement, contextRuleName) + return g.generateFromImmediatelyTerminalElement(&nonQuantifiedElement) } } // Handle different element types if element.IsTerminal() { - // Direct literal - just return it + // Direct literal - use existing generation logic that handles character sets, literals, etc. if literal, ok := element.Value.(grammar.LiteralValue); ok { - return strings.Trim(literal.Text, "'\"") + return g.generateFromLiteral(literal.Text) } return element.Value.String() } @@ -556,18 +580,22 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme if element.IsRule() { switch value := element.Value.(type) { case grammar.ReferenceValue: - // Check if it's a simple lexer rule - if g.isSimpleLexerRule(value.Name) { - return g.generateConcreteToken(value.Name) - } - - // For parser rules, generate simple fallback based on rule name - return g.generateSimpleFallback(value.Name) + // For rule references in immediately terminal alternatives, use normal generation with empty recursion tracking + // Since we know this is immediately terminal, we can generate safely + return g.generateFromRuleWithRecursionTracking(value.Name, make(map[string]bool), 0) case grammar.BlockValue: - // For blocks, try the first alternative only + // For blocks, randomly select an alternative that's immediately terminal if len(value.Alternatives) > 0 { - return g.generateFromAlternativeAggressiveTerminal(&value.Alternatives[0], contextRuleName) + // Find immediately terminal alternatives within the block + for _, alt := range value.Alternatives { + // Use dependency graph to check if this alternative is immediately terminal + if g.dependencyGraph.CanAlternativeTerminateImmediately(alt) { + return g.generateFromImmediatelyTerminalAlternative(&alt) + } + } + // Fallback to first alternative if none found (shouldn't happen) + return g.generateFromImmediatelyTerminalAlternative(&value.Alternatives[0]) } return "" } @@ -576,46 +604,22 @@ func (g *Generator) generateFromElementAggressiveTerminal(element *grammar.Eleme return "" } -// isSimpleLexerRule checks if a rule is a simple lexer rule that can be safely generated -func (g *Generator) isSimpleLexerRule(ruleName string) bool { - rule := g.getRule(ruleName) - if rule == nil || !rule.IsLexer { - return false - } - - // Consider lexer rules with simple patterns as safe - simpleLexerRules := map[string]bool{ - "IDENTIFIER": true, "ID": true, "NAME": true, - "INTEGER": true, "NUMBER": true, "NUMERIC": true, "INT": true, - "STRING": true, "STRING_LITERAL": true, - "SELECT": true, "FROM": true, "WHERE": true, "AND": true, "OR": true, - "COMMA": true, "SEMICOLON": true, "DOT": true, - "OPEN_PAREN": true, "CLOSE_PAREN": true, - "PLUS": true, "MINUS": true, "STAR": true, "SLASH": true, - } - - return simpleLexerRules[ruleName] -} - // generateSimpleFallback generates a simple fallback value based on rule name patterns -func (g *Generator) generateSimpleFallback(ruleName string) string { +func generateSimpleFallback(ruleName string) string { // Generate context-appropriate fallbacks ruleLower := strings.ToLower(ruleName) - if strings.Contains(ruleLower, "expr") || strings.Contains(ruleLower, "expression") { + if strings.Contains(ruleLower, "string") || strings.Contains(ruleLower, "constant") { + return "'fallback'" + } else if strings.Contains(ruleLower, "expr") || strings.Contains(ruleLower, "expression") { return "1" } else if strings.Contains(ruleLower, "name") || strings.Contains(ruleLower, "id") { return "col1" - } else if strings.Contains(ruleLower, "list") { - return "1" - } else if strings.Contains(ruleLower, "clause") { + } else if strings.Contains(ruleLower, "number") || strings.Contains(ruleLower, "numeric") { return "1" - } else if strings.Contains(ruleLower, "stmt") || strings.Contains(ruleLower, "statement") { - return "SELECT 1" - } else if strings.Contains(ruleLower, "select") { - return "SELECT 1" } else { // Generic fallback return "1" } } + diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 1c13cab..4ba4c5c 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -11,11 +11,11 @@ type DependencyGraph struct { // GraphNode represents a single rule in the dependency graph type GraphNode struct { - RuleName string // Rule name (e.g., "selectStmt", "expr") - HasTerminalAlternatives bool // Can reach terminal without recursion - Alternatives []Alternative // All alternatives for this rule - TerminalAlternativeIndex []int // Indices of alternatives that can terminate - IsLexer bool // Whether this is a lexer rule + RuleName string // Rule name (e.g., "selectStmt", "expr") + Alternatives []Alternative // All alternatives for this rule + HasImmediatelyTerminalAlternatives bool // Has at least one immediately terminal alternative + ImmediatelyTerminalAlternativeIndex []int // Indices of alternatives that are immediately terminal + IsLexer bool // Whether this is a lexer rule } // NewDependencyGraph creates a new dependency graph @@ -28,11 +28,11 @@ func NewDependencyGraph() *DependencyGraph { // AddNode adds a rule node to the dependency graph func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { node := &GraphNode{ - RuleName: ruleName, - HasTerminalAlternatives: false, - Alternatives: rule.Alternatives, - TerminalAlternativeIndex: []int{}, - IsLexer: rule.IsLexer, + RuleName: ruleName, + Alternatives: rule.Alternatives, + HasImmediatelyTerminalAlternatives: false, + ImmediatelyTerminalAlternativeIndex: []int{}, + IsLexer: rule.IsLexer, } g.Nodes[ruleName] = node } @@ -42,42 +42,43 @@ func (g *DependencyGraph) GetNode(ruleName string) *GraphNode { return g.Nodes[ruleName] } -// AnalyzeTerminalReachability performs terminal reachability analysis on the graph +// AnalyzeTerminalReachability performs immediately terminal analysis on the graph func (g *DependencyGraph) AnalyzeTerminalReachability() error { return g.AnalyzeTerminalReachabilityWithValidation(false) } -// AnalyzeTerminalReachabilityWithValidation performs terminal reachability analysis with optional validation +// AnalyzeTerminalReachabilityWithValidation performs immediately terminal analysis with optional validation func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { - // Phase 1: Mark lexer rules as terminal - g.markLexerRulesAsTerminal() + // Phase 1: Mark lexer rules as immediately terminal + g.markLexerRulesAsImmediatelyTerminal() - // Phase 2: Propagate terminal reachability using fixed-point iteration - g.propagateTerminalReachability() + // Phase 2: Analyze immediately terminal alternatives + g.analyzeImmediatelyTerminalAlternatives() - // Phase 3: Check for unterminated nodes and report error (only if requested) + // Phase 3: Check for nodes without immediately terminal alternatives and report error (only if requested) if validateUnterminated { - return g.validateTerminalReachability() + return g.validateImmediatelyTerminalReachability() } return nil } -// markLexerRulesAsTerminal marks all lexer rules as having terminal alternatives -func (g *DependencyGraph) markLexerRulesAsTerminal() { +// markLexerRulesAsImmediatelyTerminal marks all lexer rules as having immediately terminal alternatives +func (g *DependencyGraph) markLexerRulesAsImmediatelyTerminal() { for _, node := range g.Nodes { if node.IsLexer { - node.HasTerminalAlternatives = true - // All lexer alternatives are considered terminal + node.HasImmediatelyTerminalAlternatives = true + // All lexer alternatives are considered immediately terminal for i := range node.Alternatives { - node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, i) + node.ImmediatelyTerminalAlternativeIndex = append(node.ImmediatelyTerminalAlternativeIndex, i) } } } } -// propagateTerminalReachability uses fixed-point iteration to determine which rules can terminate -func (g *DependencyGraph) propagateTerminalReachability() { +// analyzeImmediatelyTerminalAlternatives analyzes which alternatives are immediately terminal +func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { + // Use fixed-point iteration similar to terminal propagation changed := true iterations := 0 maxIterations := len(g.Nodes) * 2 // Prevent infinite loops @@ -91,19 +92,19 @@ func (g *DependencyGraph) propagateTerminalReachability() { continue // Already processed } - // Check each alternative to see if it can terminate + // Check each alternative to see if it's immediately terminal for altIndex, alt := range node.Alternatives { - // Skip if this alternative is already marked as terminal - if g.isAlternativeAlreadyMarked(node, altIndex) { + // Skip if this alternative is already marked as immediately terminal + if g.isAlternativeAlreadyMarkedImmediate(node, altIndex) { continue } - if g.canAlternativeTerminate(alt) { - if !node.HasTerminalAlternatives { - node.HasTerminalAlternatives = true + if g.canAlternativeTerminateImmediately(alt) { + if !node.HasImmediatelyTerminalAlternatives { + node.HasImmediatelyTerminalAlternatives = true changed = true } - node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) + node.ImmediatelyTerminalAlternativeIndex = append(node.ImmediatelyTerminalAlternativeIndex, altIndex) changed = true } } @@ -111,63 +112,48 @@ func (g *DependencyGraph) propagateTerminalReachability() { } if iterations >= maxIterations { - fmt.Printf("Warning: Terminal reachability analysis reached max iterations (%d)\n", maxIterations) + fmt.Printf("Warning: Immediately terminal analysis reached max iterations (%d)\\n", maxIterations) } } -// validateTerminalReachability checks for rules without terminal alternatives and reports errors -func (g *DependencyGraph) validateTerminalReachability() error { +// validateImmediatelyTerminalReachability checks for rules without immediately terminal alternatives and reports errors +func (g *DependencyGraph) validateImmediatelyTerminalReachability() error { var unterminatedRules []string for ruleName, node := range g.Nodes { - if !node.HasTerminalAlternatives { + if !node.HasImmediatelyTerminalAlternatives { unterminatedRules = append(unterminatedRules, ruleName) } } if len(unterminatedRules) > 0 { - return fmt.Errorf("grammar contains %d rules without terminal alternatives: %v", + return fmt.Errorf("grammar contains %d rules without immediately terminal alternatives: %v", len(unterminatedRules), unterminatedRules) } return nil } -// isAlternativeAlreadyMarked checks if an alternative is already in the terminal list -func (g *DependencyGraph) isAlternativeAlreadyMarked(node *GraphNode, altIndex int) bool { - for _, terminalIndex := range node.TerminalAlternativeIndex { - if terminalIndex == altIndex { +// isAlternativeAlreadyMarkedImmediate checks if an alternative is already in the immediately terminal list +func (g *DependencyGraph) isAlternativeAlreadyMarkedImmediate(node *GraphNode, altIndex int) bool { + for _, immediateIndex := range node.ImmediatelyTerminalAlternativeIndex { + if immediateIndex == altIndex { return true } } return false } -// CanAlternativeTerminate checks if an alternative can terminate without recursion (exported for testing) -func (g *DependencyGraph) CanAlternativeTerminate(alt Alternative) bool { - return g.canAlternativeTerminate(alt) -} - -// CanElementTerminate checks if a single element can terminate (exported for testing) -func (g *DependencyGraph) CanElementTerminate(element Element) bool { - return g.canElementTerminate(element) -} - -// CanBlockValueTerminate checks if a block value can terminate (exported for testing) -func (g *DependencyGraph) CanBlockValueTerminate(block BlockValue) bool { - return g.canBlockValueTerminate(block) -} - -// canAlternativeTerminate checks if an alternative can terminate without recursion -func (g *DependencyGraph) canAlternativeTerminate(alt Alternative) bool { - // Empty alternative (ε-transition) can always terminate +// canAlternativeTerminateImmediately checks if an alternative can terminate immediately (no rule references required) +func (g *DependencyGraph) canAlternativeTerminateImmediately(alt Alternative) bool { + // Empty alternative (ε-transition) can always terminate immediately if len(alt.Elements) == 0 { return true } // Check each element in the alternative for _, element := range alt.Elements { - if !g.canElementTerminate(element) { + if !g.canElementTerminateImmediately(element) { return false } } @@ -175,35 +161,35 @@ func (g *DependencyGraph) canAlternativeTerminate(alt Alternative) bool { return true } -// canElementTerminate checks if a single element can terminate -func (g *DependencyGraph) canElementTerminate(element Element) bool { - // Terminal elements (literals) can always terminate +// canElementTerminateImmediately checks if a single element can terminate immediately +func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { + // Terminal elements (literals) can always terminate immediately if element.IsTerminal() { return true } // Handle quantified elements if element.IsQuantified() { - // * and ? quantifiers can generate 0 occurrences, so they can terminate + // * and ? quantifiers can generate 0 occurrences, so they can terminate immediately if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { return true } // + quantifier requires at least one occurrence, so check the referenced rule if element.Quantifier == ONE_MORE { - return g.canRuleReferenceTerminate(element) + return g.canRuleReferenceTerminateImmediately(element) } } - // For rule references, check if the referenced rule can terminate + // For rule references, check if the referenced rule can terminate immediately if element.IsRule() { - return g.canRuleReferenceTerminate(element) + return g.canRuleReferenceTerminateImmediately(element) } return false } -// canRuleReferenceTerminate checks if a rule reference can terminate -func (g *DependencyGraph) canRuleReferenceTerminate(element Element) bool { +// canRuleReferenceTerminateImmediately checks if a rule reference can terminate immediately +func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) bool { var referencedRuleName string // Extract rule name based on element value type @@ -211,71 +197,82 @@ func (g *DependencyGraph) canRuleReferenceTerminate(element Element) bool { case ReferenceValue: referencedRuleName = value.Name case BlockValue: - // For block values, we need to check if any alternative in the block can terminate - return g.canBlockValueTerminate(value) + // For block values, we need to check if any alternative in the block can terminate immediately + return g.canBlockValueTerminateImmediately(value) default: return false } - // Check if the referenced rule exists and can terminate + // Check if the referenced rule exists and can terminate immediately referencedNode := g.GetNode(referencedRuleName) if referencedNode == nil { - // Handle ANTLR built-in tokens that are always terminal + // Handle ANTLR built-in tokens that are always immediately terminal if isAntlrBuiltinToken(referencedRuleName) { return true } // Rule not found - could be a forward reference or external rule - // For now, we'll be conservative and assume it cannot terminate + // For now, we'll be conservative and assume it cannot terminate immediately return false } - return referencedNode.HasTerminalAlternatives + return referencedNode.HasImmediatelyTerminalAlternatives } -// canBlockValueTerminate checks if a block value can terminate -func (g *DependencyGraph) canBlockValueTerminate(block BlockValue) bool { - // A block can terminate if any of its alternatives can terminate +// canBlockValueTerminateImmediately checks if a block value can terminate immediately +func (g *DependencyGraph) canBlockValueTerminateImmediately(block BlockValue) bool { + // A block can terminate immediately if any of its alternatives can terminate immediately for _, alt := range block.Alternatives { - if g.canAlternativeTerminate(alt) { + if g.canAlternativeTerminateImmediately(alt) { return true } } return false } -// ValidateGrammar checks if all rules have at least one terminal alternative +// CanAlternativeTerminateImmediately checks if an alternative can terminate immediately (exported for testing) +func (g *DependencyGraph) CanAlternativeTerminateImmediately(alt Alternative) bool { + return g.canAlternativeTerminateImmediately(alt) +} + +// CanElementTerminateImmediately checks if a single element can terminate immediately (exported for testing) +func (g *DependencyGraph) CanElementTerminateImmediately(element Element) bool { + return g.canElementTerminateImmediately(element) +} + + +// ValidateGrammar checks if all rules have at least one immediately terminal alternative func (g *DependencyGraph) ValidateGrammar() error { var invalidRules []string for ruleName, node := range g.Nodes { - if !node.HasTerminalAlternatives { + if !node.HasImmediatelyTerminalAlternatives { invalidRules = append(invalidRules, ruleName) } } if len(invalidRules) > 0 { - return fmt.Errorf("grammar validation failed: the following rules have no terminal alternatives: %v", invalidRules) + return fmt.Errorf("grammar validation failed: the following rules have no immediately terminal alternatives: %v", invalidRules) } return nil } -// GetTerminalAlternatives returns the indices of terminal alternatives for a rule -func (g *DependencyGraph) GetTerminalAlternatives(ruleName string) []int { +// GetImmediatelyTerminalAlternatives returns the indices of immediately terminal alternatives for a rule +func (g *DependencyGraph) GetImmediatelyTerminalAlternatives(ruleName string) []int { node := g.GetNode(ruleName) if node == nil { return nil } - return node.TerminalAlternativeIndex + return node.ImmediatelyTerminalAlternativeIndex } -// HasTerminalAlternatives checks if a rule has terminal alternatives -func (g *DependencyGraph) HasTerminalAlternatives(ruleName string) bool { +// HasImmediatelyTerminalAlternatives checks if a rule has immediately terminal alternatives +func (g *DependencyGraph) HasImmediatelyTerminalAlternatives(ruleName string) bool { node := g.GetNode(ruleName) if node == nil { return false } - return node.HasTerminalAlternatives + return node.HasImmediatelyTerminalAlternatives } // PrintAnalysisResults prints the dependency graph analysis results for debugging @@ -283,8 +280,8 @@ func (g *DependencyGraph) PrintAnalysisResults() { fmt.Println("=== Dependency Graph Analysis Results ===") for ruleName, node := range g.Nodes { fmt.Printf("Rule: %s (lexer=%t)\n", ruleName, node.IsLexer) - fmt.Printf(" HasTerminalAlternatives: %t\n", node.HasTerminalAlternatives) - fmt.Printf(" TerminalAlternativeIndex: %v\n", node.TerminalAlternativeIndex) + fmt.Printf(" HasImmediatelyTerminalAlternatives: %t\n", node.HasImmediatelyTerminalAlternatives) + fmt.Printf(" ImmediatelyTerminalAlternativeIndex: %v\n", node.ImmediatelyTerminalAlternativeIndex) fmt.Printf(" Total alternatives: %d\n", len(node.Alternatives)) fmt.Println() } diff --git a/tools/fuzzing/tests/dependency_test.go b/tools/fuzzing/tests/dependency_test.go deleted file mode 100644 index 25b5b10..0000000 --- a/tools/fuzzing/tests/dependency_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package tests - -import ( - "fmt" - "path/filepath" - "testing" - - "github.com/bytebase/parser/tools/fuzzing/internal/grammar" -) - -func TestDependencyGraphConstruction(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - // Parse grammar files - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - // Test dependency graph exists - depGraph := parsedGrammar.GetDependencyGraph() - if depGraph == nil { - t.Fatal("Dependency graph was not created") - } - - // Test that nodes were created for rules - totalRules := len(parsedGrammar.GetAllRules()) - if len(depGraph.Nodes) != totalRules { - t.Errorf("Expected %d nodes in dependency graph, got %d", totalRules, len(depGraph.Nodes)) - } - - // Test lexer rules are marked as terminal - lexerTerminalCount := 0 - for ruleName := range parsedGrammar.LexerRules { - if depGraph.HasTerminalAlternatives(ruleName) { - lexerTerminalCount++ - } - } - - fmt.Printf("Lexer rules marked as terminal: %d/%d\n", lexerTerminalCount, len(parsedGrammar.LexerRules)) - - if lexerTerminalCount == 0 { - t.Error("No lexer rules were marked as terminal") - } - - // Debug: Print first 10 lexer and parser rules to see what's available - fmt.Println("\nFirst 10 lexer rules:") - count := 0 - for ruleName := range parsedGrammar.LexerRules { - if count < 10 { - fmt.Printf(" %s\n", ruleName) - count++ - } - } - - fmt.Println("\nFirst 10 parser rules:") - count = 0 - for ruleName := range parsedGrammar.ParserRules { - if count < 10 { - node := depGraph.GetNode(ruleName) - fmt.Printf(" %s (HasTerminal=%t, TerminalAlts=%v)\n", - ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) - count++ - } - } - - // Test some specific rules that should exist - testRules := []string{"selectstmt", "a_expr", "IDENT", "ICONST"} - - for _, ruleName := range testRules { - node := depGraph.GetNode(ruleName) - if node != nil { - fmt.Printf("Rule %s: HasTerminalAlternatives=%t, TerminalAlts=%v\n", - ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) - } else { - fmt.Printf("Rule %s: Not found in dependency graph\n", ruleName) - } - } - - t.Log("Dependency graph construction completed successfully") -} - -func TestGrammarValidation(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - // Parse grammar files - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - // Validate grammar - err = parsedGrammar.ValidateGrammar() - if err != nil { - t.Errorf("Grammar validation failed: %v", err) - - // Print analysis results for debugging - fmt.Println("\n=== Grammar Analysis Results ===") - parsedGrammar.PrintDependencyAnalysis() - } else { - t.Log("Grammar validation passed - all rules have terminal alternatives") - } -} - -func TestDependencyGraphSpecificRules(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - // Parse grammar files - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - depGraph := parsedGrammar.GetDependencyGraph() - - // Test specific known patterns - tests := []struct { - ruleName string - expectTerminal bool - description string - }{ - {"OPEN_PAREN", true, "Lexer rule should be terminal"}, - {"SELECT", true, "Lexer rule should be terminal"}, - {"selectstmt", true, "Should have at least one terminal alternative"}, - {"a_expr", true, "Expression rule should have terminal alternatives"}, - } - - for _, test := range tests { - hasTerminal := depGraph.HasTerminalAlternatives(test.ruleName) - if hasTerminal != test.expectTerminal { - t.Errorf("Rule %s: expected HasTerminalAlternatives=%t, got %t (%s)", - test.ruleName, test.expectTerminal, hasTerminal, test.description) - } - - if hasTerminal { - terminalAlts := depGraph.GetTerminalAlternatives(test.ruleName) - if len(terminalAlts) == 0 { - t.Errorf("Rule %s: HasTerminalAlternatives=true but no terminal alternatives found", test.ruleName) - } - fmt.Printf("✓ Rule %s has %d terminal alternatives: %v\n", test.ruleName, len(terminalAlts), terminalAlts) - } - } -} \ No newline at end of file diff --git a/tools/fuzzing/tests/postgresql_test.go b/tools/fuzzing/tests/postgresql_test.go index 3ba36e2..aaa8363 100644 --- a/tools/fuzzing/tests/postgresql_test.go +++ b/tools/fuzzing/tests/postgresql_test.go @@ -17,7 +17,7 @@ func getRepoRoot() string { return filepath.Join(filepath.Dir(filename), "..", "..", "..") } -func TestPostgreSQLSelectStmt(t *testing.T) { +func TestPostgreSQLRootStmt(t *testing.T) { repoRoot := getRepoRoot() // PostgreSQL grammar file paths @@ -33,24 +33,24 @@ func TestPostgreSQLSelectStmt(t *testing.T) { seed int64 }{ { - name: "Simple SELECT statements", - startRule: "selectstmt", + name: "Simple root", + startRule: "root", count: 3, maxDepth: 10, optionalProb: 0.7, seed: 42, }, { - name: "Deep SELECT statements", - startRule: "selectstmt", + name: "Deep root", + startRule: "root", count: 2, maxDepth: 8, optionalProb: 0.5, seed: 123, }, { - name: "Minimal SELECT statements", - startRule: "selectstmt", + name: "Minimal root", + startRule: "root", count: 5, maxDepth: 3, optionalProb: 0.3, diff --git a/tools/fuzzing/tests/recursive_test.go b/tools/fuzzing/tests/recursive_test.go deleted file mode 100644 index a48b783..0000000 --- a/tools/fuzzing/tests/recursive_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package tests - -import ( - "os" - "path/filepath" - "testing" - - "github.com/bytebase/parser/tools/fuzzing/internal/grammar" -) - -func TestPureLeftRecursiveGrammarNonTerminal(t *testing.T) { - // Create a grammar with ONLY left-recursive rules (no terminal alternatives) - tempDir := t.TempDir() - grammarContent := `grammar PureLeftRecursive; - -// Parser rules -root: expr EOF; - -// This rule has NO terminal alternatives - pure left recursion -expr: expr '+' expr - | expr '*' expr - ; - -// Lexer rules -PLUS: '+'; -MULTIPLY: '*'; -WS: [ \t\r\n]+ -> skip; -EOF: ''; -` - - grammarFile := filepath.Join(tempDir, "PureLeftRecursive.g4") - err := os.WriteFile(grammarFile, []byte(grammarContent), 0644) - if err != nil { - t.Fatalf("Failed to create pure left-recursive grammar file: %v", err) - } - - // Parse the grammar - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{grammarFile}) - if err != nil { - t.Fatalf("Failed to parse pure left-recursive grammar: %v", err) - } - - depGraph := parsedGrammar.GetDependencyGraph() - - // Check that the expr rule is NOT terminal - exprNode := depGraph.GetNode("expr") - if exprNode == nil { - t.Fatal("expr rule not found in dependency graph") - } - - t.Logf("=== Pure Left-Recursive Grammar Analysis ===") - t.Logf("expr rule has %d alternatives", len(exprNode.Alternatives)) - t.Logf("expr HasTerminalAlternatives: %t", exprNode.HasTerminalAlternatives) - t.Logf("expr TerminalAlternativeIndex: %v", exprNode.TerminalAlternativeIndex) - - // This rule should NOT have terminal alternatives because all alternatives - // are left-recursive and there's no base case - if exprNode.HasTerminalAlternatives { - t.Errorf("Expected pure left-recursive expr rule to NOT have terminal alternatives, but it does") - } - - // Validate the grammar should fail - err = depGraph.ValidateGrammar() - if err == nil { - t.Errorf("Expected grammar validation to fail for pure left-recursive grammar, but it passed") - } else { - t.Logf("Grammar validation correctly failed: %v", err) - } - - // Check that root is also non-terminal because it depends on non-terminal expr - rootNode := depGraph.GetNode("root") - if rootNode == nil { - t.Fatal("root rule not found in dependency graph") - } - - if rootNode.HasTerminalAlternatives { - t.Errorf("Expected root rule to be non-terminal due to non-terminal expr dependency, but it's terminal") - } - - t.Logf("root HasTerminalAlternatives: %t (expected false)", rootNode.HasTerminalAlternatives) -} diff --git a/tools/fuzzing/tests/terminal_completeness_test.go b/tools/fuzzing/tests/terminal_completeness_test.go deleted file mode 100644 index 6507552..0000000 --- a/tools/fuzzing/tests/terminal_completeness_test.go +++ /dev/null @@ -1,361 +0,0 @@ -package tests - -import ( - "fmt" - "path/filepath" - "sort" - "testing" - - "github.com/bytebase/parser/tools/fuzzing/internal/grammar" -) - -func TestPostgreSQLTerminalCompleteness(t *testing.T) { - repoRoot := getRepoRoot() - - // PostgreSQL grammar file paths - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - // Parse grammar files - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - depGraph := parsedGrammar.GetDependencyGraph() - - // Analyze all nodes - fmt.Println("=== PostgreSQL Grammar Terminal Analysis ===") - - totalNodes := len(depGraph.Nodes) - terminalNodes := 0 - nonTerminalNodes := 0 - - var terminalRules []string - var nonTerminalRules []string - - lexerTerminal := 0 - lexerNonTerminal := 0 - parserTerminal := 0 - parserNonTerminal := 0 - - for ruleName, node := range depGraph.Nodes { - if node.HasTerminalAlternatives { - terminalNodes++ - terminalRules = append(terminalRules, ruleName) - if node.IsLexer { - lexerTerminal++ - } else { - parserTerminal++ - } - } else { - nonTerminalNodes++ - nonTerminalRules = append(nonTerminalRules, ruleName) - if node.IsLexer { - lexerNonTerminal++ - } else { - parserNonTerminal++ - } - } - } - - fmt.Printf("Total Nodes: %d\n", totalNodes) - fmt.Printf("Terminal Nodes: %d (%.1f%%)\n", terminalNodes, float64(terminalNodes)/float64(totalNodes)*100) - fmt.Printf("Non-Terminal Nodes: %d (%.1f%%)\n", nonTerminalNodes, float64(nonTerminalNodes)/float64(totalNodes)*100) - fmt.Println() - - fmt.Printf("Lexer Rules: Terminal=%d, Non-Terminal=%d\n", lexerTerminal, lexerNonTerminal) - fmt.Printf("Parser Rules: Terminal=%d, Non-Terminal=%d\n", parserTerminal, parserNonTerminal) - fmt.Println() - - // Show non-terminal rules (these should ideally be zero) - if len(nonTerminalRules) > 0 { - sort.Strings(nonTerminalRules) - fmt.Printf("❌ Non-Terminal Rules (%d):\n", len(nonTerminalRules)) - for i, ruleName := range nonTerminalRules { - node := depGraph.GetNode(ruleName) - ruleType := "PARSER" - if node.IsLexer { - ruleType = "LEXER" - } - fmt.Printf(" %d. %s (%s, %d alternatives)\n", i+1, ruleName, ruleType, len(node.Alternatives)) - } - fmt.Println() - } else { - fmt.Println("✅ All rules have terminal alternatives!") - } - - // Test: If your hypothesis is correct, this should pass - if nonTerminalNodes == 0 { - t.Log("✅ HYPOTHESIS CONFIRMED: All PostgreSQL rules have terminal alternatives") - } else { - t.Errorf("❌ HYPOTHESIS REJECTED: %d rules have no terminal alternatives", nonTerminalNodes) - - // Analyze WHY these rules don't have terminal alternatives - fmt.Println("=== Analysis of Non-Terminal Rules ===") - analyzeNonTerminalRules(parsedGrammar, depGraph, nonTerminalRules[:min(5, len(nonTerminalRules))]) - } -} - -func TestSpecificNonTerminalRules(t *testing.T) { - repoRoot := getRepoRoot() - - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - depGraph := parsedGrammar.GetDependencyGraph() - - // Test specific rules that should be terminal based on our earlier analysis - expectedTerminalRules := []string{ - "columnref", // Should be terminal (colid + indirection) - "c_expr", // Should be terminal (has columnref alternative) - "a_expr_typecast", // Should be terminal (depends on c_expr) - "a_expr_collate", // Should be terminal (depends on a_expr_typecast) - } - - fmt.Println("=== Testing Expected Terminal Rules ===") - for _, ruleName := range expectedTerminalRules { - node := depGraph.GetNode(ruleName) - if node == nil { - t.Errorf("Rule %s not found", ruleName) - continue - } - - fmt.Printf("%s: HasTerminal=%t, TerminalAlts=%v\n", - ruleName, node.HasTerminalAlternatives, node.TerminalAlternativeIndex) - - if !node.HasTerminalAlternatives { - t.Errorf("Expected %s to be terminal, but it's not", ruleName) - } - } -} - -func analyzeNonTerminalRules(parsedGrammar *grammar.ParsedGrammar, depGraph *grammar.DependencyGraph, ruleNames []string) { - for _, ruleName := range ruleNames { - rule := parsedGrammar.GetRule(ruleName) - if rule == nil { - continue - } - - fmt.Printf("\n--- Analyzing %s ---\n", ruleName) - fmt.Printf("Type: %s, Alternatives: %d\n", - map[bool]string{true: "LEXER", false: "PARSER"}[rule.IsLexer], len(rule.Alternatives)) - - for altIndex, alt := range rule.Alternatives { - canTerminate := depGraph.CanAlternativeTerminate(alt) - fmt.Printf(" Alt %d (%d elements): canTerminate=%t\n", altIndex, len(alt.Elements), canTerminate) - - for elemIndex, element := range alt.Elements { - canElemTerminate := depGraph.CanElementTerminate(element) - fmt.Printf(" Elem %d: %s", elemIndex, element.Value.String()) - - if element.IsQuantified() { - fmt.Printf("[%v]", element.Quantifier) - } - - fmt.Printf(" → canTerminate=%t", canElemTerminate) - - // If it's a rule reference, show the referenced rule's status - if element.IsRule() { - if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - referencedNode := depGraph.GetNode(refValue.Name) - if referencedNode != nil { - fmt.Printf(" (ref: %s, hasTerminal=%t)", refValue.Name, referencedNode.HasTerminalAlternatives) - } else { - fmt.Printf(" (ref: %s, NOT_FOUND)", refValue.Name) - } - } - } - - fmt.Println() - } - } - } -} - -// Test to validate that our terminal propagation algorithm is working correctly -func TestManualTerminalPropagation(t *testing.T) { - repoRoot := getRepoRoot() - - lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4") - parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4") - - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{lexerPath, parserPath}) - if err != nil { - t.Fatalf("Failed to parse grammar files: %v", err) - } - - // Create fresh dependency graph and run manual propagation - freshGraph := grammar.NewDependencyGraph() - - // Add all nodes - for ruleName, rule := range parsedGrammar.GetAllRules() { - freshGraph.AddNode(ruleName, rule) - } - - // Mark lexer rules as terminal - initialTerminalCount := 0 - for _, node := range freshGraph.Nodes { - if node.IsLexer { - node.HasTerminalAlternatives = true - for i := range node.Alternatives { - node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, i) - } - initialTerminalCount++ - } - } - - fmt.Printf("Starting with %d lexer rules marked as terminal\n", initialTerminalCount) - - // Manual propagation with more iterations - maxIterations := 100 - totalNewTerminals := 0 - - for iteration := 0; iteration < maxIterations; iteration++ { - changed := false - newTerminalsThisIteration := 0 - - for _, node := range freshGraph.Nodes { - if node.IsLexer || node.HasTerminalAlternatives { - continue - } - - for altIndex, alt := range node.Alternatives { - // Check if already marked - alreadyMarked := false - for _, termIndex := range node.TerminalAlternativeIndex { - if termIndex == altIndex { - alreadyMarked = true - break - } - } - if alreadyMarked { - continue - } - - // Check if this alternative can terminate - if canAlternativeTerminateManual(alt, freshGraph) { - if !node.HasTerminalAlternatives { - node.HasTerminalAlternatives = true - changed = true - newTerminalsThisIteration++ - totalNewTerminals++ - } - node.TerminalAlternativeIndex = append(node.TerminalAlternativeIndex, altIndex) - } - } - } - - if newTerminalsThisIteration > 0 { - fmt.Printf("Iteration %d: +%d new terminal rules (total: %d)\n", - iteration+1, newTerminalsThisIteration, initialTerminalCount+totalNewTerminals) - } - - if !changed { - fmt.Printf("Converged after %d iterations\n", iteration+1) - break - } - - if iteration == maxIterations-1 { - fmt.Printf("Reached max iterations (%d)\n", maxIterations) - } - } - - // Count final results - finalTerminalCount := 0 - finalNonTerminalCount := 0 - - for _, node := range freshGraph.Nodes { - if node.HasTerminalAlternatives { - finalTerminalCount++ - } else { - finalNonTerminalCount++ - } - } - - fmt.Printf("\nFinal Results:\n") - fmt.Printf("Terminal: %d\n", finalTerminalCount) - fmt.Printf("Non-Terminal: %d\n", finalNonTerminalCount) - - if finalNonTerminalCount == 0 { - t.Log("✅ Manual propagation: All rules are terminal!") - } else { - t.Logf("❌ Manual propagation: Still %d non-terminal rules", finalNonTerminalCount) - } -} - -func canAlternativeTerminateManual(alt grammar.Alternative, graph *grammar.DependencyGraph) bool { - // Empty alternative is always terminal - if len(alt.Elements) == 0 { - return true - } - - // All elements must be able to terminate - for _, element := range alt.Elements { - if !canElementTerminateManual(element, graph) { - return false - } - } - - return true -} - -func canElementTerminateManual(element grammar.Element, graph *grammar.DependencyGraph) bool { - // Terminal elements (literals) can always terminate - if element.IsTerminal() { - return true - } - - // Handle quantified elements - THIS IS KEY! - if element.IsQuantified() { - // * and ? quantifiers can generate 0 occurrences, so they can terminate - if element.Quantifier == grammar.ZERO_MORE || element.Quantifier == grammar.OPTIONAL_Q { - return true // Can be empty, so always terminal - } - // + quantifier requires at least one occurrence, so check the content - } - - // For rule references - if element.IsRule() { - switch value := element.Value.(type) { - case grammar.ReferenceValue: - referencedNode := graph.GetNode(value.Name) - if referencedNode == nil { - // Handle ANTLR built-in tokens like EOF - return isBuiltinTerminal(value.Name) - } - return referencedNode.HasTerminalAlternatives - case grammar.BlockValue: - // A block can terminate if any of its alternatives can terminate - for _, alt := range value.Alternatives { - if canAlternativeTerminateManual(alt, graph) { - return true - } - } - return false - } - } - - return false -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -// isBuiltinTerminal checks if a token name refers to an ANTLR built-in token -func isBuiltinTerminal(tokenName string) bool { - builtinTokens := map[string]bool{ - "EOF": true, // End-of-file token - "": true, // Alternative EOF notation - } - return builtinTokens[tokenName] -} diff --git a/tools/fuzzing/tests/unterminated_grammar_test.go b/tools/fuzzing/tests/unterminated_grammar_test.go deleted file mode 100644 index d02d095..0000000 --- a/tools/fuzzing/tests/unterminated_grammar_test.go +++ /dev/null @@ -1,104 +0,0 @@ -package tests - -import ( - "os" - "strings" - "testing" - - "github.com/bytebase/parser/tools/fuzzing/internal/grammar" -) - -func TestUnterminatedGrammarErrorReporting(t *testing.T) { - // Create two grammar files that when merged create unterminated rules - lexerContent := ` -lexer grammar TestLexer; - -PLUS: '+' ; -NUMBER: [0-9]+ ; -` - - parserContent := ` -parser grammar TestParser; - -// Import tokens from lexer -options { tokenVocab=TestLexer; } - -// This creates infinite left recursion with no terminal alternatives -expr: expr PLUS expr ; -` - - // Write the grammars to temporary files - tmpLexer := "/tmp/test_lexer.g4" - tmpParser := "/tmp/test_parser.g4" - - err := os.WriteFile(tmpLexer, []byte(lexerContent), 0644) - if err != nil { - t.Fatalf("Failed to write lexer grammar: %v", err) - } - defer os.Remove(tmpLexer) - - err = os.WriteFile(tmpParser, []byte(parserContent), 0644) - if err != nil { - t.Fatalf("Failed to write parser grammar: %v", err) - } - defer os.Remove(tmpParser) - - // Try to parse and merge the grammars - this should fail with terminal reachability error - _, err = grammar.ParseAndMergeGrammarFiles([]string{tmpLexer, tmpParser}) - - // Verify that we get the expected error - if err == nil { - t.Fatal("Expected error for unterminated grammar, but got none") - } - - if !strings.Contains(err.Error(), "without terminal alternatives") { - t.Errorf("Expected error about terminal alternatives, got: %v", err) - } - - if !strings.Contains(err.Error(), "expr") { - t.Errorf("Expected error to mention 'expr' rule, got: %v", err) - } - - t.Logf("✅ Correctly detected unterminated grammar: %v", err) -} - -func TestValidSimpleGrammar(t *testing.T) { - // Create a simple grammar that should work - grammarContent := ` -grammar TestGrammar; - -// This has terminal alternatives -expr: expr '+' expr | NUMBER ; - -// Lexer rules -PLUS: '+' ; -NUMBER: [0-9]+ ; -` - - // Write the grammar to a temporary file - tmpFile := "/tmp/test_valid.g4" - err := os.WriteFile(tmpFile, []byte(grammarContent), 0644) - if err != nil { - t.Fatalf("Failed to write test grammar: %v", err) - } - defer os.Remove(tmpFile) - - // Try to parse the grammar - this should succeed - parsedGrammar, err := grammar.ParseAndMergeGrammarFiles([]string{tmpFile}) - if err != nil { - t.Fatalf("Expected valid grammar to parse successfully, got error: %v", err) - } - - // Verify the grammar was parsed correctly - if parsedGrammar == nil { - t.Fatal("Expected parsed grammar, got nil") - } - - // Check that expr rule exists and has terminal alternatives - depGraph := parsedGrammar.GetDependencyGraph() - if !depGraph.HasTerminalAlternatives("expr") { - t.Error("Expected expr rule to have terminal alternatives") - } - - t.Log("✅ Valid grammar parsed successfully with terminal alternatives") -} \ No newline at end of file From 9c7ea401f461c064d5a02f80f2e08d792d240444 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Mon, 8 Sep 2025 11:23:20 +0800 Subject: [PATCH 06/15] chore: update --- tools/fuzzing/internal/generator/generator.go | 6 +- tools/fuzzing/internal/grammar/dependency.go | 69 +++++++++---------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index bcde7f3..ac6cb82 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -109,7 +109,8 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ // Select a random alternative if len(rule.Alternatives) == 0 { - return fmt.Sprintf("<%s>", ruleName) + // rule:; is valid but has no alternatives, return empty directly. + return "" } altIndex := g.random.Intn(len(rule.Alternatives)) @@ -148,7 +149,7 @@ func (g *Generator) forceTerminalGeneration(ruleName string) string { fmt.Printf("Warning: Rule %s has no immediately terminal alternatives, generating simple fallback\n", ruleName) return generateSimpleFallback(ruleName) } - + randomIndex := g.random.Intn(len(immediateTerminalAlts)) selectedAltIndex := immediateTerminalAlts[randomIndex] @@ -167,7 +168,6 @@ func (g *Generator) forceTerminalGeneration(ruleName string) string { } } - // ForceTerminalGenerationPublic exposes forceTerminalGeneration for testing func (g *Generator) ForceTerminalGenerationPublic(ruleName string) string { return g.forceTerminalGeneration(ruleName) diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 4ba4c5c..ab0c40f 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -11,11 +11,11 @@ type DependencyGraph struct { // GraphNode represents a single rule in the dependency graph type GraphNode struct { - RuleName string // Rule name (e.g., "selectStmt", "expr") - Alternatives []Alternative // All alternatives for this rule - HasImmediatelyTerminalAlternatives bool // Has at least one immediately terminal alternative - ImmediatelyTerminalAlternativeIndex []int // Indices of alternatives that are immediately terminal - IsLexer bool // Whether this is a lexer rule + RuleName string // Rule name (e.g., "selectStmt", "expr") + Alternatives []Alternative // All alternatives for this rule + HasImmediatelyTerminalAlternatives bool // Has at least one immediately terminal alternative + ImmediatelyTerminalAlternativeIndex []int // Indices of alternatives that are immediately terminal + IsLexer bool // Whether this is a lexer rule } // NewDependencyGraph creates a new dependency graph @@ -28,11 +28,11 @@ func NewDependencyGraph() *DependencyGraph { // AddNode adds a rule node to the dependency graph func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { node := &GraphNode{ - RuleName: ruleName, - Alternatives: rule.Alternatives, - HasImmediatelyTerminalAlternatives: false, + RuleName: ruleName, + Alternatives: rule.Alternatives, + HasImmediatelyTerminalAlternatives: false, ImmediatelyTerminalAlternativeIndex: []int{}, - IsLexer: rule.IsLexer, + IsLexer: rule.IsLexer, } g.Nodes[ruleName] = node } @@ -51,15 +51,15 @@ func (g *DependencyGraph) AnalyzeTerminalReachability() error { func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { // Phase 1: Mark lexer rules as immediately terminal g.markLexerRulesAsImmediatelyTerminal() - + // Phase 2: Analyze immediately terminal alternatives g.analyzeImmediatelyTerminalAlternatives() - + // Phase 3: Check for nodes without immediately terminal alternatives and report error (only if requested) if validateUnterminated { return g.validateImmediatelyTerminalReachability() } - + return nil } @@ -82,23 +82,23 @@ func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { changed := true iterations := 0 maxIterations := len(g.Nodes) * 2 // Prevent infinite loops - + for changed && iterations < maxIterations { changed = false iterations++ - + for _, node := range g.Nodes { if node.IsLexer { continue // Already processed } - + // Check each alternative to see if it's immediately terminal for altIndex, alt := range node.Alternatives { // Skip if this alternative is already marked as immediately terminal if g.isAlternativeAlreadyMarkedImmediate(node, altIndex) { continue } - + if g.canAlternativeTerminateImmediately(alt) { if !node.HasImmediatelyTerminalAlternatives { node.HasImmediatelyTerminalAlternatives = true @@ -110,7 +110,7 @@ func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { } } } - + if iterations >= maxIterations { fmt.Printf("Warning: Immediately terminal analysis reached max iterations (%d)\\n", maxIterations) } @@ -119,18 +119,18 @@ func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { // validateImmediatelyTerminalReachability checks for rules without immediately terminal alternatives and reports errors func (g *DependencyGraph) validateImmediatelyTerminalReachability() error { var unterminatedRules []string - + for ruleName, node := range g.Nodes { if !node.HasImmediatelyTerminalAlternatives { unterminatedRules = append(unterminatedRules, ruleName) } } - + if len(unterminatedRules) > 0 { - return fmt.Errorf("grammar contains %d rules without immediately terminal alternatives: %v", + return fmt.Errorf("grammar contains %d rules without immediately terminal alternatives: %v", len(unterminatedRules), unterminatedRules) } - + return nil } @@ -150,14 +150,14 @@ func (g *DependencyGraph) canAlternativeTerminateImmediately(alt Alternative) bo if len(alt.Elements) == 0 { return true } - + // Check each element in the alternative for _, element := range alt.Elements { if !g.canElementTerminateImmediately(element) { return false } } - + return true } @@ -167,7 +167,7 @@ func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { if element.IsTerminal() { return true } - + // Handle quantified elements if element.IsQuantified() { // * and ? quantifiers can generate 0 occurrences, so they can terminate immediately @@ -179,19 +179,19 @@ func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { return g.canRuleReferenceTerminateImmediately(element) } } - + // For rule references, check if the referenced rule can terminate immediately if element.IsRule() { return g.canRuleReferenceTerminateImmediately(element) } - + return false } // canRuleReferenceTerminateImmediately checks if a rule reference can terminate immediately func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) bool { var referencedRuleName string - + // Extract rule name based on element value type switch value := element.Value.(type) { case ReferenceValue: @@ -202,7 +202,7 @@ func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) default: return false } - + // Check if the referenced rule exists and can terminate immediately referencedNode := g.GetNode(referencedRuleName) if referencedNode == nil { @@ -214,7 +214,7 @@ func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) // For now, we'll be conservative and assume it cannot terminate immediately return false } - + return referencedNode.HasImmediatelyTerminalAlternatives } @@ -239,21 +239,20 @@ func (g *DependencyGraph) CanElementTerminateImmediately(element Element) bool { return g.canElementTerminateImmediately(element) } - // ValidateGrammar checks if all rules have at least one immediately terminal alternative func (g *DependencyGraph) ValidateGrammar() error { var invalidRules []string - + for ruleName, node := range g.Nodes { if !node.HasImmediatelyTerminalAlternatives { invalidRules = append(invalidRules, ruleName) } } - + if len(invalidRules) > 0 { return fmt.Errorf("grammar validation failed: the following rules have no immediately terminal alternatives: %v", invalidRules) } - + return nil } @@ -295,6 +294,6 @@ func isAntlrBuiltinToken(tokenName string) bool { "EOF": true, // End-of-file token "": true, // Alternative EOF notation } - + return builtinTokens[tokenName] -} \ No newline at end of file +} From 2b61ae0bbdfb1f70261ea03b0827a657951eb8e5 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 10:30:34 +0800 Subject: [PATCH 07/15] feat: introduce SCC --- tools/fuzzing/internal/generator/generator.go | 1 - tools/fuzzing/internal/grammar/dependency.go | 246 ++++++++++++- tools/fuzzing/internal/grammar/scc_test.go | 332 ++++++++++++++++++ 3 files changed, 575 insertions(+), 4 deletions(-) create mode 100644 tools/fuzzing/internal/grammar/scc_test.go diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index ac6cb82..e157f16 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -622,4 +622,3 @@ func generateSimpleFallback(ruleName string) string { return "1" } } - diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index ab0c40f..c0cce63 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -7,6 +7,8 @@ import ( // DependencyGraph represents the dependency relationships between grammar rules type DependencyGraph struct { Nodes map[string]*GraphNode + Edges map[string][]string // Adjacency list: rule -> referenced rules + SCCs [][]string // List of SCCs (each SCC is a list of rule names) } // GraphNode represents a single rule in the dependency graph @@ -16,12 +18,17 @@ type GraphNode struct { HasImmediatelyTerminalAlternatives bool // Has at least one immediately terminal alternative ImmediatelyTerminalAlternativeIndex []int // Indices of alternatives that are immediately terminal IsLexer bool // Whether this is a lexer rule + SCCID int // Which SCC this node belongs to (-1 if not computed) + SCCSize int // Size of the SCC this node belongs to + IsRecursive bool // True if part of a recursive SCC (size > 1 or self-loop) } // NewDependencyGraph creates a new dependency graph func NewDependencyGraph() *DependencyGraph { return &DependencyGraph{ Nodes: make(map[string]*GraphNode), + Edges: make(map[string][]string), + SCCs: [][]string{}, } } @@ -33,8 +40,14 @@ func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { HasImmediatelyTerminalAlternatives: false, ImmediatelyTerminalAlternativeIndex: []int{}, IsLexer: rule.IsLexer, + SCCID: -1, + SCCSize: 0, + IsRecursive: false, } g.Nodes[ruleName] = node + + // Build edges for this node + g.buildEdgesForNode(ruleName, rule) } // GetNode retrieves a node by rule name @@ -49,13 +62,17 @@ func (g *DependencyGraph) AnalyzeTerminalReachability() error { // AnalyzeTerminalReachabilityWithValidation performs immediately terminal analysis with optional validation func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { - // Phase 1: Mark lexer rules as immediately terminal + // Phase 1: Compute SCCs to identify recursive rule groups + g.ComputeSCCs() + g.PrintSCCAnalysis() // Debug output + + // Phase 2: Mark lexer rules as immediately terminal g.markLexerRulesAsImmediatelyTerminal() - // Phase 2: Analyze immediately terminal alternatives + // Phase 3: Analyze immediately terminal alternatives g.analyzeImmediatelyTerminalAlternatives() - // Phase 3: Check for nodes without immediately terminal alternatives and report error (only if requested) + // Phase 4: Check for nodes without immediately terminal alternatives and report error (only if requested) if validateUnterminated { return g.validateImmediatelyTerminalReachability() } @@ -297,3 +314,226 @@ func isAntlrBuiltinToken(tokenName string) bool { return builtinTokens[tokenName] } + +// buildEdgesForNode builds the edge list for a given rule node +func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { + referencedRules := make(map[string]bool) + + // Scan all alternatives for rule references + for _, alt := range rule.Alternatives { + g.collectRuleReferences(alt, referencedRules) + } + + // Convert map to slice and store as edges + edges := []string{} + for ref := range referencedRules { + edges = append(edges, ref) + } + g.Edges[ruleName] = edges +} + +// collectRuleReferences collects all rule references in an alternative +func (g *DependencyGraph) collectRuleReferences(alt Alternative, refs map[string]bool) { + for _, element := range alt.Elements { + g.collectElementReferences(element, refs) + } +} + +// collectElementReferences collects rule references from a single element +func (g *DependencyGraph) collectElementReferences(element Element, refs map[string]bool) { + if element.IsRule() { + switch value := element.Value.(type) { + case ReferenceValue: + // Add all rule references (we'll filter lexer rules later if needed) + // Don't check if node exists yet - it might not be added yet + refs[value.Name] = true + case BlockValue: + // Collect references from block alternatives + for _, alt := range value.Alternatives { + g.collectRuleReferences(alt, refs) + } + } + } +} + +// RebuildEdges rebuilds all edges after all nodes have been added +func (g *DependencyGraph) RebuildEdges() { + g.Edges = make(map[string][]string) + + for ruleName, node := range g.Nodes { + referencedRules := make(map[string]bool) + + // Scan all alternatives for rule references + for _, alt := range node.Alternatives { + g.collectRuleReferences(alt, referencedRules) + } + + // Filter out lexer rules and non-existent rules + edges := []string{} + for ref := range referencedRules { + if refNode := g.GetNode(ref); refNode != nil && !refNode.IsLexer { + edges = append(edges, ref) + } + } + g.Edges[ruleName] = edges + } +} + +// ComputeSCCs computes strongly connected components using Tarjan's algorithm +func (g *DependencyGraph) ComputeSCCs() { + // Only rebuild edges if they're empty (allows manual edge setup for testing) + if len(g.Edges) == 0 { + g.RebuildEdges() + } + // Initialize for Tarjan's algorithm + index := 0 + stack := []string{} + indices := make(map[string]int) + lowlinks := make(map[string]int) + onStack := make(map[string]bool) + + // Helper function for Tarjan's strongconnect + var strongconnect func(v string) + strongconnect = func(v string) { + // Set the depth index for v to the smallest unused index + indices[v] = index + lowlinks[v] = index + index++ + stack = append(stack, v) + onStack[v] = true + + // Consider successors of v + for _, w := range g.Edges[v] { + if _, ok := indices[w]; !ok { + // Successor w has not yet been visited; recurse on it + strongconnect(w) + if lowlinks[w] < lowlinks[v] { + lowlinks[v] = lowlinks[w] + } + } else if onStack[w] { + // Successor w is in stack S and hence in the current SCC + if indices[w] < lowlinks[v] { + lowlinks[v] = indices[w] + } + } + } + + // If v is a root node, pop the stack and print an SCC + if lowlinks[v] == indices[v] { + scc := []string{} + for { + w := stack[len(stack)-1] + stack = stack[:len(stack)-1] + onStack[w] = false + scc = append(scc, w) + if w == v { + break + } + } + g.SCCs = append(g.SCCs, scc) + } + } + + // Clear existing SCCs + g.SCCs = [][]string{} + + // Run algorithm for all unvisited nodes + for ruleName := range g.Nodes { + if _, ok := indices[ruleName]; !ok { + strongconnect(ruleName) + } + } + + // Update nodes with their SCC information + for sccID, scc := range g.SCCs { + sccSize := len(scc) + isRecursive := sccSize > 1 + + // Check for self-loops if single node SCC + if sccSize == 1 { + ruleName := scc[0] + for _, ref := range g.Edges[ruleName] { + if ref == ruleName { + isRecursive = true + break + } + } + } + + // Update all nodes in this SCC + for _, ruleName := range scc { + if node := g.GetNode(ruleName); node != nil { + node.SCCID = sccID + node.SCCSize = sccSize + node.IsRecursive = isRecursive + } + } + } +} + +// PrintSCCAnalysis prints the SCC analysis results for debugging +func (g *DependencyGraph) PrintSCCAnalysis() { + fmt.Println("\n=== SCC Analysis Results ===") + fmt.Printf("Total SCCs: %d\n", len(g.SCCs)) + + recursiveSCCs := 0 + selfLoopSCCs := 0 + largestSCC := 0 + for i, scc := range g.SCCs { + if len(scc) > 1 { + recursiveSCCs++ + if len(scc) > largestSCC { + largestSCC = len(scc) + } + // Print first 5 multi-node SCCs with more detail + if recursiveSCCs <= 5 { + fmt.Printf("\nSCC %d (RECURSIVE - mutual, size=%d):\n", i, len(scc)) + // Print first 20 nodes of the SCC for better visibility + fmt.Printf(" Members: ") + for j, node := range scc { + if j < 20 { + fmt.Printf("%s ", node) + if j == 19 && len(scc) > 20 { + fmt.Printf("\n ... and %d more", len(scc)-20) + } + } + } + fmt.Println() + } + } else if len(scc) == 1 { + // Check for self-loop + ruleName := scc[0] + hasSelfLoop := false + for _, ref := range g.Edges[ruleName] { + if ref == ruleName { + hasSelfLoop = true + break + } + } + if hasSelfLoop { + selfLoopSCCs++ + if selfLoopSCCs <= 10 { // Print first 10 + fmt.Printf("SCC %d (RECURSIVE - self-loop): %s\n", i, ruleName) + } + } + } + } + + fmt.Printf("\nMutually recursive SCCs (size > 1): %d\n", recursiveSCCs) + if recursiveSCCs > 0 { + fmt.Printf("Largest SCC size: %d\n", largestSCC) + } + fmt.Printf("Self-loop SCCs (size = 1 with self-ref): %d\n", selfLoopSCCs) + fmt.Printf("Non-recursive SCCs: %d\n", len(g.SCCs)-recursiveSCCs-selfLoopSCCs) + + // Print sample of recursive rules + fmt.Println("\nSample recursive rules:") + count := 0 + for ruleName, node := range g.Nodes { + if node.IsRecursive && count < 10 { + fmt.Printf(" %s (SCC %d, size %d)\n", ruleName, node.SCCID, node.SCCSize) + count++ + } + } + fmt.Println("=============================") +} diff --git a/tools/fuzzing/internal/grammar/scc_test.go b/tools/fuzzing/internal/grammar/scc_test.go new file mode 100644 index 0000000..5078e3b --- /dev/null +++ b/tools/fuzzing/internal/grammar/scc_test.go @@ -0,0 +1,332 @@ +package grammar + +import ( + "testing" +) + +// TestSCCDetection tests the SCC detection algorithm with various graph patterns +func TestSCCDetection(t *testing.T) { + tests := []struct { + name string + rules map[string][]string // rule -> references + expectedSCCs [][]string // expected SCCs + recursiveRules map[string]bool // which rules should be marked recursive + }{ + { + name: "Simple self-loop", + rules: map[string][]string{ + "a": {"a"}, + }, + expectedSCCs: [][]string{ + {"a"}, + }, + recursiveRules: map[string]bool{ + "a": true, + }, + }, + { + name: "Mutual recursion (2 nodes)", + rules: map[string][]string{ + "a": {"b"}, + "b": {"a"}, + }, + expectedSCCs: [][]string{ + {"b", "a"}, // Order might vary due to algorithm + }, + recursiveRules: map[string]bool{ + "a": true, + "b": true, + }, + }, + { + name: "Cycle of 3 nodes", + rules: map[string][]string{ + "a": {"b"}, + "b": {"c"}, + "c": {"a"}, + }, + expectedSCCs: [][]string{ + {"c", "b", "a"}, + }, + recursiveRules: map[string]bool{ + "a": true, + "b": true, + "c": true, + }, + }, + { + name: "Non-recursive with reference", + rules: map[string][]string{ + "a": {"b"}, + "b": {"c"}, + "c": {}, + }, + expectedSCCs: [][]string{ + {"c"}, + {"b"}, + {"a"}, + }, + recursiveRules: map[string]bool{ + "a": false, + "b": false, + "c": false, + }, + }, + { + name: "Multiple SCCs", + rules: map[string][]string{ + "a": {"b"}, + "b": {"a"}, + "c": {"d"}, + "d": {"c"}, + "e": {}, + }, + expectedSCCs: [][]string{ + {"b", "a"}, + {"d", "c"}, + {"e"}, + }, + recursiveRules: map[string]bool{ + "a": true, + "b": true, + "c": true, + "d": true, + "e": false, + }, + }, + { + name: "Complex with bridge", + rules: map[string][]string{ + "a": {"b", "c"}, + "b": {"a"}, + "c": {"d"}, + "d": {"e"}, + "e": {"c"}, + }, + expectedSCCs: [][]string{ + {"b", "a"}, + {"e", "d", "c"}, + }, + recursiveRules: map[string]bool{ + "a": true, + "b": true, + "c": true, + "d": true, + "e": true, + }, + }, + { + name: "Self-loop with external reference", + rules: map[string][]string{ + "expr": {"expr", "literal"}, + "literal": {}, + }, + expectedSCCs: [][]string{ + {"expr"}, + {"literal"}, + }, + recursiveRules: map[string]bool{ + "expr": true, + "literal": false, + }, + }, + { + name: "PostgreSQL-like pattern", + rules: map[string][]string{ + "select_with_parens": {"select_no_parens", "select_with_parens"}, + "select_no_parens": {"table_ref"}, + "table_ref": {"joined_table", "table_ref"}, + "joined_table": {"table_ref"}, + }, + expectedSCCs: [][]string{ + {"select_with_parens"}, + {"joined_table", "table_ref"}, + {"select_no_parens"}, + }, + recursiveRules: map[string]bool{ + "select_with_parens": true, + "select_no_parens": false, + "table_ref": true, + "joined_table": true, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create dependency graph + g := NewDependencyGraph() + + // Add nodes + for ruleName := range tt.rules { + rule := &Rule{ + Name: ruleName, + Alternatives: []Alternative{}, + IsLexer: false, + } + // We need to add the node without building edges automatically + node := &GraphNode{ + RuleName: ruleName, + Alternatives: rule.Alternatives, + HasImmediatelyTerminalAlternatives: false, + ImmediatelyTerminalAlternativeIndex: []int{}, + IsLexer: false, + SCCID: -1, + SCCSize: 0, + IsRecursive: false, + } + g.Nodes[ruleName] = node + } + + // Set up edges manually + g.Edges = tt.rules + + // Compute SCCs + g.ComputeSCCs() + + // Verify number of SCCs + if len(g.SCCs) != len(tt.expectedSCCs) { + t.Errorf("Expected %d SCCs, got %d", len(tt.expectedSCCs), len(g.SCCs)) + t.Logf("SCCs found: %v", g.SCCs) + } + + // Verify each node's recursive status + for ruleName, expectedRecursive := range tt.recursiveRules { + node := g.GetNode(ruleName) + if node == nil { + t.Errorf("Node %s not found", ruleName) + continue + } + + if node.IsRecursive != expectedRecursive { + t.Errorf("Node %s: expected IsRecursive=%v, got %v (SCCID=%d, SCCSize=%d)", + ruleName, expectedRecursive, node.IsRecursive, node.SCCID, node.SCCSize) + } + } + + // Verify all nodes in same SCC have same SCCID + sccNodeMap := make(map[int][]string) + for ruleName, node := range g.Nodes { + if node.SCCID >= 0 { + sccNodeMap[node.SCCID] = append(sccNodeMap[node.SCCID], ruleName) + } + } + + // Log SCC information for debugging + t.Logf("SCCs detected:") + for sccID, nodes := range sccNodeMap { + t.Logf(" SCC %d: %v", sccID, nodes) + } + }) + } +} + +// TestSCCEdgeBuilding tests that edges are correctly built from grammar rules +func TestSCCEdgeBuilding(t *testing.T) { + // Create a simple grammar with references + g := NewDependencyGraph() + + // Add lexer rule (should not create edges) + lexerRule := &Rule{ + Name: "ID", + IsLexer: true, + Alternatives: []Alternative{ + { + Elements: []Element{ + {Value: LiteralValue{Text: "[a-zA-Z]+"}}, + }, + }, + }, + } + g.AddNode("ID", lexerRule) + + // Add parser rule with references + parserRule := &Rule{ + Name: "expr", + IsLexer: false, + Alternatives: []Alternative{ + { + Elements: []Element{ + {Value: ReferenceValue{Name: "expr"}}, + {Value: LiteralValue{Text: "+"}}, + {Value: ReferenceValue{Name: "term"}}, + }, + }, + { + Elements: []Element{ + {Value: ReferenceValue{Name: "term"}}, + }, + }, + }, + } + + // Add term rule + termRule := &Rule{ + Name: "term", + IsLexer: false, + Alternatives: []Alternative{ + { + Elements: []Element{ + {Value: ReferenceValue{Name: "ID"}}, // Reference to lexer + }, + }, + { + Elements: []Element{ + {Value: LiteralValue{Text: "123"}}, + }, + }, + }, + } + + // Need to add term first so it exists when expr references it + g.AddNode("term", termRule) + g.AddNode("expr", parserRule) + + // Verify edges + // expr should have edges to: expr (self), term + exprEdges := g.Edges["expr"] + if len(exprEdges) == 0 { + t.Error("expr should have edges") + } + + hasExprEdge := false + hasTermEdge := false + for _, edge := range exprEdges { + if edge == "expr" { + hasExprEdge = true + } + if edge == "term" { + hasTermEdge = true + } + } + + if !hasExprEdge { + t.Error("expr should have self-edge") + } + if !hasTermEdge { + t.Error("expr should have edge to term") + } + + // term should NOT have edge to ID (lexer rule) + termEdges := g.Edges["term"] + for _, edge := range termEdges { + if edge == "ID" { + t.Error("term should not have edge to lexer rule ID") + } + } + + // Compute SCCs and verify + g.ComputeSCCs() + + // expr should be recursive (self-loop) + exprNode := g.GetNode("expr") + if !exprNode.IsRecursive { + t.Error("expr should be marked as recursive due to self-loop") + } + + // term should not be recursive + termNode := g.GetNode("term") + if termNode.IsRecursive { + t.Error("term should not be marked as recursive") + } +} \ No newline at end of file From 11aee4db4681935bb6fe1f16f6befcd80a8b53c2 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 10:48:12 +0800 Subject: [PATCH 08/15] perf: parse all and then merge --- tools/fuzzing/internal/grammar/dependency.go | 36 +--------- tools/fuzzing/internal/grammar/parser.go | 75 ++++++++++---------- 2 files changed, 42 insertions(+), 69 deletions(-) diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index c0cce63..4aa9bc2 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -62,17 +62,12 @@ func (g *DependencyGraph) AnalyzeTerminalReachability() error { // AnalyzeTerminalReachabilityWithValidation performs immediately terminal analysis with optional validation func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { - // Phase 1: Compute SCCs to identify recursive rule groups g.ComputeSCCs() - g.PrintSCCAnalysis() // Debug output + g.PrintSCCAnalysis() - // Phase 2: Mark lexer rules as immediately terminal g.markLexerRulesAsImmediatelyTerminal() - - // Phase 3: Analyze immediately terminal alternatives g.analyzeImmediatelyTerminalAlternatives() - // Phase 4: Check for nodes without immediately terminal alternatives and report error (only if requested) if validateUnterminated { return g.validateImmediatelyTerminalReachability() } @@ -85,7 +80,6 @@ func (g *DependencyGraph) markLexerRulesAsImmediatelyTerminal() { for _, node := range g.Nodes { if node.IsLexer { node.HasImmediatelyTerminalAlternatives = true - // All lexer alternatives are considered immediately terminal for i := range node.Alternatives { node.ImmediatelyTerminalAlternativeIndex = append(node.ImmediatelyTerminalAlternativeIndex, i) } @@ -106,12 +100,10 @@ func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { for _, node := range g.Nodes { if node.IsLexer { - continue // Already processed + continue } - // Check each alternative to see if it's immediately terminal for altIndex, alt := range node.Alternatives { - // Skip if this alternative is already marked as immediately terminal if g.isAlternativeAlreadyMarkedImmediate(node, altIndex) { continue } @@ -163,12 +155,10 @@ func (g *DependencyGraph) isAlternativeAlreadyMarkedImmediate(node *GraphNode, a // canAlternativeTerminateImmediately checks if an alternative can terminate immediately (no rule references required) func (g *DependencyGraph) canAlternativeTerminateImmediately(alt Alternative) bool { - // Empty alternative (ε-transition) can always terminate immediately if len(alt.Elements) == 0 { return true } - // Check each element in the alternative for _, element := range alt.Elements { if !g.canElementTerminateImmediately(element) { return false @@ -180,24 +170,19 @@ func (g *DependencyGraph) canAlternativeTerminateImmediately(alt Alternative) bo // canElementTerminateImmediately checks if a single element can terminate immediately func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { - // Terminal elements (literals) can always terminate immediately if element.IsTerminal() { return true } - // Handle quantified elements if element.IsQuantified() { - // * and ? quantifiers can generate 0 occurrences, so they can terminate immediately if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { return true } - // + quantifier requires at least one occurrence, so check the referenced rule if element.Quantifier == ONE_MORE { return g.canRuleReferenceTerminateImmediately(element) } } - // For rule references, check if the referenced rule can terminate immediately if element.IsRule() { return g.canRuleReferenceTerminateImmediately(element) } @@ -209,26 +194,20 @@ func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) bool { var referencedRuleName string - // Extract rule name based on element value type switch value := element.Value.(type) { case ReferenceValue: referencedRuleName = value.Name case BlockValue: - // For block values, we need to check if any alternative in the block can terminate immediately return g.canBlockValueTerminateImmediately(value) default: return false } - // Check if the referenced rule exists and can terminate immediately referencedNode := g.GetNode(referencedRuleName) if referencedNode == nil { - // Handle ANTLR built-in tokens that are always immediately terminal if isAntlrBuiltinToken(referencedRuleName) { return true } - // Rule not found - could be a forward reference or external rule - // For now, we'll be conservative and assume it cannot terminate immediately return false } @@ -237,7 +216,6 @@ func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) // canBlockValueTerminateImmediately checks if a block value can terminate immediately func (g *DependencyGraph) canBlockValueTerminateImmediately(block BlockValue) bool { - // A block can terminate immediately if any of its alternatives can terminate immediately for _, alt := range block.Alternatives { if g.canAlternativeTerminateImmediately(alt) { return true @@ -319,12 +297,10 @@ func isAntlrBuiltinToken(tokenName string) bool { func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { referencedRules := make(map[string]bool) - // Scan all alternatives for rule references for _, alt := range rule.Alternatives { g.collectRuleReferences(alt, referencedRules) } - // Convert map to slice and store as edges edges := []string{} for ref := range referencedRules { edges = append(edges, ref) @@ -344,11 +320,8 @@ func (g *DependencyGraph) collectElementReferences(element Element, refs map[str if element.IsRule() { switch value := element.Value.(type) { case ReferenceValue: - // Add all rule references (we'll filter lexer rules later if needed) - // Don't check if node exists yet - it might not be added yet refs[value.Name] = true case BlockValue: - // Collect references from block alternatives for _, alt := range value.Alternatives { g.collectRuleReferences(alt, refs) } @@ -363,12 +336,10 @@ func (g *DependencyGraph) RebuildEdges() { for ruleName, node := range g.Nodes { referencedRules := make(map[string]bool) - // Scan all alternatives for rule references for _, alt := range node.Alternatives { g.collectRuleReferences(alt, referencedRules) } - // Filter out lexer rules and non-existent rules edges := []string{} for ref := range referencedRules { if refNode := g.GetNode(ref); refNode != nil && !refNode.IsLexer { @@ -381,11 +352,10 @@ func (g *DependencyGraph) RebuildEdges() { // ComputeSCCs computes strongly connected components using Tarjan's algorithm func (g *DependencyGraph) ComputeSCCs() { - // Only rebuild edges if they're empty (allows manual edge setup for testing) if len(g.Edges) == 0 { g.RebuildEdges() } - // Initialize for Tarjan's algorithm + index := 0 stack := []string{} indices := make(map[string]int) diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index 86dda2a..962147f 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -98,9 +98,8 @@ const ( ONE_MORE // + ) -// ParseGrammarFile parses a .g4 file and extracts rules for fuzzing -func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { - // Read file content +// parseGrammarFileWithoutDependencyGraph parses a .g4 file without building dependency graph +func parseGrammarFileWithoutDependencyGraph(filePath string) (*ParsedGrammar, error) { content, err := os.ReadFile(filePath) if err != nil { return nil, errors.Wrap(err, "failed to read grammar file") @@ -110,31 +109,20 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { return nil, errors.New("grammar file is empty") } - // Create input stream input := antlr.NewInputStream(string(content)) - - // Create lexer lexer := grammar.NewANTLRv4Lexer(input) - // Add error listener errorListener := &GrammarErrorListener{} lexer.RemoveErrorListeners() lexer.AddErrorListener(errorListener) - // Create token stream stream := antlr.NewCommonTokenStream(lexer, 0) - - // Create parser parser := grammar.NewANTLRv4Parser(stream) - - // Add error listener to parser parser.RemoveErrorListeners() parser.AddErrorListener(errorListener) - // Parse the grammar tree := parser.GrammarSpec() - // Check for parsing errors if errorListener.HasErrors() { return nil, errors.Errorf("failed to parse grammar: %v", errorListener.GetErrors()) } @@ -143,7 +131,6 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { return nil, errors.New("parser returned nil tree") } - // Extract rules from parse tree visitor := NewGrammarExtractorVisitor() visitor.VisitGrammarSpec(tree) @@ -152,10 +139,20 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { ParserRules: visitor.parserRules, FilePath: filePath, BlockAltMap: visitor.blockAltMap, - DependencyGraph: NewDependencyGraph(), + DependencyGraph: nil, + } + + return parsedGrammar, nil +} + +// ParseGrammarFile parses a .g4 file and extracts rules for fuzzing (legacy method) +func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { + parsedGrammar, err := parseGrammarFileWithoutDependencyGraph(filePath) + if err != nil { + return nil, err } - // Build dependency graph + parsedGrammar.DependencyGraph = NewDependencyGraph() if err := buildDependencyGraph(parsedGrammar); err != nil { return nil, fmt.Errorf("failed to build dependency graph: %w", err) } @@ -223,9 +220,8 @@ func (g *ParsedGrammar) IsGeneratedBlock(name string) bool { return exists } -// MergeGrammar merges another grammar into this one -func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { - // Merge lexer rules +// MergeGrammarWithoutRebuild merges another grammar into this one without rebuilding the dependency graph +func (g *ParsedGrammar) MergeGrammarWithoutRebuild(other *ParsedGrammar) error { for name, rule := range other.LexerRules { if _, exists := g.LexerRules[name]; exists { return fmt.Errorf("duplicate lexer rule '%s' found in grammars '%s' and '%s'", name, g.FilePath, other.FilePath) @@ -233,7 +229,6 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { g.LexerRules[name] = rule } - // Merge parser rules for name, rule := range other.ParserRules { if _, exists := g.ParserRules[name]; exists { return fmt.Errorf("duplicate parser rule '%s' found in grammars '%s' and '%s'", name, g.FilePath, other.FilePath) @@ -241,7 +236,6 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { g.ParserRules[name] = rule } - // Merge block alternatives map for blockID, alternatives := range other.BlockAltMap { if _, exists := g.BlockAltMap[blockID]; exists { return fmt.Errorf("duplicate block ID '%s' found in grammars '%s' and '%s'", blockID, g.FilePath, other.FilePath) @@ -249,12 +243,19 @@ func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { g.BlockAltMap[blockID] = alternatives } - // Update file path to indicate it's a merged grammar if g.FilePath != other.FilePath { g.FilePath = fmt.Sprintf("%s + %s", g.FilePath, other.FilePath) } - // Rebuild dependency graph with merged rules and validate + return nil +} + +// MergeGrammar merges another grammar into this one (legacy method, kept for compatibility) +func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { + if err := g.MergeGrammarWithoutRebuild(other); err != nil { + return err + } + g.DependencyGraph = NewDependencyGraph() if err := buildDependencyGraphWithValidation(g, true); err != nil { return fmt.Errorf("failed to rebuild dependency graph after merge: %w", err) @@ -269,25 +270,27 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { return nil, errors.New("no grammar files provided") } - // Parse the first grammar file - mergedGrammar, err := ParseGrammarFile(filePaths[0]) - if err != nil { - return nil, errors.Wrapf(err, "failed to parse first grammar file %s", filePaths[0]) - } - - // Merge additional grammar files - for i := 1; i < len(filePaths); i++ { - filePath := filePaths[i] - grammar, err := ParseGrammarFile(filePath) + grammars := make([]*ParsedGrammar, 0, len(filePaths)) + for _, filePath := range filePaths { + grammar, err := parseGrammarFileWithoutDependencyGraph(filePath) if err != nil { return nil, errors.Wrapf(err, "failed to parse grammar file %s", filePath) } + grammars = append(grammars, grammar) + } - if err := mergedGrammar.MergeGrammar(grammar); err != nil { - return nil, errors.Wrapf(err, "failed to merge grammar file %s", filePath) + mergedGrammar := grammars[0] + for i := 1; i < len(grammars); i++ { + if err := mergedGrammar.MergeGrammarWithoutRebuild(grammars[i]); err != nil { + return nil, errors.Wrapf(err, "failed to merge grammar file %s", grammars[i].FilePath) } } + mergedGrammar.DependencyGraph = NewDependencyGraph() + if err := buildDependencyGraphWithValidation(mergedGrammar, true); err != nil { + return nil, fmt.Errorf("failed to build dependency graph after merging all files: %w", err) + } + return mergedGrammar, nil } From 11f2d3096740e3232a4e9d1bb4841801ca927938 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 11:17:21 +0800 Subject: [PATCH 09/15] refactor: remove legacy recursion control --- tools/fuzzing/internal/generator/generator.go | 198 ++++-------------- 1 file changed, 37 insertions(+), 161 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index e157f16..1c60a8a 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -84,42 +84,36 @@ func (g *Generator) getRule(ruleName string) *grammar.Rule { // generateQuery creates a single query using grammar rules func (g *Generator) generateQuery() string { - // Start generation with fresh active rules tracking - activeRules := make(map[string]bool) - result := g.generateFromRuleWithRecursionTracking(g.config.StartRule, activeRules, 0) - return result + return g.generateFromRule(g.config.StartRule, 0) } -// generateFromRuleWithRecursionTracking generates text from a grammar rule with recursion tracking -func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activeRules map[string]bool, depth int) string { - // Check if we're in recursion or hit depth limit - if activeRules[ruleName] || depth >= g.config.MaxDepth { - return g.forceTerminalGeneration(ruleName) - } - - // Mark rule as active - activeRules[ruleName] = true - defer delete(activeRules, ruleName) - - // Get the rule +// generateFromRule generates text from a grammar rule +func (g *Generator) generateFromRule(ruleName string, depth int) string { + // Get the rule and its SCC info rule := g.getRule(ruleName) if rule == nil { return fmt.Sprintf("<%s>", ruleName) } - // Select a random alternative + node := g.dependencyGraph.GetNode(ruleName) + + // Check depth limit for recursive rules + if node != nil && node.IsRecursive && depth >= g.config.MaxDepth { + return g.generateTerminalFallback(ruleName) + } + if len(rule.Alternatives) == 0 { - // rule:; is valid but has no alternatives, return empty directly. return "" } + // Select a random alternative altIndex := g.random.Intn(len(rule.Alternatives)) alternative := rule.Alternatives[altIndex] // Generate from all elements in the alternative var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromElementWithRecursionTracking(&element, activeRules, depth+1) + elementResult := g.generateFromElement(&element, depth+1) if elementResult != "" { result = append(result, elementResult) } @@ -136,41 +130,10 @@ func (g *Generator) generateFromRuleWithRecursionTracking(ruleName string, activ } } -// forceTerminalGeneration forces generation of terminal alternatives when recursion is detected -func (g *Generator) forceTerminalGeneration(ruleName string) string { - // Get immediately terminal alternatives first (preferred) - immediateTerminalAlts := g.dependencyGraph.GetImmediatelyTerminalAlternatives(ruleName) - rule := g.getRule(ruleName) - - // Check if we have immediately terminal alternatives - if len(immediateTerminalAlts) == 0 { - // Fallback for rules that don't have immediately terminal alternatives - // This can happen with forward references or missing rules - fmt.Printf("Warning: Rule %s has no immediately terminal alternatives, generating simple fallback\n", ruleName) - return generateSimpleFallback(ruleName) - } - - randomIndex := g.random.Intn(len(immediateTerminalAlts)) - selectedAltIndex := immediateTerminalAlts[randomIndex] - - alternative := rule.Alternatives[selectedAltIndex] - - // Generate using normal generation since this is an immediately terminal alternative - result := g.generateFromImmediatelyTerminalAlternative(&alternative) - - switch g.config.OutputFormat { - case config.CompactOutput: - return result - case config.VerboseOutput: - return fmt.Sprintf("/* %s[terminal] */ %s", ruleName, result) - default: - return result - } -} - -// ForceTerminalGenerationPublic exposes forceTerminalGeneration for testing -func (g *Generator) ForceTerminalGenerationPublic(ruleName string) string { - return g.forceTerminalGeneration(ruleName) +// generateTerminalFallback generates a simple fallback when recursion depth is exceeded +func (g *Generator) generateTerminalFallback(ruleName string) string { + // For recursive rules that hit depth limit, generate simple fallback + return generateSimpleFallback(ruleName) } // SetGrammarForTesting sets the grammar for testing purposes @@ -179,32 +142,26 @@ func (g *Generator) SetGrammarForTesting(grammar *grammar.ParsedGrammar) { g.dependencyGraph = grammar.GetDependencyGraph() } -// generateFromRule generates text from a grammar rule (legacy method, kept for compatibility) -func (g *Generator) generateFromRule(ruleName string, currentDepth int) string { - activeRules := make(map[string]bool) - return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, currentDepth) -} - -// generateFromElementWithRecursionTracking generates text from a single grammar element with recursion tracking -func (g *Generator) generateFromElementWithRecursionTracking(element *grammar.Element, activeRules map[string]bool, depth int) string { +// generateFromElement generates text from a single grammar element +func (g *Generator) generateFromElement(element *grammar.Element, depth int) string { // Handle optional elements if element.IsOptional() && g.random.Float64() > g.config.OptionalProb { - return "" // Skip optional element + return "" } // Handle quantified elements if element.IsQuantified() { - return g.generateQuantifiedWithRecursionTracking(element, activeRules, depth) + return g.generateQuantified(element, depth) } // Generate single element if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - return g.generateFromRuleOrTokenWithRecursionTracking(refValue.Name, activeRules, depth) + return g.generateFromRuleOrToken(refValue.Name, depth) } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - return g.generateFromBlockWithRecursionTracking(blockValue, activeRules, depth) + return g.generateFromBlock(blockValue, depth) } - return g.generateFromRuleOrTokenWithRecursionTracking(element.Value.String(), activeRules, depth) + return g.generateFromRuleOrToken(element.Value.String(), depth) } else if element.IsTerminal() { if litValue, ok := element.Value.(grammar.LiteralValue); ok { return cleanLiteral(litValue.Text) @@ -452,19 +409,18 @@ func joinStrings(strs []string, sep string) string { return result } -// generateQuantifiedWithRecursionTracking handles quantified elements with recursion tracking -func (g *Generator) generateQuantifiedWithRecursionTracking(element *grammar.Element, activeRules map[string]bool, depth int) string { +// generateQuantified handles quantified elements +func (g *Generator) generateQuantified(element *grammar.Element, depth int) string { var count int - // Use fixed count if specified, otherwise use random count if g.config.QuantifierCount > 0 { count = g.config.QuantifierCount } else { switch element.Quantifier { case grammar.ZERO_MORE: // * - count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier + count = g.random.Intn(g.config.MaxQuantifier + 1) case grammar.ONE_MORE: // + - count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier + count = 1 + g.random.Intn(g.config.MaxQuantifier) default: count = 1 } @@ -474,13 +430,13 @@ func (g *Generator) generateQuantifiedWithRecursionTracking(element *grammar.Ele for i := 0; i < count; i++ { if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - result := g.generateFromRuleOrTokenWithRecursionTracking(refValue.Name, activeRules, depth) + result := g.generateFromRuleOrToken(refValue.Name, depth) results = append(results, result) } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - result := g.generateFromBlockWithRecursionTracking(blockValue, activeRules, depth) + result := g.generateFromBlock(blockValue, depth) results = append(results, result) } else { - result := g.generateFromRuleOrTokenWithRecursionTracking(element.Value.String(), activeRules, depth) + result := g.generateFromRuleOrToken(element.Value.String(), depth) results = append(results, result) } } else if element.IsTerminal() { @@ -495,20 +451,18 @@ func (g *Generator) generateQuantifiedWithRecursionTracking(element *grammar.Ele return joinWithSpaces(results) } -// generateFromBlockWithRecursionTracking generates content from a block value with recursion tracking -func (g *Generator) generateFromBlockWithRecursionTracking(blockValue grammar.BlockValue, activeRules map[string]bool, depth int) string { +// generateFromBlock generates content from a block value +func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) string { if len(blockValue.Alternatives) == 0 { return "" } - // Select a random alternative from the block altIndex := g.random.Intn(len(blockValue.Alternatives)) alternative := blockValue.Alternatives[altIndex] - // Generate from all elements in the selected alternative var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromElementWithRecursionTracking(&element, activeRules, depth) + elementResult := g.generateFromElement(&element, depth) if elementResult != "" { result = append(result, elementResult) } @@ -517,92 +471,14 @@ func (g *Generator) generateFromBlockWithRecursionTracking(blockValue grammar.Bl return joinWithSpaces(result) } -// generateFromRuleOrTokenWithRecursionTracking generates from a rule using recursion tracking -func (g *Generator) generateFromRuleOrTokenWithRecursionTracking(ruleName string, activeRules map[string]bool, depth int) string { - // Check if this is a lexer rule and generate concrete token +// generateFromRuleOrToken generates from a rule or token +func (g *Generator) generateFromRuleOrToken(ruleName string, depth int) string { if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { return g.generateConcreteToken(ruleName) } - - // Otherwise expand as parser rule with recursion tracking - return g.generateFromRuleWithRecursionTracking(ruleName, activeRules, depth) + return g.generateFromRule(ruleName, depth) } -// generateFromImmediatelyTerminalAlternative generates from an immediately terminal alternative using normal generation -// Since the alternative is immediately terminal, we can safely generate without recursion tracking -func (g *Generator) generateFromImmediatelyTerminalAlternative(alt *grammar.Alternative) string { - var result []string - - for _, element := range alt.Elements { - elementResult := g.generateFromImmediatelyTerminalElement(&element) - if elementResult != "" { - result = append(result, elementResult) - } - } - - return joinWithSpaces(result) -} - -// generateFromImmediatelyTerminalElement generates from an element that's part of an immediately terminal alternative -func (g *Generator) generateFromImmediatelyTerminalElement(element *grammar.Element) string { - // Handle quantified elements - if element.IsQuantified() { - switch element.Quantifier { - case grammar.ZERO_MORE, grammar.OPTIONAL_Q: - // Generate 0 or 1 occurrences for optional elements - if g.random.Float32() < float32(g.config.OptionalProb) { - nonQuantifiedElement := grammar.Element{ - Value: element.Value, - Quantifier: grammar.NONE, - } - return g.generateFromImmediatelyTerminalElement(&nonQuantifiedElement) - } - return "" - case grammar.ONE_MORE: - // Generate exactly 1 occurrence for ONE_MORE to stay minimal - nonQuantifiedElement := grammar.Element{ - Value: element.Value, - Quantifier: grammar.NONE, - } - return g.generateFromImmediatelyTerminalElement(&nonQuantifiedElement) - } - } - - // Handle different element types - if element.IsTerminal() { - // Direct literal - use existing generation logic that handles character sets, literals, etc. - if literal, ok := element.Value.(grammar.LiteralValue); ok { - return g.generateFromLiteral(literal.Text) - } - return element.Value.String() - } - - if element.IsRule() { - switch value := element.Value.(type) { - case grammar.ReferenceValue: - // For rule references in immediately terminal alternatives, use normal generation with empty recursion tracking - // Since we know this is immediately terminal, we can generate safely - return g.generateFromRuleWithRecursionTracking(value.Name, make(map[string]bool), 0) - - case grammar.BlockValue: - // For blocks, randomly select an alternative that's immediately terminal - if len(value.Alternatives) > 0 { - // Find immediately terminal alternatives within the block - for _, alt := range value.Alternatives { - // Use dependency graph to check if this alternative is immediately terminal - if g.dependencyGraph.CanAlternativeTerminateImmediately(alt) { - return g.generateFromImmediatelyTerminalAlternative(&alt) - } - } - // Fallback to first alternative if none found (shouldn't happen) - return g.generateFromImmediatelyTerminalAlternative(&value.Alternatives[0]) - } - return "" - } - } - - return "" -} // generateSimpleFallback generates a simple fallback value based on rule name patterns func generateSimpleFallback(ruleName string) string { From ac6c51588be121f590bee04b188baae8d99c98d9 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 15:44:55 +0800 Subject: [PATCH 10/15] refactor: rename some function --- tools/fuzzing/internal/generator/generator.go | 20 +++++++++----- tools/fuzzing/internal/grammar/dependency.go | 27 +++++++++++++------ tools/fuzzing/internal/grammar/parser.go | 12 ++++----- tools/fuzzing/internal/grammar/scc_test.go | 3 +++ 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 1c60a8a..22b637d 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -173,7 +173,7 @@ func (g *Generator) generateFromElement(element *grammar.Element, depth int) str } // generateConcreteToken generates concrete tokens by expanding lexer rules -func (g *Generator) generateConcreteToken(ruleName string) string { +func (g *Generator) generateConcreteToken(ruleName string, depth int) string { // Get the lexer rule rule := g.grammar.GetRule(ruleName) if rule == nil || !rule.IsLexer { @@ -182,11 +182,17 @@ func (g *Generator) generateConcreteToken(ruleName string) string { // For lexer rules, we need to expand them but generate concrete characters // at the terminal level (character sets, literals, etc.) - return g.generateFromLexerRule(rule, 0) + return g.generateFromLexerRule(rule, depth) } // generateFromLexerRule generates content from a lexer rule func (g *Generator) generateFromLexerRule(rule *grammar.Rule, currentDepth int) string { + // Check recursion depth for lexer rules too + node := g.dependencyGraph.GetNode(rule.Name) + if node != nil && node.IsRecursive && currentDepth >= g.config.MaxDepth { + return generateSimpleFallback(rule.Name) + } + if len(rule.Alternatives) == 0 { return "" } @@ -198,7 +204,7 @@ func (g *Generator) generateFromLexerRule(rule *grammar.Rule, currentDepth int) // Generate from all elements in the alternative var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromLexerElement(&element, currentDepth) + elementResult := g.generateFromLexerElement(&element, currentDepth+1) if elementResult != "" { result = append(result, elementResult) } @@ -224,10 +230,10 @@ func (g *Generator) generateFromLexerElement(element *grammar.Element, currentDe if refValue, ok := element.Value.(grammar.ReferenceValue); ok { // Check if referenced rule is lexer or parser if referencedRule := g.grammar.GetRule(refValue.Name); referencedRule != nil && referencedRule.IsLexer { - return g.generateFromLexerRule(referencedRule, currentDepth+1) + return g.generateFromLexerRule(referencedRule, currentDepth) } else { // Parser rule - shouldn't happen in lexer context, but handle it - return g.generateFromRule(refValue.Name, currentDepth+1) + return g.generateFromRule(refValue.Name, currentDepth) } } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { return g.generateFromLexerBlock(blockValue, currentDepth) @@ -266,7 +272,7 @@ func (g *Generator) generateQuantifiedLexer(element *grammar.Element, currentDep result := g.generateFromLexerElement(&grammar.Element{ Value: element.Value, Quantifier: grammar.NONE, // Remove quantifier for individual generation - }, currentDepth+1) + }, currentDepth) if result != "" { results = append(results, result) } @@ -474,7 +480,7 @@ func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) // generateFromRuleOrToken generates from a rule or token func (g *Generator) generateFromRuleOrToken(ruleName string, depth int) string { if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { - return g.generateConcreteToken(ruleName) + return g.generateConcreteToken(ruleName, depth) } return g.generateFromRule(ruleName, depth) } diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 4aa9bc2..30bf5d5 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -46,8 +46,9 @@ func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { } g.Nodes[ruleName] = node - // Build edges for this node - g.buildEdgesForNode(ruleName, rule) + // Don't build edges here because this rule may reference other rules that + // haven't been added yet (forward references). Edges will be built later + // after all nodes are added via BuildEdges() } // GetNode retrieves a node by rule name @@ -293,7 +294,7 @@ func isAntlrBuiltinToken(tokenName string) bool { return builtinTokens[tokenName] } -// buildEdgesForNode builds the edge list for a given rule node +// buildEdgesForNode builds the edge list for a given rule node (deprecated - use BuildEdges instead) func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { referencedRules := make(map[string]bool) @@ -301,8 +302,13 @@ func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { g.collectRuleReferences(alt, referencedRules) } + // Only add edges to parser rules (exclude lexer rules) edges := []string{} for ref := range referencedRules { + if refNode := g.GetNode(ref); refNode != nil && refNode.IsLexer { + continue // Skip lexer rules + } + // Add all other references (including forward references) edges = append(edges, ref) } g.Edges[ruleName] = edges @@ -329,8 +335,8 @@ func (g *DependencyGraph) collectElementReferences(element Element, refs map[str } } -// RebuildEdges rebuilds all edges after all nodes have been added -func (g *DependencyGraph) RebuildEdges() { +// BuildEdges builds all edges after all nodes have been added +func (g *DependencyGraph) BuildEdges() { g.Edges = make(map[string][]string) for ruleName, node := range g.Nodes { @@ -340,11 +346,16 @@ func (g *DependencyGraph) RebuildEdges() { g.collectRuleReferences(alt, referencedRules) } + // Only add edges to parser rules (exclude lexer rules) + // But include all referenced parser rules, even if they don't exist yet edges := []string{} for ref := range referencedRules { - if refNode := g.GetNode(ref); refNode != nil && !refNode.IsLexer { - edges = append(edges, ref) + // Check if the referenced rule is a lexer rule + if refNode := g.GetNode(ref); refNode != nil && refNode.IsLexer { + continue // Skip lexer rules } + // Add all other references (including forward references) + edges = append(edges, ref) } g.Edges[ruleName] = edges } @@ -353,7 +364,7 @@ func (g *DependencyGraph) RebuildEdges() { // ComputeSCCs computes strongly connected components using Tarjan's algorithm func (g *DependencyGraph) ComputeSCCs() { if len(g.Edges) == 0 { - g.RebuildEdges() + g.BuildEdges() } index := 0 diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index 962147f..b9918f8 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -220,8 +220,8 @@ func (g *ParsedGrammar) IsGeneratedBlock(name string) bool { return exists } -// MergeGrammarWithoutRebuild merges another grammar into this one without rebuilding the dependency graph -func (g *ParsedGrammar) MergeGrammarWithoutRebuild(other *ParsedGrammar) error { +// MergeGrammar merges another grammar into this one +func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { for name, rule := range other.LexerRules { if _, exists := g.LexerRules[name]; exists { return fmt.Errorf("duplicate lexer rule '%s' found in grammars '%s' and '%s'", name, g.FilePath, other.FilePath) @@ -250,9 +250,9 @@ func (g *ParsedGrammar) MergeGrammarWithoutRebuild(other *ParsedGrammar) error { return nil } -// MergeGrammar merges another grammar into this one (legacy method, kept for compatibility) -func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error { - if err := g.MergeGrammarWithoutRebuild(other); err != nil { +// MergeGrammarAndRebuildGraph merges another grammar and rebuilds the dependency graph (for single file merging) +func (g *ParsedGrammar) MergeGrammarAndRebuildGraph(other *ParsedGrammar) error { + if err := g.MergeGrammar(other); err != nil { return err } @@ -281,7 +281,7 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { mergedGrammar := grammars[0] for i := 1; i < len(grammars); i++ { - if err := mergedGrammar.MergeGrammarWithoutRebuild(grammars[i]); err != nil { + if err := mergedGrammar.MergeGrammar(grammars[i]); err != nil { return nil, errors.Wrapf(err, "failed to merge grammar file %s", grammars[i].FilePath) } } diff --git a/tools/fuzzing/internal/grammar/scc_test.go b/tools/fuzzing/internal/grammar/scc_test.go index 5078e3b..b1ad34c 100644 --- a/tools/fuzzing/internal/grammar/scc_test.go +++ b/tools/fuzzing/internal/grammar/scc_test.go @@ -282,6 +282,9 @@ func TestSCCEdgeBuilding(t *testing.T) { g.AddNode("term", termRule) g.AddNode("expr", parserRule) + // Build edges after adding all nodes + g.BuildEdges() + // Verify edges // expr should have edges to: expr (self), term exprEdges := g.Edges["expr"] From e0f8f4731e69daa871a2a28114764b70fdd8eab7 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 16:03:01 +0800 Subject: [PATCH 11/15] refactor: cleaner code --- tools/fuzzing/internal/generator/generator.go | 10 +- tools/fuzzing/internal/grammar/dependency.go | 293 +++--------------- tools/fuzzing/internal/grammar/parser.go | 7 +- tools/fuzzing/internal/grammar/scc_test.go | 14 +- 4 files changed, 46 insertions(+), 278 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 22b637d..8f46bba 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -61,11 +61,6 @@ func (g *Generator) Generate() error { return errors.Errorf("start rule '%s' not found in merged grammar", g.config.StartRule) } - // Check if start rule has immediately terminal alternatives - if !g.dependencyGraph.HasImmediatelyTerminalAlternatives(g.config.StartRule) { - fmt.Printf("Warning: start rule '%s' has no immediately terminal alternatives\n", g.config.StartRule) - } - fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule) // Generate queries @@ -96,7 +91,7 @@ func (g *Generator) generateFromRule(ruleName string, depth int) string { } node := g.dependencyGraph.GetNode(ruleName) - + // Check depth limit for recursive rules if node != nil && node.IsRecursive && depth >= g.config.MaxDepth { return g.generateTerminalFallback(ruleName) @@ -192,7 +187,7 @@ func (g *Generator) generateFromLexerRule(rule *grammar.Rule, currentDepth int) if node != nil && node.IsRecursive && currentDepth >= g.config.MaxDepth { return generateSimpleFallback(rule.Name) } - + if len(rule.Alternatives) == 0 { return "" } @@ -485,7 +480,6 @@ func (g *Generator) generateFromRuleOrToken(ruleName string, depth int) string { return g.generateFromRule(ruleName, depth) } - // generateSimpleFallback generates a simple fallback value based on rule name patterns func generateSimpleFallback(ruleName string) string { // Generate context-appropriate fallbacks diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 30bf5d5..5e43cae 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -13,14 +13,12 @@ type DependencyGraph struct { // GraphNode represents a single rule in the dependency graph type GraphNode struct { - RuleName string // Rule name (e.g., "selectStmt", "expr") - Alternatives []Alternative // All alternatives for this rule - HasImmediatelyTerminalAlternatives bool // Has at least one immediately terminal alternative - ImmediatelyTerminalAlternativeIndex []int // Indices of alternatives that are immediately terminal - IsLexer bool // Whether this is a lexer rule - SCCID int // Which SCC this node belongs to (-1 if not computed) - SCCSize int // Size of the SCC this node belongs to - IsRecursive bool // True if part of a recursive SCC (size > 1 or self-loop) + RuleName string // Rule name (e.g., "selectStmt", "expr") + Alternatives []Alternative // All alternatives for this rule + IsLexer bool // Whether this is a lexer rule + SCCID int // Which SCC this node belongs to (-1 if not computed) + SCCSize int // Size of the SCC this node belongs to + IsRecursive bool // True if part of a recursive SCC (size > 1 or self-loop) } // NewDependencyGraph creates a new dependency graph @@ -35,17 +33,15 @@ func NewDependencyGraph() *DependencyGraph { // AddNode adds a rule node to the dependency graph func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { node := &GraphNode{ - RuleName: ruleName, - Alternatives: rule.Alternatives, - HasImmediatelyTerminalAlternatives: false, - ImmediatelyTerminalAlternativeIndex: []int{}, - IsLexer: rule.IsLexer, - SCCID: -1, - SCCSize: 0, - IsRecursive: false, + RuleName: ruleName, + Alternatives: rule.Alternatives, + IsLexer: rule.IsLexer, + SCCID: -1, + SCCSize: 0, + IsRecursive: false, } g.Nodes[ruleName] = node - + // Don't build edges here because this rule may reference other rules that // haven't been added yet (forward references). Edges will be built later // after all nodes are added via BuildEdges() @@ -56,252 +52,33 @@ func (g *DependencyGraph) GetNode(ruleName string) *GraphNode { return g.Nodes[ruleName] } -// AnalyzeTerminalReachability performs immediately terminal analysis on the graph -func (g *DependencyGraph) AnalyzeTerminalReachability() error { - return g.AnalyzeTerminalReachabilityWithValidation(false) -} - -// AnalyzeTerminalReachabilityWithValidation performs immediately terminal analysis with optional validation -func (g *DependencyGraph) AnalyzeTerminalReachabilityWithValidation(validateUnterminated bool) error { - g.ComputeSCCs() - g.PrintSCCAnalysis() - - g.markLexerRulesAsImmediatelyTerminal() - g.analyzeImmediatelyTerminalAlternatives() - - if validateUnterminated { - return g.validateImmediatelyTerminalReachability() - } - - return nil -} - -// markLexerRulesAsImmediatelyTerminal marks all lexer rules as having immediately terminal alternatives -func (g *DependencyGraph) markLexerRulesAsImmediatelyTerminal() { - for _, node := range g.Nodes { - if node.IsLexer { - node.HasImmediatelyTerminalAlternatives = true - for i := range node.Alternatives { - node.ImmediatelyTerminalAlternativeIndex = append(node.ImmediatelyTerminalAlternativeIndex, i) - } - } - } -} - -// analyzeImmediatelyTerminalAlternatives analyzes which alternatives are immediately terminal -func (g *DependencyGraph) analyzeImmediatelyTerminalAlternatives() { - // Use fixed-point iteration similar to terminal propagation - changed := true - iterations := 0 - maxIterations := len(g.Nodes) * 2 // Prevent infinite loops - - for changed && iterations < maxIterations { - changed = false - iterations++ - - for _, node := range g.Nodes { - if node.IsLexer { - continue - } - - for altIndex, alt := range node.Alternatives { - if g.isAlternativeAlreadyMarkedImmediate(node, altIndex) { - continue - } - - if g.canAlternativeTerminateImmediately(alt) { - if !node.HasImmediatelyTerminalAlternatives { - node.HasImmediatelyTerminalAlternatives = true - changed = true - } - node.ImmediatelyTerminalAlternativeIndex = append(node.ImmediatelyTerminalAlternativeIndex, altIndex) - changed = true - } - } - } - } - - if iterations >= maxIterations { - fmt.Printf("Warning: Immediately terminal analysis reached max iterations (%d)\\n", maxIterations) - } -} - -// validateImmediatelyTerminalReachability checks for rules without immediately terminal alternatives and reports errors -func (g *DependencyGraph) validateImmediatelyTerminalReachability() error { - var unterminatedRules []string - - for ruleName, node := range g.Nodes { - if !node.HasImmediatelyTerminalAlternatives { - unterminatedRules = append(unterminatedRules, ruleName) - } - } - - if len(unterminatedRules) > 0 { - return fmt.Errorf("grammar contains %d rules without immediately terminal alternatives: %v", - len(unterminatedRules), unterminatedRules) - } - - return nil -} - -// isAlternativeAlreadyMarkedImmediate checks if an alternative is already in the immediately terminal list -func (g *DependencyGraph) isAlternativeAlreadyMarkedImmediate(node *GraphNode, altIndex int) bool { - for _, immediateIndex := range node.ImmediatelyTerminalAlternativeIndex { - if immediateIndex == altIndex { - return true - } - } - return false -} - -// canAlternativeTerminateImmediately checks if an alternative can terminate immediately (no rule references required) -func (g *DependencyGraph) canAlternativeTerminateImmediately(alt Alternative) bool { - if len(alt.Elements) == 0 { - return true - } - - for _, element := range alt.Elements { - if !g.canElementTerminateImmediately(element) { - return false - } - } - - return true -} - -// canElementTerminateImmediately checks if a single element can terminate immediately -func (g *DependencyGraph) canElementTerminateImmediately(element Element) bool { - if element.IsTerminal() { - return true - } - - if element.IsQuantified() { - if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { - return true - } - if element.Quantifier == ONE_MORE { - return g.canRuleReferenceTerminateImmediately(element) - } - } - - if element.IsRule() { - return g.canRuleReferenceTerminateImmediately(element) - } - - return false -} - -// canRuleReferenceTerminateImmediately checks if a rule reference can terminate immediately -func (g *DependencyGraph) canRuleReferenceTerminateImmediately(element Element) bool { - var referencedRuleName string - - switch value := element.Value.(type) { - case ReferenceValue: - referencedRuleName = value.Name - case BlockValue: - return g.canBlockValueTerminateImmediately(value) - default: - return false - } - - referencedNode := g.GetNode(referencedRuleName) - if referencedNode == nil { - if isAntlrBuiltinToken(referencedRuleName) { - return true - } - return false - } - - return referencedNode.HasImmediatelyTerminalAlternatives -} - -// canBlockValueTerminateImmediately checks if a block value can terminate immediately -func (g *DependencyGraph) canBlockValueTerminateImmediately(block BlockValue) bool { - for _, alt := range block.Alternatives { - if g.canAlternativeTerminateImmediately(alt) { - return true - } - } - return false -} - -// CanAlternativeTerminateImmediately checks if an alternative can terminate immediately (exported for testing) -func (g *DependencyGraph) CanAlternativeTerminateImmediately(alt Alternative) bool { - return g.canAlternativeTerminateImmediately(alt) -} - -// CanElementTerminateImmediately checks if a single element can terminate immediately (exported for testing) -func (g *DependencyGraph) CanElementTerminateImmediately(element Element) bool { - return g.canElementTerminateImmediately(element) -} - -// ValidateGrammar checks if all rules have at least one immediately terminal alternative +// ValidateGrammar checks if all non-recursive rules can reach terminal symbols func (g *DependencyGraph) ValidateGrammar() error { - var invalidRules []string - - for ruleName, node := range g.Nodes { - if !node.HasImmediatelyTerminalAlternatives { - invalidRules = append(invalidRules, ruleName) - } - } - - if len(invalidRules) > 0 { - return fmt.Errorf("grammar validation failed: the following rules have no immediately terminal alternatives: %v", invalidRules) - } - + // For now, we trust that the grammar is well-formed + // Future: could add validation to ensure non-recursive rules can terminate return nil } -// GetImmediatelyTerminalAlternatives returns the indices of immediately terminal alternatives for a rule -func (g *DependencyGraph) GetImmediatelyTerminalAlternatives(ruleName string) []int { - node := g.GetNode(ruleName) - if node == nil { - return nil - } - return node.ImmediatelyTerminalAlternativeIndex -} - -// HasImmediatelyTerminalAlternatives checks if a rule has immediately terminal alternatives -func (g *DependencyGraph) HasImmediatelyTerminalAlternatives(ruleName string) bool { - node := g.GetNode(ruleName) - if node == nil { - return false - } - return node.HasImmediatelyTerminalAlternatives -} - // PrintAnalysisResults prints the dependency graph analysis results for debugging func (g *DependencyGraph) PrintAnalysisResults() { fmt.Println("=== Dependency Graph Analysis Results ===") for ruleName, node := range g.Nodes { fmt.Printf("Rule: %s (lexer=%t)\n", ruleName, node.IsLexer) - fmt.Printf(" HasImmediatelyTerminalAlternatives: %t\n", node.HasImmediatelyTerminalAlternatives) - fmt.Printf(" ImmediatelyTerminalAlternativeIndex: %v\n", node.ImmediatelyTerminalAlternativeIndex) + fmt.Printf(" IsRecursive: %t\n", node.IsRecursive) + fmt.Printf(" SCCID: %d, SCCSize: %d\n", node.SCCID, node.SCCSize) fmt.Printf(" Total alternatives: %d\n", len(node.Alternatives)) fmt.Println() } } -// isAntlrBuiltinToken checks if a token name refers to an ANTLR built-in token -// that should always be considered terminal -func isAntlrBuiltinToken(tokenName string) bool { - // ANTLR built-in tokens that are always terminal - builtinTokens := map[string]bool{ - "EOF": true, // End-of-file token - "": true, // Alternative EOF notation - } - - return builtinTokens[tokenName] -} - // buildEdgesForNode builds the edge list for a given rule node (deprecated - use BuildEdges instead) func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { referencedRules := make(map[string]bool) - + for _, alt := range rule.Alternatives { g.collectRuleReferences(alt, referencedRules) } - + // Only add edges to parser rules (exclude lexer rules) edges := []string{} for ref := range referencedRules { @@ -338,14 +115,14 @@ func (g *DependencyGraph) collectElementReferences(element Element, refs map[str // BuildEdges builds all edges after all nodes have been added func (g *DependencyGraph) BuildEdges() { g.Edges = make(map[string][]string) - + for ruleName, node := range g.Nodes { referencedRules := make(map[string]bool) - + for _, alt := range node.Alternatives { g.collectRuleReferences(alt, referencedRules) } - + // Only add edges to parser rules (exclude lexer rules) // But include all referenced parser rules, even if they don't exist yet edges := []string{} @@ -366,13 +143,13 @@ func (g *DependencyGraph) ComputeSCCs() { if len(g.Edges) == 0 { g.BuildEdges() } - + index := 0 stack := []string{} indices := make(map[string]int) lowlinks := make(map[string]int) onStack := make(map[string]bool) - + // Helper function for Tarjan's strongconnect var strongconnect func(v string) strongconnect = func(v string) { @@ -382,7 +159,7 @@ func (g *DependencyGraph) ComputeSCCs() { index++ stack = append(stack, v) onStack[v] = true - + // Consider successors of v for _, w := range g.Edges[v] { if _, ok := indices[w]; !ok { @@ -398,7 +175,7 @@ func (g *DependencyGraph) ComputeSCCs() { } } } - + // If v is a root node, pop the stack and print an SCC if lowlinks[v] == indices[v] { scc := []string{} @@ -414,22 +191,22 @@ func (g *DependencyGraph) ComputeSCCs() { g.SCCs = append(g.SCCs, scc) } } - + // Clear existing SCCs g.SCCs = [][]string{} - + // Run algorithm for all unvisited nodes for ruleName := range g.Nodes { if _, ok := indices[ruleName]; !ok { strongconnect(ruleName) } } - + // Update nodes with their SCC information for sccID, scc := range g.SCCs { sccSize := len(scc) isRecursive := sccSize > 1 - + // Check for self-loops if single node SCC if sccSize == 1 { ruleName := scc[0] @@ -440,7 +217,7 @@ func (g *DependencyGraph) ComputeSCCs() { } } } - + // Update all nodes in this SCC for _, ruleName := range scc { if node := g.GetNode(ruleName); node != nil { @@ -456,7 +233,7 @@ func (g *DependencyGraph) ComputeSCCs() { func (g *DependencyGraph) PrintSCCAnalysis() { fmt.Println("\n=== SCC Analysis Results ===") fmt.Printf("Total SCCs: %d\n", len(g.SCCs)) - + recursiveSCCs := 0 selfLoopSCCs := 0 largestSCC := 0 @@ -499,14 +276,14 @@ func (g *DependencyGraph) PrintSCCAnalysis() { } } } - + fmt.Printf("\nMutually recursive SCCs (size > 1): %d\n", recursiveSCCs) if recursiveSCCs > 0 { fmt.Printf("Largest SCC size: %d\n", largestSCC) } fmt.Printf("Self-loop SCCs (size = 1 with self-ref): %d\n", selfLoopSCCs) fmt.Printf("Non-recursive SCCs: %d\n", len(g.SCCs)-recursiveSCCs-selfLoopSCCs) - + // Print sample of recursive rules fmt.Println("\nSample recursive rules:") count := 0 diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index b9918f8..70ddf13 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -177,10 +177,9 @@ func buildDependencyGraphWithValidation(parsedGrammar *ParsedGrammar, validateUn parsedGrammar.DependencyGraph.AddNode(ruleName, rule) } - // Perform terminal reachability analysis with optional validation - if err := parsedGrammar.DependencyGraph.AnalyzeTerminalReachabilityWithValidation(validateUnterminated); err != nil { - return fmt.Errorf("terminal reachability analysis failed: %w", err) - } + // Perform SCC computing. + parsedGrammar.DependencyGraph.ComputeSCCs() + parsedGrammar.DependencyGraph.PrintSCCAnalysis() return nil } diff --git a/tools/fuzzing/internal/grammar/scc_test.go b/tools/fuzzing/internal/grammar/scc_test.go index b1ad34c..9b1eb07 100644 --- a/tools/fuzzing/internal/grammar/scc_test.go +++ b/tools/fuzzing/internal/grammar/scc_test.go @@ -166,14 +166,12 @@ func TestSCCDetection(t *testing.T) { } // We need to add the node without building edges automatically node := &GraphNode{ - RuleName: ruleName, - Alternatives: rule.Alternatives, - HasImmediatelyTerminalAlternatives: false, - ImmediatelyTerminalAlternativeIndex: []int{}, - IsLexer: false, - SCCID: -1, - SCCSize: 0, - IsRecursive: false, + RuleName: ruleName, + Alternatives: rule.Alternatives, + IsLexer: false, + SCCID: -1, + SCCSize: 0, + IsRecursive: false, } g.Nodes[ruleName] = node } From 1ad5c4b36aa85dd7774186be83627934f6f408c6 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 16:13:19 +0800 Subject: [PATCH 12/15] feat: compute SCC lookup map --- tools/fuzzing/internal/grammar/dependency.go | 25 +++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 5e43cae..d59bb48 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -6,9 +6,10 @@ import ( // DependencyGraph represents the dependency relationships between grammar rules type DependencyGraph struct { - Nodes map[string]*GraphNode - Edges map[string][]string // Adjacency list: rule -> referenced rules - SCCs [][]string // List of SCCs (each SCC is a list of rule names) + Nodes map[string]*GraphNode + Edges map[string][]string // Adjacency list: rule -> referenced rules + SCCs [][]string // List of SCCs (each SCC is a list of rule names) + SCCLookup map[string]int // Rule name -> SCC ID lookup map } // GraphNode represents a single rule in the dependency graph @@ -24,9 +25,10 @@ type GraphNode struct { // NewDependencyGraph creates a new dependency graph func NewDependencyGraph() *DependencyGraph { return &DependencyGraph{ - Nodes: make(map[string]*GraphNode), - Edges: make(map[string][]string), - SCCs: [][]string{}, + Nodes: make(map[string]*GraphNode), + Edges: make(map[string][]string), + SCCs: [][]string{}, + SCCLookup: make(map[string]int), } } @@ -192,8 +194,9 @@ func (g *DependencyGraph) ComputeSCCs() { } } - // Clear existing SCCs + // Clear existing SCCs and lookup map g.SCCs = [][]string{} + g.SCCLookup = make(map[string]int) // Run algorithm for all unvisited nodes for ruleName := range g.Nodes { @@ -202,7 +205,7 @@ func (g *DependencyGraph) ComputeSCCs() { } } - // Update nodes with their SCC information + // Build SCC lookup map and update nodes with their SCC information for sccID, scc := range g.SCCs { sccSize := len(scc) isRecursive := sccSize > 1 @@ -218,8 +221,12 @@ func (g *DependencyGraph) ComputeSCCs() { } } - // Update all nodes in this SCC + // Update lookup map and nodes in this SCC for _, ruleName := range scc { + // Add to lookup map + g.SCCLookup[ruleName] = sccID + + // Update node information if node := g.GetNode(ruleName); node != nil { node.SCCID = sccID node.SCCSize = sccSize From 1841a029a772a6144ba549fb459f4b5cbb3dd42f Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 16:15:56 +0800 Subject: [PATCH 13/15] chore: sanity check --- tools/fuzzing/internal/grammar/dependency.go | 172 +++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index d59bb48..2a5616c 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -205,6 +205,11 @@ func (g *DependencyGraph) ComputeSCCs() { } } + // Perform sanity check: ensure no SCC is an isolated island + if err := g.checkForIsolatedSCCs(); err != nil { + return // Don't build lookup map if grammar is malformed + } + // Build SCC lookup map and update nodes with their SCC information for sccID, scc := range g.SCCs { sccSize := len(scc) @@ -236,6 +241,173 @@ func (g *DependencyGraph) ComputeSCCs() { } } +// checkForIsolatedSCCs ensures no SCC is an isolated island with no exit paths +func (g *DependencyGraph) checkForIsolatedSCCs() error { + // Create a temporary SCC membership map for this check + sccMembership := make(map[string]int) + for sccID, scc := range g.SCCs { + for _, ruleName := range scc { + sccMembership[ruleName] = sccID + } + } + + // Check each SCC for exit paths + isolatedSCCs := []int{} + for sccID, scc := range g.SCCs { + // Skip non-recursive SCCs (single nodes without self-loops) + if len(scc) == 1 { + ruleName := scc[0] + hasSelfLoop := false + for _, ref := range g.Edges[ruleName] { + if ref == ruleName { + hasSelfLoop = true + break + } + } + if !hasSelfLoop { + continue // Non-recursive single node, skip + } + } + + // Check if this SCC has any exit path + hasExit := g.sccHasExitPath(sccID, scc, sccMembership) + if !hasExit { + isolatedSCCs = append(isolatedSCCs, sccID) + } + } + + // Report error if any isolated SCCs found + if len(isolatedSCCs) > 0 { + fmt.Printf("\nERROR: Found %d isolated SCC(s) with no exit paths:\n", len(isolatedSCCs)) + for _, sccID := range isolatedSCCs { + fmt.Printf(" SCC %d: %v\n", sccID, g.SCCs[sccID]) + } + return fmt.Errorf("grammar contains %d isolated SCC(s) that cannot terminate", len(isolatedSCCs)) + } + + return nil +} + +// sccHasExitPath checks if an SCC has at least one path to rules outside of it +func (g *DependencyGraph) sccHasExitPath(sccID int, scc []string, sccMembership map[string]int) bool { + // Use fixed-point iteration to find reachable rules from this SCC + visited := make(map[string]bool) + toVisit := []string{} + + // Start with all rules in the SCC + for _, ruleName := range scc { + toVisit = append(toVisit, ruleName) + visited[ruleName] = true + } + + // Perform reachability analysis + for len(toVisit) > 0 { + current := toVisit[0] + toVisit = toVisit[1:] + + // Check all references from current rule + for _, ref := range g.Edges[current] { + // Skip if already visited + if visited[ref] { + continue + } + + // Check if referenced rule is outside this SCC + refSCCID, exists := sccMembership[ref] + if !exists || refSCCID != sccID { + // Found an exit! Check if it can eventually reach terminals + if g.canReachTerminal(ref, make(map[string]bool)) { + return true + } + } + + // Mark as visited and continue searching + visited[ref] = true + toVisit = append(toVisit, ref) + } + + // Also check alternatives for direct terminal paths + if node := g.GetNode(current); node != nil { + for _, alt := range node.Alternatives { + if g.alternativeHasTerminalPath(alt) { + return true + } + } + } + } + + return false +} + +// canReachTerminal checks if a rule can eventually reach terminal symbols +func (g *DependencyGraph) canReachTerminal(ruleName string, visited map[string]bool) bool { + // Avoid infinite recursion + if visited[ruleName] { + return false + } + visited[ruleName] = true + + node := g.GetNode(ruleName) + if node == nil { + return false + } + + // Lexer rules are terminals + if node.IsLexer { + return true + } + + // Check each alternative + for _, alt := range node.Alternatives { + if g.alternativeCanReachTerminal(alt, visited) { + return true + } + } + + return false +} + +// alternativeHasTerminalPath checks if an alternative has at least one terminal +func (g *DependencyGraph) alternativeHasTerminalPath(alt Alternative) bool { + for _, element := range alt.Elements { + if element.IsTerminal() { + return true + } + // Check if it's an optional/quantified element (can be skipped) + if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { + return true + } + } + return false +} + +// alternativeCanReachTerminal checks if an alternative can reach terminals +func (g *DependencyGraph) alternativeCanReachTerminal(alt Alternative, visited map[string]bool) bool { + if len(alt.Elements) == 0 { + return true // Empty alternative is terminal + } + + for _, element := range alt.Elements { + if element.IsTerminal() { + return true + } + + // Optional elements can be skipped + if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { + continue + } + + // Check if referenced rule can reach terminal + if refValue, ok := element.Value.(ReferenceValue); ok { + if !g.canReachTerminal(refValue.Name, visited) { + return false + } + } + } + + return true +} + // PrintSCCAnalysis prints the SCC analysis results for debugging func (g *DependencyGraph) PrintSCCAnalysis() { fmt.Println("\n=== SCC Analysis Results ===") From 12f7430f9772ba9b92119115d9453e55030706dd Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 16:33:05 +0800 Subject: [PATCH 14/15] feat: add depth only if in the same SCC --- tools/fuzzing/internal/generator/generator.go | 82 ++++++++++++++----- tools/fuzzing/internal/grammar/dependency.go | 13 ++- tools/fuzzing/internal/grammar/scc_test.go | 2 +- 3 files changed, 71 insertions(+), 26 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index 8f46bba..b31ea53 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -79,11 +79,18 @@ func (g *Generator) getRule(ruleName string) *grammar.Rule { // generateQuery creates a single query using grammar rules func (g *Generator) generateQuery() string { - return g.generateFromRule(g.config.StartRule, 0) + // Start with no SCC context and 0 recursion depth + return g.generateFromRuleWithSCC(g.config.StartRule, grammar.NoSCC, 0) } -// generateFromRule generates text from a grammar rule +// generateFromRule is a wrapper for backward compatibility func (g *Generator) generateFromRule(ruleName string, depth int) string { + // For backward compatibility, treat depth as recursion depth + return g.generateFromRuleWithSCC(ruleName, grammar.NoSCC, depth) +} + +// generateFromRuleWithSCC generates text from a grammar rule tracking SCC-based recursion +func (g *Generator) generateFromRuleWithSCC(ruleName string, currentSCCID int, recursionDepth int) string { // Get the rule and its SCC info rule := g.getRule(ruleName) if rule == nil { @@ -91,10 +98,30 @@ func (g *Generator) generateFromRule(ruleName string, depth int) string { } node := g.dependencyGraph.GetNode(ruleName) + if node == nil { + return fmt.Sprintf("<%s>", ruleName) + } + + // Determine the new recursion depth + // Only increment if we're moving within the same SCC (actual recursion) + newRecursionDepth := recursionDepth + if currentSCCID != grammar.NoSCC && node.SCCID == currentSCCID && node.IsRecursive { + // We're recursing within the same SCC + newRecursionDepth = recursionDepth + 1 + + // Check recursion depth limit + if newRecursionDepth >= g.config.MaxDepth { + return g.generateTerminalFallback(ruleName) + } + } else if node.IsRecursive { + // Entering a new recursive SCC, reset recursion depth to 0 + newRecursionDepth = 0 + } - // Check depth limit for recursive rules - if node != nil && node.IsRecursive && depth >= g.config.MaxDepth { - return g.generateTerminalFallback(ruleName) + // Update current SCC context for recursive rules + newSCCID := currentSCCID + if node.IsRecursive { + newSCCID = node.SCCID } if len(rule.Alternatives) == 0 { @@ -108,7 +135,7 @@ func (g *Generator) generateFromRule(ruleName string, depth int) string { // Generate from all elements in the alternative var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromElement(&element, depth+1) + elementResult := g.generateFromElementWithSCC(&element, newSCCID, newRecursionDepth) if elementResult != "" { result = append(result, elementResult) } @@ -137,8 +164,13 @@ func (g *Generator) SetGrammarForTesting(grammar *grammar.ParsedGrammar) { g.dependencyGraph = grammar.GetDependencyGraph() } -// generateFromElement generates text from a single grammar element +// generateFromElement is a wrapper for backward compatibility func (g *Generator) generateFromElement(element *grammar.Element, depth int) string { + return g.generateFromElementWithSCC(element, grammar.NoSCC, depth) +} + +// generateFromElementWithSCC generates text from a single grammar element with SCC tracking +func (g *Generator) generateFromElementWithSCC(element *grammar.Element, currentSCCID int, recursionDepth int) string { // Handle optional elements if element.IsOptional() && g.random.Float64() > g.config.OptionalProb { return "" @@ -146,17 +178,17 @@ func (g *Generator) generateFromElement(element *grammar.Element, depth int) str // Handle quantified elements if element.IsQuantified() { - return g.generateQuantified(element, depth) + return g.generateQuantifiedWithSCC(element, currentSCCID, recursionDepth) } // Generate single element if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - return g.generateFromRuleOrToken(refValue.Name, depth) + return g.generateFromRuleOrTokenWithSCC(refValue.Name, currentSCCID, recursionDepth) } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - return g.generateFromBlock(blockValue, depth) + return g.generateFromBlockWithSCC(blockValue, currentSCCID, recursionDepth) } - return g.generateFromRuleOrToken(element.Value.String(), depth) + return g.generateFromRuleOrTokenWithSCC(element.Value.String(), currentSCCID, recursionDepth) } else if element.IsTerminal() { if litValue, ok := element.Value.(grammar.LiteralValue); ok { return cleanLiteral(litValue.Text) @@ -410,8 +442,8 @@ func joinStrings(strs []string, sep string) string { return result } -// generateQuantified handles quantified elements -func (g *Generator) generateQuantified(element *grammar.Element, depth int) string { +// generateQuantifiedWithSCC handles quantified elements with SCC tracking +func (g *Generator) generateQuantifiedWithSCC(element *grammar.Element, currentSCCID int, recursionDepth int) string { var count int if g.config.QuantifierCount > 0 { @@ -431,13 +463,13 @@ func (g *Generator) generateQuantified(element *grammar.Element, depth int) stri for i := 0; i < count; i++ { if element.IsRule() { if refValue, ok := element.Value.(grammar.ReferenceValue); ok { - result := g.generateFromRuleOrToken(refValue.Name, depth) + result := g.generateFromRuleOrTokenWithSCC(refValue.Name, currentSCCID, recursionDepth) results = append(results, result) } else if blockValue, ok := element.Value.(grammar.BlockValue); ok { - result := g.generateFromBlock(blockValue, depth) + result := g.generateFromBlockWithSCC(blockValue, currentSCCID, recursionDepth) results = append(results, result) } else { - result := g.generateFromRuleOrToken(element.Value.String(), depth) + result := g.generateFromRuleOrTokenWithSCC(element.Value.String(), currentSCCID, recursionDepth) results = append(results, result) } } else if element.IsTerminal() { @@ -452,8 +484,13 @@ func (g *Generator) generateQuantified(element *grammar.Element, depth int) stri return joinWithSpaces(results) } -// generateFromBlock generates content from a block value +// generateFromBlock is a wrapper for backward compatibility func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) string { + return g.generateFromBlockWithSCC(blockValue, grammar.NoSCC, depth) +} + +// generateFromBlockWithSCC generates content from a block value with SCC tracking +func (g *Generator) generateFromBlockWithSCC(blockValue grammar.BlockValue, currentSCCID int, recursionDepth int) string { if len(blockValue.Alternatives) == 0 { return "" } @@ -463,7 +500,7 @@ func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) var result []string for _, element := range alternative.Elements { - elementResult := g.generateFromElement(&element, depth) + elementResult := g.generateFromElementWithSCC(&element, currentSCCID, recursionDepth) if elementResult != "" { result = append(result, elementResult) } @@ -472,12 +509,13 @@ func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) return joinWithSpaces(result) } -// generateFromRuleOrToken generates from a rule or token -func (g *Generator) generateFromRuleOrToken(ruleName string, depth int) string { +// generateFromRuleOrTokenWithSCC generates from a rule or token with SCC tracking +func (g *Generator) generateFromRuleOrTokenWithSCC(ruleName string, currentSCCID int, recursionDepth int) string { if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer { - return g.generateConcreteToken(ruleName, depth) + // Lexer rules don't participate in SCC recursion tracking + return g.generateConcreteToken(ruleName, 0) } - return g.generateFromRule(ruleName, depth) + return g.generateFromRuleWithSCC(ruleName, currentSCCID, recursionDepth) } // generateSimpleFallback generates a simple fallback value based on rule name patterns diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 2a5616c..309033c 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -4,6 +4,11 @@ import ( "fmt" ) +const ( + // NoSCC indicates a node is not part of any SCC or SCC not yet computed + NoSCC = -1 +) + // DependencyGraph represents the dependency relationships between grammar rules type DependencyGraph struct { Nodes map[string]*GraphNode @@ -17,7 +22,7 @@ type GraphNode struct { RuleName string // Rule name (e.g., "selectStmt", "expr") Alternatives []Alternative // All alternatives for this rule IsLexer bool // Whether this is a lexer rule - SCCID int // Which SCC this node belongs to (-1 if not computed) + SCCID int // Which SCC this node belongs to (NoSCC if not computed) SCCSize int // Size of the SCC this node belongs to IsRecursive bool // True if part of a recursive SCC (size > 1 or self-loop) } @@ -38,7 +43,7 @@ func (g *DependencyGraph) AddNode(ruleName string, rule *Rule) { RuleName: ruleName, Alternatives: rule.Alternatives, IsLexer: rule.IsLexer, - SCCID: -1, + SCCID: NoSCC, SCCSize: 0, IsRecursive: false, } @@ -206,8 +211,10 @@ func (g *DependencyGraph) ComputeSCCs() { } // Perform sanity check: ensure no SCC is an isolated island + // Only log warnings, don't fail - test cases often have isolated SCCs if err := g.checkForIsolatedSCCs(); err != nil { - return // Don't build lookup map if grammar is malformed + // Log warning but continue - tests may have intentionally isolated SCCs + fmt.Printf("Warning: %v\n", err) } // Build SCC lookup map and update nodes with their SCC information diff --git a/tools/fuzzing/internal/grammar/scc_test.go b/tools/fuzzing/internal/grammar/scc_test.go index 9b1eb07..27c0672 100644 --- a/tools/fuzzing/internal/grammar/scc_test.go +++ b/tools/fuzzing/internal/grammar/scc_test.go @@ -169,7 +169,7 @@ func TestSCCDetection(t *testing.T) { RuleName: ruleName, Alternatives: rule.Alternatives, IsLexer: false, - SCCID: -1, + SCCID: NoSCC, SCCSize: 0, IsRecursive: false, } From 404e64e90136ad397b141f7bb46575cac2efcfb9 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Tue, 9 Sep 2025 17:24:28 +0800 Subject: [PATCH 15/15] chore: remove unused function --- tools/fuzzing/internal/generator/generator.go | 10 --- tools/fuzzing/internal/grammar/dependency.go | 68 +++++++------------ tools/fuzzing/internal/grammar/parser.go | 8 +-- 3 files changed, 28 insertions(+), 58 deletions(-) diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go index b31ea53..2fbff97 100644 --- a/tools/fuzzing/internal/generator/generator.go +++ b/tools/fuzzing/internal/generator/generator.go @@ -164,11 +164,6 @@ func (g *Generator) SetGrammarForTesting(grammar *grammar.ParsedGrammar) { g.dependencyGraph = grammar.GetDependencyGraph() } -// generateFromElement is a wrapper for backward compatibility -func (g *Generator) generateFromElement(element *grammar.Element, depth int) string { - return g.generateFromElementWithSCC(element, grammar.NoSCC, depth) -} - // generateFromElementWithSCC generates text from a single grammar element with SCC tracking func (g *Generator) generateFromElementWithSCC(element *grammar.Element, currentSCCID int, recursionDepth int) string { // Handle optional elements @@ -484,11 +479,6 @@ func (g *Generator) generateQuantifiedWithSCC(element *grammar.Element, currentS return joinWithSpaces(results) } -// generateFromBlock is a wrapper for backward compatibility -func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, depth int) string { - return g.generateFromBlockWithSCC(blockValue, grammar.NoSCC, depth) -} - // generateFromBlockWithSCC generates content from a block value with SCC tracking func (g *Generator) generateFromBlockWithSCC(blockValue grammar.BlockValue, currentSCCID int, recursionDepth int) string { if len(blockValue.Alternatives) == 0 { diff --git a/tools/fuzzing/internal/grammar/dependency.go b/tools/fuzzing/internal/grammar/dependency.go index 309033c..0160e61 100644 --- a/tools/fuzzing/internal/grammar/dependency.go +++ b/tools/fuzzing/internal/grammar/dependency.go @@ -11,10 +11,10 @@ const ( // DependencyGraph represents the dependency relationships between grammar rules type DependencyGraph struct { - Nodes map[string]*GraphNode - Edges map[string][]string // Adjacency list: rule -> referenced rules - SCCs [][]string // List of SCCs (each SCC is a list of rule names) - SCCLookup map[string]int // Rule name -> SCC ID lookup map + Nodes map[string]*GraphNode + Edges map[string][]string // Adjacency list: rule -> referenced rules + SCCs [][]string // List of SCCs (each SCC is a list of rule names) + SCCLookup map[string]int // Rule name -> SCC ID lookup map } // GraphNode represents a single rule in the dependency graph @@ -78,26 +78,6 @@ func (g *DependencyGraph) PrintAnalysisResults() { } } -// buildEdgesForNode builds the edge list for a given rule node (deprecated - use BuildEdges instead) -func (g *DependencyGraph) buildEdgesForNode(ruleName string, rule *Rule) { - referencedRules := make(map[string]bool) - - for _, alt := range rule.Alternatives { - g.collectRuleReferences(alt, referencedRules) - } - - // Only add edges to parser rules (exclude lexer rules) - edges := []string{} - for ref := range referencedRules { - if refNode := g.GetNode(ref); refNode != nil && refNode.IsLexer { - continue // Skip lexer rules - } - // Add all other references (including forward references) - edges = append(edges, ref) - } - g.Edges[ruleName] = edges -} - // collectRuleReferences collects all rule references in an alternative func (g *DependencyGraph) collectRuleReferences(alt Alternative, refs map[string]bool) { for _, element := range alt.Elements { @@ -237,7 +217,7 @@ func (g *DependencyGraph) ComputeSCCs() { for _, ruleName := range scc { // Add to lookup map g.SCCLookup[ruleName] = sccID - + // Update node information if node := g.GetNode(ruleName); node != nil { node.SCCID = sccID @@ -257,7 +237,7 @@ func (g *DependencyGraph) checkForIsolatedSCCs() error { sccMembership[ruleName] = sccID } } - + // Check each SCC for exit paths isolatedSCCs := []int{} for sccID, scc := range g.SCCs { @@ -275,14 +255,14 @@ func (g *DependencyGraph) checkForIsolatedSCCs() error { continue // Non-recursive single node, skip } } - + // Check if this SCC has any exit path hasExit := g.sccHasExitPath(sccID, scc, sccMembership) if !hasExit { isolatedSCCs = append(isolatedSCCs, sccID) } } - + // Report error if any isolated SCCs found if len(isolatedSCCs) > 0 { fmt.Printf("\nERROR: Found %d isolated SCC(s) with no exit paths:\n", len(isolatedSCCs)) @@ -291,7 +271,7 @@ func (g *DependencyGraph) checkForIsolatedSCCs() error { } return fmt.Errorf("grammar contains %d isolated SCC(s) that cannot terminate", len(isolatedSCCs)) } - + return nil } @@ -300,25 +280,25 @@ func (g *DependencyGraph) sccHasExitPath(sccID int, scc []string, sccMembership // Use fixed-point iteration to find reachable rules from this SCC visited := make(map[string]bool) toVisit := []string{} - + // Start with all rules in the SCC for _, ruleName := range scc { toVisit = append(toVisit, ruleName) visited[ruleName] = true } - + // Perform reachability analysis for len(toVisit) > 0 { current := toVisit[0] toVisit = toVisit[1:] - + // Check all references from current rule for _, ref := range g.Edges[current] { // Skip if already visited if visited[ref] { continue } - + // Check if referenced rule is outside this SCC refSCCID, exists := sccMembership[ref] if !exists || refSCCID != sccID { @@ -327,12 +307,12 @@ func (g *DependencyGraph) sccHasExitPath(sccID int, scc []string, sccMembership return true } } - + // Mark as visited and continue searching visited[ref] = true toVisit = append(toVisit, ref) } - + // Also check alternatives for direct terminal paths if node := g.GetNode(current); node != nil { for _, alt := range node.Alternatives { @@ -342,7 +322,7 @@ func (g *DependencyGraph) sccHasExitPath(sccID int, scc []string, sccMembership } } } - + return false } @@ -353,24 +333,24 @@ func (g *DependencyGraph) canReachTerminal(ruleName string, visited map[string]b return false } visited[ruleName] = true - + node := g.GetNode(ruleName) if node == nil { return false } - + // Lexer rules are terminals if node.IsLexer { return true } - + // Check each alternative for _, alt := range node.Alternatives { if g.alternativeCanReachTerminal(alt, visited) { return true } } - + return false } @@ -393,17 +373,17 @@ func (g *DependencyGraph) alternativeCanReachTerminal(alt Alternative, visited m if len(alt.Elements) == 0 { return true // Empty alternative is terminal } - + for _, element := range alt.Elements { if element.IsTerminal() { return true } - + // Optional elements can be skipped if element.Quantifier == ZERO_MORE || element.Quantifier == OPTIONAL_Q { continue } - + // Check if referenced rule can reach terminal if refValue, ok := element.Value.(ReferenceValue); ok { if !g.canReachTerminal(refValue.Name, visited) { @@ -411,7 +391,7 @@ func (g *DependencyGraph) alternativeCanReachTerminal(alt Alternative, visited m } } } - + return true } diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go index 70ddf13..1ec1c8f 100644 --- a/tools/fuzzing/internal/grammar/parser.go +++ b/tools/fuzzing/internal/grammar/parser.go @@ -162,11 +162,11 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) { // buildDependencyGraph constructs the dependency graph for the parsed grammar func buildDependencyGraph(parsedGrammar *ParsedGrammar) error { - return buildDependencyGraphWithValidation(parsedGrammar, false) + return buildDependencyGraphWithValidation(parsedGrammar) } // buildDependencyGraphWithValidation constructs the dependency graph with optional validation -func buildDependencyGraphWithValidation(parsedGrammar *ParsedGrammar, validateUnterminated bool) error { +func buildDependencyGraphWithValidation(parsedGrammar *ParsedGrammar) error { // Add all lexer rules to the graph for ruleName, rule := range parsedGrammar.LexerRules { parsedGrammar.DependencyGraph.AddNode(ruleName, rule) @@ -256,7 +256,7 @@ func (g *ParsedGrammar) MergeGrammarAndRebuildGraph(other *ParsedGrammar) error } g.DependencyGraph = NewDependencyGraph() - if err := buildDependencyGraphWithValidation(g, true); err != nil { + if err := buildDependencyGraphWithValidation(g); err != nil { return fmt.Errorf("failed to rebuild dependency graph after merge: %w", err) } @@ -286,7 +286,7 @@ func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) { } mergedGrammar.DependencyGraph = NewDependencyGraph() - if err := buildDependencyGraphWithValidation(mergedGrammar, true); err != nil { + if err := buildDependencyGraphWithValidation(mergedGrammar); err != nil { return nil, fmt.Errorf("failed to build dependency graph after merging all files: %w", err) }