diff --git a/csharp/Platform.RegularExpressions.Transformer.Tests/MarkovAlgorithmsTests.cs b/csharp/Platform.RegularExpressions.Transformer.Tests/MarkovAlgorithmsTests.cs index a7310b1..3ffcdd7 100644 --- a/csharp/Platform.RegularExpressions.Transformer.Tests/MarkovAlgorithmsTests.cs +++ b/csharp/Platform.RegularExpressions.Transformer.Tests/MarkovAlgorithmsTests.cs @@ -30,5 +30,102 @@ public void BinaryToUnaryNumbersTest() var output = transformer.Transform(input); Assert.Equal(expectedOutput, output); } + + /// + /// Tests that terminating rules stop the algorithm immediately when applied. + /// + [Fact] + public void TerminatingRuleStopsAlgorithmTest() + { + var rules = new SubstitutionRule[] + { + ("a", "b", 0, false), // Regular rule: a -> b + ("b", "STOP", 0, true), // Terminating rule: b -> STOP (should stop here) + ("STOP", "c", 0, false), // This rule should never be applied + }; + var transformer = new TextTransformer(rules); + var input = "a"; + var expectedOutput = "STOP"; + var output = transformer.Transform(input); + Assert.Equal(expectedOutput, output); + } + + /// + /// Tests that non-terminating rules continue processing. + /// + [Fact] + public void NonTerminatingRulesContinueProcessingTest() + { + var rules = new SubstitutionRule[] + { + ("a", "b", 0, false), // Regular rule: a -> b + ("b", "c", 0, false), // Regular rule: b -> c + ("c", "d", 0, false), // Regular rule: c -> d + }; + var transformer = new TextTransformer(rules); + var input = "a"; + var expectedOutput = "d"; + var output = transformer.Transform(input); + Assert.Equal(expectedOutput, output); + } + + /// + /// Tests terminating rule with complex pattern. + /// + [Fact] + public void TerminatingRuleWithComplexPatternTest() + { + var rules = new SubstitutionRule[] + { + (@"(\d+)", "[$1]", 0, false), // Wrap numbers in brackets + (@"\[42\]", "FORTY-TWO", 0, true), // Terminating rule for [42] + (@"\[(\d+)\]", "NUM:$1", 0, false), // This should not be applied for [42] + }; + var transformer = new TextTransformer(rules); + var input = "42"; + var expectedOutput = "FORTY-TWO"; + var output = transformer.Transform(input); + Assert.Equal(expectedOutput, output); + } + + /// + /// Tests that terminating rules work with multiple matches in text. + /// + [Fact] + public void TerminatingRuleWithMultipleMatchesTest() + { + var rules = new SubstitutionRule[] + { + ("x", "X", 0, false), // Regular rule: x -> X + ("X", "TERMINATED", 0, true), // Terminating rule: X -> TERMINATED + ("y", "Y", 0, false), // This rule should not be applied + }; + var transformer = new TextTransformer(rules); + var input = "xyx"; // Should transform both x to X, then first X to TERMINATED and stop + var expectedOutput = "TERMINATEDyX"; // Both x converted to X, then first X terminated + var output = transformer.Transform(input); + Assert.Equal(expectedOutput, output); + } + + /// + /// Tests the Wikipedia Markov algorithm example with terminating rule. + /// Example modified to include a terminating condition. + /// + [Fact] + public void WikipediaExampleWithTerminatingRuleTest() + { + var rules = new SubstitutionRule[] + { + ("1", "0|", int.MaxValue), // "1" -> "0|" repeated forever + (@"\|0", "0||", int.MaxValue), // "\|0" -> "0||" repeated forever + ("0", "", int.MaxValue), // "0" -> "" repeated forever + (@"\|\|\|\|\|", "FIVE", true), // Terminating rule: five bars -> FIVE + }; + var transformer = new TextTransformer(rules); + var input = "101"; + var expectedOutput = "FIVE"; // Should transform to ||||| then to FIVE and stop + var output = transformer.Transform(input); + Assert.Equal(expectedOutput, output); + } } } diff --git a/csharp/Platform.RegularExpressions.Transformer/ISubstitutionRule.cs b/csharp/Platform.RegularExpressions.Transformer/ISubstitutionRule.cs index f24e825..563fbf0 100644 --- a/csharp/Platform.RegularExpressions.Transformer/ISubstitutionRule.cs +++ b/csharp/Platform.RegularExpressions.Transformer/ISubstitutionRule.cs @@ -48,5 +48,18 @@ int MaximumRepeatCount [MethodImpl(MethodImplOptions.AggressiveInlining)] get; } + + /// + /// + /// Gets a value indicating whether this rule is terminating. + /// When a terminating rule is applied, the Markov algorithm stops execution. + /// + /// + /// + bool IsTerminating + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } } } \ No newline at end of file diff --git a/csharp/Platform.RegularExpressions.Transformer/SubstitutionRule.cs b/csharp/Platform.RegularExpressions.Transformer/SubstitutionRule.cs index 150343a..b2b5ae9 100644 --- a/csharp/Platform.RegularExpressions.Transformer/SubstitutionRule.cs +++ b/csharp/Platform.RegularExpressions.Transformer/SubstitutionRule.cs @@ -87,6 +87,21 @@ public int MaximumRepeatCount set; } + /// + /// + /// Gets or sets a value indicating whether this rule is terminating. + /// When a terminating rule is applied, the Markov algorithm stops execution. + /// + /// + /// + public bool IsTerminating + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + set; + } + /// /// /// Initializes a new instance. @@ -113,12 +128,17 @@ public int MaximumRepeatCount /// A match timeout. /// /// + /// + /// A value indicating whether this rule is terminating. + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount, RegexOptions? matchPatternOptions, TimeSpan? matchTimeout) + public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount, RegexOptions? matchPatternOptions, TimeSpan? matchTimeout, bool isTerminating = false) { MatchPattern = matchPattern; SubstitutionPattern = substitutionPattern; MaximumRepeatCount = maximumRepeatCount; + IsTerminating = isTerminating; OverrideMatchPatternOptions(matchPatternOptions ?? matchPattern.Options, matchTimeout ?? matchPattern.MatchTimeout); } @@ -144,8 +164,12 @@ public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maxi /// A use default options. /// /// + /// + /// A value indicating whether this rule is terminating. + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount, bool useDefaultOptions) : this(matchPattern, substitutionPattern, maximumRepeatCount, useDefaultOptions ? DefaultMatchPatternRegexOptions : (RegexOptions?)null, useDefaultOptions ? DefaultMatchTimeout : (TimeSpan?)null) { } + public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount, bool useDefaultOptions, bool isTerminating = false) : this(matchPattern, substitutionPattern, maximumRepeatCount, useDefaultOptions ? DefaultMatchPatternRegexOptions : (RegexOptions?)null, useDefaultOptions ? DefaultMatchTimeout : (TimeSpan?)null, isTerminating) { } /// /// @@ -165,8 +189,12 @@ public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maxi /// A maximum repeat count. /// /// + /// + /// A value indicating whether this rule is terminating. + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount) : this(matchPattern, substitutionPattern, maximumRepeatCount, true) { } + public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maximumRepeatCount, bool isTerminating = false) : this(matchPattern, substitutionPattern, maximumRepeatCount, true, isTerminating) { } /// /// @@ -182,8 +210,12 @@ public SubstitutionRule(Regex matchPattern, string substitutionPattern, int maxi /// A substitution pattern. /// /// + /// + /// A value indicating whether this rule is terminating. + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public SubstitutionRule(Regex matchPattern, string substitutionPattern) : this(matchPattern, substitutionPattern, 0) { } + public SubstitutionRule(Regex matchPattern, string substitutionPattern, bool isTerminating = false) : this(matchPattern, substitutionPattern, 0, isTerminating) { } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(new Regex(tuple.Item1), tuple.Item2); @@ -197,6 +229,18 @@ public SubstitutionRule(Regex matchPattern, string substitutionPattern) : this(m [MethodImpl(MethodImplOptions.AggressiveInlining)] public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(tuple.Item1, tuple.Item2, tuple.Item3); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(new Regex(tuple.Item1), tuple.Item2, tuple.Item3); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(tuple.Item1, tuple.Item2, tuple.Item3); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(new Regex(tuple.Item1), tuple.Item2, tuple.Item3, tuple.Item4); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static implicit operator SubstitutionRule(ValueTuple tuple) => new SubstitutionRule(tuple.Item1, tuple.Item2, tuple.Item3, tuple.Item4); + /// /// /// Overrides the match pattern options using the specified options. @@ -272,6 +316,10 @@ public override string ToString() sb.Append(" times"); } } + if (IsTerminating) + { + sb.Append(" (terminating)"); + } return sb.ToString(); } } diff --git a/csharp/Platform.RegularExpressions.Transformer/TextSteppedTransformer.cs b/csharp/Platform.RegularExpressions.Transformer/TextSteppedTransformer.cs index 0b7e8f2..b7c09a9 100644 --- a/csharp/Platform.RegularExpressions.Transformer/TextSteppedTransformer.cs +++ b/csharp/Platform.RegularExpressions.Transformer/TextSteppedTransformer.cs @@ -217,14 +217,37 @@ public bool Next() var matchPattern = rule.MatchPattern; var substitutionPattern = rule.SubstitutionPattern; var maximumRepeatCount = rule.MaximumRepeatCount; + var isTerminating = rule.IsTerminating; var replaceCount = 0; var text = Text; + + // If this is a terminating rule, apply it only once and then stop + if (isTerminating) + { + if (matchPattern.IsMatch(text)) + { + text = matchPattern.Replace(text, substitutionPattern, 1); // Apply only once + Text = text; + Current = current; + return false; // Stop processing further rules + } + else + { + // Terminating rule doesn't match, continue to next rule + Text = text; + Current = current; + return true; + } + } + + // Non-terminating rule: apply according to repeat count do { text = matchPattern.Replace(text, substitutionPattern); replaceCount++; } while ((maximumRepeatCount == int.MaxValue || replaceCount <= maximumRepeatCount) && matchPattern.IsMatch(text)); + Text = text; Current = current; return true; diff --git a/csharp/debug_test.cs b/csharp/debug_test.cs new file mode 100644 index 0000000..562e396 --- /dev/null +++ b/csharp/debug_test.cs @@ -0,0 +1,28 @@ +using System; +using Platform.RegularExpressions.Transformer; + +var rules = new SubstitutionRule[] +{ + ("x", "X", false), // Regular rule: x -> X + ("X", "TERMINATED", true), // Terminating rule: X -> TERMINATED + ("y", "Y", false), // This rule should not be applied +}; + +var transformer = new TextTransformer(rules); +var input = "xyx"; + +Console.WriteLine($"Input: {input}"); + +var stepTransformer = new TextSteppedTransformer(rules, input); +var step = 0; + +while (stepTransformer.Next()) +{ + step++; + Console.WriteLine($"Step {step}: {stepTransformer.Text} (Rule {stepTransformer.Current})"); +} + +Console.WriteLine($"Final result: {stepTransformer.Text}"); + +var output = transformer.Transform(input); +Console.WriteLine($"Transform result: {output}"); \ No newline at end of file diff --git a/experiments/DebugTerminating/DebugTerminating.csproj b/experiments/DebugTerminating/DebugTerminating.csproj new file mode 100644 index 0000000..0df28bc --- /dev/null +++ b/experiments/DebugTerminating/DebugTerminating.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + diff --git a/experiments/DebugTerminating/Program.cs b/experiments/DebugTerminating/Program.cs new file mode 100644 index 0000000..d58d1b9 --- /dev/null +++ b/experiments/DebugTerminating/Program.cs @@ -0,0 +1,27 @@ +using Platform.RegularExpressions.Transformer; + +var rules = new SubstitutionRule[] +{ + ("x", "X", 0, false), // Regular rule: x -> X (single application) + ("X", "TERMINATED", 0, true), // Terminating rule: X -> TERMINATED + ("y", "Y", 0, false), // This rule should not be applied +}; + +var input = "xyx"; + +Console.WriteLine($"Input: {input}"); + +var stepTransformer = new TextSteppedTransformer(rules, input); +var step = 0; + +while (stepTransformer.Next()) +{ + step++; + Console.WriteLine($"Step {step}: {stepTransformer.Text} (Rule {stepTransformer.Current})"); +} + +Console.WriteLine($"Final result: {stepTransformer.Text}"); + +var transformer = new TextTransformer(rules); +var output = transformer.Transform(input); +Console.WriteLine($"Transform result: {output}"); diff --git a/nim/src/Rule.nim b/nim/src/Rule.nim index 7d1c85e..3ab10ae 100644 --- a/nim/src/Rule.nim +++ b/nim/src/Rule.nim @@ -8,17 +8,19 @@ type replace*: string file*: string count*: int + isTerminating*: bool proc `$`*(r: RuleObj): string = ## Converts Rule object to string. - result = fmt"""("{r.rule.pattern}", "{r.replace}", "{r.file}", {r.count})""" + result = fmt"""("{r.rule.pattern}", "{r.replace}", "{r.file}", {r.count}, {r.isTerminating})""" proc Rule*(rule: Regex = re"", replace: string = "", - file: string = "nil", count: int = 0): RuleObj = + file: string = "nil", count: int = 0, isTerminating: bool = false): RuleObj = ## Creates new Rule object. ## ## Keyword Arguments: ## - ``rule`` -- pattern for rule. ## - ``replace`` -- replace for rule. ## - ``count`` -- count for use pattern. - return RuleObj(rule: rule, replace: replace, file: file, count: count) + ## - ``isTerminating`` -- whether this rule is terminating. + return RuleObj(rule: rule, replace: replace, file: file, count: count, isTerminating: isTerminating) diff --git a/nim/src/retranslator.nim b/nim/src/retranslator.nim index c620fe5..ad3552c 100644 --- a/nim/src/retranslator.nim +++ b/nim/src/retranslator.nim @@ -50,6 +50,21 @@ proc transform*(t: TransformerRef, code=""): string = for i in t.rules: count = i.count + + # If this is a terminating rule, apply it only once and then stop + if i.isTerminating: + if tr.find(i.rule).isSome: + tr = tr.replace(i.rule, i.replace, 1) # Apply only once + if t.debug: + echo i, " (terminated)" + return tr # Stop processing further rules + else: + # Terminating rule doesn't match, continue to next rule + if t.debug: + echo i, " (no match)" + continue + + # Non-terminating rule: apply according to repeat count tr = tr.replace(i.rule, i.replace) if t.debug: echo i diff --git a/nim/tests/test_terminating.nim b/nim/tests/test_terminating.nim new file mode 100644 index 0000000..9323a03 --- /dev/null +++ b/nim/tests/test_terminating.nim @@ -0,0 +1,30 @@ +# Test terminating rules functionality +import retranslator + +# Test that terminating rules stop the algorithm immediately when applied +echo "Testing terminating rules..." + +var transformer = Transformer() +transformer.add( + Rule(re"a", "b", count: 0, isTerminating: false), # Regular rule: a -> b + Rule(re"b", "STOP", count: 0, isTerminating: true), # Terminating rule: b -> STOP + Rule(re"STOP", "c", count: 0, isTerminating: false) # This rule should never be applied +) + +let result1 = transformer.transform("a") +echo "Test 1 - Input: 'a', Expected: 'STOP', Actual: '", result1, "'" +assert result1 == "STOP", "Test 1 failed: expected 'STOP', got '" & result1 & "'" + +# Test that terminating rules work with multiple matches +var transformer2 = Transformer() +transformer2.add( + Rule(re"x", "X", count: 0, isTerminating: false), # Regular rule: x -> X + Rule(re"X", "TERMINATED", count: 0, isTerminating: true), # Terminating rule: X -> TERMINATED + Rule(re"y", "Y", count: 0, isTerminating: false) # This rule should not be applied +) + +let result2 = transformer2.transform("xyx") +echo "Test 2 - Input: 'xyx', Expected: 'TERMINATEDyX', Actual: '", result2, "'" +assert result2 == "TERMINATEDyX", "Test 2 failed: expected 'TERMINATEDyX', got '" & result2 & "'" + +echo "All tests passed!" \ No newline at end of file diff --git a/python/retranslator/__init__.py b/python/retranslator/__init__.py index ec3c163..8c0cf9e 100644 --- a/python/retranslator/__init__.py +++ b/python/retranslator/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from .sub_rule import SubRule from .stepped_translator import SteppedTranslator -from .translator import Translator +from .Translator import Translator from .file_translator import FileTranslator from .translator_cli import TranslatorCLI diff --git a/python/retranslator/file_translator.py b/python/retranslator/file_translator.py index 64ae032..a820734 100644 --- a/python/retranslator/file_translator.py +++ b/python/retranslator/file_translator.py @@ -2,7 +2,7 @@ from typing import NoReturn, Optional, List from os import listdir, path, getcwd, mkdir -from .translator import Translator +from .Translator import Translator class FileTranslator(Translator): diff --git a/python/retranslator/stepped_translator.py b/python/retranslator/stepped_translator.py index cffc253..0550cc0 100644 --- a/python/retranslator/stepped_translator.py +++ b/python/retranslator/stepped_translator.py @@ -41,7 +41,19 @@ def next( rule = self.rules[self.current] replace = -1 - + + # If this is a terminating rule, apply it only once and then stop + if rule.is_terminating: + if search(rule.match, self.text): + self.text = sub(rule.match, rule.sub, self.text, count=1) # Apply only once + self.current += 1 + return False # Stop processing further rules + else: + # Terminating rule doesn't match, continue to next rule + self.current += 1 + return True + + # Non-terminating rule: apply according to repeat count while search(rule.match, self.text) and rule.max_repeat > replace: self.text = sub(rule.match, rule.sub, self.text) replace += 1 diff --git a/python/retranslator/sub_rule.py b/python/retranslator/sub_rule.py index 23a8125..e1fd3c6 100644 --- a/python/retranslator/sub_rule.py +++ b/python/retranslator/sub_rule.py @@ -10,6 +10,7 @@ class SubRule: sub: Pattern = r'' # substitution pattern path: Optional[Pattern] = None # path pattern max_repeat: int = 0 # maximum repeat count + is_terminating: bool = False # whether this rule is terminating def __init__( self, @@ -17,6 +18,7 @@ def __init__( sub: Pattern, path: Optional[Pattern] = None, max_repeat: int = 0, + is_terminating: bool = False, regex_options: int = options ): """Initializes Substitution rule. @@ -25,12 +27,14 @@ def __init__( :param sub: substitution pattern :param path: path pattern :param max_repeat: max match repeat + :param is_terminating: whether this rule is terminating :regex_options: regular expression options. by default is Multiline. """ self.match = match self.sub = sub self.path = path self.max_repeat = max_repeat + self.is_terminating = is_terminating self.options = regex_options def __str__( @@ -41,4 +45,6 @@ def __str__( result = f'{result} on files "{self.path}"' if self.max_repeat > 0: result = f'{result} repeated {self.max_repeat} times' + if self.is_terminating: + result = f'{result} (terminating)' return result diff --git a/python/retranslator/translator_cli.py b/python/retranslator/translator_cli.py index 733b2df..734f48c 100644 --- a/python/retranslator/translator_cli.py +++ b/python/retranslator/translator_cli.py @@ -6,7 +6,7 @@ from regex import Pattern -from .translator import Translator +from .Translator import Translator class TranslatorCLI: diff --git a/python/tests.py b/python/tests.py index f49151c..48ced14 100644 --- a/python/tests.py +++ b/python/tests.py @@ -31,6 +31,31 @@ def test_init_stepped_translator(self): pass assert obj.text == '909asd' + def test_terminating_rule_stops_algorithm(self): + """Test that terminating rules stop the algorithm immediately when applied.""" + rules = [ + SubRule(r'a', 'b', max_repeat=0, is_terminating=False), # Regular rule: a -> b + SubRule(r'b', 'STOP', max_repeat=0, is_terminating=True), # Terminating rule: b -> STOP + SubRule(r'STOP', 'c', max_repeat=0, is_terminating=False), # This rule should never be applied + ] + obj = SteppedTranslator(rules, 'a') + while obj.next(): + pass + assert obj.text == 'STOP' + + def test_terminating_rule_with_multiple_matches(self): + """Test that terminating rules work with multiple matches in text.""" + rules = [ + SubRule(r'x', 'X', max_repeat=0, is_terminating=False), # Regular rule: x -> X + SubRule(r'X', 'TERMINATED', max_repeat=0, is_terminating=True), # Terminating rule: X -> TERMINATED + SubRule(r'y', 'Y', max_repeat=0, is_terminating=False), # This rule should not be applied + ] + obj = SteppedTranslator(rules, 'xyx') + while obj.next(): + pass + # Both x converted to X, then first X terminated + assert obj.text == 'TERMINATEDyX' + class Test3Translator(TestCase): def test_init(self):