Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
Expand Down Expand Up @@ -59,6 +59,7 @@
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.regex.RegexLanguage;
import com.oracle.truffle.regex.RegexObject;
import com.oracle.truffle.regex.RegexSyntaxException;

@TruffleLanguage.Registration(name = TRegexTestDummyLanguage.NAME, id = TRegexTestDummyLanguage.ID, characterMimeTypes = TRegexTestDummyLanguage.MIME_TYPE, version = "0.1", dependentLanguages = RegexLanguage.ID)
public class TRegexTestDummyLanguage extends TruffleLanguage<TRegexTestDummyLanguage.DummyLanguageContext> {
Expand Down Expand Up @@ -111,8 +112,12 @@ public Object execute(VirtualFrame frame) {
}
}.getCallTarget();
}
return DummyLanguageContext.get(null).getEnv().parseInternal(
Source.newBuilder(RegexLanguage.ID, src, parsingRequest.getSource().getName()).internal(true).build());
try {
return DummyLanguageContext.get(null).getEnv().parseInternal(
Source.newBuilder(RegexLanguage.ID, src, parsingRequest.getSource().getName()).internal(true).build());
} catch (RegexSyntaxException e) {
throw e.withErrorCodeInMessage();
}
}

@GenerateInline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ public void testParseFlags() {
assertTrue(parse("i").isIgnoreCase());
assertTrue(parse("m").isMultiLine());
assertTrue(parse("s").isDotAll());
assertTrue(parse("t").isTemplate());
assertTrue(parse("u").isUnicodeExplicitlySet());
assertTrue(parse("x").isVerbose());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
Expand Down Expand Up @@ -83,28 +83,29 @@ public void testBenchmarkRegexes() {
testInputStringGenerator(
"([-!#-''*+/-9=?A-Z^-~]+(\\.[-!#-''*+/-9=?A-Z^-~]+)*|\"([ ]!#-[^-~ ]|(\\\\[-~ ]))+\")@[0-9A-Za-z]([0-9A-Za-z-]*[0-9A-Za-z])?(\\.[0-9A-Za-z]([0-9A-Za-z-]*[0-9A-Za-z])?)+");
testInputStringGenerator("(\\S+) (\\S+) (\\S+) \\[([A-Za-z0-9_:/]+\\s[-+]\\d{4})\\] \"(\\S+)\\s?(\\S+)?\\s?(\\S+)?\" (\\d{3}|-) (\\d+|-)\\s?\"?([^\"]*)\"?\\s?\"?([^\"]*)?\"?");
testInputStringGenerator("(?<=(a))\\1");
}

private TruffleString generateInputString(String pattern, String flags, String options, Encodings.Encoding encoding) {
private TruffleString generateInputString(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed) {
String sourceString = createSourceString(pattern, flags, options, encoding);
Source source = Source.newBuilder("regex", sourceString, "regexSource").build();
RegexSource regexSource = RegexLanguage.createRegexSource(source);
RegexAST ast = regexSource.getOptions().getFlavor().createParser(language, regexSource, new CompilationBuffer(regexSource.getEncoding())).parse();
return InputStringGenerator.generate(ast, rng.nextLong());
return InputStringGenerator.generate(ast, rngSeed);
}

void testInputStringGenerator(String pattern) {
testInputStringGenerator(pattern, "", getEngineOptions(), getTRegexEncoding());
testInputStringGenerator(pattern, "", getEngineOptions(), getTRegexEncoding(), rng.nextLong());
}

void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding) {
void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed) {
Value compiledRegex = compileRegex(pattern, flags);
testInputStringGenerator(pattern, flags, options, encoding, compiledRegex);
testInputStringGenerator(pattern, flags, options, encoding, rngSeed, compiledRegex);
}

private void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding, Value compiledRegex) {
private void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed, Value compiledRegex) {
for (int i = 0; i < 20; i++) {
TruffleString input = generateInputString(pattern, flags, options, encoding);
TruffleString input = generateInputString(pattern, flags, options, encoding, rngSeed);
Assert.assertNotNull(input);
Value result = execRegex(compiledRegex, encoding, input, 0);
Assert.assertTrue(result.getMember("isMatch").asBoolean());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,6 @@
*/
package com.oracle.truffle.regex.tregex.test;

import com.oracle.truffle.regex.charset.Range;
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
import com.oracle.truffle.regex.tregex.parser.flavors.java.JavaFlags;
import com.oracle.truffle.regex.tregex.string.Encodings;
import com.oracle.truffle.regex.util.EmptyArrays;
import org.graalvm.collections.Pair;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.PolyglotException;
import org.graalvm.polyglot.Value;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand All @@ -62,6 +49,21 @@
import java.util.regex.PatternSyntaxException;
import java.util.stream.Stream;

import org.graalvm.collections.Pair;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.PolyglotException;
import org.graalvm.polyglot.Value;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

import com.oracle.truffle.regex.RegexSyntaxException.ErrorCode;
import com.oracle.truffle.regex.charset.Range;
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
import com.oracle.truffle.regex.tregex.parser.flavors.java.JavaFlags;
import com.oracle.truffle.regex.tregex.string.Encodings;
import com.oracle.truffle.regex.util.EmptyArrays;

public class JavaUtilPatternTests extends RegexTestBase {

public static final String ENGINE_OPTIONS = "Flavor=JavaUtilPattern,MatchingMode=search,JavaJDKVersion=" + Runtime.version().feature();
Expand Down Expand Up @@ -164,6 +166,8 @@ public void documentationSummary() {
// Boundary matchers
test("^", 0, "");
test("$", 0, "");
test("$", 0, "empty");
test("\\Z", 0, "\r\n");
test("\\b", 0, " a", 1);
// test("\\b{g}", 0, "");
test("\\B", 0, "b");
Expand Down Expand Up @@ -1266,6 +1270,112 @@ public void caseFolding() {
});
}

@Test
public void generatedTests() {
/* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */

// Generated using Java version 24
test("((A|){7,10}?){10,17}", "", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 0, true, 0, 86, 86, 86, 86, 86);
test("(a{1,30}){1,4}", "", "a", 0, true, 0, 1, 0, 1);
test("((a|){4,6}){4,6}", "", "aaaaaaa", 0, true, 0, 7, 7, 7, 7, 7);
test("((a?){4,6}){4,6}", "", "aaaaaaa", 0, true, 0, 7, 7, 7, 7, 7);
test("((|a){4,6}){4,6}", "", "aaaaaaa", 0, true, 0, 0, 0, 0, 0, 0);
test("((a??){4,6}){4,6}", "", "aaaaaaa", 0, true, 0, 0, 0, 0, 0, 0);
test("((a?){4,6}){4,6}", "", "aaaaaa", 0, true, 0, 6, 6, 6, 6, 6);
test("(a|^){100}", "", "a", 0, true, 0, 0, 0, 0);
test("(a|^){100}", "", "aa", 0, true, 0, 0, 0, 0);
test("(a|^){100}", "", "aa", 1, false);
test("(a|^){100}", "", "ab", 1, false);
test("(.)\\1{2,}", "", "billiam", 0, false);
test("(^_(a{1,2}[:])*a{1,2}[:]a{1,2}([.]a{1,4})?_)+", "", "_a:a:a.aaa_", 0, true, 0, 11, 0, 11, 1, 3, 6, 10);
test("(a{2}|())+$", "", "aaaa", 0, true, 0, 4, 4, 4, 4, 4);
test("^a(b*)\\1{4,6}?", "", "abbbb", 0, true, 0, 1, 1, 1);
test("^a(b*)\\1{4,6}?", "", "abbbbb", 0, true, 0, 6, 1, 2);
test("(?<=|$)", "", "a", 0, true, 0, 0);
test("(?=ab)a", "", "ab", 0, true, 0, 1);
test("(?=()|^)|x", "", "empty", 0, true, 0, 0, 0, 0);
test("a(?<=ba)", "", "ba", 0, true, 1, 2);
test("(?<=(?=|()))", "", "aa", 0, true, 0, 0, -1, -1);
test("\\d\\W", "iv", "4\u017f", 0, true, 0, 2);
test("[\u08bc-\ucf3a]", "iv", "\u03b0", 0, false);
test("a(?:|()\\1){1,2}", "", "a", 0, true, 0, 1, -1, -1);
expectSyntaxError("|(?<\\d\\1)\ub7e4", "", "", getTRegexEncoding(), "error", 0, ErrorCode.InvalidNamedGroup);
test("[a-z][a-z\u2028\u2029].|ab(?<=[a-z]w.)", "", "aac", 0, true, 0, 3);
test("(animation|animation-name)", "", "animation", 0, true, 0, 9, 0, 9);
test("(a|){7,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(a|){7,7}?b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(|a){7,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(|a){7,7}?b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(a||b){7,7}c", "", "aaabc", 0, true, 0, 5, 4, 4);
test("(a||b){7,7}c", "", "aaac", 0, true, 0, 4, 3, 3);
test("(a||b){7,7}c", "", "aaabac", 0, true, 0, 6, 5, 5);
test("($|a){7,7}", "", "aaa", 0, true, 0, 3, 3, 3);
test("($|a){7,7}?", "", "aaa", 0, true, 0, 3, 3, 3);
test("(a|$){7,7}", "", "aaa", 0, true, 0, 3, 3, 3);
test("(a|$){7,7}?", "", "aaa", 0, true, 0, 3, 3, 3);
test("(a|$|b){7,7}", "", "aaab", 0, true, 0, 4, 4, 4);
test("(a|$|b){7,7}", "", "aaa", 0, true, 0, 3, 3, 3);
test("(a|$|b){7,7}", "", "aaaba", 0, true, 0, 5, 5, 5);
test("((?=a)|a){7,7}b", "", "aaa", 0, false);
test("((?=[ab])|a){7,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("((?<=a)|a){7,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("a((?<=a)|a){7,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(a|){0,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(a|){0,7}?b", "", "aaab", 0, true, 0, 4, 2, 3);
test("(|a){0,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(|a){0,7}?b", "", "aaab", 0, true, 0, 4, 2, 3);
test("(a||b){0,7}c", "", "aaabc", 0, true, 0, 5, 4, 4);
test("(a||b){0,7}c", "", "aaac", 0, true, 0, 4, 3, 3);
test("(a||b){0,7}c", "", "aaabac", 0, true, 0, 6, 5, 5);
test("((?=a)|a){0,7}b", "", "aaab", 0, true, 0, 4, 2, 3);
test("((?=[ab])|a){0,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("((?<=a)|a){0,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("a((?<=a)|a){0,7}b", "", "aaab", 0, true, 0, 4, 3, 3);
test("(a*?){11,11}?b", "", "aaaaaaaaaaaaaaaaaaaaaaaaab", 0, true, 0, 26, 10, 25);
test("(?:a(b{0,19})c)", "", "abbbbbbbcdebbbbbbbf", 0, true, 0, 9, 1, 8);
test("(?:a(b{0,19})c)de", "", "abbbbbbbcdebbbbbbbf", 0, true, 0, 11, 1, 8);
test("(?<=a(b{0,19})c)de", "", "abbbbbbbcdebbbbbbbf", 0, true, 9, 11, 1, 8);
test("[\ud0d9](?<=\\S)", "", "\ud0d9", 0, true, 0, 1);
test("[\ud0d9](?<=\\W)", "", "\ud0d9", 0, true, 0, 1);
test("\u0895(?<=\\S)", "", "\u0895", 0, true, 0, 1);
test("\u0895(?<=\\W)", "", "\u0895", 0, true, 0, 1);
test("[\u8053](?<=\\S)", "", "\u8053", 0, true, 0, 1);
test("[\u8053](?<=\\W)", "", "\u8053", 0, true, 0, 1);
test("\u0895(?<=\\S)", "", "\u0895", 0, true, 0, 1);
test("\u0895(?<=\\W)", "", "\u0895", 0, true, 0, 1);
test("\u0895|[\u8053\ud0d9]+(?<=\\S\\W\\S)", "", "\ud0d9\ud0d9\ud0d9\ud0d9", 0, true, 0, 4);
test("a|[bc]+(?<=[abc][abcd][abc])", "", "bbbb", 0, true, 0, 4);
test("a(b*)*c\\1d", "", "abbbbcbbd", 0, true, 0, 9, 3, 5);
test("(|a)||b(?<=cde)|", "", "a", 0, true, 0, 0, 0, 0);
test("^(\\1)?\\D*", "s", "empty", 0, true, 0, 5, -1, -1);
test("abcd(?<=d|c()d)", "", "_abcd", 0, true, 1, 5, -1, -1);
test("\\Dw\u3aa7\\A\\S(?<=\ue3b3|\\A()\\S)", "", "\udad1\udcfaw\u3aa7A\ue3b3", 0, false);
test("a(?:c|b(?=()))*", "", "abc", 0, true, 0, 3, 2, 2);
test("a(?:c|b(?=(c)))*", "", "abc", 0, true, 0, 3, 2, 3);
test("a(?:c|(?<=(a))b)*", "", "abc", 0, true, 0, 3, 0, 1);
test("(a||b){15,18}c", "", "ababaabbaaac", 0, true, 0, 12, 11, 11);
test("(a||b){15,18}?c", "", "ababaabbaaac", 0, true, 0, 12, 11, 11);
test("(?:ab|c|^){103,104}", "", "abcababccabccabababccabcababcccccabcababababccccabcabcabccabcabcccabababccabababcababababccababccabcababcabcabccabababccccabcab", 0, true, 0, 0);
test("((?<=a)bec)*d", "", "abecd", 0, true, 1, 5, 1, 4);
test("(|(^|\\z){2,77}?)?", "", "empty", 0, true, 0, 0, 0, 0, -1, -1);
test("a(|a{15,36}){10,11}", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, true, 0, 1, 1, 1);
test("a(|a{15,36}?){10,11}", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, true, 0, 1, 1, 1);
test("a(|a{15,36}){10,11}$", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, true, 0, 66, 66, 66);
test("a(|a{15,36}?){10,11}b$", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", 0, true, 0, 67, 66, 66);
test("(?:a()|b??){22,26}c", "", "aabbbaabaaaaaabaaaac", 0, true, 0, 20, 19, 19);
test("b()(a\\1|){4,4}\\2c", "", "baaaac", 0, false);
test("a((?=b()|)[a-d])+", "", "abbbcbd", 0, true, 0, 7, 6, 7, 6, 6);
test("a(?=b(?<=ab)()|)", "", "ab", 0, true, 0, 1, 2, 2);
test("[ab]*?$(?<=[^b][ab][^b])", "", "aaaaaa", 0, true, 0, 6);
test("([ab]+){0,5}", "", "bbbba", 0, true, 0, 5, 0, 5);
test("[--a]", "v", "empty", 0, false);
test("(?:^\\1|$){10,11}bc", "", "aaaaaabc", 0, false);
test("a(?:|[0-9]+?a|[0-9a]){11,13}?[ab]", "", "a372a466a109585878b", 0, true, 0, 19);
test("\\Z", "", "\r\n", 0, true, 0, 0);

/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
}

void test(String pattern, int flags, String input) {
test(pattern, flags, input, 0);
}
Expand Down
Loading