|
| 1 | +module d_phobos_ct; |
| 2 | + |
| 3 | +version(CtRegex): |
| 4 | + |
| 5 | +static immutable PATTERNS = [ |
| 6 | + r"y", // misc::literal |
| 7 | + r".y", // misc::not_literal |
| 8 | + "[abcdw]", // misc::match_class |
| 9 | + "[ac]", // misc::match_class_in_range |
| 10 | + r"\p{L}", // misc::match_class_unicode / sherlock::letters |
| 11 | + r"^zbc(d|e)", // misc::anchored_literal_long_non_match / misc::anchored_literal_short_non_match |
| 12 | + r"^.bc(d|e)", // misc::anchored_literal_short_match / misc::anchored_literal_long_match |
| 13 | + r"^.bc(d|e)*$", // misc::one_pass_short |
| 14 | + r".bc(d|e)*$", // misc::one_pass_short_not |
| 15 | + r"^abcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix |
| 16 | + r"^.bcdefghijklmnopqrstuvwxyz.*$", // misc::one_pass_long_prefix_not |
| 17 | + r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", // misc::long_needle1 |
| 18 | + r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", // misc::long_needle2 |
| 19 | + r"[r-z].*bcdefghijklmnopq", // misc::reverse_suffix_no_quadratic |
| 20 | + "ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::easy0 |
| 21 | + r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$", // misc::easy1 |
| 22 | + r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::medium |
| 23 | + r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$", // misc::hard |
| 24 | + r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*", // misc::reallyhard |
| 25 | + r"\w+\s+Holmes", // misc::reallyhard2 |
| 26 | + // This causes compile times to go from ~40s to ~9m with dmd 2.077.1 |
| 27 | + //r"a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // misc::no_exponential |
| 28 | + r">[^\n]*\n|\n", // dna::find_new_lines |
| 29 | + r"agggtaaa|tttaccct", // dna::variant1 |
| 30 | + r"[cgt]gggtaaa|tttaccc[acg]", // dna::variant2 |
| 31 | + r"a[act]ggtaaa|tttacc[agt]t", // dna::variant3 |
| 32 | + r"ag[act]gtaaa|tttac[agt]ct", // dna::variant4 |
| 33 | + r"agg[act]taaa|ttta[agt]cct", // dna::variant5 |
| 34 | + r"aggg[acg]aaa|ttt[cgt]ccct", // dna::variant6 |
| 35 | + r"agggt[cgt]aa|tt[acg]accct", // dna::variant7 |
| 36 | + r"agggta[cgt]a|t[acg]taccct", // dna::variant8 |
| 37 | + r"agggtaa[cgt]|[acg]ttaccct", // dna::variant9 |
| 38 | + r"B", // dna::subst1 |
| 39 | + r"D", // dna::subst2 |
| 40 | + r"H", // dna::subst3 |
| 41 | + r"K", // dna::subst4 |
| 42 | + r"M", // dna::subst5 |
| 43 | + r"N", // dna::subst6 |
| 44 | + r"R", // dna::subst7 |
| 45 | + r"S", // dna::subst8 |
| 46 | + r"V", // dna::subst9 |
| 47 | + r"W", // dna::subst10 |
| 48 | + r"Y", // dna::subst11 |
| 49 | + r"Sherlock", // sherlock::name_sherlock |
| 50 | + r"Holmes", // sherlock::name_holmes |
| 51 | + r"Sherlock Holmes", // sherlock::name_sherlock_holmes |
| 52 | + r"(?i)Sherlock", // sherlock::name_sherlock_nocase |
| 53 | + r"(?i)Holmes", // sherlock::name_holmes_nocase |
| 54 | + r"(?i)Sherlock Holmes", // sherlock::name_sherlock_holmes_nocase |
| 55 | + r"Sherlock\s+Holmes", // sherlock::name_whitespace |
| 56 | + r"Sherlock|Street", // sherlock::name_alt1 |
| 57 | + r"Sherlock|Holmes", // sherlock::name_alt2 |
| 58 | + r"Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3 |
| 59 | + r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker", // sherlock::name_alt3_nocase |
| 60 | + r"Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4 |
| 61 | + r"(?i)Sher[a-z]+|Hol[a-z]+", // sherlock::name_alt4_nocase |
| 62 | + r"Sherlock|Holmes|Watson", // sherlock::name_alt5 |
| 63 | + r"(?i)Sherlock|Holmes|Watson", // sherlock::name_alt5_nocase |
| 64 | + r"zqj", // sherlock::no_match_uncommon |
| 65 | + r"aqj", // sherlock::no_match_common |
| 66 | + r"aei", // sherlock::no_match_really_common |
| 67 | + r"the", // sherlock::the_lower |
| 68 | + r"The", // sherlock::the_upper |
| 69 | + r"(?i)the", // sherlock::the_nocase |
| 70 | + r"the\s+\w+", // sherlock::the_whitespace |
| 71 | + r"\p{Lu}", // sherlock::letters_upper |
| 72 | + r"\p{Ll}", // sherlock::letters_lower |
| 73 | + r"\w+", // sherlock::words |
| 74 | + r"\w+\s+Holmes", // sherlock::before_holmes |
| 75 | + r"\w+\s+Holmes\s+\w+", // sherlock::before_after_holmes |
| 76 | + r"Holmes.{0,25}Watson|Watson.{0,25}Holmes", // sherlock::holmes_cochar_watson |
| 77 | + r"Holmes(?:\s*.+\s*){0,10}Watson|Watson(?:\s*.+\s*){0,10}Holmes", // sherlock::holmes_coword_watson |
| 78 | + `["'][^"']{0,30}[?!.]["']`, // sherlock::quotes |
| 79 | + r"(?m)^Sherlock Holmes|Sherlock Holmes$", // sherlock::line_boundary_sherlock_holmes |
| 80 | + r"\b\w+n\b", // sherlock::word_ending_n |
| 81 | + r"[a-q][^u-z]{13}x", // sherlock::repeated_class_negation |
| 82 | + r"[a-zA-Z]+ing", // sherlock::ing_suffix |
| 83 | + r"\s[a-zA-Z]{0,12}ing\s", // sherlock::ing_suffix_limited_space |
| 84 | +]; |
| 85 | + |
| 86 | +public auto getCtRegex() { |
| 87 | + import std.regex; |
| 88 | + import std.string; |
| 89 | + |
| 90 | + Regex!char[string] aa; |
| 91 | + |
| 92 | + static foreach (pattern; PATTERNS) { |
| 93 | + static if (pattern.startsWith("(?i)")) { |
| 94 | + aa[pattern] = ctRegex!(pattern[4..$], "gi"); |
| 95 | + } else static if (pattern.startsWith("(?m)")) { |
| 96 | + aa[pattern] = ctRegex!(pattern[4..$], "gm"); |
| 97 | + } else { |
| 98 | + aa[pattern] = ctRegex!(pattern, "g"); |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + return aa; |
| 103 | +} |
0 commit comments