Skip to content

Commit 759c9e8

Browse files
committed
Generate whole regexp by default
1 parent db93fc1 commit 759c9e8

File tree

4 files changed

+38
-23
lines changed

4 files changed

+38
-23
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
s9e\RegexpBuilder is a single-purpose library that generates a regular expression that matches a given list of strings. It is best suited for efficiently finding a list of literals inside of a text.
22

3-
Simply put, given `['foo', 'bar', 'baz']` as input, the library will generate `(?:ba[rz]|foo)`, a regular expression that can match any of the strings `foo`, `bar`, or `baz`.
3+
Simply put, given `['foo', 'bar', 'baz']` as input, the library will generate `ba[rz]|foo`, a regular expression that can match any of the strings `foo`, `bar`, or `baz`.
44

55
[![Build status](https://github.com/s9e/RegexpBuilder/actions/workflows/build.yml/badge.svg)](https://github.com/s9e/RegexpBuilder/actions/workflows/build.yml)
66
[![Code Coverage](https://scrutinizer-ci.com/g/s9e/RegexpBuilder/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/s9e/RegexpBuilder/?branch=master)
@@ -14,7 +14,7 @@ $builder = new s9e\RegexpBuilder\Builder;
1414
echo '/', $builder->build(['foo', 'bar', 'baz']), '/';
1515
```
1616
```
17-
/(?:ba[rz]|foo)/
17+
/ba[rz]|foo/
1818
```
1919

2020

@@ -181,7 +181,7 @@ $builder = new s9e\RegexpBuilder\Builder([
181181
echo '/', $builder->build(['foo?', 'bar*']), '/';
182182
```
183183
```
184-
/(?:bar.*|foo.)/
184+
/bar.*|foo./
185185
```
186186

187187
In the following example, we map `X` to `\d`. Note that sequences produced by meta-characters may appear in character classes if the result is valid.

src/Builder.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ class Builder
4141
*/
4242
public Serializer $serializer;
4343

44+
/**
45+
* @var bool Whether the expression generated is meant to be used whole. If not, alternations
46+
* will be put into a non-capturing group
47+
*/
48+
public bool $standalone = true;
49+
4450
/**
4551
* @param array $config
4652
*/
@@ -80,7 +86,7 @@ public function build(array $strings): string
8086
$strings = $this->meta->replaceMeta($strings);
8187
$strings = $this->runner->run($strings);
8288

83-
return $this->serializer->serializeStrings($strings);
89+
return $this->serializer->serializeStrings($strings, !$this->standalone);
8490
}
8591

8692
/**

tests/BuilderTest.php

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@
1010
*/
1111
class BuilderTest extends TestCase
1212
{
13+
public function testStandalone()
14+
{
15+
$builder = new Builder;
16+
$this->assertEquals('bar|foo', $builder->build(['foo', 'bar']));
17+
18+
$builder->standalone = false;
19+
$this->assertEquals('(?:bar|foo)', $builder->build(['foo', 'bar']));
20+
}
21+
1322
/**
1423
* @dataProvider getBuilderTests
1524
*/
@@ -31,15 +40,15 @@ public function getBuilderTests()
3140
'foo',
3241
'bar'
3342
],
34-
'(?:bar|foo)'
43+
'bar|foo'
3544
],
3645
[
3746
[
3847
'foo',
3948
'fool',
4049
'bar'
4150
],
42-
'(?:bar|fool?)'
51+
'bar|fool?'
4352
],
4453
[
4554
[
@@ -91,7 +100,7 @@ public function getBuilderTests()
91100
"\xEF\xA4\x80\xEF\xA4\x80",
92101
"\xF0\x9F\x98\x80\xF0\x9F\x98\x80"
93102
],
94-
'(?:\\x{D7FB}\\x{D7FB}|\\x{F900}\\x{F900}|\\x{1F600}\\x{1F600})',
103+
'\\x{D7FB}\\x{D7FB}|\\x{F900}\\x{F900}|\\x{1F600}\\x{1F600}',
95104
['input' => 'Utf8', 'output' => 'PHP']
96105
],
97106
[
@@ -100,7 +109,7 @@ public function getBuilderTests()
100109
"\xEF\xA4\x80\xEF\xA4\x80",
101110
"\xF0\x9F\x98\x80"
102111
],
103-
'(?:\\uD7FB\\uD7FB|\\uF900\\uF900|\\uD83D\\uDE00)',
112+
'\\uD7FB\\uD7FB|\\uF900\\uF900|\\uD83D\\uDE00',
104113
[
105114
'input' => 'Utf8',
106115
'inputOptions' => ['useSurrogates' => true],
@@ -129,7 +138,7 @@ public function getBuilderTests()
129138
],
130139
[
131140
["\n", '.'],
132-
'(?:\\n|.)',
141+
'\\n|.',
133142
['meta' => ['.' => '.'], 'output' => 'PHP']
134143
],
135144
[

tests/ValidationTest.php

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public function getValidationTests()
4040
],
4141
[
4242
// CoalesceSingleCharacterPrefix
43-
'(?:[ab]b|c)',
43+
'[ab]b|c',
4444
['ab', 'bb', 'c']
4545
],
4646
[
@@ -130,7 +130,7 @@ public function getValidationTests()
130130
['best', 'boost', 'bust']
131131
],
132132
[
133-
'(?:b(?:oo)?st|cool)',
133+
'b(?:oo)?st|cool',
134134
['boost', 'bst', 'cool']
135135
],
136136
[
@@ -166,13 +166,13 @@ public function getValidationTests()
166166
['axx', 'ayy', 'bbxx', 'bbyy']
167167
],
168168
[
169-
'(?:a(?:xx|yy)|bb(?:xx|yy)|c)',
169+
'a(?:xx|yy)|bb(?:xx|yy)|c',
170170
['axx', 'ayy', 'bbxx', 'bbyy', 'c']
171171
],
172172
[
173173
// Ensure it doesn't become (?:c|(?:a|bb)(?:xx|yy)|azz) even though it would be
174174
// shorter, because having fewer alternations at the top level is more important
175-
'(?:a(?:xx|yy|zz)|bb(?:xx|yy)|c)',
175+
'a(?:xx|yy|zz)|bb(?:xx|yy)|c',
176176
['axx', 'ayy', 'azz', 'bbxx', 'bbyy', 'c']
177177
],
178178
[
@@ -192,7 +192,7 @@ public function getValidationTests()
192192
['ax', 'ay', 'bx', 'by']
193193
],
194194
[
195-
'(?:[ab][xy]|c)',
195+
'[ab][xy]|c',
196196
['ax', 'ay', 'bx', 'by', 'c']
197197
],
198198
[
@@ -204,15 +204,15 @@ public function getValidationTests()
204204
['03', '04', '13', '14', '3', '4']
205205
],
206206
[
207-
'(?:a[xy]|bb[xy]|c)',
207+
'a[xy]|bb[xy]|c',
208208
['ax', 'ay', 'bbx', 'bby', 'c']
209209
],
210210
[
211-
'(?:[ab][xy]|c|dd[xy])',
211+
'[ab][xy]|c|dd[xy]',
212212
['ax', 'ay', 'bx', 'by', 'c', 'ddx', 'ddy']
213213
],
214214
[
215-
'(?:[ab][xy]|[cd][XY]|[ef]|gg)',
215+
'[ab][xy]|[cd][XY]|[ef]|gg',
216216
['ax', 'ay', 'bx', 'by', 'cX', 'cY', 'dX', 'dY', 'e', 'f', 'gg']
217217
],
218218
[
@@ -235,15 +235,15 @@ public function getValidationTests()
235235
[]
236236
],
237237
[
238-
'(?:[yz]|bar|foo)',
238+
'[yz]|bar|foo',
239239
['foo', 'bar', 'y', 'z']
240240
],
241241
[
242-
'(?:[yz]|ba[rz]|foo)',
242+
'[yz]|ba[rz]|foo',
243243
['foo', 'bar', 'baz', 'y', 'z']
244244
],
245245
[
246-
'(?:a(?:a(?:cc|dd))?|bb(?:cc|dd))',
246+
'a(?:a(?:cc|dd))?|bb(?:cc|dd)',
247247
['a', 'aacc', 'aadd', 'bbcc', 'bbdd']
248248
],
249249
[
@@ -259,7 +259,7 @@ public function getValidationTests()
259259
]
260260
],
261261
[
262-
'(?:[1-7][0-7]?|0)',
262+
'[1-7][0-7]?|0',
263263
array_map('decoct', range(0, 63))
264264
],
265265
[
@@ -279,7 +279,7 @@ public function getValidationTests()
279279
]
280280
],
281281
[
282-
'(?:12?3?|23?|3)',
282+
'12?3?|23?|3',
283283
['1', '12', '123', '13', '2', '23', '3']
284284
],
285285
[
@@ -332,7 +332,7 @@ function ($n)
332332
[
333333
// CoalesceSingleCharacterPrefix should ignore expressions that do not represent a
334334
// single character
335-
'(?:[\\dab]x|\\bx|\\d+x|zz)',
335+
'[\\dab]x|\\bx|\\d+x|zz',
336336
['ax', 'bx', '?x', '*x', '#x', 'zz'],
337337
['meta' => ['*' => '\\d+', '#' => '\\b', '?' => '\\d']]
338338
],

0 commit comments

Comments
 (0)