Skip to content

Commit 6af05d5

Browse files
authored
refactor(support): improve regular expression utilities (#1082)
1 parent d1704e8 commit 6af05d5

File tree

6 files changed

+200
-175
lines changed

6 files changed

+200
-175
lines changed

src/Tempest/Support/src/Regex/functions.php

Lines changed: 71 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,50 @@
55
namespace Tempest\Support\Regex {
66
use Closure;
77
use RuntimeException;
8-
8+
use Stringable;
9+
use Tempest\Support\Arr\ImmutableArray;
10+
11+
use function Tempest\Support\arr;
12+
use function Tempest\Support\Arr\filter;
13+
use function Tempest\Support\Arr\first;
14+
use function Tempest\Support\Arr\get_by_key;
15+
use function Tempest\Support\Arr\wrap;
916
use function Tempest\Support\Str\starts_with;
17+
use function Tempest\Support\Str\strip_end;
1018
use function Tempest\Support\Str\strip_start;
1119

1220
/**
13-
* Returns all portions of the `$subject` that match the given `$pattern`.
21+
* Returns portions of the `$subject` that match the given `$pattern`. If `$global` is set to `true`, returns all matches. Otherwise, only returns the first one.
1422
*
1523
* @param non-empty-string $pattern The pattern to match against.
24+
* @param 0|2|256|512|768 $flags
25+
* @mago-expect best-practices/no-unused-parameter
1626
*/
17-
function get_all_matches(string $subject, string $pattern, int $flags = 0, int $offset = 0): array
27+
function get_matches(Stringable|string $subject, Stringable|string $pattern, bool $global = false, int $flags = 0, int $offset = 0): array
1828
{
19-
return call_preg('preg_match_all', static function () use ($subject, $pattern, $flags, $offset): array {
29+
if (str_ends_with($pattern, 'g')) {
30+
$global = true;
31+
$pattern = strip_end($pattern, 'g');
32+
}
33+
34+
return call_preg($global ? 'preg_match_all' : 'preg_match', static function () use ($subject, $pattern, $global, $flags, $offset): array {
2035
$matches = [];
21-
$result = preg_match_all(
22-
$pattern,
23-
$subject,
24-
$matches,
25-
$flags,
26-
$offset,
27-
);
36+
$result = match ($global) {
37+
true => preg_match_all(
38+
(string) $pattern,
39+
(string) $subject,
40+
$matches,
41+
$flags,
42+
$offset,
43+
),
44+
false => preg_match(
45+
(string) $pattern,
46+
(string) $subject,
47+
$matches,
48+
$flags,
49+
$offset,
50+
),
51+
};
2852

2953
if ($result === false || $result === 0) {
3054
return [];
@@ -35,29 +59,47 @@ function get_all_matches(string $subject, string $pattern, int $flags = 0, int $
3559
}
3660

3761
/**
38-
* Returns the first match of `$pattern` in `$subject`.
62+
* Returns the specified matches of `$pattern` in `$subject`.
3963
*
4064
* @param non-empty-string $pattern The pattern to match against.
41-
* @param 0|256|512|768 $flags
4265
*/
43-
function get_first_match(string $subject, string $pattern, int $flags = 0, int $offset = 0): array
44-
{
45-
return call_preg('preg_match', static function () use ($subject, $pattern, $flags, $offset): array {
46-
$matches = [];
47-
$result = preg_match(
48-
$pattern,
49-
$subject,
50-
$matches,
51-
$flags,
52-
$offset,
53-
);
66+
function get_all_matches(
67+
Stringable|string $subject,
68+
Stringable|string $pattern,
69+
Stringable|string|int|array $matches = 0,
70+
int $offset = 0,
71+
): array {
72+
$result = get_matches($subject, $pattern, true, PREG_SET_ORDER, $offset);
73+
74+
return arr($result)
75+
->map(fn (array $result) => filter($result, fn ($_, string|int $key) => in_array($key, wrap($matches), strict: false))) // @mago-expect strictness/require-strict-behavior
76+
->toArray();
77+
}
5478

55-
if ($result === false) {
56-
return [];
57-
}
79+
/**
80+
* Returns the specified match of `$pattern` in `$subject`. If no match is specified, returns the first group.
81+
*
82+
* @param non-empty-string $pattern The pattern to match against.
83+
* @param 0|256|512|768 $flags
84+
*/
85+
function get_match(
86+
Stringable|string $subject,
87+
Stringable|string $pattern,
88+
array|Stringable|int|string $match = 1,
89+
mixed $default = null,
90+
int $flags = 0,
91+
int $offset = 0,
92+
): null|int|string|array {
93+
$result = get_matches($subject, $pattern, false, $flags, $offset);
94+
95+
if (is_array($match)) {
96+
return arr($result)
97+
->filter(fn ($_, string|int $key) => in_array($key, $match, strict: false)) // @mago-expect strictness/require-strict-behavior
98+
->mapWithKeys(fn (array $matches, string|int $key) => yield $key => first($matches))
99+
->toArray();
100+
}
58101

59-
return $matches;
60-
});
102+
return get_by_key($result, $match, $default);
61103
}
62104

63105
/**

src/Tempest/Support/src/Str/ManipulatesString.php

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -577,20 +577,27 @@ public function replaceRegex(array|string $regex, array|string|callable $replace
577577
*
578578
* ### Example
579579
* ```php
580-
* str('10-abc')->match('/(?<id>\d+-)/'); // ['id' => '10']
580+
* str('10-abc')->match('/(?<id>\d+-)/', match: 'id'); // 10
581581
* ```
582+
*
583+
* @param non-empty-string $pattern The regular expression to match on
584+
* @param string|int $match The group number or name to retrieve
585+
* @param mixed $default The default value to return if no match is found
586+
* @param 0|256|512|768 $flags
582587
*/
583-
public function match(string $regex): array
588+
public function match(string $pattern, array|Stringable|int|string $match = 1, mixed $default = null, int $flags = 0, int $offset = 0): null|int|string|array
584589
{
585-
return Regex\get_first_match($this->value, $regex);
590+
return Regex\get_match($this->value, $pattern, $match, $default, $flags, $offset);
586591
}
587592

588593
/**
589594
* Gets all portions of the instance that match the given regular expression.
595+
*
596+
* @param non-empty-string $pattern The regular expression to match on
590597
*/
591-
public function matchAll(string $regex, int $flags = 0, int $offset = 0): array
598+
public function matchAll(Stringable|string $pattern, array|Stringable|int|string $matches = 0, int $offset = 0): ImmutableArray
592599
{
593-
return Regex\get_all_matches($this->value, $regex, $flags, $offset);
600+
return new ImmutableArray(Regex\get_all_matches($this->value, $pattern, $matches, $offset));
594601
}
595602

596603
/**

src/Tempest/Support/tests/Regex/FunctionsTest.php

Lines changed: 95 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
use Tempest\Support\Regex\InvalidPatternException;
1010

1111
use function Tempest\Support\Regex\get_all_matches;
12-
use function Tempest\Support\Regex\get_first_match;
12+
use function Tempest\Support\Regex\get_match;
13+
use function Tempest\Support\Regex\get_matches;
1314
use function Tempest\Support\Regex\matches;
1415
use function Tempest\Support\Regex\replace;
1516
use function Tempest\Support\Regex\replace_every;
@@ -80,41 +81,87 @@ public function test_replace_every_with_invalid_pattern(): void
8081
replace_every('April 15, 2003', ['/(\w+) (\d+), (\d+)' => '${1}1,$3']);
8182
}
8283

83-
public function test_get_all_matches(): void
84+
public function test_get_match(): void
8485
{
85-
// simple pattern
86-
$this->assertSame([['Hello', 'Hello']], get_all_matches('Hello world, Hello universe', '/Hello/'));
86+
$this->assertSame('10', get_match('10-abc', '/(?<id>\d+)-.*/', match: 'id'));
87+
$this->assertSame('10', get_match('10-abc', '/(\d+)-.*/', match: 1));
88+
$this->assertSame('10', get_match('10-abc', '/(\d+)-.*/'));
89+
$this->assertSame('10-abc', get_match('10-abc', '/\d+-.*/', match: 0));
90+
$this->assertSame(null, get_match('10-abc', '/\d+-.*/', match: 1));
8791

88-
// named capture groups
89-
$regex = '/(?<adjective>quick|lazy) (?<noun>brown|dog)/';
90-
$matches = get_all_matches('The quick brown fox, then the lazy dog', $regex);
91-
$this->assertSame([
92+
$this->assertSame(
9293
[
93-
'quick brown',
94-
'lazy dog',
94+
'match' => "<href='https://tempestphp.com'>Tempest</href>",
95+
'quote' => "'",
96+
'href' => 'https://tempestphp.com',
9597
],
96-
'adjective' => [
97-
'quick',
98-
'lazy',
98+
get_match("<href='https://tempestphp.com'>Tempest</href>", '/(?<match>\<href=(?<quote>[\"\'])(?<href>.+)\k<quote>\>(?:(?!\<href).)*?\<\/href\>)/g', match: [
99+
'match',
100+
'quote',
101+
'href',
102+
]),
103+
);
104+
}
105+
106+
public function test_all_matches(): void
107+
{
108+
$this->assertSame(
109+
[
110+
['Hello'],
111+
['Hello'],
99112
],
100-
1 => [
101-
'quick',
102-
'lazy',
113+
get_all_matches('Hello world, Hello universe', '/Hello/'),
114+
);
115+
116+
$this->assertSame(
117+
[
118+
[
119+
'match' => "<href='https://bsky.app'>Bluesky</href>",
120+
'quote' => "'",
121+
'href' => 'https://bsky.app',
122+
],
123+
[
124+
'match' => "<href='https://x.com.com'>X</href>",
125+
'quote' => "'",
126+
'href' => 'https://x.com.com',
127+
],
103128
],
104-
'noun' => [
105-
'brown',
106-
'dog',
129+
get_all_matches(
130+
"<href='https://bsky.app'>Bluesky</href><href='https://x.com.com'>X</href>",
131+
'/(?<match>\<href=(?<quote>[\"\'])(?<href>.+?)\k<quote>\>(?:(?!\<href).)*?\<\/href\>)/g',
132+
matches: [
133+
'match',
134+
'quote',
135+
'href',
136+
],
137+
),
138+
);
139+
}
140+
141+
public function test_get_matches(): void
142+
{
143+
$this->assertSame([], get_matches('The quick brown fox, then the lazy dog', '/cat/', global: true));
144+
145+
$this->assertSame(
146+
[
147+
0 => '10-',
148+
'id' => '10-',
149+
1 => '10-',
107150
],
108-
2 => [
109-
'brown',
110-
'dog',
151+
get_matches('10-abc', '/(?<id>\d+-)/'),
152+
);
153+
154+
$this->assertSame(
155+
[
156+
[['foobar', 0]],
157+
[['foo', 0]],
158+
[['bar', 3]],
111159
],
112-
], $matches);
160+
get_matches('foobarbaz', '/(foo)(bar)/', global: true, flags: PREG_OFFSET_CAPTURE),
161+
);
113162

114-
// No matches
115-
$this->assertSame([], get_all_matches('The quick brown fox, then the lazy dog', '/cat/'));
163+
$this->assertSame([], get_matches('abcdef', '/^def/', global: true, offset: 3));
116164

117-
// Mixed captures
118165
$this->assertSame(
119166
[
120167
[
@@ -146,26 +193,33 @@ public function test_get_all_matches(): void
146193
'eats',
147194
],
148195
],
149-
get_all_matches('The quick brown fox, then the lazy dog eats', '/(?<adjective>quick|lazy) (?<noun>brown|dog) (?<action>jumps|eats)?/'),
196+
get_matches('The quick brown fox, then the lazy dog eats', '/(?<adjective>quick|lazy) (?<noun>brown|dog) (?<action>jumps|eats)?/', global: true),
150197
);
151198

152-
// Test flags
153199
$this->assertSame(
154200
[
155-
[['foobar', 0]],
156-
[['foo', 0]],
157-
[['bar', 3]],
201+
[
202+
'quick brown',
203+
'lazy dog',
204+
],
205+
'adjective' => [
206+
'quick',
207+
'lazy',
208+
],
209+
1 => [
210+
'quick',
211+
'lazy',
212+
],
213+
'noun' => [
214+
'brown',
215+
'dog',
216+
],
217+
2 => [
218+
'brown',
219+
'dog',
220+
],
158221
],
159-
get_all_matches('foobarbaz', '/(foo)(bar)/', PREG_OFFSET_CAPTURE),
222+
get_matches('The quick brown fox, then the lazy dog', '/(?<adjective>quick|lazy) (?<noun>brown|dog)/', global: true),
160223
);
161-
162-
$this->assertSame([], get_all_matches('abcdef', '/^def/', offset: 3));
163-
}
164-
165-
public function test_match(): void
166-
{
167-
$match = get_first_match('10-abc', '/(?<id>\d+-)/')['id'];
168-
169-
$this->assertSame('10-', $match);
170224
}
171225
}

0 commit comments

Comments
 (0)