Skip to content

Commit fd1f649

Browse files
committed
detect regexp errors and throw them as exceptions
1 parent f2abb86 commit fd1f649

17 files changed

+146
-38
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
namespace Aternos\Licensee\Exception;
4+
5+
use Exception;
6+
7+
class LicenseeException extends Exception
8+
{
9+
10+
}

src/Exception/RegExpException.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
namespace Aternos\Licensee\Exception;
4+
5+
class RegExpException extends LicenseeException
6+
{
7+
/**
8+
* @return static
9+
*/
10+
public static function last(): static
11+
{
12+
$code = preg_last_error();
13+
$message = preg_last_error_msg();
14+
15+
return new static($message, $code);
16+
}
17+
18+
/**
19+
* @template T
20+
* @param T $result
21+
* @param mixed $errorResult
22+
* @return T
23+
* @throws RegExpException
24+
*/
25+
public static function handle(mixed $result, mixed $errorResult): mixed
26+
{
27+
if ($result === $errorResult) {
28+
throw static::last();
29+
}
30+
return $result;
31+
}
32+
33+
/**
34+
* @template T
35+
* @param T $result
36+
* @return T
37+
* @throws RegExpException
38+
*/
39+
public static function handleNull(mixed $result): mixed
40+
{
41+
return static::handle($result, null);
42+
}
43+
44+
/**
45+
* @template T
46+
* @param T $result
47+
* @return T
48+
* @throws RegExpException
49+
*/
50+
public static function handleFalse(mixed $result): mixed
51+
{
52+
return static::handle($result, false);
53+
}
54+
}

src/License/License.php

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Aternos\Licensee\License;
44

5+
use Aternos\Licensee\Exception\RegExpException;
56
use Aternos\Licensee\Generated\Condition;
67
use Aternos\Licensee\Generated\Limitation;
78
use Aternos\Licensee\Generated\Permission;
@@ -11,6 +12,7 @@
1112
use DOMDocument;
1213
use DOMNode;
1314
use InvalidArgumentException;
15+
use RuntimeException;
1416

1517
class License
1618
{
@@ -98,7 +100,11 @@ public function __construct(
98100
protected string $key
99101
)
100102
{
101-
$this->parseLicenseFile(file_get_contents(static::LICENSE_DIR . $key . ".txt"));
103+
try {
104+
$this->parseLicenseFile(file_get_contents(static::LICENSE_DIR . $key . ".txt"));
105+
} catch (RegExpException $e) {
106+
throw new RuntimeException("Built-in license file \"" . $key . "\" could not be loaded.", previous: $e);
107+
}
102108
}
103109

104110
/**
@@ -112,6 +118,7 @@ protected function readSpdxXmlContent(): string
112118
/**
113119
* @param string $content
114120
* @return void
121+
* @throws RegExpException
115122
*/
116123
protected function parseLicenseFile(string $content): void
117124
{
@@ -120,7 +127,7 @@ protected function parseLicenseFile(string $content): void
120127
throw new InvalidArgumentException("Invalid license format");
121128
}
122129

123-
$parts = preg_split('/^---\s*$/m', $content, 3);
130+
$parts = RegExpException::handleFalse(preg_split('/^---\s*$/m', $content, 3));
124131
if (count($parts) !== 3) {
125132
throw new InvalidArgumentException("Invalid license format");
126133
}
@@ -158,6 +165,7 @@ protected function parseLicenseFile(string $content): void
158165
/**
159166
* I used the regex to regex the regex
160167
* @return string
168+
* @throws RegExpException
161169
*/
162170
public function getTitleRegex(): string
163171
{
@@ -168,29 +176,29 @@ public function getTitleRegex(): string
168176
$simpleTitleRegex = strtolower($this->title);
169177
$simpleTitleRegex = str_replace('*', 'u', $simpleTitleRegex);
170178
$simpleTitleRegex = preg_quote($simpleTitleRegex, '/');
171-
$titleRegex = preg_replace('/^the /i', '', $simpleTitleRegex);
172-
$titleRegex = preg_replace('/,? version /', ' ', $titleRegex);
173-
$titleRegex = preg_replace('/v(\d+\.\d+)/', '$1', $titleRegex);
174-
$titleRegex = preg_quote($titleRegex, '/');
175-
$titleRegex = preg_replace('/\\\ licen[sc]e/i', '(?:\ licen[sc]e)?', $titleRegex);
176-
preg_match('/\d+\\\+\.(\d+)/', $titleRegex, $versionMatch);
179+
$titleRegex = RegExpException::handleNull(preg_replace('/^the /i', '', $simpleTitleRegex));
180+
$titleRegex = RegExpException::handleNull(preg_replace('/,? version /', ' ', $titleRegex));
181+
$titleRegex = RegExpException::handleNull(preg_replace('/v(\d+\.\d+)/', '$1', $titleRegex));
182+
$titleRegex = RegExpException::handleNull(preg_quote($titleRegex, '/'));
183+
$titleRegex = RegExpException::handleNull(preg_replace('/\\\ licen[sc]e/i', '(?:\ licen[sc]e)?', $titleRegex));
184+
RegExpException::handleFalse(preg_match('/\d+\\\+\.(\d+)/', $titleRegex, $versionMatch));
177185
if ($versionMatch) {
178186
if ($versionMatch[1] === '0') {
179187
$sub = ',?\s+(?:version\ |v(?:\. )?)?$1($2)?';
180188
} else {
181189
$sub = ',?\s+(?:version\ |v(?:\. )?)?$1$2';
182190
}
183-
$titleRegex = preg_replace('/\s*(\d+)\\\+(\.\d+)/', $sub, $titleRegex);
191+
$titleRegex = RegExpException::handleNull(preg_replace('/\s*(\d+)\\\+(\.\d+)/', $sub, $titleRegex));
184192
}
185-
$titleRegex = preg_replace('/\bgnu\\\ /i', '(?:GNU )?', $titleRegex);
193+
$titleRegex = RegExpException::handleNull(preg_replace('/\bgnu\\\ /i', '(?:GNU )?', $titleRegex));
186194

187-
$keyRegex = str_replace('-', '[- ]', $this->getSpdxId()->value);
188-
$keyRegex = str_replace('.', '\.', $keyRegex);
195+
$keyRegex = RegExpException::handleNull(str_replace('-', '[- ]', $this->getSpdxId()->value));
196+
$keyRegex = RegExpException::handleNull(str_replace('.', '\.', $keyRegex));
189197
$keyRegex .= '(?:\ licen[sc]e)?';
190198

191199
$parts = [$simpleTitleRegex, $titleRegex, $keyRegex];
192200
if ($this->nickname) {
193-
$parts[] = preg_replace('/\bGNU /i', '(?:GNU )?', preg_quote($this->nickname, '/'));
201+
$parts[] = RegExpException::handleNull(preg_replace('/\bGNU /i', '(?:GNU )?', preg_quote($this->nickname, '/')));
194202
}
195203

196204
return $this->titleRegexp = implode('|', $parts);

src/License/Text/LicenseText.php

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Aternos\Licensee\License\Text;
44

5+
use Aternos\Licensee\Exception\RegExpException;
56
use Aternos\Licensee\Generated\Field;
67
use Aternos\Licensee\TextTransformer\AmpersandsTransformer;
78
use Aternos\Licensee\TextTransformer\BordersTransformer;
@@ -95,14 +96,15 @@ public function __construct(
9596

9697
/**
9798
* @return string
99+
* @throws RegExpException
98100
*/
99101
public function getNormalizedContent(): string
100102
{
101103
if ($this->normalizedContent === null) {
102104
$this->normalizedContent = $this->content;
103105
foreach ($this->transformers as $transformer) {
104106
$this->normalizedContent = $transformer->transform($this->normalizedContent);
105-
$this->normalizedContent = preg_replace("# +#", " ", $this->normalizedContent);
107+
$this->normalizedContent = RegExpException::handleNull(preg_replace("# +#", " ", $this->normalizedContent));
106108
$this->normalizedContent = trim($this->normalizedContent);
107109
}
108110
}
@@ -111,6 +113,7 @@ public function getNormalizedContent(): string
111113

112114
/**
113115
* @return int
116+
* @throws RegExpException
114117
*/
115118
public function getNormalizedLength(): int
116119
{
@@ -119,12 +122,13 @@ public function getNormalizedLength(): int
119122

120123
/**
121124
* @return string[]
125+
* @throws RegExpException
122126
*/
123127
public function getWordSet(): array
124128
{
125129
if (!isset($this->wordSet)) {
126130
$matches = [];
127-
if (preg_match_all('/(?:[\w\/-](?:\'s|(?<=s)\')?)+/', $this->getNormalizedContent(), $matches)) {
131+
if (RegExpException::handleFalse(preg_match_all('/(?:[\w\/-](?:\'s|(?<=s)\')?)+/', $this->getNormalizedContent(), $matches))) {
128132
$this->wordSet = array_unique($matches[0]);
129133
} else {
130134
$this->wordSet = [];
@@ -135,6 +139,7 @@ public function getWordSet(): array
135139

136140
/**
137141
* @return string[]
142+
* @throws RegExpException
138143
*/
139144
public function getFieldlessWordSet(): array
140145
{
@@ -144,6 +149,7 @@ public function getFieldlessWordSet(): array
144149
/**
145150
* @param LicenseText $other
146151
* @return float
152+
* @throws RegExpException
147153
*/
148154
public function getSimilarity(LicenseText $other): float
149155
{
@@ -156,6 +162,7 @@ public function getSimilarity(LicenseText $other): float
156162
/**
157163
* @param LicenseText $other
158164
* @return int
165+
* @throws RegExpException
159166
*/
160167
protected function getLengthDelta(LicenseText $other): int
161168
{
@@ -165,6 +172,7 @@ protected function getLengthDelta(LicenseText $other): int
165172
/**
166173
* @param LicenseText $other
167174
* @return int
175+
* @throws RegExpException
168176
*/
169177
protected function getVariationAdjustedLengthDelta(LicenseText $other): int
170178
{
@@ -173,18 +181,20 @@ protected function getVariationAdjustedLengthDelta(LicenseText $other): int
173181

174182
/**
175183
* @return string[]
184+
* @throws RegExpException
176185
*/
177186
public function getNormalizedFields(): array
178187
{
179188
if ($this->normalizedFields !== null) {
180189
return $this->normalizedFields;
181190
}
182-
preg_match_all(Field::getKeyRegex(), $this->getNormalizedContent(), $matches);
191+
RegExpException::handleFalse(preg_match_all(Field::getKeyRegex(), $this->getNormalizedContent(), $matches));
183192
return $this->normalizedFields = array_values($matches[1]);
184193
}
185194

186195
/**
187196
* @return string[]
197+
* @throws RegExpException
188198
*/
189199
public function getUniqueNormalizedFields(): array
190200
{
@@ -196,13 +206,15 @@ public function getUniqueNormalizedFields(): array
196206

197207
/**
198208
* @return bool
209+
* @throws RegExpException
199210
*/
200211
public function hasPotentialCCFalsePositives(): bool
201212
{
202213
if ($this->potentialCCFalsePositives !== null) {
203214
return $this->potentialCCFalsePositives;
204215
}
205-
return $this->potentialCCFalsePositives = !!preg_match('/^(creative commons )?Attribution-(NonCommercial|NoDerivatives)/i', $this->content) > 0;
216+
return $this->potentialCCFalsePositives =
217+
RegExpException::handleFalse(preg_match('/^(creative commons )?Attribution-(NonCommercial|NoDerivatives)/i', $this->content)) > 0;
206218
}
207219

208220
/**

src/Licensee.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Aternos\Licensee;
44

5+
use Aternos\Licensee\Exception\RegExpException;
56
use Aternos\Licensee\License\License;
67
use Aternos\Licensee\License\Text\LicenseText;
78
use Aternos\Licensee\Matcher\DiceMatcher;
@@ -34,6 +35,7 @@ public function findLicenseById(string $id): ?License
3435
* @param string $title
3536
* @param bool $allowMatchWithoutVersion - If true, the title can match without the version part of the license title
3637
* @return License|null
38+
* @throws RegExpException
3739
*/
3840
public function findLicenseByTitle(string $title, bool $allowMatchWithoutVersion = false): ?License
3941
{
@@ -42,14 +44,14 @@ public function findLicenseByTitle(string $title, bool $allowMatchWithoutVersion
4244
return $license;
4345
}
4446

45-
if (preg_match('/' . $license->getTitleRegex() . '/i', $title)) {
47+
if (RegExpException::handleFalse(preg_match('/' . $license->getTitleRegex() . '/i', $title))) {
4648
return $license;
4749
}
4850
}
4951

5052
if ($allowMatchWithoutVersion) {
5153
foreach (License::getAll() as $license) {
52-
if (preg_match('/' . preg_quote($license->getNameWithoutVersion(), "/") . '/i', $title)) {
54+
if (RegExpException::handleFalse(preg_match('/' . preg_quote($license->getNameWithoutVersion(), "/") . '/i', $title))) {
5355
return $license;
5456
}
5557
}

src/TextTransformer/BulletTransformer.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace Aternos\Licensee\TextTransformer;
44

5+
use Aternos\Licensee\Exception\RegExpException;
6+
57
class BulletTransformer extends TextTransformer
68
{
79

@@ -10,7 +12,7 @@ class BulletTransformer extends TextTransformer
1012
*/
1113
public function transform(string $text): string
1214
{
13-
$text = preg_replace('/\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i', "\n\n- ", $text);
14-
return preg_replace('/\)\s+\(/', ')(', $text);
15+
$text = RegExpException::handleNull(preg_replace('/\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i', "\n\n- ", $text));
16+
return RegExpException::handleNull(preg_replace('/\)\s+\(/', ')(', $text));
1517
}
1618
}

src/TextTransformer/GenericStripTitleTransformer.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Aternos\Licensee\TextTransformer;
44

5+
use Aternos\Licensee\Exception\RegExpException;
56
use Aternos\Licensee\License\License;
67

78
class GenericStripTitleTransformer extends TextTransformer
@@ -29,8 +30,8 @@ public function transform(string $text): string
2930
$match = false;
3031
foreach ($titles as $title) {
3132
$pattern = '/\A\s*\(?(?:the )?(' . $title . ').*?$/imu';
32-
if (preg_match($pattern, $text)) {
33-
$text = preg_replace($pattern, " ", $text);
33+
if (RegExpException::handleFalse(preg_match($pattern, $text))) {
34+
$text = RegExpException::handleNull(preg_replace($pattern, " ", $text));
3435
$match = true;
3536
}
3637
}

src/TextTransformer/HtmlTransformer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Aternos\Licensee\TextTransformer;
44

5+
use Aternos\Licensee\Exception\RegExpException;
56
use League\HTMLToMarkdown\HtmlConverter;
67

78
class HtmlTransformer extends TextTransformer
@@ -23,7 +24,6 @@ public function __construct()
2324
public function transform(string $text): string
2425
{
2526
$md = $this->converter->convert($text);
26-
$md = preg_replace('/<\?xml .*?\?>/', '', $md);
27-
return $md;
27+
return RegExpException::handleNull(preg_replace('/<\?xml .*?\?>/', '', $md));
2828
}
2929
}

src/TextTransformer/RegexReplaceTransformer.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
namespace Aternos\Licensee\TextTransformer;
44

5+
use Aternos\Licensee\Exception\RegExpException;
6+
57
class RegexReplaceTransformer extends TextTransformer
68
{
79
/**
@@ -20,6 +22,6 @@ public function __construct(
2022
*/
2123
public function transform(string $text): string
2224
{
23-
return preg_replace($this->regex, $this->replacement, $text);
25+
return RegExpException::handleNull(preg_replace($this->regex, $this->replacement, $text));
2426
}
2527
}

0 commit comments

Comments
 (0)