Skip to content

Commit bd32621

Browse files
committed
Add tokenizer
1 parent f889335 commit bd32621

File tree

9 files changed

+308
-1
lines changed

9 files changed

+308
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.idea/
2+
.phpunit.cache/
23
vendor/
34
composer.lock
45
phpstan.neon

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
"phpstan/phpstan-deprecation-rules": "^1.1",
2020
"phpstan/phpstan-phpunit": "^1.3",
2121
"phpstan/phpstan-strict-rules": "^1.5",
22-
"phpunit/phpunit": "^11.1"
22+
"phpunit/phpunit": "^11.1",
23+
"symfony/var-dumper": "^7.0"
2324
},
2425
"autoload": {
2526
"psr-4": {

src/Tokenizer.php

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
<?php
2+
3+
namespace Recoded\WordPressBlockParser;
4+
5+
use Generator;
6+
use IteratorAggregate;
7+
use Recoded\WordPressBlockParser\Value\BlockClosing;
8+
use Recoded\WordPressBlockParser\Value\BlockOpening;
9+
use Recoded\WordPressBlockParser\Value\SelfClosingBlock;
10+
11+
/**
12+
* @implements \IteratorAggregate<int, \Recoded\WordPressBlockParser\Value\Token>
13+
*/
14+
final class Tokenizer implements IteratorAggregate
15+
{
16+
public const PATTERN = '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s';
17+
18+
private int $currentOffset = 0;
19+
20+
/**
21+
* Create a new Tokenizer instance.
22+
*
23+
* @param string $content
24+
* @return void
25+
*/
26+
public function __construct(
27+
public readonly string $content,
28+
) {
29+
//
30+
}
31+
32+
/**
33+
* Create a new tokenizer from content string.
34+
*
35+
* @param string $content
36+
* @return self
37+
*/
38+
public static function create(string $content): self
39+
{
40+
return new self($content);
41+
}
42+
43+
/**
44+
* Iterate over the tokens.
45+
*
46+
* @return \Generator
47+
*/
48+
public function getIterator(): Generator
49+
{
50+
do {
51+
$result = preg_match(
52+
'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
53+
$this->content,
54+
$matches,
55+
PREG_OFFSET_CAPTURE,
56+
$this->currentOffset
57+
);
58+
59+
if ($result === false || $result === 0) {
60+
continue;
61+
}
62+
63+
[$match, $offset] = $matches[0];
64+
65+
$length = strlen($match);
66+
$hasAttributes = isset($matches['attrs']) && $matches['attrs'][1] !== -1;
67+
/** @var array<string, mixed> $attributes */
68+
$attributes = $hasAttributes
69+
? json_decode($matches['attrs'][0], associative: true)
70+
: [];
71+
$namespace = (isset($matches['namespace']) && -1 !== $matches['namespace'][1])
72+
? $matches['namespace'][0]
73+
: 'core/';
74+
$name = $matches['name'][0];
75+
76+
if (isset($matches['void']) && $matches['void'][1] !== -1) {
77+
yield new SelfClosingBlock(
78+
namespace: $namespace,
79+
name: $name,
80+
attributes: $attributes,
81+
startsAt: $offset,
82+
length: $length,
83+
);
84+
} elseif (isset($matches['closer']) && $matches['closer'][1] !== -1) {
85+
yield new BlockClosing(
86+
namespace: $namespace,
87+
name: $name,
88+
startsAt: $offset,
89+
length: $length,
90+
);
91+
} else {
92+
yield new BlockOpening(
93+
namespace: $namespace,
94+
name: $name,
95+
attributes: $attributes,
96+
startsAt: $offset,
97+
length: $length,
98+
);
99+
}
100+
101+
$this->currentOffset = $offset + $length;
102+
} while ($result !== false && $result !== 0);
103+
}
104+
}

src/Value/BlockClosing.php

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<?php
2+
3+
namespace Recoded\WordPressBlockParser\Value;
4+
5+
final class BlockClosing extends Token
6+
{
7+
/**
8+
* Create new BlockClosing instance.
9+
*
10+
* @param string $namespace
11+
* @param string $name
12+
* @param int $startsAt
13+
* @param int $length
14+
* @return void
15+
*/
16+
public function __construct(
17+
public readonly string $namespace,
18+
public readonly string $name,
19+
int $startsAt,
20+
int $length,
21+
) {
22+
parent::__construct(
23+
startsAt: $startsAt,
24+
length: $length,
25+
);
26+
}
27+
}

src/Value/BlockOpening.php

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?php
2+
3+
namespace Recoded\WordPressBlockParser\Value;
4+
5+
final class BlockOpening extends Token
6+
{
7+
/**
8+
* Create new BlockOpening instance.
9+
*
10+
* @param string $namespace
11+
* @param string $name
12+
* @param array<string, mixed> $attributes
13+
* @param int $startsAt
14+
* @param int $length
15+
* @return void
16+
*/
17+
public function __construct(
18+
public readonly string $namespace,
19+
public readonly string $name,
20+
public readonly array $attributes,
21+
int $startsAt,
22+
int $length,
23+
) {
24+
parent::__construct(
25+
startsAt: $startsAt,
26+
length: $length,
27+
);
28+
}
29+
}

src/Value/SelfClosingBlock.php

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?php
2+
3+
namespace Recoded\WordPressBlockParser\Value;
4+
5+
final class SelfClosingBlock extends Token
6+
{
7+
/**
8+
* Create new SelfClosingBlock instance.
9+
*
10+
* @param string $namespace
11+
* @param string $name
12+
* @param array<string, mixed> $attributes
13+
* @param int $startsAt
14+
* @param int $length
15+
* @return void
16+
*/
17+
public function __construct(
18+
public readonly string $namespace,
19+
public readonly string $name,
20+
public readonly array $attributes,
21+
int $startsAt,
22+
int $length,
23+
) {
24+
parent::__construct(
25+
startsAt: $startsAt,
26+
length: $length,
27+
);
28+
}
29+
}

src/Value/Token.php

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?php
2+
3+
namespace Recoded\WordPressBlockParser\Value;
4+
5+
abstract class Token
6+
{
7+
/**
8+
* Create new Token instance.
9+
*
10+
* @param int $startsAt
11+
* @param int $length
12+
* @return void
13+
*/
14+
public function __construct(
15+
public readonly int $startsAt,
16+
public readonly int $length,
17+
) {
18+
//
19+
}
20+
}

tests/TestCase.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?php
2+
3+
namespace Tests;
4+
5+
abstract class TestCase extends \PHPUnit\Framework\TestCase
6+
{
7+
//
8+
}

tests/TokenizerTest.php

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
<?php
2+
3+
namespace Tests;
4+
5+
use Recoded\WordPressBlockParser\Tokenizer;
6+
use Recoded\WordPressBlockParser\Value\BlockClosing;
7+
use Recoded\WordPressBlockParser\Value\BlockOpening;
8+
use Recoded\WordPressBlockParser\Value\SelfClosingBlock;
9+
10+
final class TokenizerTest extends TestCase
11+
{
12+
public function test_it_tokenizes_opening_tags(): void
13+
{
14+
$tokenizer = Tokenizer::create(<<<HTML
15+
<!-- wp:paragraph -->
16+
HTML);
17+
18+
self::assertEquals([new BlockOpening(
19+
namespace: 'core/',
20+
name: 'paragraph',
21+
attributes: [],
22+
startsAt: 0,
23+
length: 21,
24+
)], iterator_to_array($tokenizer));
25+
}
26+
public function test_it_tokenizes_opening_tags_with_attributes(): void
27+
{
28+
$tokenizer = Tokenizer::create(<<<HTML
29+
<!-- wp:paragraph {"foo": "bar"} -->
30+
HTML);
31+
32+
self::assertEquals([new BlockOpening(
33+
namespace: 'core/',
34+
name: 'paragraph',
35+
attributes: [
36+
'foo' => 'bar',
37+
],
38+
startsAt: 0,
39+
length: 36,
40+
)], iterator_to_array($tokenizer));
41+
}
42+
43+
public function test_it_tokenizes_closing_tags(): void
44+
{
45+
$tokenizer = Tokenizer::create(<<<HTML
46+
<!-- /wp:paragraph -->
47+
HTML);
48+
49+
self::assertEquals([new BlockClosing(
50+
namespace: 'core/',
51+
name: 'paragraph',
52+
startsAt: 0,
53+
length: 22,
54+
)], iterator_to_array($tokenizer));
55+
}
56+
57+
public function test_it_tokenizes_self_closing_blocks(): void
58+
{
59+
$tokenizer = Tokenizer::create(<<<HTML
60+
<!-- wp:paragraph /-->
61+
HTML);
62+
63+
self::assertEquals([new SelfClosingBlock(
64+
namespace: 'core/',
65+
name: 'paragraph',
66+
attributes: [],
67+
startsAt: 0,
68+
length: 22,
69+
)], iterator_to_array($tokenizer));
70+
}
71+
72+
public function test_it_tokenizes_self_closing_blocks_with_attributes(): void
73+
{
74+
$tokenizer = Tokenizer::create(<<<HTML
75+
<!-- wp:paragraph {"foo": "bar"} /-->
76+
HTML);
77+
78+
self::assertEquals([new SelfClosingBlock(
79+
namespace: 'core/',
80+
name: 'paragraph',
81+
attributes: [
82+
'foo' => 'bar',
83+
],
84+
startsAt: 0,
85+
length: 37,
86+
)], iterator_to_array($tokenizer));
87+
}
88+
}

0 commit comments

Comments
 (0)