|
30 | 30 |
|
31 | 31 | final class TextParser |
32 | 32 | { |
| 33 | + private string $value; |
| 34 | + |
| 35 | + private ?Token $startingToken; |
| 36 | + private ?Token $finalToken; |
| 37 | + |
| 38 | + private bool $trimLeadingSpace; |
| 39 | + private bool $trimTrailingSpace; |
| 40 | + private bool $currentlyCapturingSpace; |
| 41 | + private bool $trailingSpaceContainsLineBreak; |
| 42 | + private bool $terminated; |
| 43 | + |
33 | 44 | /** |
34 | 45 | * @param \Iterator<mixed,Token> $tokens |
35 | 46 | * @param boolean $preserveLeadingSpace |
36 | 47 | * @return null|TextNode |
37 | 48 | */ |
38 | 49 | public function parse(\Iterator $tokens, bool $preserveLeadingSpace = false): ?TextNode |
39 | 50 | { |
40 | | - $value = ''; |
41 | | - $startingToken = null; |
42 | | - $finalToken = null; |
43 | | - $ignoreSpace = false; |
44 | | - $keepTrailingSpace = false; |
45 | | - $forceTrimTrailingSpace = false; |
46 | | - while (!Scanner::isEnd($tokens)) { |
47 | | - $startingToken ??= $tokens->current(); |
48 | | - switch (Scanner::type($tokens)) { |
49 | | - case TokenType::BRACKET_CURLY_OPEN: |
50 | | - case TokenType::TAG_START_OPENING: |
51 | | - $keepTrailingSpace = true; |
52 | | - break 2; |
53 | | - case TokenType::TAG_START_CLOSING: |
54 | | - $value = rtrim($value); |
55 | | - break 2; |
56 | | - case TokenType::SPACE: |
57 | | - case TokenType::END_OF_LINE: |
58 | | - if (!$ignoreSpace) { |
59 | | - $value .= ' '; |
60 | | - } |
61 | | - $ignoreSpace = true; |
62 | | - if (Scanner::type($tokens) === TokenType::END_OF_LINE) { |
63 | | - $forceTrimTrailingSpace = true; |
64 | | - } |
65 | | - $finalToken = $tokens->current(); |
66 | | - Scanner::skipOne($tokens); |
67 | | - break; |
68 | | - default: |
69 | | - $value .= Scanner::value($tokens); |
70 | | - $ignoreSpace = false; |
71 | | - $forceTrimTrailingSpace = false; |
72 | | - $finalToken = $tokens->current(); |
73 | | - Scanner::skipOne($tokens); |
74 | | - break; |
| 51 | + $this->reset($preserveLeadingSpace); |
| 52 | + |
| 53 | + while (!Scanner::isEnd($tokens) && !$this->terminated) { |
| 54 | + $this->startingToken ??= $tokens->current(); |
| 55 | + |
| 56 | + match (Scanner::type($tokens)) { |
| 57 | + TokenType::BRACKET_CURLY_OPEN, |
| 58 | + TokenType::TAG_START_OPENING => |
| 59 | + $this->terminateAtAdjacentChildNode(), |
| 60 | + TokenType::TAG_START_CLOSING => |
| 61 | + $this->terminateAtClosingTag(), |
| 62 | + TokenType::SPACE => |
| 63 | + $this->captureSpace($tokens->current()), |
| 64 | + TokenType::END_OF_LINE => |
| 65 | + $this->captureLineBreak($tokens->current()), |
| 66 | + default => |
| 67 | + $this->captureText($tokens->current()), |
| 68 | + }; |
| 69 | + |
| 70 | + if (!$this->terminated) { |
| 71 | + Scanner::skipOne($tokens); |
75 | 72 | } |
76 | 73 | } |
77 | 74 |
|
78 | | - if (is_null($startingToken) || is_null($finalToken)) { |
| 75 | + return $this->build(); |
| 76 | + } |
| 77 | + |
| 78 | + private function reset(bool $preserveLeadingSpace): void |
| 79 | + { |
| 80 | + $this->value = ''; |
| 81 | + |
| 82 | + $this->startingToken = null; |
| 83 | + $this->finalToken = null; |
| 84 | + |
| 85 | + $this->trimLeadingSpace = !$preserveLeadingSpace; |
| 86 | + $this->trimTrailingSpace = true; |
| 87 | + $this->currentlyCapturingSpace = false; |
| 88 | + $this->trailingSpaceContainsLineBreak = false; |
| 89 | + $this->terminated = false; |
| 90 | + } |
| 91 | + |
| 92 | + private function terminateAtAdjacentChildNode(): void |
| 93 | + { |
| 94 | + $this->terminated = true; |
| 95 | + $this->trimTrailingSpace = $this->trailingSpaceContainsLineBreak; |
| 96 | + } |
| 97 | + |
| 98 | + private function terminateAtClosingTag(): void |
| 99 | + { |
| 100 | + $this->terminated = true; |
| 101 | + } |
| 102 | + |
| 103 | + private function captureSpace(Token $token): void |
| 104 | + { |
| 105 | + $this->finalToken = $token; |
| 106 | + |
| 107 | + if ($this->currentlyCapturingSpace) { |
| 108 | + return; |
| 109 | + } |
| 110 | + |
| 111 | + $this->currentlyCapturingSpace = true; |
| 112 | + $this->value .= ' '; |
| 113 | + } |
| 114 | + |
| 115 | + private function captureLineBreak(Token $token): void |
| 116 | + { |
| 117 | + $this->captureSpace($token); |
| 118 | + $this->trailingSpaceContainsLineBreak = true; |
| 119 | + } |
| 120 | + |
| 121 | + private function captureText(Token $token): void |
| 122 | + { |
| 123 | + $this->finalToken = $token; |
| 124 | + $this->currentlyCapturingSpace = false; |
| 125 | + $this->trailingSpaceContainsLineBreak = false; |
| 126 | + |
| 127 | + $this->value .= $token->value; |
| 128 | + } |
| 129 | + |
| 130 | + private function build(): ?TextNode |
| 131 | + { |
| 132 | + if (is_null($this->startingToken) || is_null($this->finalToken)) { |
79 | 133 | return null; |
80 | 134 | } |
81 | 135 |
|
82 | | - if (!$preserveLeadingSpace) { |
83 | | - $value = ltrim($value); |
| 136 | + if ($this->trimLeadingSpace) { |
| 137 | + $this->value = ltrim($this->value); |
84 | 138 | } |
85 | 139 |
|
86 | | - if (!$keepTrailingSpace || $forceTrimTrailingSpace) { |
87 | | - $value = rtrim($value); |
| 140 | + if ($this->trimTrailingSpace) { |
| 141 | + $this->value = rtrim($this->value); |
88 | 142 | } |
89 | 143 |
|
90 | | - if ($value === '' || $value === ' ') { |
| 144 | + if ($this->value === '' || $this->value === ' ') { |
91 | 145 | return null; |
92 | 146 | } |
93 | 147 |
|
94 | 148 | return new TextNode( |
95 | 149 | rangeInSource: Range::from( |
96 | | - $startingToken->boundaries->start, |
97 | | - $finalToken->boundaries->end |
| 150 | + $this->startingToken->boundaries->start, |
| 151 | + $this->finalToken->boundaries->end |
98 | 152 | ), |
99 | | - value: $value |
| 153 | + value: $this->value |
100 | 154 | ); |
101 | 155 | } |
102 | 156 | } |
0 commit comments