|
1 | 1 | from abc import ABC, abstractmethod
|
| 2 | +from enum import IntEnum, auto |
2 | 3 | from types import SimpleNamespace
|
3 | 4 | from typing import Union, List, Dict
|
4 | 5 | import re
|
@@ -188,6 +189,95 @@ def finish_consumption(self, final: bool) -> str:
|
188 | 189 | follower: Union['IParser', None] = None
|
189 | 190 |
|
190 | 191 |
|
| 192 | +class TableParser(IParser): |
| 193 | + |
| 194 | + class State(IntEnum): |
| 195 | + AWAITS = auto() |
| 196 | + PARSING_HEADER = auto() |
| 197 | + PARSED_HEADER = auto() |
| 198 | + PARSING_ROWS = auto() |
| 199 | + FINISHED = auto() |
| 200 | + |
| 201 | + outer_border_pattern = r'^=+( +=+)+$' |
| 202 | + |
| 203 | + _state: int |
| 204 | + _column_starts: List[int] |
| 205 | + _columns: List[str] |
| 206 | + _rows: List[List[str]] |
| 207 | + _max_sizes: List[int] |
| 208 | + |
| 209 | + def _reset_state(self): |
| 210 | + self._state = TableParser.State.AWAITS |
| 211 | + self._column_starts = [] |
| 212 | + self._columns = [] |
| 213 | + self._rows = [] |
| 214 | + self._max_sizes = [] |
| 215 | + |
| 216 | + def can_parse(self, line: str) -> bool: |
| 217 | + return bool(re.match(self.outer_border_pattern, line)) |
| 218 | + |
| 219 | + def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning: |
| 220 | + self._reset_state() |
| 221 | + match = re.match(self.outer_border_pattern, line) |
| 222 | + assert match |
| 223 | + self._column_starts = [] |
| 224 | + previous = ' ' |
| 225 | + for i, char in enumerate(line): |
| 226 | + if char == '=' and previous == ' ': |
| 227 | + self._column_starts.append(i) |
| 228 | + previous = char |
| 229 | + self._max_sizes = [0 for i in self._column_starts] |
| 230 | + self._state = TableParser.State.PARSING_HEADER |
| 231 | + return IBlockBeginning(remainder='') |
| 232 | + |
| 233 | + def can_consume(self, line: str) -> bool: |
| 234 | + return bool(self._state != TableParser.State.FINISHED) |
| 235 | + |
| 236 | + def consume(self, line: str) -> None: |
| 237 | + states = TableParser.State |
| 238 | + if self._state == states.PARSING_HEADER: |
| 239 | + self._columns = self._split(line) |
| 240 | + self._state += 1 |
| 241 | + elif self._state == states.PARSED_HEADER: |
| 242 | + # TODO: check integrity? |
| 243 | + self._state += 1 |
| 244 | + elif self._state == states.PARSING_ROWS: |
| 245 | + match = re.match(self.outer_border_pattern, line) |
| 246 | + if match: |
| 247 | + self._state += 1 |
| 248 | + else: |
| 249 | + self._rows.append(self._split(line)) |
| 250 | + |
| 251 | + def _split(self, line: str) -> List[str]: |
| 252 | + assert self._column_starts |
| 253 | + fragments = [] |
| 254 | + for i, start in enumerate(self._column_starts): |
| 255 | + end = self._column_starts[i + 1] if i < len(self._column_starts) - 1 else None |
| 256 | + fragment = line[start:end].strip() |
| 257 | + self._max_sizes[i] = max(self._max_sizes[i], len(fragment)) |
| 258 | + fragments.append(fragment) |
| 259 | + return fragments |
| 260 | + |
| 261 | + def _wrap(self, row: List[str], align=str.ljust) -> str: |
| 262 | + padded_row = [ |
| 263 | + align(e, self._max_sizes[i]) |
| 264 | + for i, e in enumerate(row) |
| 265 | + ] |
| 266 | + return '| ' + (' | '.join(padded_row)) + ' |\n' |
| 267 | + |
| 268 | + def finish_consumption(self, final: bool) -> str: |
| 269 | + result = self._wrap(self._columns, align=str.center) |
| 270 | + result += self._wrap([ |
| 271 | + '-' * size |
| 272 | + for size in self._max_sizes |
| 273 | + ]) |
| 274 | + |
| 275 | + for row in self._rows: |
| 276 | + result += self._wrap(row) |
| 277 | + |
| 278 | + return result |
| 279 | + |
| 280 | + |
191 | 281 | class BlockParser(IParser):
|
192 | 282 | enclosure = '```'
|
193 | 283 | follower: Union['IParser', None] = None
|
@@ -351,7 +441,8 @@ def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning:
|
351 | 441 | NoteBlockParser(),
|
352 | 442 | MathBlockParser(),
|
353 | 443 | ExplicitCodeBlockParser(),
|
354 |
| - DoubleColonBlockParser() |
| 444 | + DoubleColonBlockParser(), |
| 445 | + TableParser() |
355 | 446 | ]
|
356 | 447 |
|
357 | 448 | RST_SECTIONS = {
|
|
0 commit comments