|
27 | 27 | import codecs |
28 | 28 |
|
29 | 29 |
|
30 | | -__all__ = ('AdvancedHTMLFormatter', 'AdvancedHTMLMiniFormatter') |
| 30 | +__all__ = ('AdvancedHTMLFormatter', 'AdvancedHTMLMiniFormatter', 'AdvancedHTMLSlimTagFormatter', 'AdvancedHTMLSlimTagMiniFormatter') |
31 | 31 |
|
32 | 32 | class AdvancedHTMLFormatter(HTMLParser): |
33 | 33 | ''' |
@@ -204,7 +204,7 @@ def handle_endtag(self, tagName): |
204 | 204 | if not foundIt: |
205 | 205 | sys.stderr.write('WARNING: found close tag with no matching start.\n') |
206 | 206 | return |
207 | | - |
| 207 | + |
208 | 208 | while inTag[-1].tagName != tagName: |
209 | 209 | oldTag = inTag.pop() |
210 | 210 | if oldTag.tagName in PREFORMATTED_TAGS: |
@@ -284,7 +284,7 @@ def unknown_decl(self, decl): |
284 | 284 | def parseFile(self, filename): |
285 | 285 | ''' |
286 | 286 | parseFile - Parses a file and creates the DOM tree and indexes |
287 | | - |
| 287 | +
|
288 | 288 | @param filename <str/file> - A string to a filename or a file object. If file object, it will not be closed, you must close. |
289 | 289 | ''' |
290 | 290 | self.reset() |
@@ -326,4 +326,146 @@ def __init__(self, encoding='utf-8'): |
326 | 326 | def _getIndent(self): |
327 | 327 | return '' |
328 | 328 |
|
| 329 | + |
| 330 | +class AdvancedTagSlim(AdvancedTag): |
| 331 | + ''' |
| 332 | + AdvancedTagSlim - A special class which extends AdvancedTag, but uses |
| 333 | +
|
| 334 | + slim-endings (which may have parsing issues on some old/strange parsers) |
| 335 | +
|
| 336 | + I.e. instead of <span id="blah" > you would have <span id="blah"> |
| 337 | +
|
| 338 | + We still by default keep <br /> as having the space because of xhtml attribute rules, |
| 339 | + but can be disabled y passing slimSelfClosing=True to __init__ |
| 340 | +
|
| 341 | +
|
| 342 | + @NOTE: You should NOT use this directly, they are for use by the SlimTagFormatter s |
| 343 | + ''' |
| 344 | + |
| 345 | + def __init__(self, *args, **kwargs): |
| 346 | + ''' |
| 347 | + __init__ - Create an AdvancedTagSlim object. |
| 348 | +
|
| 349 | + @see AdvancedTag |
| 350 | +
|
| 351 | + Extra arguments: |
| 352 | +
|
| 353 | + slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags, |
| 354 | +
|
| 355 | + i.e. <br/> instead of <br /> |
| 356 | +
|
| 357 | + This may break xhtml compatibility but modern browsers are okay with it. |
| 358 | + ''' |
| 359 | + if 'slimSelfClosing' in kwargs: |
| 360 | + slimSelfClosing = kwargs.pop('slimSelfClosing') |
| 361 | + else: |
| 362 | + slimSelfClosing = False |
| 363 | + AdvancedTag.__init__(self, *args, **kwargs) |
| 364 | + |
| 365 | + object.__setattr__(self, 'slimSelfClosing', slimSelfClosing) |
| 366 | + |
| 367 | + |
| 368 | + def getStartTag(self, *args, **kwargs): |
| 369 | + ''' |
| 370 | + getStartTag - Override the end-spacing rules |
| 371 | +
|
| 372 | + @see AdvancedTag.getStartTag |
| 373 | + ''' |
| 374 | + |
| 375 | + ret = AdvancedTag.getStartTag(self, *args, **kwargs) |
| 376 | + |
| 377 | + if ret.endswith(' >'): |
| 378 | + ret = ret[:-2] + '>' |
| 379 | + elif object.__getattribute__(self, 'slimSelfClosing') and ret.endswith(' />'): |
| 380 | + ret = ret[:-3] + '/>' |
| 381 | + |
| 382 | + return ret |
| 383 | + |
| 384 | + |
| 385 | +class AdvancedHTMLSlimTagFormatter(AdvancedHTMLFormatter): |
| 386 | + ''' |
| 387 | + AdvancedHTMLSlimTagFormatter - Formats HTML with slim start tags, |
| 388 | + which may break some xhtml-compatible parsers. |
| 389 | +
|
| 390 | + For example <span id="abc" > will become <span id="abc">. |
| 391 | +
|
| 392 | + Remainder will be pretty-printed. For mini-printing, @see AdvancedHTMLSlimTagMiniFormatter |
| 393 | +
|
| 394 | + If slimSelfClosing=True on __init__, <br /> will become <br/> as well |
| 395 | + ''' |
| 396 | + |
| 397 | + |
| 398 | + def __init__(self, indent=' ', encoding='utf-8', slimSelfClosing=False): |
| 399 | + ''' |
| 400 | + __init__ - Construct an AdvancedHTMLSlimTagFormatter |
| 401 | +
|
| 402 | + @see AdvancedHTMLFormatter |
| 403 | +
|
| 404 | + @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags, |
| 405 | +
|
| 406 | + e.x. <br /> becomes <br/> |
| 407 | + ''' |
| 408 | + |
| 409 | + AdvancedHTMLFormatter.__init__(self, indent=indent, encoding=encoding) |
| 410 | + |
| 411 | + self.slimSelfClosing = slimSelfClosing |
| 412 | + |
| 413 | + def handle_starttag(self, tagName, attributeList, isSelfClosing=False): |
| 414 | + ''' |
| 415 | + handle_starttag - Handles parsing a start tag. |
| 416 | +
|
| 417 | + @see AdvancedHTMLFormatter.handle_starttag |
| 418 | + ''' |
| 419 | + tagName = tagName.lower() |
| 420 | + inTag = self._inTag |
| 421 | + |
| 422 | + if isSelfClosing is False and tagName in IMPLICIT_SELF_CLOSING_TAGS: |
| 423 | + isSelfClosing = True |
| 424 | + |
| 425 | + newTag = AdvancedTagSlim(tagName, attributeList, isSelfClosing, slimSelfClosing=self.slimSelfClosing) |
| 426 | + if self.root is None: |
| 427 | + self.root = newTag |
| 428 | + elif len(inTag) > 0: |
| 429 | + inTag[-1].appendChild(newTag) |
| 430 | + else: |
| 431 | + raise MultipleRootNodeException() |
| 432 | + |
| 433 | + if self.inPreformatted is 0: |
| 434 | + newTag._indent = self._getIndent() |
| 435 | + |
| 436 | + if tagName in PREFORMATTED_TAGS: |
| 437 | + self.inPreformatted += 1 |
| 438 | + |
| 439 | + if isSelfClosing is False: |
| 440 | + inTag.append(newTag) |
| 441 | + if tagName != INVISIBLE_ROOT_TAG: |
| 442 | + self.currentIndentLevel += 1 |
| 443 | + |
| 444 | + |
| 445 | +class AdvancedHTMLSlimTagMiniFormatter(AdvancedHTMLMiniFormatter): |
| 446 | + ''' |
| 447 | + AdvancedHTMLSlimTagMiniFormatter - A "mini" formatter that |
| 448 | + removes all non-functional whitespace (including all indentations) |
| 449 | +
|
| 450 | + Also uses "slim" start tags, @see AdvancedHTMLSlimTagFormatter for more info |
| 451 | + ''' |
| 452 | + |
| 453 | + def __init__(self, encoding='utf-8', slimSelfClosing=False): |
| 454 | + ''' |
| 455 | + __init__ - Create an AdvancedHTMLSlimTagMiniFormatter |
| 456 | +
|
| 457 | + @see AdvancedHTMLMiniFormatter |
| 458 | +
|
| 459 | + @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags, |
| 460 | +
|
| 461 | + e.x. <br /> becomes <br/> |
| 462 | + ''' |
| 463 | + |
| 464 | + AdvancedHTMLMiniFormatter.__init__(self, encoding=encoding) |
| 465 | + |
| 466 | + self.slimSelfClosing = slimSelfClosing |
| 467 | + |
| 468 | + handle_starttag = AdvancedHTMLSlimTagFormatter.handle_starttag |
| 469 | + |
| 470 | + |
329 | 471 | #vim: set ts=4 sw=4 expandtab |
0 commit comments