11"""Define adapter / helper classes to hide unrelated functionality in."""
2-
2+ import io
33import logging
44import re
55from functools import partial
1212from ruyaml .tokens import CommentToken
1313
1414from yamlfix .model import YamlfixConfig , YamlNodeStyle
15+ from yamlfix .util import walk_object
1516
1617log = logging .getLogger (__name__ )
1718
@@ -334,18 +335,12 @@ def __init__(self, yaml: Yaml, config: Optional[YamlfixConfig]) -> None:
334335 self .yaml = yaml .yaml
335336 self .config = config or YamlfixConfig ()
336337
337- def fix (self , source_code : str ) -> str :
338- """Run all yaml source code fixers.
339-
340- Args:
341- source_code: Source code to be corrected.
342-
343- Returns:
344- Corrected source code.
345- """
346- log .debug ("Running source code fixers..." )
347-
348- fixers = [
338+ # The list of fixers to run, and the index of the currently called fixer.
339+ # This allows fixers which might need to reinvoke other fixers to reinvoke the
340+ # previous fixers (currently fix comments does this as it needs to re-emit the
341+ # YAML).
342+ self ._fixer_idx = - 1
343+ self ._fixers = [
349344 self ._fix_truthy_strings ,
350345 self ._fix_jinja_variables ,
351346 self ._ruamel_yaml_fixer ,
@@ -359,8 +354,40 @@ def fix(self, source_code: str) -> str:
359354 self ._add_newline_at_end_of_file ,
360355 ]
361356
362- for fixer in fixers :
357+ def _rerun_previous_fixers (self , source_code : str ) -> str :
358+ """Run all the previous source code fixers except the currently running one.
359+
360+ This is re-entrant safe and will correctly restore the fixer idx once it's
361+ complete.
362+
363+ Args:
364+ source_code: Source code to be corrected.
365+
366+ Returns:
367+ Corrected source code.
368+ """
369+ cur_fixer_idx = self ._fixer_idx
370+ for fixer_idx , fixer in enumerate (self ._fixers [:cur_fixer_idx ]):
371+ self ._fixer_idx = fixer_idx
363372 source_code = fixer (source_code )
373+ # Restore fixer idx
374+ self ._fixer_idx = cur_fixer_idx
375+ return source_code
376+
377+ def fix (self , source_code : str ) -> str :
378+ """Run all yaml source code fixers.
379+
380+ Args:
381+ source_code: Source code to be corrected.
382+
383+ Returns:
384+ Corrected source code.
385+ """
386+ log .debug ("Running source code fixers..." )
387+
388+ # Just use the re-run system do it.
389+ self ._fixer_idx = len (self ._fixers )
390+ source_code = self ._rerun_previous_fixers (source_code )
364391
365392 return source_code
366393
@@ -573,24 +600,102 @@ def _restore_truthy_strings(source_code: str) -> str:
573600 def _fix_comments (self , source_code : str ) -> str :
574601 log .debug ("Fixing comments..." )
575602 config = self .config
576- comment_start = " " * config .comments_min_spaces_from_content + "#"
577603
578- fixed_source_lines = []
604+ # We need the source lines for the comment fixers to analyze whitespace easily
605+ source_lines = source_code .splitlines ()
579606
580- for line in source_code .splitlines ():
581- # Comment at the start of the line
582- if config .comments_require_starting_space and re .search (r"(^|\s)#\w" , line ):
583- line = line .replace ("#" , "# " )
584- # Comment in the middle of the line, but it's not part of a string
585- if (
586- config .comments_min_spaces_from_content > 1
587- and " #" in line
588- and line [- 1 ] not in ["'" , '"' ]
589- ):
590- line = re .sub (r"(.+\S)(\s+?)#" , rf"\1{ comment_start } " , line )
591- fixed_source_lines .append (line )
607+ yaml = YAML (typ = "rt" )
608+ # Hijack config options from the regular fixer
609+ yaml .explicit_start = self .yaml .explicit_start
610+ yaml .width = self .yaml .width
611+ # preserve_quotes however must always be true, otherwise we change output
612+ # unexpectedly.
613+ yaml .preserve_quotes = True # type: ignore
614+ yaml_documents = list (yaml .load_all (source_code ))
592615
593- return "\n " .join (fixed_source_lines )
616+ handled_comments : List [CommentToken ] = []
617+
618+ def _comment_token_cb (target : Any ) -> None : # noqa:W0613,ANN401
619+ """This function is a callback for walk_object.
620+
621+ This implements the actual comment fixing, and is used because comment
622+ objects are structured in nested-lists and can repeat.
623+ """
624+ if not isinstance (target , CommentToken ):
625+ return
626+ if any (target is e for e in handled_comments ):
627+ # This comment was handled at a higher level already.
628+ return
629+ if target .value is None :
630+ return
631+ comment_lines = target .value .split ("\n " )
632+ fixed_comment_lines = []
633+ for line in comment_lines :
634+ if config .comments_require_starting_space and re .search (
635+ r"(^|\s)#\w" , line
636+ ):
637+ line = line .replace ("#" , "# " )
638+ fixed_comment_lines .append (line )
639+
640+ # Update the comment with the fixed lines
641+ target .value = "\n " .join (fixed_comment_lines )
642+
643+ if config .comments_min_spaces_from_content > 1 :
644+ # It's hard to reconstruct exactly where the content is, but since we
645+ # have the line numbers what we do is lookup the literal source line
646+ # here and check where the whitespace is compared to where we know the
647+ # comment starts.
648+ source_line = source_lines [target .start_mark .line ]
649+ content_part = source_line [0 : target .start_mark .column ] # noqa: E203
650+ # Find the non-whitespace position in the content part
651+ rx_match = re .match (r"^.*\S" , content_part )
652+ if (
653+ rx_match is not None
654+ ): # If no match then nothing to do - no content to be away from
655+ _ , content_end = rx_match .span ()
656+ # If there's less than min-spaces from content, we're going to add
657+ # some.
658+ if (
659+ target .start_mark .column - content_end
660+ < config .comments_min_spaces_from_content
661+ ):
662+ # Handled
663+ target .start_mark .column = (
664+ content_end + config .comments_min_spaces_from_content
665+ )
666+ # Some ruyaml objects will return attached comments at multiple levels (but
667+ # not all). Keep track of which comments we've already processed to avoid
668+ # double processing them (important because we use raw source lines to
669+ # determine content position above).
670+ handled_comments .append (target )
671+
672+ def _comment_fixer (target : Any ) -> None : # noqa:W0613,ANN401
673+ """This function is a callback for walk_object.
674+
675+ walk_object calls it for every object it finds, and then will walk the
676+ mapping/sequence subvalues and call this function on those too. This gives
677+ us direct access to all round tripped comments.
678+ """
679+ if not hasattr (target , "ca" ):
680+ # Scalar or other object with no comment parameter.
681+ return
682+ # Find all comment tokens and fix them
683+ walk_object (target .ca .comment , _comment_token_cb )
684+ walk_object (target .ca .end , _comment_token_cb )
685+ walk_object (target .ca .items , _comment_token_cb )
686+ walk_object (target .ca .pre , _comment_token_cb )
687+
688+ # Walk the object and invoke the comment fixer
689+ walk_object (yaml_documents , _comment_fixer )
690+
691+ # Dump out the YAML documents
692+ stream = io .StringIO ()
693+ yaml .dump_all (yaml_documents , stream = stream )
694+
695+ fixed_source_code = stream .getvalue ()
696+ # Reinvoke the previous fixers to ensure we fix the new output we just created.
697+ fixed_source_code = self ._rerun_previous_fixers (fixed_source_code )
698+ return fixed_source_code
594699
595700 def _fix_whitelines (self , source_code : str ) -> str :
596701 """Fixes number of consecutive whitelines.
0 commit comments