@@ -112,20 +112,81 @@ def transform_text_files(tmp_dir: Path, replacements: List[Dict], binary_pattern
112112 path .write_text (content , encoding = "utf-8" )
113113
114114
115- def rsync_into_repo (tmp_dir : Path , exclude_paths : List [str ]):
116- # Bring transformed files into repo, excluding configured paths
115+ def rsync_into_repo (tmp_dir : Path , exclude_paths : List [str ], include_patterns : List [str ] | None = None ,
116+ normalize_opts : Dict | None = None ):
117+ # Bring transformed files into repo, excluding configured paths; skip copying if content is effectively equal
117118 from fnmatch import fnmatch
119+ text_exts = {
120+ ".py" , ".md" , ".rst" , ".toml" , ".yml" , ".yaml" , ".ini" , ".cfg" , ".txt" , ".json" ,
121+ ".sh" , ".ps1" , ".bat" , ".ipynb" , ".in" , ".pyi" , ".pyx" , ".pxd" , ".pxi"
122+ }
123+ special_text_names = {".gitignore" , ".gitattributes" , "Makefile" , "LICENSE" , "COPYING" , "pre-commit" }
124+
125+ def is_probably_text (p : Path ) -> bool :
126+ try :
127+ with p .open ('rb' ) as f :
128+ head = f .read (4096 )
129+ if b"\x00 " in head :
130+ return False
131+ return True
132+ except Exception :
133+ return False
134+
135+ def norm_bytes_for_compare (p : Path ):
136+ if normalize_opts is None :
137+ return p .read_bytes ()
138+ if (p .suffix .lower () not in text_exts ) and (p .name not in special_text_names ) and not is_probably_text (p ):
139+ return p .read_bytes ()
140+ import unicodedata
141+ b = p .read_bytes ().replace (b"\r \n " , b"\n " ).replace (b"\r " , b"\n " )
142+ if b .startswith (b"\xef \xbb \xbf " ):
143+ b = b [3 :]
144+ try :
145+ s = b .decode ("utf-8" , errors = "replace" )
146+ except Exception :
147+ s = b .decode ("latin-1" , errors = "replace" )
148+ norm_form = (normalize_opts .get ("unicode_norm" ) or "NFKC" ).upper ()
149+ if norm_form != "NONE" :
150+ if norm_form == "NFC" :
151+ s = unicodedata .normalize ("NFC" , s )
152+ elif norm_form == "NFD" :
153+ s = unicodedata .normalize ("NFD" , s )
154+ elif norm_form == "NFKC" :
155+ s = unicodedata .normalize ("NFKC" , s )
156+ elif norm_form == "NFKD" :
157+ s = unicodedata .normalize ("NFKD" , s )
158+ if normalize_opts .get ("ignore_ws" ):
159+ s = "\n " .join (ln .rstrip () for ln in s .splitlines ())
160+ return s .encode ("utf-8" )
161+
162+ def equal_file (a : Path , b : Path ) -> bool :
163+ try :
164+ if a .exists () and b .exists ():
165+ try :
166+ if a .read_bytes () == b .read_bytes ():
167+ return True
168+ except Exception :
169+ pass
170+ return norm_bytes_for_compare (a ) == norm_bytes_for_compare (b )
171+ return False
172+ except Exception :
173+ return False
174+
118175 for src in tmp_dir .rglob ("*" ):
119176 rel = src .relative_to (tmp_dir )
120177 if rel .parts and rel .parts [0 ] in {".git" , ".github" , ".sync_upstream" , ".sync_diff" }:
121178 continue
179+ if include_patterns and not _matches (rel , include_patterns ):
180+ continue
122181 if any (fnmatch (str (rel ).replace ("\\ " , "/" ), pat ) for pat in exclude_paths ):
123182 continue
124183 dest = ROOT / rel
125184 if src .is_dir ():
126185 dest .mkdir (parents = True , exist_ok = True )
127186 else :
128187 dest .parent .mkdir (parents = True , exist_ok = True )
188+ if dest .exists () and equal_file (src , dest ):
189+ continue
129190 shutil .copy2 (src , dest )
130191
131192
@@ -221,7 +282,9 @@ def main():
221282 parser .add_argument ("--clean" , action = "store_true" , help = "Delete files removed upstream (excluding excluded paths)" )
222283 parser .add_argument ("--dry-run" , action = "store_true" , help = "Run transforms without copying into repo" )
223284 parser .add_argument ("--paths" , nargs = "+" , help = "Glob patterns relative to repo root to limit the diff/sync scope (e.g., pyballistic/** docs/**)" )
224- parser .add_argument ("--show-diff" , action = "store_true" , help = "Print diff of changes without modifying working tree" )
285+ parser .add_argument ("--show-diff" , action = "store_true" , help = "Print diff of changes; does not modify working tree unless --apply/--stage is set" )
286+ parser .add_argument ("--apply" , action = "store_true" , help = "Apply transformed upstream into working tree (respects --paths, --clean)" )
287+ parser .add_argument ("--stage" , action = "store_true" , help = "Stage changes after applying (implies --apply)" )
225288 parser .add_argument ("--keep-temp" , action = "store_true" , help = "Keep .sync_diff folder for manual inspection" )
226289 parser .add_argument ("--check" , nargs = "*" , help = "Specific files to verify (relative to repo root); prints if changed and why" )
227290 parser .add_argument ("--sample-diff" , type = int , default = 0 , help = "When using --check, print up to N lines of unified diff for each checked file" )
@@ -432,14 +495,25 @@ def sha256(p: Path) -> str:
432495 print ("[dry-run] Transforms applied in" , WORKTREE )
433496 return
434497
435- # Sync files into current repo, respecting exclusions
436- rsync_into_repo ( WORKTREE , exclude_paths )
437- if args . clean :
438- clean_deleted_files ( WORKTREE , exclude_paths )
498+ # If only showing diff, avoid modifying working tree unless apply/stage is set
499+ if args . show_diff and not ( args . apply or args . stage ):
500+ print ( "[info] show-diff only; no changes applied." )
501+ return
439502
440- # Stage and leave to workflow to open PR
441- run (["git" , "add" , "-A" ], cwd = ROOT )
442- # Don't commit here; create-pull-request action will commit
503+ # Backward compatibility default: if neither show-diff nor apply/stage provided,
504+ # proceed to apply+stage (keeps CI behavior unchanged)
505+ do_apply = args .apply or args .stage or (not args .show_diff and not args .dry_run )
506+ if do_apply :
507+ rsync_into_repo (
508+ WORKTREE ,
509+ exclude_paths ,
510+ include_patterns = args .paths ,
511+ normalize_opts = {"ignore_ws" : args .ignore_ws , "unicode_norm" : args .unicode_norm },
512+ )
513+ if args .clean :
514+ clean_deleted_files (WORKTREE , exclude_paths , dest_root = ROOT , include_patterns = args .paths )
515+ if args .stage or (not args .show_diff and not args .dry_run ):
516+ run (["git" , "add" , "-A" ], cwd = ROOT )
443517
444518
445519if __name__ == "__main__" :
0 commit comments