1111class  DiffGenerator :
1212    """ 
1313    A class for generating custom diffs between two pieces of content. 
14+     It enhances the standard unified diff by adding function/class context to hunk headers, 
15+     similar to `git diff`, in a fail-safe manner. 
1416    """ 
1517
18+     # A pre-compiled list of regex patterns to find function/class definitions. 
19+     # This is the core mechanism that mimics Git's `xfuncname` feature. 
20+     # It covers a wide range of common languages to provide broad, out-of-the-box support. 
21+     _FUNC_CONTEXT_PATTERNS  =  [
22+         re .compile (r'^\s*(def|class)\s+.*' , re .IGNORECASE ),  # Python 
23+         re .compile (r'^\s*(public|private|protected|static|final|native|synchronized|abstract|transient|volatile|strictfp|async|function|class|interface|enum|@|implements|extends)' ),  # Java, JS, TS, PHP, C# 
24+         re .compile (r'^\s*(func|fn|impl|trait|struct|enum|mod)\s+.*' , re .IGNORECASE ), # Go, Rust 
25+         re .compile (r'^\s*(def|class|module)\s+.*' , re .IGNORECASE ), # Ruby 
26+         re .compile (r'^\s*([a-zA-Z_][a-zA-Z0-9_]*\s+)*[a-zA-Z_][a-zA-Z0-9_]*\s*\(.*\)\s*\{' ), # C, C++ style function definitions 
27+         re .compile (r'^sub\s+.*' ), # Perl 
28+     ]
29+ 
30+     @staticmethod  
31+     def  _find_context (line_index : int , lines : List [str ]) ->  str :
32+         """ 
33+         Search upwards from a given line index to find the nearest function/class context. 
34+ 
35+         Args: 
36+             line_index (int): The 0-based index to start searching upwards from. 
37+             lines (List[str]): The content of the file, as a list of lines. 
38+ 
39+         Returns: 
40+             str: The found context line, stripped of whitespace, or an empty string if not found. 
41+         """ 
42+         # Search from the target line upwards to the beginning of the file. 
43+         for  i  in  range (line_index , - 1 , - 1 ):
44+             line  =  lines [i ]
45+             # Check the line against all our predefined patterns. 
46+             for  pattern  in  DiffGenerator ._FUNC_CONTEXT_PATTERNS :
47+                 if  pattern .search (line ):
48+                     return  line .strip ()
49+         return  ""  # Return empty string if no context is found. 
50+ 
1651    @staticmethod  
1752    def  generate_custom_diff (base_content : str , head_content : str , context_lines : int ) ->  str :
1853        """ 
19-         Generate a custom diff between two pieces of content with specified context lines. 
54+         Generate a custom diff between two pieces of content with specified context lines, 
55+         and automatically add function/class context to hunk headers, similar to `git diff`. 
56+         This method is designed to be fail-safe; if context addition fails, it returns the standard diff. 
2057
2158        Args: 
2259            base_content (str): The original content. 
2360            head_content (str): The new content to compare against the base. 
2461            context_lines (int): The number of context lines to include in the diff. 
2562
2663        Returns: 
27-             str: A string representation of the unified diff. 
64+             str: A string representation of the unified diff, preferably with hunk headers . 
2865
2966        Raises: 
3067            ValueError: If context_lines is negative. 
@@ -40,15 +77,69 @@ def generate_custom_diff(base_content: str, head_content: str, context_lines: in
4077            # File is deleted 
4178            return  "" .join (f"- { line } \n "  for  line  in  base_content .splitlines ())
4279
80+         # Use empty strings for None content to ensure difflib handles them correctly 
81+         # as file additions or deletions. This is more robust and aligns with difflib's expectations. 
82+         base_content  =  base_content  or  "" 
83+         head_content  =  head_content  or  "" 
84+ 
4385        base_lines : List [str ] =  base_content .splitlines ()
4486        head_lines : List [str ] =  head_content .splitlines ()
4587
88+         # Generate the standard unified diff. This part is considered stable. 
89+         diff : List [str ] =  list (difflib .unified_diff (
90+             base_lines ,
91+             head_lines ,
92+             n = context_lines ,
93+             lineterm = '' 
94+         ))
95+ 
96+         if  not  diff :
97+             return  ""  # No differences found, return early. 
98+ 
99+         # --- Start of the fail-safe enhancement logic --- 
100+         # This entire block attempts to add context to hunk headers. 
101+         # If any exception occurs here, we catch it and return the original, un-enhanced diff. 
102+         # This ensures the function is always reliable (Pareto improvement). 
46103        try :
47-             diff : List [str ] =  list (difflib .unified_diff (base_lines , head_lines , n = context_lines , lineterm = '' ))
48-             return  '\n ' .join (diff )
104+             enhanced_diff  =  []
105+             # Regex to parse the original line number from a hunk header. 
106+             # e.g., from "@@ -35,7 +35,7 @@" it captures "35". 
107+             hunk_header_re  =  re .compile (r'^@@ -(\d+)(?:,\d+)? .*' )
108+ 
109+             for  line  in  diff :
110+                 match  =  hunk_header_re .match (line )
111+                 if  match :
112+                     # This is a hunk header line. 
113+                     # The line number from the regex is 1-based. 
114+                     start_line_num  =  int (match .group (1 ))
115+ 
116+                     # The index is 0-based, so we subtract 1. 
117+                     # We search from the line where the change starts, or the line before it. 
118+                     context_line_index  =  max (0 , start_line_num  -  1 )
119+                     context  =  DiffGenerator ._find_context (context_line_index , base_lines )
120+ 
121+                     if  context :
122+                         # If context was found, append it to the hunk header. 
123+                         enhanced_diff .append (f"{ line }   { context }  " )
124+                     else :
125+                         # Otherwise, use the original hunk header. 
126+                         enhanced_diff .append (line )
127+                 else :
128+                     # This is not a hunk header, just a regular diff line (+, -, ' '). 
129+                     enhanced_diff .append (line )
130+             
131+             # If the enhancement process completes successfully, return the result. 
132+             return  '\n ' .join (enhanced_diff )
133+ 
49134        except  Exception  as  e :
50-             logger .exception (f"Error generating diff: { str (e )}  " )
51-             return  "" 
135+             # If any error occurred during the enhancement, log a warning and fall back. 
136+             logger .warning (
137+                 f"Could not add hunk header context due to an unexpected error: { str (e )}  . " 
138+                 "Falling back to standard diff output." 
139+             )
140+             # --- Fallback mechanism --- 
141+             # Return the original, unmodified diff generated by difflib. 
142+             return  '\n ' .join (diff )
52143
53144
54145class  DataAnonymizer :
0 commit comments