|
| 1 | +#!/usr/bin/perl |
| 2 | +# ====================================================================== |
| 3 | +# NAME: fettle.pl (Dynamic via $me) |
| 4 | +# PURPOSE: A high-integrity utility for making exact, precise, and |
| 5 | +# repeatable changes to files using only perl-base. |
| 6 | +# ====================================================================== |
| 7 | +use strict; |
| 8 | +use warnings; |
| 9 | +use Getopt::Long; |
| 10 | +use File::Basename; |
| 11 | + |
| 12 | +# Identify the script name dynamically for usage and error reporting |
| 13 | +my $me = basename($0); |
| 14 | + |
| 15 | +# --- 1. Global Constants & Indices --- |
| 16 | +# Stat array indices for readability and easy maintenance |
| 17 | +my $ST_SIZE = 7; |
| 18 | +my $ST_MTIME = 9; |
| 19 | + |
| 20 | +# --- 2. Minimal Native Copy --- |
| 21 | +# Performs a binary-safe copy without external dependencies. |
| 22 | +# Used for creating temporary work files and original backups. |
| 23 | +sub native_copy { |
| 24 | + my ($source_path, $destination_path) = @_; |
| 25 | + open(my $in, '<', $source_path) or die "$me: Could not read $source_path: $!"; |
| 26 | + open(my $out, '>', $destination_path) or die "$me: Could not write $destination_path: $!"; |
| 27 | + binmode($in); |
| 28 | + binmode($out); |
| 29 | + print $out $_ while <$in>; |
| 30 | + close($in); |
| 31 | + close($out); |
| 32 | +} |
| 33 | + |
| 34 | +# --- 3. Hashing Helpers --- |
| 35 | +# Internal helper to get a file's fingerprint using the system 'cksum' utility. |
| 36 | +sub _get_digest { |
| 37 | + my ($file_path, $algo) = @_; |
| 38 | + return "" unless -f $file_path; |
| 39 | + # Executes system cksum with specified algorithm (e.g., sha1, sha512). |
| 40 | + # Safely open a pipe without invoking a shell |
| 41 | + open(my $pipe, "-|", "/usr/bin/cksum", "-a", $algo, $file_path) or return ""; |
| 42 | + my $cksum_output = <$pipe>; |
| 43 | + close($pipe); |
| 44 | + chomp($cksum_output); |
| 45 | + return "" unless $cksum_output; |
| 46 | + # Extract only the hex fingerprint from the tool's output. |
| 47 | + my ($fingerprint) = $cksum_output =~ /=\s+([a-f0-9]+)/i; |
| 48 | + return $fingerprint || ""; |
| 49 | +} |
| 50 | + |
| 51 | +# Specific aliases for readability: sha1 for patch ID, sha512 for file integrity. |
| 52 | +sub get_patch_id { return _get_digest(shift, "sha1"); } |
| 53 | +sub get_file_fingerprint { return _get_digest(shift, "sha512"); } |
| 54 | + |
| 55 | +# --- 4. Configuration & Globals --- |
| 56 | +# Default fuzz_range allows the patcher to look 25 lines up/down for a match. |
| 57 | +my ($dry_run, $revert, $clean, $fuzz_range) = (0, 0, 0, 25); |
| 58 | +GetOptions( |
| 59 | + "dry-run" => \$dry_run, # Pre-calculates offsets and validates files |
| 60 | + "fuzz=i" => \$fuzz_range, # User-adjustable search range for line drifts |
| 61 | + "revert" => \$revert, # Restore files from .orig backups |
| 62 | + "clean" => \$clean # Delete .orig backups |
| 63 | +); |
| 64 | + |
| 65 | +my $patch_file = $ARGV[0] or die "Usage: $me [--dry-run|--revert|--clean] <patch_file>\n"; |
| 66 | +my $patch_hash = get_patch_id($patch_file); |
| 67 | +die "$me: Could not generate ID for patch file.\n" unless $patch_hash; |
| 68 | + |
| 69 | +# Locate a writable directory for state tracking, preferring /cache if on a tmpfs mount. |
| 70 | +my $state_directory; |
| 71 | +if (open(my $mount_fh, '<', '/proc/mounts')) { |
| 72 | + while (my $mount_line = <$mount_fh>) { |
| 73 | + if ($mount_line =~ /^\S+\s+\/cache\s+tmpfs\s+/) { |
| 74 | + $state_directory = "/cache" if -w "/cache"; |
| 75 | + last; |
| 76 | + } |
| 77 | + } |
| 78 | + close($mount_fh); |
| 79 | +} |
| 80 | +$state_directory ||= ($ENV{TMPDIR} && -d $ENV{TMPDIR} && -w _) ? $ENV{TMPDIR} : "."; |
| 81 | + |
| 82 | +my $state_file = "$state_directory/.${me}_state_${patch_hash}"; |
| 83 | +my $temp_suffix = substr($patch_hash, 0, 11); |
| 84 | + |
| 85 | +# --- 5. Patch Parsing --- |
| 86 | +# Scans the patch file to build a map of files to be modified and their hunks. |
| 87 | +open(my $patch_fh, '<', $patch_file) or die "$me: Cannot open patch: $!\n"; |
| 88 | +binmode($patch_fh); |
| 89 | +read($patch_fh, my $buffer, 2); |
| 90 | +close($patch_fh); |
| 91 | +if (defined $buffer && $buffer eq "\x1f\x8b") { |
| 92 | + open($patch_fh, "-|", "/usr/bin/gzip", "-dc", "--", $patch_file) or die "$me: Cannot open patch using zcat: $!"; |
| 93 | +} else { |
| 94 | + open($patch_fh, '<', $patch_file) or die "$me: Cannot open patch: $!\n"; |
| 95 | +} |
| 96 | +my %patches; |
| 97 | +my $current_file; |
| 98 | +my $is_git_format = 0; |
| 99 | + |
| 100 | +while (my $line = <$patch_fh>) { |
| 101 | + # Detects git-style diff headers to properly handle additions/deletions. |
| 102 | + if ($line =~ /^diff --git\s+a\/.+?\s+b\/.+$/) { |
| 103 | + undef $current_file; # Reset context for high-integrity parsing |
| 104 | + $is_git_format = 1; next; |
| 105 | + } |
| 106 | + elsif ($is_git_format && $line =~ /^deleted file mode/) { |
| 107 | + $patches{$current_file}{deleted} = 1 if $current_file; next; |
| 108 | + } |
| 109 | + # Parse source ('---') and destination ('+++') file paths. |
| 110 | + elsif ($line =~ /^--- (?:a\/)?(.+)$/) { |
| 111 | + my $path = $1; $path =~ s/(?:\t.*|\s+)$//; |
| 112 | + $current_file = $path unless $path eq '/dev/null'; |
| 113 | + } |
| 114 | + elsif ($line =~ /^\+\+\+ (?:b\/)?(.+)$/) { |
| 115 | + my $path = $1; $path =~ s/(?:\t.*|\s+)$//; |
| 116 | + if ($path eq '/dev/null') { $patches{$current_file}{deleted} = 1; } |
| 117 | + else { $current_file = $path; $patches{$current_file}{deleted} = 0; } |
| 118 | + } |
| 119 | + # Capture hunk headers: @@ -old_start,len +new_start,len @@ |
| 120 | + # Handle hunk header with optional counts (default to 1) |
| 121 | + elsif ($current_file && $line =~ /^@@ \-(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/) { |
| 122 | + push @{$patches{$current_file}{hunks}}, { |
| 123 | + old_start => $1, |
| 124 | + old_count => $2 // 1, # Default to 1 if count is missing |
| 125 | + new_start => $3, |
| 126 | + new_count => $4 // 1, # Default to 1 if count is missing |
| 127 | + lines => [], |
| 128 | + no_eof_newline => 0 |
| 129 | + }; |
| 130 | + } |
| 131 | + # Accumulate hunk content (context lines, additions, or deletions). |
| 132 | + elsif ($current_file && @{$patches{$current_file}{hunks}}) { |
| 133 | + if ($line =~ /^\\ No newline at end of file/) { |
| 134 | + $patches{$current_file}{hunks}[-1]{no_eof_newline} = 1; |
| 135 | + } |
| 136 | + else { |
| 137 | + push @{$patches{$current_file}{hunks}[-1]{lines}}, $line if $line =~ /^[ \+\-]/; |
| 138 | + |
| 139 | + } |
| 140 | + } |
| 141 | +} |
| 142 | +close($patch_fh); |
| 143 | + |
| 144 | +# --- 6. Clean and Revert --- |
| 145 | +# Logic for cleaning up or rolling back previously applied patches using the state file. |
| 146 | +my %state_metadata; |
| 147 | +if (-e $state_file) { |
| 148 | + open(my $sf_fh, '<', $state_file); <$sf_fh>; # Skip header |
| 149 | + while (<$sf_fh>) { |
| 150 | + chomp; |
| 151 | + my ($filename, $mtime, $size, $offsets, $status, $source_hash) = split(/\|/); |
| 152 | + $state_metadata{$filename} = { status => $status, hash => $source_hash }; |
| 153 | + } |
| 154 | + close($sf_fh); |
| 155 | +} |
| 156 | + |
| 157 | +if ($clean || $revert) { |
| 158 | + print(($clean ? "Cleaning backups...\n" : "Reverting to original state...\n")); |
| 159 | + my $errors = 0; |
| 160 | + foreach my $target (keys %patches) { |
| 161 | + my $backup = "$target.orig"; |
| 162 | + if ($clean && -e $backup) { |
| 163 | + unlink($backup) or (warn("$me: Skip delete $backup: $!\n"), $errors++); |
| 164 | + } |
| 165 | + elsif ($revert && -e $backup) { |
| 166 | + # If the file was created by the patch, remove it entirely. |
| 167 | + if ($state_metadata{$target} && $state_metadata{$target}{status} eq "NEW") { |
| 168 | + # If the patch created it, delete it |
| 169 | + (unlink($target) or $errors++) if -e $target; |
| 170 | + unlink($backup) or $errors++; |
| 171 | + print " Removed created file: $target\n"; |
| 172 | + } else { |
| 173 | + # Restore existing files from their .orig backup. |
| 174 | + # Attempt restoration of all files regardless of individual failures |
| 175 | + rename($backup, $target) or (warn("$me: Failed to restore $target: $!\n"), $errors++); |
| 176 | + print " Restored: $target\n"; |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + unlink($state_file) if -e $state_file && $errors == 0; |
| 182 | + |
| 183 | + # Exit with non-zero status if any part of the operation failed |
| 184 | + exit($errors > 0 ? 1 : 0); |
| 185 | +} |
| 186 | + |
| 187 | +# --- 7. Hunk Matching Engine --- |
| 188 | +# Finds the correct line index in a file to apply a hunk, accounting for line drifts. |
| 189 | +sub find_hunk_index { |
| 190 | + my ($file_content, $hunk_lines, $start_pos) = @_; |
| 191 | + # Only use ' ' (context) and '-' (to-be-removed) lines for matching. |
| 192 | + my @match_search = grep { /^[ -]/ } @$hunk_lines; |
| 193 | + |
| 194 | + # Try exact match first. |
| 195 | + return $start_pos if verify_context($file_content, \@match_search, $start_pos); |
| 196 | + |
| 197 | + # Search within the 'fuzz' range for a shifted match. |
| 198 | + for (my $offset = 1; $offset <= $fuzz_range; $offset++) { |
| 199 | + return ($start_pos - $offset) if verify_context($file_content, \@match_search, $start_pos - $offset); |
| 200 | + return ($start_pos + $offset) if verify_context($file_content, \@match_search, $start_pos + $offset); |
| 201 | + } |
| 202 | + return undef; # Hunk does not apply (context mismatch). |
| 203 | +} |
| 204 | + |
| 205 | +# Helper to verify if the hunk's context matches the actual file content at a given index. |
| 206 | +sub verify_context { |
| 207 | + my ($lines, $search, $idx) = @_; |
| 208 | + my $search_size = scalar @$search; |
| 209 | + return 0 if $idx < 0 || ($idx + $search_size) > scalar @$lines; |
| 210 | + for (my $i = 0; $i < $search_size; $i++) { |
| 211 | + my $f_text = $lines->[$idx + $i]; $f_text =~ s/[\r]?$//; |
| 212 | + my $h_text = substr($search->[$i], 1); $h_text =~ s/[\r]?$//; |
| 213 | + return 0 if $f_text ne $h_text; |
| 214 | + } |
| 215 | + return 1; |
| 216 | +} |
| 217 | + |
| 218 | +# --- 8. Dry Run --- |
| 219 | +# Pre-validation phase: Checks if all hunks can be matched and records offsets. |
| 220 | +if ($dry_run) { |
| 221 | + open(my $sf_out, '>', $state_file) or die "$me: Cannot create state file: $!\n"; |
| 222 | + print $sf_out "CKSUM:$patch_hash\n"; |
| 223 | + foreach my $f (sort keys %patches) { |
| 224 | + if ($patches{$f}{deleted}) { print "DELETE: $f\n"; next; } |
| 225 | + if (-f $f) { |
| 226 | + my @stats = stat($f); |
| 227 | + my $file_hash = get_file_fingerprint($f); |
| 228 | + open(my $fh, '<', $f); my @content = <$fh>; close($fh); |
| 229 | + my (@offsets, $failed) = ((), 0); |
| 230 | + foreach my $h (@{$patches{$f}{hunks}}) { |
| 231 | + my $idx = find_hunk_index(\@content, $h->{lines}, $h->{old_start} - 1); |
| 232 | + if (defined $idx) { |
| 233 | + push @offsets, ($idx - ($h->{old_start} - 1)); |
| 234 | + } else { $failed = 1; } |
| 235 | + } |
| 236 | + # Save metadata to ensure the file hasn't changed between dry-run and apply. |
| 237 | + print $sf_out "$f|$stats[$ST_MTIME]|$stats[$ST_SIZE]|" . join(",", @offsets) . "|EXISTING|$file_hash\n" unless $failed; |
| 238 | + print(($failed ? "FAIL: " : "READY: ") . "$f\n"); |
| 239 | + } elsif (!-e $f) { |
| 240 | + print $sf_out "$f|0|0||NEW|\n"; print "CREATE: $f\n"; |
| 241 | + } |
| 242 | + } |
| 243 | + close($sf_out); exit 0; |
| 244 | +} |
| 245 | + |
| 246 | +# --- 9. Execution --- |
| 247 | +# Load offsets/hashes generated during the Dry Run to ensure consistent application. |
| 248 | +my %stabilized_data; |
| 249 | +if (-e $state_file) { |
| 250 | + open(my $sf_in, '<', $state_file); <$sf_in>; |
| 251 | + while (<$sf_in>) { |
| 252 | + chomp; my ($f, $m, $s, $o, $st, $shash) = split(/\|/); |
| 253 | + @{$stabilized_data{$f}} = (split(",", $o), $st, $shash); |
| 254 | + } |
| 255 | + close($sf_in); |
| 256 | +} |
| 257 | + |
| 258 | +my @processed_files; |
| 259 | +my @deferred_unlinks; |
| 260 | + |
| 261 | +# Use eval to handle errors gracefully and trigger a rollback if any file fails. |
| 262 | +eval { |
| 263 | + foreach my $target (keys %patches) { |
| 264 | + my $temp_work_file = "${target}.tmp_${temp_suffix}"; |
| 265 | + unlink($temp_work_file) if -e $temp_work_file; |
| 266 | + my $backup_file = "$target.orig"; |
| 267 | + my $expected_hash = defined $stabilized_data{$target} ? $stabilized_data{$target}[-1] : ""; |
| 268 | + |
| 269 | + # Step A: Safe Backup Sequence |
| 270 | + # Creates a backup before any modification. rename() is used to ensure atomicity. |
| 271 | + if (-e $target && !-e $backup_file) { |
| 272 | + my $current_disk_hash = get_file_fingerprint($target); |
| 273 | + die "State Conflict: $target drift detected!\n" if $expected_hash ne $current_disk_hash; |
| 274 | + |
| 275 | + native_copy($target, $temp_work_file); |
| 276 | + rename($target, $backup_file) or die "Renaming backup failed: $target\n"; |
| 277 | + rename($temp_work_file, $target) or die "Activating working copy failed: $target\n"; |
| 278 | + |
| 279 | + # Validate that the file hasn't been corrupted during the copy/move process. |
| 280 | + my $work_hash = get_file_fingerprint($target); |
| 281 | + die "Integrity Check Failed: $target corruption!\n" if $expected_hash ne $work_hash; |
| 282 | + |
| 283 | + push @processed_files, $target; |
| 284 | + push @deferred_unlinks, $target if $patches{$target}{deleted}; |
| 285 | + } |
| 286 | + elsif (!-e $target && !-e $backup_file) { |
| 287 | + # For new files, create a marker backup so rollback knows to delete them. |
| 288 | + open(my $marker_fh, '>', $backup_file); close($marker_fh); |
| 289 | + push @processed_files, $target; |
| 290 | + } |
| 291 | + |
| 292 | + next if $patches{$target}{deleted}; |
| 293 | + |
| 294 | + # Step B: Application Logic |
| 295 | + # Builds the directory structure if it doesn't exist. |
| 296 | + my $target_dir = dirname($target); |
| 297 | + if (!-d $target_dir) { |
| 298 | + my $path_acc = ""; |
| 299 | + foreach my $seg (split(/\//, $target_dir)) { |
| 300 | + next if $seg eq ""; $path_acc .= "/$seg"; |
| 301 | + mkdir($path_acc, 0755) if !-d $path_acc; |
| 302 | + } |
| 303 | + } |
| 304 | + |
| 305 | + my @file_lines = (-e $target) ? do { open(my $fh, '<', $target); <$fh> } : (); |
| 306 | + my @hunks = @{$patches{$target}{hunks} // []}; |
| 307 | + # Accessing metadata status/hash relative to the end of the stored list |
| 308 | + my @offsets = defined $stabilized_data{$target} ? @{$stabilized_data{$target}}[0..$#{$stabilized_data{$target}}-2] : (); |
| 309 | + my $suppress_final_newline = 0; |
| 310 | + |
| 311 | + # Apply hunks in reverse order to keep line indices stable for earlier hunks. |
| 312 | + for (my $i = $#hunks; $i >= 0; $i--) { |
| 313 | + my $h = $hunks[$i]; |
| 314 | + my $zi_ln = $h->{old_start} - 1; |
| 315 | + my $match_idx = (@file_lines) ? (defined $offsets[$i] ? ($zi_ln+$offsets[$i]) : find_hunk_index(\@file_lines, $h->{lines}, $zi_ln)) : 0; |
| 316 | + die "Match failed during apply: $target\n" unless defined $match_idx; |
| 317 | + |
| 318 | + $suppress_final_newline = 1 if $i == $#hunks && $h->{no_eof_newline}; |
| 319 | + |
| 320 | + my ($removed_count, @transformed) = (0, ()); |
| 321 | + foreach my $line (@{$h->{lines}}) { |
| 322 | + my $ind = substr($line, 0, 1); |
| 323 | + my $text = (length($line) > 1) ? substr($line, 1) : ""; |
| 324 | + $text =~ s/\r?[\n]+$//; |
| 325 | + $text = ($text . "\n"); |
| 326 | + # '-' lines are not added to the list |
| 327 | + # '+' lines do not increment the removal count |
| 328 | + if ('-' eq $ind || ' ' eq $ind) { |
| 329 | + $removed_count++; |
| 330 | + push @transformed, $text if ' ' eq $ind; |
| 331 | + } |
| 332 | + elsif ('+' eq $ind) { push @transformed, $text; } |
| 333 | + } |
| 334 | + # Use splice to replace the matched block with the new transformed lines. |
| 335 | + splice(@file_lines, $match_idx, $removed_count, @transformed); |
| 336 | + } |
| 337 | + |
| 338 | + # Step C: Final Atomic Commit |
| 339 | + # Writes the fully patched result to a temp file, then swaps it with the target. |
| 340 | + open(my $out_fh, '>', $temp_work_file) or die "Write temp failed: $target\n"; |
| 341 | + for (my $i = 0; $i <= $#file_lines; $i++) { |
| 342 | + my $l = $file_lines[$i]; $l =~ s/\r?[\n]+$//; |
| 343 | + print $out_fh ($i == $#file_lines && $suppress_final_newline) ? $l : $l . "\n"; |
| 344 | + } |
| 345 | + close($out_fh); |
| 346 | + rename($temp_work_file, $target) or die "Commit failed: $target\n"; |
| 347 | + } |
| 348 | + |
| 349 | + # Clean up files marked for deletion after successful application. |
| 350 | + foreach my $f_to_del (@deferred_unlinks) { |
| 351 | + unlink($f_to_del) or warn "$me: Unlink failed: $f_to_del: $!\n"; |
| 352 | + } |
| 353 | +}; |
| 354 | + |
| 355 | +# Rollback Logic: Restores files to their state before the script began if an error occurred. |
| 356 | +if ($@) { |
| 357 | + warn "$me: Application Error: $@. Rolling back changes...\n"; |
| 358 | + foreach my $f (@processed_files) { |
| 359 | + my $orig = "$f.orig"; |
| 360 | + if (-e $orig) { |
| 361 | + # Accessing metadata status relative to the end of the stored list |
| 362 | + my $status = defined $stabilized_data{$f} ? $stabilized_data{$f}[-2] : "EXISTING"; |
| 363 | + if ($status eq "NEW") { unlink($f) if -e $f; unlink($orig); } |
| 364 | + else { rename($orig, $f); } |
| 365 | + } |
| 366 | + } |
| 367 | + exit 1; |
| 368 | +} |
| 369 | + |
| 370 | +unlink($state_file) if -e $state_file; |
| 371 | +print "Success.\n"; |
| 372 | + |
0 commit comments