|
1 | | -#!/bin/bash |
2 | | -# |
3 | | -# This is the diff-logs utility for diff'ing log files. |
4 | | -# |
5 | | -# See usage instructions below. |
6 | | -# |
7 | | -set -eu |
8 | | -set -o pipefail |
| 1 | +#!/usr/bin/env perl |
| 2 | +use strict; |
| 3 | +use warnings; |
| 4 | +use utf8; |
| 5 | +use File::Temp (); |
9 | 6 |
|
10 | | -lib="$(dirname "$(command -v "$0" || true)")" |
11 | | -pyscript="$lib/diff-logs.py" |
12 | | -difftool="${DIFFTOOL:-diff}" |
| 7 | +my @PATTERNS = ( |
| 8 | + # Date/time |
| 9 | + [ qr/\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?/, '2111-11-11 11:11:11' ], |
| 10 | + [ qr/\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}/, 'Thu, 11 Nov 2111 11:11:11 GMT' ], |
| 11 | + [ qr/\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+/, '11-11-2111 11:11:11.111111' ], |
| 12 | + [ qr/[A-Z][a-z]{2} [ \d]\d \d{2}:\d{2}/, 'Nov 11 11:11' ], # `ls -l` format |
| 13 | + # Other timestamp |
| 14 | + [ qr/\b\d+(?:\.\d+)?s(?:ec)?\b/, '1.1s' ], |
| 15 | + [ qr/\b(in|since) \d+\.\d+/, 'in 1.1' ], |
| 16 | + # File/download sizes |
| 17 | + [ qr/\d+(?:\.\d+)?(?:\/\d)? ?(?P<suffix>[kmg](?:i?b)?)\b/i, q["1 " . $+{suffix}] ], |
| 18 | + # TCP / HTTP |
| 19 | + [ qr/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, '11.1.1.1' ], # IPv4 |
| 20 | + [ qr/:\d{5,5}\b/, ':11111' ], # Remote port |
| 21 | + [ qr/\bport \d{5,5}\b/, 'port 11111' ], # Remote port |
| 22 | + [ qr{\bW/(?<quote>\\?")[^"]*\k<quote>}, 'W/"ETag"' ], # ETag header |
| 23 | + # Common files |
| 24 | + [ qr{/tmp/[^\s/:"']{6,}(?:/[^\s/:"']+)*/?}, '/tmp/d1ff1065' ], |
| 25 | + # Common tools |
| 26 | + [ qr/(?<step_no>(?:\s|\A)#\d+) \d+\.\d+/, q[$+{step_no} . " 1"] ], # Docker build steps |
| 27 | + # strace process PIDs |
| 28 | + [ qr/(?<prefix>(?:\b|_?)pid[ =])\d{4,}\b/, q[$+{prefix} . "11111"] ], |
| 29 | + [ qr/(?<prefix>strace: Process )\d+/, q[$+{prefix} . "111111"] ], |
13 | 30 |
|
14 | | -if [ $# -eq 0 ]; then |
15 | | - "$pyscript" <&0; |
16 | | -elif [ $# -eq 2 ]; then |
17 | | - case $difftool in diff) args='--color=auto' ;; *) args= ;; esac |
18 | | - # shellcheck disable=SC2086,SC2248,SC2312 |
19 | | - $difftool $args <("$pyscript" < "$1") <("$pyscript" < "$2"); |
20 | | -else |
21 | | - echo "Usage: $0 < FILE.log # Print log file diff-friendly" >&2 |
22 | | - echo " $0 FILE1.log FILE2.log # Invoke \$DIFFTOOL (e.g. diff)" >&2 |
23 | | - exit 1 |
24 | | -fi |
| 31 | + [ qr/(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}/, 'd1ff1065-d1ff-1065-1007-d1ff1065' ], |
| 32 | + [ qr/[a-zA-Z0-9]{18,}/, 'AAAAAAAAAAAAAAAAAA' ], # Long payload |
| 33 | + [ qr/[a-fA-F0-9]{7,}/, 'd1ff1065' ], # Hash digest |
| 34 | + # Progress bar, e.g. in pip, tqdm |
| 35 | + [ qr{(?<indent>[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\x{2500}-\x{259F}=.\-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*}, q[$+{indent} . "......."] ], |
| 36 | +); |
| 37 | + |
| 38 | +# Self-test to ensure idempotence for simple replacements |
| 39 | +for my $pair (@PATTERNS) { |
| 40 | + my ($pattern, $replacement) = @$pair; |
| 41 | + if (index($replacement, '$+') == -1) { |
| 42 | + if ($replacement !~ m/\A(?:$pattern)\z/s) { |
| 43 | + die "Assertion failed: Pattern-replacement pair '$pattern' => '$replacement' not idempotent!"; |
| 44 | + } |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +# Subroutine to read from an input handle, apply all normalizations, |
| 49 | +# and write the result to an output handle |
| 50 | +sub normalize { |
| 51 | + my ($in_fh, $out_fh) = @_; |
| 52 | + while (my $line = <$in_fh>) { |
| 53 | + for my $rule (@PATTERNS) { |
| 54 | + my ($pattern, $replacement) = @$rule; |
| 55 | + # Use /ee (evaluate 2x) for replacements with named backreferences |
| 56 | + if (index($replacement, '$+') != -1) { |
| 57 | + $line =~ s/$pattern/$replacement/gee; |
| 58 | + } else { |
| 59 | + $line =~ s/$pattern/$replacement/g; |
| 60 | + } |
| 61 | + } |
| 62 | + print $out_fh $line; |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +# Main |
| 67 | +my $argc = @ARGV; |
| 68 | +if ($argc == 0) { |
| 69 | + binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); ## no critic |
| 70 | + normalize(\*STDIN, \*STDOUT); |
| 71 | +} elsif ($argc == 2) { |
| 72 | + my ($file1, $file2) = @ARGV; |
| 73 | + my $temp1 = File::Temp->new(UNLINK => 1); |
| 74 | + my $temp2 = File::Temp->new(UNLINK => 1); |
| 75 | + binmode($temp1, ':utf8'); binmode($temp2, ':utf8'); ## no critic |
| 76 | + open my $fh1_in, '<:utf8', $file1 or die "Error: Cannot read '$file1': $!"; ## no critic |
| 77 | + open my $fh2_in, '<:utf8', $file2 or die "Error: Cannot read '$file2': $!"; ## no critic |
| 78 | + normalize($fh1_in, $temp1); |
| 79 | + normalize($fh2_in, $temp2); |
| 80 | + close $fh1_in; close $fh2_in; |
| 81 | + my $difftool = $ENV{DIFFTOOL} || 'diff'; |
| 82 | + exec $difftool, ($difftool eq 'diff' ? '--color=auto' : ()), $temp1->filename, $temp2->filename; |
| 83 | +} else { |
| 84 | + print STDERR "Usage: $0 < FILE # Print log file diff-friendly\n"; |
| 85 | + print STDERR " $0 FILE1 FILE2 # Invoke \$DIFFTOOL (e.g. diff)\n"; |
| 86 | + exit 1; |
| 87 | +} |
0 commit comments