Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: shellcheck -o all -e SC2250,SC2292 diff-logs
- run: pip install flake8
- run: flake8 --max-line-length=120 *.py
- run: sudo apt-get install man-db- libperl-critic-perl
- run: perlcritic diff-logs
- run: echo '2000-01-01T00:00:00' > file.log
- run: time ./diff-logs < file.log
- run: time ./diff-logs file.log file.log
- run: time tests/test.sh
- run: podman run -v.:/src ubuntu:latest /src/tests/test.sh # Runs on base container

workflow-keepalive:
if: github.event_name == 'schedule'
Expand Down
32 changes: 17 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
`diff-logs`
===========
[![Build Status](https://img.shields.io/github/actions/workflow/status/kernc/diff-logs/ci.yml?branch=master&style=for-the-badge)](https://github.com/kernc/diff-logs/actions)
[![Language: shell](https://img.shields.io/badge/lang-Shell-peachpuff?style=for-the-badge)](https://github.com/kernc/diff-logs)
[![Language: Python](https://img.shields.io/badge/lang-Python-skyblue?style=for-the-badge)](https://github.com/kernc/diff-logs)
[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs.py,diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs)
[![Language: Perl](https://img.shields.io/badge/lang-Perl-056?style=for-the-badge)](https://github.com/kernc/diff-logs)
[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs)
[![Script size](https://img.shields.io/github/languages/code-size/kernc/diff-logs?style=for-the-badge&color=greenyellow)](https://github.com/kernc/diff-logs)
[![](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues)
[![Bug tracker](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues)

A command-line utility for diff'ing log files.

Quickly find **difference lines** in **all kinds of logs**,
namely build/CI logs, server/container logs, or any similar such.
Figure out quickly **what changed** and _why exactly_ the shit is failing.
Figure out quickly **what changed** and _why exactly_ your shit is failing.

The script works by simply replacing common stochastic string [patterns],
such as datetime timestamps, download speeds, temporary files,
HTTP header values, UUIDs, hash digests etc. with known fixed
such as datetime timestamps, download speeds, temporary filenames,
HTTP header values, UUIDs, hash digests etc. etc. with known fixed
values that a tool such as `diff` can then easily skip.

[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs.py
[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs


Installation
Expand All @@ -27,13 +26,12 @@ First, check if your OS distro already provides an installable `diff-logs` packa

Otherwise:
1. Star, [download](https://github.com/kernc/diff-logs/archive/refs/heads/master.zip)
or clone repo.
2. (Optional) Create a symlink in your bin-dir pointing to `diff-logs` shell script:
or clone repo. 🫶
2. Put `diff-logs` script into your bin-dir or elsewhere on `$PATH`:
```shell
mkdir -p ~/.local/bin
export PATH="~/.local/bin:$PATH" # Also put in .bashrc or similar
# Link script into your bin
ln -s ~/path/to/diff-logs/diff-logs ~/.local/bin/diff-logs
curl -vL https://github.com/kernc/diff-logs/raw/refs/heads/master/diff-logs
sudo tee /usr/local/bin/diff-logs
sudo chmod +x /usr/local/bin/diff-logs
```


Expand Down Expand Up @@ -63,7 +61,11 @@ diff-logs FILE1 FILE2 # Invokes `meld`
diff-logs < FILE1 > FILE1.clean
```

Notes
-----
This once was Python, but Perl is even more ubiquituous.

-----
Finally, we can diff our logs with ease! 🥳

Improvements welcome!
Improvements and additions welcome!
107 changes: 85 additions & 22 deletions diff-logs
Original file line number Diff line number Diff line change
@@ -1,24 +1,87 @@
#!/bin/bash
#
# This is the diff-logs utility for diff'ing log files.
#
# See usage instructions below.
#
set -eu
set -o pipefail
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use File::Temp ();

lib="$(dirname "$(command -v "$0" || true)")"
pyscript="$lib/diff-logs.py"
difftool="${DIFFTOOL:-diff}"
my @PATTERNS = (
# Date/time
[ qr/\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?/, '2111-11-11 11:11:11' ],
[ qr/\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}/, 'Thu, 11 Nov 2111 11:11:11 GMT' ],
[ qr/\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+/, '11-11-2111 11:11:11.111111' ],
[ qr/[A-Z][a-z]{2} [ \d]\d \d{2}:\d{2}/, 'Nov 11 11:11' ], # `ls -l` format
# Other timestamp
[ qr/\b\d+(?:\.\d+)?s(?:ec)?\b/, '1.1s' ],
[ qr/\b(in|since) \d+\.\d+/, 'in 1.1' ],
# File/download sizes
[ qr/\d+(?:\.\d+)?(?:\/\d)? ?(?P<suffix>[kmg](?:i?b)?)\b/i, q["1 " . $+{suffix}] ],
# TCP / HTTP
[ qr/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, '11.1.1.1' ], # IPv4
[ qr/:\d{5,5}\b/, ':11111' ], # Remote port
[ qr/\bport \d{5,5}\b/, 'port 11111' ], # Remote port
[ qr{\bW/(?<quote>\\?")[^"]*\k<quote>}, 'W/"ETag"' ], # ETag header
# Common files
[ qr{/tmp/[^\s/:"']{6,}(?:/[^\s/:"']+)*/?}, '/tmp/d1ff1065' ],
# Common tools
[ qr/(?<step_no>(?:\s|\A)#\d+) \d+\.\d+/, q[$+{step_no} . " 1"] ], # Docker build steps
# strace process PIDs
[ qr/(?<prefix>(?:\b|_?)pid[ =])\d{4,}\b/, q[$+{prefix} . "11111"] ],
[ qr/(?<prefix>strace: Process )\d+/, q[$+{prefix} . "111111"] ],

if [ $# -eq 0 ]; then
"$pyscript" <&0;
elif [ $# -eq 2 ]; then
case $difftool in diff) args='--color=auto' ;; *) args= ;; esac
# shellcheck disable=SC2086,SC2248,SC2312
$difftool $args <("$pyscript" < "$1") <("$pyscript" < "$2");
else
echo "Usage: $0 < FILE.log # Print log file diff-friendly" >&2
echo " $0 FILE1.log FILE2.log # Invoke \$DIFFTOOL (e.g. diff)" >&2
exit 1
fi
[ qr/(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}/, 'd1ff1065-d1ff-1065-1007-d1ff1065' ],
[ qr/[a-zA-Z0-9]{18,}/, 'AAAAAAAAAAAAAAAAAA' ], # Long payload
[ qr/[a-fA-F0-9]{7,}/, 'd1ff1065' ], # Hash digest
# Progress bar, e.g. in pip, tqdm
[ qr{(?<indent>[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\x{2500}-\x{259F}=.\-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*}, q[$+{indent} . "......."] ],
);

# Self-test to ensure idempotence for simple replacements
for my $pair (@PATTERNS) {
my ($pattern, $replacement) = @$pair;
if (index($replacement, '$+') == -1) {
if ($replacement !~ m/\A(?:$pattern)\z/s) {
die "Assertion failed: Pattern-replacement pair '$pattern' => '$replacement' not idempotent!";
}
}
}

# Subroutine to read from an input handle, apply all normalizations,
# and write the result to an output handle
sub normalize {
my ($in_fh, $out_fh) = @_;
while (my $line = <$in_fh>) {
for my $rule (@PATTERNS) {
my ($pattern, $replacement) = @$rule;
# Use /ee (evaluate 2x) for replacements with named backreferences
if (index($replacement, '$+') != -1) {
$line =~ s/$pattern/$replacement/gee;
} else {
$line =~ s/$pattern/$replacement/g;
}
}
print $out_fh $line;
}
}

# Main
my $argc = @ARGV;
if ($argc == 0) {
binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); ## no critic
normalize(\*STDIN, \*STDOUT);
} elsif ($argc == 2) {
my ($file1, $file2) = @ARGV;
my $temp1 = File::Temp->new(UNLINK => 1);
my $temp2 = File::Temp->new(UNLINK => 1);
binmode($temp1, ':utf8'); binmode($temp2, ':utf8'); ## no critic
open my $fh1_in, '<:utf8', $file1 or die "Error: Cannot read '$file1': $!"; ## no critic
open my $fh2_in, '<:utf8', $file2 or die "Error: Cannot read '$file2': $!"; ## no critic
normalize($fh1_in, $temp1);
normalize($fh2_in, $temp2);
close $fh1_in; close $fh2_in;
my $difftool = $ENV{DIFFTOOL} || 'diff';
exec $difftool, ($difftool eq 'diff' ? '--color=auto' : ()), $temp1->filename, $temp2->filename;
} else {
print STDERR "Usage: $0 < FILE # Print log file diff-friendly\n";
print STDERR " $0 FILE1 FILE2 # Invoke \$DIFFTOOL (e.g. diff)\n";
exit 1;
}
58 changes: 0 additions & 58 deletions diff-logs.py

This file was deleted.