Skip to content

Commit ca68544

Browse files
committed
ENH: Transcribe into simple, widely-adopted Perl
1 parent 7ba6468 commit ca68544

File tree

4 files changed

+107
-98
lines changed

4 files changed

+107
-98
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ jobs:
1212
runs-on: ubuntu-latest
1313
steps:
1414
- uses: actions/checkout@v3
15-
- run: shellcheck -o all -e SC2250,SC2292 diff-logs
16-
- run: pip install flake8
17-
- run: flake8 --max-line-length=120 *.py
15+
- run: sudo apt-get install man-db- libperl-critic-perl
16+
- run: perlcritic diff-logs
1817
- run: echo '2000-01-01T00:00:00' > file.log
1918
- run: time ./diff-logs < file.log
2019
- run: time ./diff-logs file.log file.log
2120
- run: time tests/test.sh
21+
- run: podman run -v.:/src ubuntu:latest /src/tests/test.sh # Runs on base container
2222

2323
workflow-keepalive:
2424
if: github.event_name == 'schedule'

README.md

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
11
`diff-logs`
22
===========
33
[![Build Status](https://img.shields.io/github/actions/workflow/status/kernc/diff-logs/ci.yml?branch=master&style=for-the-badge)](https://github.com/kernc/diff-logs/actions)
4-
[![Language: shell](https://img.shields.io/badge/lang-Shell-peachpuff?style=for-the-badge)](https://github.com/kernc/diff-logs)
5-
[![Language: Python](https://img.shields.io/badge/lang-Python-skyblue?style=for-the-badge)](https://github.com/kernc/diff-logs)
6-
[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs.py,diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs)
4+
[![Language: Perl](https://img.shields.io/badge/lang-Perl-056?style=for-the-badge)](https://github.com/kernc/diff-logs)
5+
[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs)
76
[![Script size](https://img.shields.io/github/languages/code-size/kernc/diff-logs?style=for-the-badge&color=greenyellow)](https://github.com/kernc/diff-logs)
8-
[![](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues)
7+
[![Bug tracker](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues)
98

109
A command-line utility for diff'ing log files.
1110

1211
Quickly find **difference lines** in **all kinds of logs**,
1312
namely build/CI logs, server/container logs, or any similar such.
14-
Figure out quickly **what changed** and _why exactly_ the shit is failing.
13+
Figure out quickly **what changed** and _why exactly_ your shit is failing.
1514

1615
The script works by simply replacing common stochastic string [patterns],
17-
such as datetime timestamps, download speeds, temporary files,
18-
HTTP header values, UUIDs, hash digests etc. with known fixed
16+
such as datetime timestamps, download speeds, temporary filenames,
17+
HTTP header values, UUIDs, hash digests etc. etc. with known fixed
1918
values that a tool such as `diff` can then easily skip.
2019

21-
[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs.py
20+
[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs
2221

2322

2423
Installation
@@ -27,13 +26,12 @@ First, check if your OS distro already provides an installable `diff-logs` packa
2726

2827
Otherwise:
2928
1. Star, [download](https://github.com/kernc/diff-logs/archive/refs/heads/master.zip)
30-
or clone repo.
31-
2. (Optional) Create a symlink in your bin-dir pointing to `diff-logs` shell script:
29+
or clone repo. 🫶
30+
2. Put `diff-logs` script into your bin-dir or elsewhere on `$PATH`:
3231
```shell
33-
mkdir -p ~/.local/bin
34-
export PATH="~/.local/bin:$PATH" # Also put in .bashrc or similar
35-
# Link script into your bin
36-
ln -s ~/path/to/diff-logs/diff-logs ~/.local/bin/diff-logs
32+
curl -vL https://github.com/kernc/diff-logs/raw/refs/heads/master/diff-logs
33+
sudo tee /usr/local/bin/diff-logs
34+
sudo chmod +x /usr/local/bin/diff-logs
3735
```
3836

3937

@@ -63,7 +61,11 @@ diff-logs FILE1 FILE2 # Invokes `meld`
6361
diff-logs < FILE1 > FILE1.clean
6462
```
6563

64+
Notes
65+
-----
66+
This once was Python, but Perl is even more ubiquituous.
67+
6668
-----
6769
Finally, we can diff our logs with ease! 🥳
6870

69-
Improvements welcome!
71+
Improvements and additions welcome!

diff-logs

Lines changed: 87 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,89 @@
1-
#!/bin/bash
2-
#
3-
# This is the diff-logs utility for diff'ing log files.
4-
#
5-
# See usage instructions below.
6-
#
7-
set -eu
8-
set -o pipefail
1+
#!/usr/bin/env perl
2+
use strict;
3+
use warnings;
4+
use utf8;
5+
use File::Temp ();
96

10-
lib="$(dirname "$(command -v "$0" || true)")"
11-
pyscript="$lib/diff-logs.py"
12-
difftool="${DIFFTOOL:-diff}"
7+
my @PATTERNS = (
8+
# Date/time
9+
[ qr/\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?/, '2111-11-11 11:11:11' ],
10+
[ qr/\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}/, 'Thu, 11 Nov 2111 11:11:11 GMT' ],
11+
[ qr/\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+/, '11-11-2111 11:11:11.111111' ],
12+
[ qr/[A-Z][a-z]{2} [ \d]\d \d{2}:\d{2}/, 'Nov 11 11:11' ], # `ls -l` format
13+
# Other timestamp
14+
[ qr/\b\d+(?:\.\d+)?s(?:ec)?\b/, '1.1s' ],
15+
[ qr/\b(in|since) \d+\.\d+/, 'in 1.1' ],
16+
# File/download sizes
17+
[ qr/\d+(?:\.\d+)?(?:\/\d)? ?(?P<suffix>[kmg](?:i?b)?)\b/i, q["1 " . $+{suffix}] ],
18+
# TCP / HTTP
19+
[ qr/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, '11.1.1.1' ], # IPv4
20+
[ qr/:\d{5,5}\b/, ':11111' ], # Remote port
21+
[ qr/\bport \d{5,5}\b/, 'port 11111' ], # Remote port
22+
[ qr{\bW/(?<quote>\\?")[^"]*\k<quote>}, 'W/"ETag"' ], # ETag header
23+
# Common files
24+
[ qr{/tmp/[^\s/:"']{6,}(?:/[^\s/:"']+)*/?}, '/tmp/d1ff1065' ],
25+
# Common tools
26+
[ qr/(?<step_no>(?:\s|\A)#\d+) \d+\.\d+/, q[$+{step_no} . " 1"] ], # Docker build steps
27+
# strace process PIDs
28+
[ qr/(?<prefix>(?:\b|_?)pid[ =])\d{4,}\b/, q[$+{prefix} . "11111"] ],
29+
[ qr/(?<prefix>strace: Process )\d+/, q[$+{prefix} . "111111"] ],
1330

14-
if [ $# -eq 0 ]; then
15-
"$pyscript" <&0;
16-
elif [ $# -eq 2 ]; then
17-
case $difftool in diff) args='--color=auto' ;; *) args= ;; esac
18-
# shellcheck disable=SC2086,SC2248,SC2312
19-
$difftool $args <("$pyscript" < "$1") <("$pyscript" < "$2");
20-
else
21-
echo "Usage: $0 < FILE.log # Print log file diff-friendly" >&2
22-
echo " $0 FILE1.log FILE2.log # Invoke \$DIFFTOOL (e.g. diff)" >&2
23-
exit 1
24-
fi
31+
[ qr/(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}/, 'd1ff1065-d1ff-1065-1007-d1ff1065' ],
32+
[ qr/[a-zA-Z0-9]{18,}/, 'AAAAAAAAAAAAAAAAAA' ], # Long payload
33+
[ qr/[a-fA-F0-9]{7,}/, 'd1ff1065' ], # Hash digest
34+
# Progress bar, e.g. in pip, tqdm
35+
[ qr{(?<indent>[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\x{2500}-\x{259F}=.\-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*}, q[$+{indent} . "......."] ],
36+
);
37+
38+
# Self-test to ensure idempotence for simple replacements
39+
for my $pair (@PATTERNS) {
40+
my ($pattern, $replacement) = @$pair;
41+
if (index($replacement, '$+') == -1) {
42+
if ($replacement !~ m/\A(?:$pattern)\z/s) {
43+
die "Assertion failed: Pattern-replacement pair '$pattern' => '$replacement' not idempotent!";
44+
}
45+
}
46+
}
47+
48+
# Subroutine to read from an input handle, apply all normalizations,
49+
# and write the result to an output handle
50+
sub normalize {
51+
my ($in_fh, $out_fh) = @_;
52+
while (my $line = <$in_fh>) {
53+
for my $rule (@PATTERNS) {
54+
my ($pattern, $replacement) = @$rule;
55+
# Use /ee (evaluate 2x) for replacements with named backreferences
56+
if (index($replacement, '$+') != -1) {
57+
$line =~ s/$pattern/$replacement/gee;
58+
} else {
59+
$line =~ s/$pattern/$replacement/g;
60+
}
61+
}
62+
print $out_fh $line;
63+
}
64+
}
65+
66+
# Main
67+
my $argc = @ARGV;
68+
if ($argc == 0) {
69+
binmode(STDIN, ":utf8");
70+
binmode(STDOUT, ":utf8");
71+
normalize(\*STDIN, \*STDOUT);
72+
} elsif ($argc == 2) {
73+
my ($file1, $file2) = @ARGV;
74+
my $temp1 = File::Temp->new(UNLINK => 1);
75+
my $temp2 = File::Temp->new(UNLINK => 1);
76+
binmode($temp1, ':utf8');
77+
binmode($temp2, ':utf8');
78+
open my $fh1_in, '<:utf8', $file1 or die "Error: Cannot read '$file1': $!";
79+
open my $fh2_in, '<:utf8', $file2 or die "Error: Cannot read '$file2': $!";
80+
normalize($fh1_in, $temp1);
81+
normalize($fh2_in, $temp2);
82+
close $fh1_in; close $fh2_in;
83+
my $difftool = $ENV{DIFFTOOL} || 'diff';
84+
exec $difftool, ($difftool eq 'diff' ? '--color=auto' : ()), $temp1->filename, $temp2->filename;
85+
} else {
86+
print STDERR "Usage: $0 < FILE # Print log file diff-friendly\n";
87+
print STDERR " $0 FILE1 FILE2 # Invoke \$DIFFTOOL (e.g. diff)\n";
88+
exit 1;
89+
}

diff-logs.py

Lines changed: 0 additions & 58 deletions
This file was deleted.

0 commit comments

Comments
 (0)