Skip to content

Commit 8972b36

Browse files
committed
bytecount working better
1 parent fbee321 commit 8972b36

File tree

6 files changed

+103
-21
lines changed

6 files changed

+103
-21
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
- [ ] `ghash`: calculate various [hashes available in the Go standard library](https://pkg.go.dev/crypto#Hash)
3131
- [ ] `body`: prints specific lines of a file (in between `head` and `tail`)
3232
- [ ] `trilobyte`: translates bytes according to a map
33+
- [ ] `trune`: translates Unicode codepoints (runes) according to a map
3334
- [ ] `ustrings`: like the standard [`strings`](https://man7.org/linux/man-pages/man1/strings.1.html) utility, but with Unicode support
3435
- [ ] `utf8ify`: convert to UTF-8
3536
- [ ] `unhexdump`: convert the (edited) output of [`hexdump -C`](https://man7.org/linux/man-pages/man1/hexdump.1.html) back to binary

TODO.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
## Uniwhat and Unicount
2020

21+
- [ ] Flag for whitespace (i.e. separate from control)
2122
- [ ] Flag to include/exclude specific scripts (not just ASCII)
2223
- [ ] Flag to print script
2324
- [ ] Flag to print block

cmd/bytecount/bytecount.go

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@ import (
44
"bufio"
55
"fmt"
66
"io"
7+
"log"
78
"os"
89

10+
"github.com/FileFormatInfo/fftools/internal"
911
"github.com/olekukonko/tablewriter"
1012
"github.com/olekukonko/tablewriter/renderer"
1113
"github.com/olekukonko/tablewriter/tw"
14+
"github.com/spf13/pflag"
1215
"golang.org/x/text/language"
1316
"golang.org/x/text/message"
1417
)
@@ -32,43 +35,91 @@ func outputPretty(out io.Writer, counts map[byte]int) {
3235
)
3336

3437
header := []string{""}
35-
for i := 0; i <= 0xf0; i += 0x10 {
38+
for i := 0; i <= 0x0f; i += 1 {
3639
header = append(header, fmt.Sprintf("0x%02X", i))
3740
}
41+
header = append(header, "")
3842
table.Header(header)
3943

40-
for row := 0; row <= 0x0F; row += 0x01 {
44+
for row := 0; row <= 0xF0; row += 0x10 {
4145
data := []string{fmt.Sprintf("0x%02X", row)}
42-
for col := 0; col <= 0xF0; col += 0x10 {
46+
for col := 0; col <= 0x0F; col += 0x01 {
4347
i := row + col
4448
data = append(data, prettyPrinter.Sprintf("%d", counts[byte(i)]))
4549
}
50+
data = append(data, fmt.Sprintf("0x%02X", row))
4651
table.Append(data)
4752
}
4853

4954
table.Render()
5055

5156
}
5257

53-
func main() {
58+
func processFile(fileName string) error {
5459
counts := make(map[byte]int)
5560
for i := 0; i < 256; i++ {
5661
counts[byte(i)] = 0
5762
}
5863

59-
reader := bufio.NewReaderSize(os.Stdin, 1024*1024)
64+
file, err := os.Open(fileName)
65+
if err != nil {
66+
return err
67+
}
68+
defer file.Close()
69+
70+
reader := bufio.NewReaderSize(file, 1024*1024)
6071
for {
6172
b, err := reader.ReadByte()
6273
if err == io.EOF {
6374
break
6475
}
6576
if err != nil {
66-
fmt.Fprintf(os.Stderr, "ERROR: unable to read input: %v\n", err)
67-
os.Exit(1)
77+
return err
6878
}
6979
counts[b]++
7080
}
7181

7282
//LATER: other output formats: plain, JSON, CSV
7383
outputPretty(os.Stdout, counts)
84+
85+
return nil
86+
}
87+
88+
func main() {
89+
90+
var help = pflag.BoolP("help", "h", false, "Show help message")
91+
var version = pflag.Bool("version", false, "Print version information")
92+
93+
pflag.Parse()
94+
95+
if *version {
96+
internal.PrintVersion("bytecount")
97+
return
98+
}
99+
100+
if *help {
101+
// LATER: print man page
102+
fmt.Printf("Usage: bytecount [options] [file...]\n\n")
103+
fmt.Printf("Options:\n")
104+
pflag.PrintDefaults()
105+
return
106+
}
107+
108+
args := pflag.Args()
109+
if len(args) == 0 {
110+
fmt.Printf("Usage: bytecount [options] file ...\n\n")
111+
return
112+
}
113+
114+
for _, arg := range args {
115+
if arg == "-" {
116+
arg = "/dev/stdin"
117+
}
118+
119+
if len(args) > 1 {
120+
fmt.Printf("Processing file: %s\n", arg)
121+
}
122+
123+
processFile(arg)
124+
}
74125
}

cmd/uniwhat/uniwhat.go

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"bufio"
5+
_ "embed"
56
"fmt"
67
"io"
78
"log"
@@ -12,14 +13,21 @@ import (
1213
"golang.org/x/text/unicode/runenames"
1314
)
1415

16+
//go:embed README.md
17+
var helpText string
18+
1519
func main() {
1620

17-
var control = pflag.Bool("control", false, "Include control characters")
1821
var ascii = pflag.Bool("ascii", false, "Include ASCII characters")
1922
var codepoint = pflag.Bool("codepoint", true, "Print the U+XXXX codepoint")
23+
var line = pflag.Bool("line", true, "Print the line number")
2024
var offset = pflag.Bool("offset", true, "Print the offset")
2125
var char = pflag.Bool("char", false, "Print the character itself")
22-
var version = pflag.Bool("version", false, "Print version information")
26+
27+
var first = pflag.Bool("first", false, "Only print the first occurrence of each character")
28+
29+
var help = pflag.Bool("help", false, "Detailed help")
30+
var version = pflag.Bool("version", false, "Version info")
2331

2432
pflag.Parse()
2533

@@ -28,11 +36,20 @@ func main() {
2836
return
2937
}
3038

39+
if *help {
40+
fmt.Printf("%s\n", helpText)
41+
return
42+
}
43+
3144
args := pflag.Args()
3245
if len(args) == 0 {
33-
args = []string{"-"}
46+
fmt.Printf("Usage: uniwhat [options] file ...\n\n")
47+
pflag.PrintDefaults()
48+
return
3449
}
3550

51+
firstMap := make(map[rune]bool)
52+
3653
for _, arg := range args {
3754
if arg == "-" {
3855
arg = "/dev/stdin"
@@ -46,9 +63,10 @@ func main() {
4663
}
4764
defer file.Close()
4865

49-
reader := bufio.NewReader(file)
66+
reader := bufio.NewReaderSize(file, 1024*1024)
5067

5168
var pos int = 0
69+
var lineNum int = 1
5270

5371
// Loop to read runes one by one
5472
for {
@@ -59,21 +77,32 @@ func main() {
5977
}
6078
log.Fatalf("Error reading rune: %v", err)
6179
}
62-
if r < 0x1F && !*control {
63-
pos += rsize
64-
continue // Skip control characters if --control is not set
80+
pos += rsize
81+
82+
if r == '\n' {
83+
lineNum++
6584
}
66-
if r <= 0x7E && !*ascii {
67-
pos += rsize
85+
86+
if !*ascii && ((r >= 0x20 && r <= 0x7E) || r == 0x09 || r == 0x0A || r == 0x0D) {
6887
continue // Skip ASCII characters if --ascii is not set
6988
}
89+
90+
if *first {
91+
if _, exists := firstMap[r]; exists {
92+
continue
93+
}
94+
firstMap[r] = true
95+
}
96+
7097
name := runenames.Name(r)
7198
if name == "" {
7299
name = "<unknown>"
73100
}
74101
if *offset {
75-
// Note: Getting the exact byte offset of the rune is complex; this is a placeholder
76-
fmt.Printf("%08x ", pos)
102+
fmt.Printf("%08x ", pos-rsize)
103+
}
104+
if *line {
105+
fmt.Printf("%6d ", lineNum)
77106
}
78107
if *codepoint {
79108
fmt.Printf("U+%04X ", r)
@@ -82,8 +111,6 @@ func main() {
82111
fmt.Printf("%c ", r)
83112
}
84113
fmt.Printf("%s\n", name)
85-
86-
pos += rsize
87114
}
88115
}
89116
}

internal/Logger.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010

1111
var Logger = initLogger()
1212
var LogLevel = slog.LevelInfo
13+
const LevelTrace = slog.Level(-8)
14+
1315

1416
// level=Trace is for stuff that should not be logged in production, either because it's too verbose or because it contains sensitive information.
1517

internal/Version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ var (
1313
)
1414

1515
func PrintVersion(name string) {
16-
if LogLevel >= slog.LevelInfo {
16+
if LogLevel <= slog.LevelInfo {
1717
slog.Info("Version information", "name", name, "version", VERSION, "lastmod", LASTMOD, "commit", COMMIT, "builder", BUILDER)
1818
} else {
1919
fmt.Printf("%s version %s (built on %s from %s by %s)\n", name, VERSION, LASTMOD, COMMIT, BUILDER)

0 commit comments

Comments
 (0)