Skip to content

Natural sorting option for file_server file browse file listings #7226

@ChandlerSwift

Description

@ChandlerSwift

Issue Details

For files with regions of numbers and other characters, it's intuitive to have the numbers sorted by increasing numeric value rather than by ASCII code. (Jeff Atwood has a nice article here: https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/)

Current behavior:

[chandler@oscar:~/projects/caddy/cmd/caddy]$ ls test
foo1  foo10  foo100  foo101  foo199  foo2  foo200  foo201  foo3  foo99
[chandler@oscar:~/projects/caddy/cmd/caddy]$ go run . file-server --root test --browse --listen :8000
Image

Desired behavior:

[chandler@oscar:~/projects/caddy/cmd/caddy]$ ls -v test
foo1  foo2  foo3  foo10  foo99  foo100  foo101  foo199  foo200  foo201
[chandler@oscar:~/projects/caddy/cmd/caddy]$ go run . file-server --root test --browse --listen :8000
Image

With the idea that code can be more precise than description, here's a likely-not-production-ready patch that accomplishes natural sorting. I'd be happy to PR in any variant of this if that's desirable.

I stole the code without modification from https://github.com/fvbommel/sortorder/blob/main/natsort.go (MIT licensed), as a quick skim suggested it would do what we wanted and be more performant than other implementations (e.g. https://github.com/facette/natsort/blob/master/natsort.go, https://github.com/skarademir/naturalsort/blob/master/naturalsort.go) that used regular expressions for chunk splitting. https://github.com/maruel/natural/blob/main/natsort.go may also be comparable in performance though I haven't reviewed it for correctness, and I haven't done any performance testing on any of these options, though a benchmark claims the specific one I included may be within a factor of two on performance vs strings.Sort.

Potential patch:
diff --git a/modules/caddyhttp/fileserver/browsetplcontext.go b/modules/caddyhttp/fileserver/browsetplcontext.go
index b9489c6a..339eea21 100644
--- a/modules/caddyhttp/fileserver/browsetplcontext.go
+++ b/modules/caddyhttp/fileserver/browsetplcontext.go
@@ -325,11 +325,72 @@ type (
 	byTime         browseTemplateContext
 )
 
+func isDigit(b byte) bool { return '0' <= b && b <= '9' }
+
+// naturalLess compares two strings using natural ordering. This means that e.g.
+// "abc2" < "abc12".
+//
+// Non-digit sequences and numbers are compared separately. The former are
+// compared bytewise, while digits are compared numerically (except that
+// the number of leading zeros is used as a tie-breaker, so e.g. "2" < "02")
+//
+// Limitation: only ASCII digits (0-9) are considered.
+func naturalLess(str1, str2 string) bool {
+	idx1, idx2 := 0, 0
+	for idx1 < len(str1) && idx2 < len(str2) {
+		c1, c2 := str1[idx1], str2[idx2]
+		dig1, dig2 := isDigit(c1), isDigit(c2)
+		switch {
+		case dig1 != dig2: // Digits before other characters.
+			return dig1 // True if LHS is a digit, false if the RHS is one.
+		case !dig1: // && !dig2, because dig1 == dig2
+			// UTF-8 compares bytewise-lexicographically, no need to decode
+			// codepoints.
+			if c1 != c2 {
+				return c1 < c2
+			}
+			idx1++
+			idx2++
+		default: // Digits
+			// Eat zeros.
+			for ; idx1 < len(str1) && str1[idx1] == '0'; idx1++ {
+			}
+			for ; idx2 < len(str2) && str2[idx2] == '0'; idx2++ {
+			}
+			// Eat all digits.
+			nonZero1, nonZero2 := idx1, idx2
+			for ; idx1 < len(str1) && isDigit(str1[idx1]); idx1++ {
+			}
+			for ; idx2 < len(str2) && isDigit(str2[idx2]); idx2++ {
+			}
+			// If lengths of numbers with non-zero prefix differ, the shorter
+			// one is less.
+			if len1, len2 := idx1-nonZero1, idx2-nonZero2; len1 != len2 {
+				return len1 < len2
+			}
+			// If they're equally long, string comparison is correct.
+			if nr1, nr2 := str1[nonZero1:idx1], str2[nonZero2:idx2]; nr1 != nr2 {
+				return nr1 < nr2
+			}
+			// Otherwise, the one with less zeros is less.
+			// Because everything up to the number is equal, comparing the index
+			// after the zeros is sufficient.
+			if nonZero1 != nonZero2 {
+				return nonZero1 < nonZero2
+			}
+		}
+		// They're identical so far, so continue comparing.
+	}
+	// So far they are identical. At least one is ended. If the other continues,
+	// it sorts last.
+	return len(str1) < len(str2)
+}
+
 func (l byName) Len() int      { return len(l.Items) }
 func (l byName) Swap(i, j int) { l.Items[i], l.Items[j] = l.Items[j], l.Items[i] }
 
 func (l byName) Less(i, j int) bool {
-	return strings.ToLower(l.Items[i].Name) < strings.ToLower(l.Items[j].Name)
+	return naturalLess(strings.ToLower(l.Items[i].Name), strings.ToLower(l.Items[j].Name))
 }
 
 func (l byNameDirFirst) Len() int      { return len(l.Items) }
@@ -338,7 +399,7 @@ func (l byNameDirFirst) Swap(i, j int) { l.Items[i], l.Items[j] = l.Items[j], l.
 func (l byNameDirFirst) Less(i, j int) bool {
 	// sort by name if both are dir or file
 	if l.Items[i].IsDir == l.Items[j].IsDir {
-		return strings.ToLower(l.Items[i].Name) < strings.ToLower(l.Items[j].Name)
+		return naturalLess(strings.ToLower(l.Items[i].Name), strings.ToLower(l.Items[j].Name))
 	}
 	// sort dir ahead of file
 	return l.Items[i].IsDir

Assistance Disclosure

AI not used

If AI was used, describe the extent to which it was used.

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions