Skip to content

Commit 1af30ca

Browse files
committed
docs: clarify hash() limitations in BucketSort; add tests demonstrating
distribution behavior
1 parent 27ada8a commit 1af30ca

File tree

2 files changed

+89
-0
lines changed

2 files changed

+89
-0
lines changed

src/main/java/com/thealgorithms/sorts/BucketSort.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,22 @@ private <T extends Comparable<T>> T[] concatenateBuckets(Iterable<List<T>> bucke
9595
* This is done by "normalizing" the element within the range of the array's minimum (min) and maximum (max) values,
9696
* and then mapping this normalized value to a specific bucket index.
9797
*
98+
*<p><b>Important limitations:</b>
99+
*<ul>
100+
* <li>This method uses {@code compareTo} as if it provided a numeric difference.
101+
* For numeric types, {@code compareTo} only reports order (−1, 0, 1), not the actual distance.
102+
* This often collapses distribution into one or two buckets.</li>
103+
* <li>For non-numeric {@code Comparable} types (for example {@code String}), bucket indices depend on lexicographic
104+
* code-point differences, which are not a proportional measure of spacing. Distribution is therefore arbitrary and uneven.</li>
105+
* <li>If {@code min.equals(max)}, the computed "range" is 0. Then {@code element.compareTo(min) / 0}
106+
* yields {@code NaN}, which Java coerces to 0 when cast to {@code int}.
107+
* Practically, all elements collapse into bucket 0 in this case.</li>
108+
* </ul>
109+
*
110+
* <p>Despite these limitations, the sort remains correct because each bucket is sorted internally and concatenated.
111+
* This method should be regarded as a simplified demonstration rather than a
112+
* general-purpose bucketing strategy for arbitrary {@code Comparable<T>} values.</p>
113+
*
98114
* @param element the element of the array
99115
* @param min the minimum value in the array
100116
* @param max the maximum value in the array
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package com.thealgorithms.sorts;
2+
3+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
4+
5+
import java.util.Arrays;
6+
import java.util.List;
7+
import org.junit.jupiter.api.Test;
8+
9+
public class BucketSortHashBehaviorTest {
10+
11+
private static <T extends Comparable<T>> int pseudoHash(final T element, final T min, final T max, final int numberOfBuckets) {
12+
//Reproduces the production hash() logic
13+
double range = max.compareTo(min);
14+
double normalizedValue = element.compareTo(min) / range; // -1/0/1 divided by -1/0/1
15+
return (int) (normalizedValue * (numberOfBuckets - 1));
16+
}
17+
18+
@Test //Test case when all numbers are equal
19+
void sort_stillCorrect_whenAllEqual() {
20+
Integer[] arr = {1, 1, 1, 1, 1};
21+
Integer[] expected = arr.clone();
22+
23+
new BucketSort().sort(arr);
24+
assertArrayEquals(expected, arr);
25+
26+
//Observe bucket mapping (all collapse to index 0)
27+
Integer min = 1, max = 1;
28+
int numberOfBuckets = Math.max(arr.length / 10, 1); // same as BUCKET_DIVISOR rule
29+
int idx = pseudoHash(1, min, max, numberOfBuckets);
30+
//idx will be 0 because NaN cast to int -> 0 in Java
31+
System.out.println("All-equal case -> bucket index: " + idx);
32+
}
33+
34+
@Test //Test case with non-equal integers
35+
void sort_stillCorrect_nonEqualIntegers() {
36+
Integer[] arr = {20, 40, 30, 10};
37+
Integer[] expected = {10, 20, 30, 40};
38+
39+
new BucketSort().sort(arr);
40+
assertArrayEquals(expected, arr);
41+
42+
Integer min = Arrays.stream(arr).min(Integer::compareTo).get();
43+
Integer max = Arrays.stream(arr).max(Integer::compareTo).get();
44+
int numberOfBuckets = Math.max(arr.length / 10, 1); // often 1 here; bump to 4 to demonstrate
45+
numberOfBuckets = 4;
46+
47+
for (Integer x : arr) {
48+
int idx = pseudoHash(x, min, max, numberOfBuckets);
49+
System.out.println("Value " + x + " -> bucket " + idx);
50+
}
51+
// Expect only two distinct buckets because compareTo gives -1/0/1
52+
}
53+
54+
@Test //Test case when the Array contains Strings
55+
void sort_stillCorrect_whenStrings() {
56+
String[] arr = {"apple", "banana", "carrot"};
57+
String[] expected = arr.clone();
58+
59+
new BucketSort().sort(arr);
60+
assertArrayEquals(expected, arr);
61+
62+
String min = Arrays.stream(arr).min(String::compareTo).get();
63+
String max = Arrays.stream(arr).max(String::compareTo).get();
64+
int numberOfBuckets = 4;
65+
66+
for (String s : arr) {
67+
int idx = pseudoHash(s, min, max, numberOfBuckets);
68+
System.out.println("Value \"" + s + "\" -> bucket " + idx);
69+
}
70+
// Buckets reflect only lexicographic order, not a numeric spacing
71+
}
72+
}
73+

0 commit comments

Comments
 (0)