Skip to content

Commit 7a860df

Browse files
committed
Adding options to only generate valid formatted zips and vins.
1 parent 93a55a0 commit 7a860df

File tree

4 files changed

+278
-7
lines changed

4 files changed

+278
-7
lines changed

src/main/java/ai/philterd/phileas/data/generators/VINGenerator.java

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,92 @@
2424
*/
2525
public class VINGenerator implements DataGenerator.Generator<String> {
2626
private final Random random;
27+
private final boolean onlyValid;
2728
private final String chars = "0123456789ABCDEFGHJKLMNPRSTUVWXYZ";
2829

2930
/**
3031
* Creates a new VIN generator.
3132
* @param random The {@link Random} to use.
3233
*/
3334
public VINGenerator(final Random random) {
35+
this(random, false);
36+
}
37+
38+
/**
39+
* Creates a new VIN generator.
40+
* @param random The {@link Random} to use.
41+
* @param onlyValid If <code>true</code>, only valid VINs will be generated.
42+
*/
43+
public VINGenerator(final Random random, final boolean onlyValid) {
3444
this.random = random;
45+
this.onlyValid = onlyValid;
3546
}
3647

3748
@Override
3849
public String random() {
39-
final StringBuilder sb = new StringBuilder();
40-
for (int i = 0; i < 17; i++) {
41-
sb.append(chars.charAt(random.nextInt(chars.length())));
50+
if (onlyValid) {
51+
return generateValidVin();
52+
} else {
53+
final StringBuilder sb = new StringBuilder();
54+
for (int i = 0; i < 17; i++) {
55+
sb.append(chars.charAt(random.nextInt(chars.length())));
56+
}
57+
return sb.toString();
58+
}
59+
}
60+
61+
/**
62+
* This generates a VIN that matches the general VIN pattern. These may or may not be actually valid VINs.
63+
* @return A VIN that matches the general pattern.
64+
*/
65+
private String generateValidVin() {
66+
67+
final int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 0, 7, 0, 9, 2, 3, 4, 5, 6, 7, 8, 9 };
68+
final int[] weights = { 8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2 };
69+
70+
String vin = "";
71+
boolean valid = false;
72+
73+
while (!valid) {
74+
final StringBuilder sb = new StringBuilder();
75+
int sum = 0;
76+
77+
for (int i = 0; i < 17; i++) {
78+
char c;
79+
if (i == 8) {
80+
// Check digit placeholder
81+
c = '0';
82+
} else {
83+
c = chars.charAt(random.nextInt(chars.length()));
84+
}
85+
86+
sb.append(c);
87+
88+
int value;
89+
if (c >= 'A' && c <= 'Z') {
90+
value = values[c - 'A'];
91+
} else {
92+
value = c - '0';
93+
}
94+
95+
sum += value * weights[i];
96+
}
97+
98+
final int checkDigitValue = sum % 11;
99+
char checkDigit;
100+
if (checkDigitValue == 10) {
101+
checkDigit = 'X';
102+
} else {
103+
checkDigit = (char) (checkDigitValue + '0');
104+
}
105+
106+
sb.setCharAt(8, checkDigit);
107+
vin = sb.toString();
108+
valid = true;
42109
}
43-
return sb.toString();
110+
111+
return vin;
112+
44113
}
45114

46115
@Override

src/main/java/ai/philterd/phileas/data/generators/ZipCodeGenerator.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,68 @@
1717

1818
import ai.philterd.phileas.data.DataGenerator;
1919

20+
import java.io.IOException;
21+
import java.util.ArrayList;
22+
import java.util.List;
2023
import java.util.Random;
2124

2225
/**
2326
* Generates random zip codes.
2427
*/
25-
public class ZipCodeGenerator implements DataGenerator.Generator<String> {
28+
public class ZipCodeGenerator extends AbstractGenerator<String> implements DataGenerator.Generator<String> {
2629
private final Random random;
30+
private final boolean onlyValid;
31+
private List<String> validZipCodes;
2732

2833
/**
2934
* Creates a new zip code generator.
3035
* @param random The {@link Random} to use.
3136
*/
3237
public ZipCodeGenerator(final Random random) {
38+
this(random, false);
39+
}
40+
41+
/**
42+
* Creates a new zip code generator.
43+
* @param random The {@link Random} to use.
44+
* @param onlyValid If <code>true</code>, only valid zip codes from the census will be used.
45+
*/
46+
public ZipCodeGenerator(final Random random, final boolean onlyValid) {
3347
this.random = random;
48+
this.onlyValid = onlyValid;
49+
50+
if (onlyValid) {
51+
try {
52+
this.validZipCodes = new ArrayList<>();
53+
final List<String> lines = loadNames("/zip-code-population.csv");
54+
for (final String line : lines) {
55+
if (!line.startsWith("#")) {
56+
final String[] parts = line.split(",");
57+
validZipCodes.add(parts[0]);
58+
}
59+
}
60+
} catch (IOException e) {
61+
throw new RuntimeException("Unable to load zip code data file.", e);
62+
}
63+
}
3464
}
3565

3666
@Override
3767
public String random() {
38-
return String.format("%05d", random.nextInt(100000));
68+
if (onlyValid) {
69+
return validZipCodes.get(random.nextInt(validZipCodes.size()));
70+
} else {
71+
return String.format("%05d", random.nextInt(100000));
72+
}
3973
}
4074

4175
@Override
4276
public long poolSize() {
43-
return 100000L;
77+
if (onlyValid) {
78+
return validZipCodes.size();
79+
} else {
80+
return 100000L;
81+
}
4482
}
4583

4684
}

src/test/java/ai/philterd/phileas/data/generators/VINGeneratorTest.java

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,118 @@
1818
import org.junit.jupiter.api.Test;
1919

2020
import java.security.SecureRandom;
21+
import java.util.regex.Pattern;
2122

2223
import static org.junit.jupiter.api.Assertions.assertEquals;
2324
import static org.junit.jupiter.api.Assertions.assertNotNull;
25+
import static org.junit.jupiter.api.Assertions.assertTrue;
2426

2527
public class VINGeneratorTest {
2628

29+
private static final String VIN_PATTERN = "^[A-HJ-NPR-Z0-9]{17}$";
30+
2731
@Test
2832
public void testGenerateVIN() {
2933
final VINGenerator generator = new VINGenerator(new SecureRandom());
3034
final String vin = generator.random();
3135
assertNotNull(vin);
3236
assertEquals(17, vin.length());
37+
assertTrue(Pattern.matches(VIN_PATTERN, vin));
38+
}
39+
40+
@Test
41+
public void testGenerateValidVIN() {
42+
final VINGenerator generator = new VINGenerator(new SecureRandom(), true);
43+
final String vin = generator.random();
44+
assertNotNull(vin);
45+
assertEquals(17, vin.length());
46+
assertTrue(Pattern.matches(VIN_PATTERN, vin));
47+
}
48+
49+
@Test
50+
public void testGenerateMultipleValidVINs() throws Exception {
51+
final VINGenerator generator = new VINGenerator(new SecureRandom(), true);
52+
53+
// We need a VinFilter to verify the VINs.
54+
// It requires a FilterConfiguration which we can't easily create here without more setup,
55+
// but we can look at how VinFilter handles validation.
56+
// Actually, let's just use the same logic as in VinFilter.
57+
for (int i = 0; i < 100; i++) {
58+
final String vin = generator.random();
59+
assertNotNull(vin);
60+
assertEquals(17, vin.length());
61+
assertTrue(Pattern.matches(VIN_PATTERN, vin));
62+
assertTrue(isVinValid(vin), "VIN should be valid: " + vin);
63+
}
64+
}
65+
66+
private boolean isVinValid(String vin) {
67+
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 0, 7, 0, 9, 2, 3, 4, 5, 6, 7, 8, 9 };
68+
int[] weights = { 8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2 };
69+
70+
String s = vin.toUpperCase();
71+
if (s.length() != 17) {
72+
return false;
73+
}
74+
75+
int sum = 0;
76+
for (int i = 0; i < 17; i++) {
77+
char c = s.charAt(i);
78+
int value;
79+
int weight = weights[i];
80+
81+
if (c >= 'A' && c <= 'Z') {
82+
value = values[c - 'A'];
83+
if (value == 0 && c != 'I' && c != 'O' && c != 'Q' && i != 8) {
84+
// This is a bit complex because some letters have value 0 in the values array but are not illegal.
85+
// However, I, O, Q are illegal and should have value 0.
86+
// The 9th character (index 8) is the check digit and shouldn't be used in the sum calculation for validation?
87+
// Wait, VinFilter.java line 82 uses weights[i]. weights[8] is 0.
88+
// So it doesn't matter what value is at index 8.
89+
}
90+
} else if (c >= '0' && c <= '9') {
91+
value = c - '0';
92+
} else {
93+
return false;
94+
}
95+
96+
sum = sum + weight * value;
97+
}
98+
99+
sum = sum % 11;
100+
char check = s.charAt(8);
101+
if (sum == 10 && check == 'X') {
102+
return true;
103+
} else if (sum == (check >= '0' && check <= '9' ? check - '0' : transliterate(check))) {
104+
return true;
105+
} else {
106+
return false;
107+
}
108+
}
109+
110+
private int transliterate(char check) {
111+
112+
if(check == 'A' || check == 'J'){
113+
return 1;
114+
} else if(check == 'B' || check == 'K' || check == 'S'){
115+
return 2;
116+
} else if(check == 'C' || check == 'L' || check == 'T'){
117+
return 3;
118+
} else if(check == 'D' || check == 'M' || check == 'U'){
119+
return 4;
120+
} else if(check == 'E' || check == 'N' || check == 'V'){
121+
return 5;
122+
} else if(check == 'F' || check == 'W'){
123+
return 6;
124+
} else if(check == 'G' || check == 'P' || check == 'X'){
125+
return 7;
126+
} else if(check == 'H' || check == 'Y'){
127+
return 8;
128+
} else if(check == 'R' || check == 'Z'){
129+
return 9;
130+
}
131+
return -1;
132+
33133
}
34134

35135
@Test

src/test/java/ai/philterd/phileas/data/generators/ZipCodeGeneratorTest.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717

1818
import org.junit.jupiter.api.Test;
1919

20+
import java.io.BufferedReader;
21+
import java.io.IOException;
22+
import java.io.InputStream;
23+
import java.io.InputStreamReader;
2024
import java.security.SecureRandom;
25+
import java.util.ArrayList;
26+
import java.util.List;
2127

2228
import static org.junit.jupiter.api.Assertions.assertEquals;
2329
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -33,10 +39,68 @@ public void testGenerateZipCode() {
3339
assertTrue(zip.matches("\\d{5}"));
3440
}
3541

42+
@Test
43+
public void testGenerateMultipleZipCodes() {
44+
final ZipCodeGenerator generator = new ZipCodeGenerator(new SecureRandom());
45+
for (int i = 0; i < 100; i++) {
46+
final String zip = generator.random();
47+
assertNotNull(zip);
48+
assertTrue(zip.matches("\\d{5}"));
49+
}
50+
}
51+
52+
@Test
53+
public void testGenerateMultipleValidZipCodes() {
54+
final ZipCodeGenerator generator = new ZipCodeGenerator(new SecureRandom(), true);
55+
for (int i = 0; i < 100; i++) {
56+
final String zip = generator.random();
57+
assertNotNull(zip);
58+
assertTrue(zip.matches("\\d{5}"));
59+
}
60+
}
61+
62+
@Test
63+
public void testGenerateValidZipCode() throws IOException {
64+
final ZipCodeGenerator generator = new ZipCodeGenerator(new SecureRandom(), true);
65+
final String zip = generator.random();
66+
assertNotNull(zip);
67+
assertTrue(zip.matches("\\d{5}"));
68+
69+
// Verify it is in the list of valid zip codes.
70+
final List<String> validZipCodes = new ArrayList<>();
71+
try (final InputStream is = getClass().getResourceAsStream("/zip-code-population.csv")) {
72+
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
73+
String line;
74+
while ((line = reader.readLine()) != null) {
75+
if (!line.startsWith("#")) {
76+
final String[] parts = line.split(",");
77+
validZipCodes.add(parts[0]);
78+
}
79+
}
80+
}
81+
}
82+
83+
assertTrue(validZipCodes.contains(zip));
84+
assertEquals(validZipCodes.size(), generator.poolSize());
85+
}
86+
3687
@Test
3788
public void testPoolSize() {
3889
final ZipCodeGenerator generator = new ZipCodeGenerator(new SecureRandom());
3990
assertEquals(100000L, generator.poolSize());
4091
}
4192

93+
@Test
94+
public void testVariety() {
95+
final ZipCodeGenerator generator = new ZipCodeGenerator(new SecureRandom());
96+
final List<String> zips = new ArrayList<>();
97+
98+
for(int i = 0; i < 100; i++) {
99+
zips.add(generator.random());
100+
}
101+
102+
// We should have some variety in the generated zip codes.
103+
assertTrue(zips.stream().distinct().count() > 1);
104+
}
105+
42106
}

0 commit comments

Comments
 (0)