Skip to content

Commit ae71e8b

Browse files
authored
Using possibleLengths info for length calculations in Java (google#1355)
Changing PhoneNumberUtil to use the possibleLengths information, not the reg-exes. Note the API is not changing, but the metadata is now somewhat stricter for many countries, since before we applied only a minimum and maximum length for most countries, and now we specify exactly which lengths are possible. This has a flow-on effect when parsing, since we decide whether to do certain operations like strip a national prefix based on whether the number is a possible length before/after - when parsing, if the number is shorter than the *national* pattern, we no longer strip the national prefix. Affected countries: AD (7 digits now invalid) AM (7 digits now invalid) AR (9 digits now invalid) AZ (8 digits now invalid) BG (4 digits now valid for local-only numbers) BJ (5-7 digits now invalid) CC/CX (5 digit numbers now possible: this should always have been the case, but the generalDesc was wrong and didn't reflect its child elements. We now calculate it based on them, which allows 5 digit numbers.) CO (9 digits now invalid) CR (9 digits now invalid) ET (8 digits now invalid) GE (7 and 8 digits now invalid) GH (8 digits now invalid) IL (5 and 6 digits now invalid) IM/JE/GG (7, 8 and 9 digits now invalid, shortest national number length now 10, so parsing affected for numbers shorter than this) IS (8 digits now invalid) KG (7,8 digits now invalid) KR (11 digits now invalid) LA (7 digits now invalid) LI (8 digits now invalid) LY (8 digits now invalid) MV (8 and 9 digits now invalid) MW (8 digits now invalid) MX (9 digits now invalid) NP (9 digits now invalid) SE (11 digits now invalid) SG (9 digits now invalid) SL (7 digits now invalid) SM (7-9 digits now invalid) UA (8 digits now invalid) UG (8 digits now invalid) UZ (8 digits now invalid)
1 parent 62714f5 commit ae71e8b

17 files changed

+118
-46
lines changed
Binary file not shown.
Binary file not shown.

java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,8 +1306,8 @@ public String formatNumberForMobileDialing(PhoneNumber number, String regionCall
13061306
// short numbers, which are always dialled in national format.
13071307
PhoneMetadata regionMetadata = getMetadataForRegion(regionCallingFrom);
13081308
if (canBeInternationallyDialled(numberNoExt)
1309-
&& !isShorterThanPossibleNormalNumber(regionMetadata,
1310-
getNationalSignificantNumber(numberNoExt))) {
1309+
&& testNumberLength(getNationalSignificantNumber(numberNoExt),
1310+
regionMetadata.getGeneralDesc()) != ValidationResult.TOO_SHORT) {
13111311
formattedNumber = format(numberNoExt, PhoneNumberFormat.INTERNATIONAL);
13121312
} else {
13131313
formattedNumber = format(numberNoExt, PhoneNumberFormat.NATIONAL);
@@ -2113,19 +2113,19 @@ PhoneMetadata getMetadataForNonGeographicalRegion(int countryCallingCode) {
21132113
return metadataSource.getMetadataForNonGeographicalRegion(countryCallingCode);
21142114
}
21152115

2116-
boolean isNumberPossibleForDesc(String nationalNumber, PhoneNumberDesc numberDesc) {
2117-
Matcher possibleNumberPatternMatcher =
2118-
regexCache.getPatternForRegex(numberDesc.getPossibleNumberPattern())
2119-
.matcher(nationalNumber);
2120-
return possibleNumberPatternMatcher.matches();
2121-
}
2122-
21232116
boolean isNumberMatchingDesc(String nationalNumber, PhoneNumberDesc numberDesc) {
2117+
// Check if any possible number lengths are present; if so, we use them to avoid checking the
2118+
// validation pattern if they don't match. If they are absent, this means they match the general
2119+
// description, which we have already checked before checking a specific number type.
2120+
int actualLength = nationalNumber.length();
2121+
List<Integer> possibleLengths = numberDesc.getPossibleLengthList();
2122+
if (possibleLengths.size() > 0 && !possibleLengths.contains(actualLength)) {
2123+
return false;
2124+
}
21242125
Matcher nationalNumberPatternMatcher =
21252126
regexCache.getPatternForRegex(numberDesc.getNationalNumberPattern())
21262127
.matcher(nationalNumber);
2127-
return isNumberPossibleForDesc(nationalNumber, numberDesc)
2128-
&& nationalNumberPatternMatcher.matches();
2128+
return nationalNumberPatternMatcher.matches();
21292129
}
21302130

21312131
/**
@@ -2362,32 +2362,35 @@ public boolean isPossibleNumber(PhoneNumber number) {
23622362
}
23632363

23642364
/**
2365-
* Helper method to check a number against a particular pattern and determine whether it matches,
2366-
* or is too short or too long. Currently, if a number pattern suggests that numbers of length 7
2367-
* and 10 are possible, and a number in between these possible lengths is entered, such as of
2368-
* length 8, this will return TOO_LONG.
2365+
* Helper method to check a number against possible lengths for this number, and determine whether
2366+
* it matches, or is too short or too long. Currently, if a number pattern suggests that numbers
2367+
* of length 7 and 10 are possible, and a number in between these possible lengths is entered,
2368+
* such as of length 8, this will return TOO_LONG.
23692369
*/
2370-
private ValidationResult testNumberLengthAgainstPattern(Pattern numberPattern, String number) {
2371-
Matcher numberMatcher = numberPattern.matcher(number);
2372-
if (numberMatcher.matches()) {
2370+
private ValidationResult testNumberLength(String number, PhoneNumberDesc phoneNumberDesc) {
2371+
List<Integer> possibleLengths = phoneNumberDesc.getPossibleLengthList();
2372+
List<Integer> localLengths = phoneNumberDesc.getPossibleLengthLocalOnlyList();
2373+
int actualLength = number.length();
2374+
if (localLengths.contains(actualLength)) {
23732375
return ValidationResult.IS_POSSIBLE;
23742376
}
2375-
if (numberMatcher.lookingAt()) {
2376-
return ValidationResult.TOO_LONG;
2377-
} else {
2377+
// There should always be "possibleLengths" set for every element. This will be a build-time
2378+
// check once ShortNumberMetadata.xml is migrated to contain this information as well.
2379+
int minimumLength = possibleLengths.get(0);
2380+
if (minimumLength == actualLength) {
2381+
return ValidationResult.IS_POSSIBLE;
2382+
} else if (minimumLength > actualLength) {
23782383
return ValidationResult.TOO_SHORT;
2384+
} else if (possibleLengths.get(possibleLengths.size() - 1) < actualLength) {
2385+
return ValidationResult.TOO_LONG;
23792386
}
2380-
}
2381-
2382-
/**
2383-
* Helper method to check whether a number is too short to be a regular length phone number in a
2384-
* region.
2385-
*/
2386-
private boolean isShorterThanPossibleNormalNumber(PhoneMetadata regionMetadata, String number) {
2387-
Pattern possibleNumberPattern = regexCache.getPatternForRegex(
2388-
regionMetadata.getGeneralDesc().getPossibleNumberPattern());
2389-
return testNumberLengthAgainstPattern(possibleNumberPattern, number) ==
2390-
ValidationResult.TOO_SHORT;
2387+
// Note that actually the number is not too long if possibleLengths does not contain the length:
2388+
// we know it is less than the highest possible number length, and higher than the lowest
2389+
// possible number length. However, we don't currently have an enum to express this, so we
2390+
// return TOO_LONG in the short-term.
2391+
// We skip the first element; we've already checked it.
2392+
return possibleLengths.subList(1, possibleLengths.size()).contains(actualLength)
2393+
? ValidationResult.IS_POSSIBLE : ValidationResult.TOO_LONG;
23912394
}
23922395

23932396
/**
@@ -2424,9 +2427,7 @@ public ValidationResult isPossibleNumberWithReason(PhoneNumber number) {
24242427
String regionCode = getRegionCodeForCountryCode(countryCode);
24252428
// Metadata cannot be null because the country calling code is valid.
24262429
PhoneMetadata metadata = getMetadataForRegionOrCallingCode(countryCode, regionCode);
2427-
Pattern possibleNumberPattern =
2428-
regexCache.getPatternForRegex(metadata.getGeneralDesc().getPossibleNumberPattern());
2429-
return testNumberLengthAgainstPattern(possibleNumberPattern, nationalNumber);
2430+
return testNumberLength(nationalNumber, metadata.getGeneralDesc());
24302431
}
24312432

24322433
/**
@@ -2596,15 +2597,12 @@ int maybeExtractCountryCode(String number, PhoneMetadata defaultRegionMetadata,
25962597
regexCache.getPatternForRegex(generalDesc.getNationalNumberPattern());
25972598
maybeStripNationalPrefixAndCarrierCode(
25982599
potentialNationalNumber, defaultRegionMetadata, null /* Don't need the carrier code */);
2599-
Pattern possibleNumberPattern =
2600-
regexCache.getPatternForRegex(generalDesc.getPossibleNumberPattern());
26012600
// If the number was not valid before but is valid now, or if it was too long before, we
26022601
// consider the number with the country calling code stripped to be a better result and
26032602
// keep that instead.
26042603
if ((!validNumberPattern.matcher(fullNumber).matches()
26052604
&& validNumberPattern.matcher(potentialNationalNumber).matches())
2606-
|| testNumberLengthAgainstPattern(possibleNumberPattern, fullNumber.toString())
2607-
== ValidationResult.TOO_LONG) {
2605+
|| testNumberLength(fullNumber.toString(), generalDesc) == ValidationResult.TOO_LONG) {
26082606
nationalNumber.append(potentialNationalNumber);
26092607
if (keepRawInput) {
26102608
phoneNumber.setCountryCodeSource(CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
@@ -3013,7 +3011,8 @@ private void parseHelper(String numberToParse, String defaultRegion, boolean kee
30133011
// We require that the NSN remaining after stripping the national prefix and carrier code be
30143012
// long enough to be a possible length for the region. Otherwise, we don't do the stripping,
30153013
// since the original number could be a valid short number.
3016-
if (!isShorterThanPossibleNormalNumber(regionMetadata, potentialNationalNumber.toString())) {
3014+
if (testNumberLength(potentialNationalNumber.toString(), regionMetadata.getGeneralDesc())
3015+
!= ValidationResult.TOO_SHORT) {
30173016
normalizedNationalNumber = potentialNationalNumber;
30183017
if (keepRawInput) {
30193018
phoneNumber.setPreferredDomesticCarrierCode(carrierCode.toString());

java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,12 @@ public void testGetInstanceLoadUSMetadata() {
145145
assertEquals("[13-689]\\d{9}|2[0-35-9]\\d{8}",
146146
metadata.getGeneralDesc().getNationalNumberPattern());
147147
assertEquals("\\d{7}(?:\\d{3})?", metadata.getGeneralDesc().getPossibleNumberPattern());
148-
assertTrue(metadata.getGeneralDesc().exactlySameAs(metadata.getFixedLine()));
148+
// Fixed-line data should be inherited from the general desc for the national number pattern,
149+
// since it wasn't overridden.
150+
assertEquals(metadata.getGeneralDesc().getNationalNumberPattern(),
151+
metadata.getFixedLine().getNationalNumberPattern());
149152
assertEquals("\\d{10}", metadata.getTollFree().getPossibleNumberPattern());
153+
assertEquals(1, metadata.getGeneralDesc().getPossibleLengthCount());
150154
assertEquals(10, metadata.getGeneralDesc().getPossibleLength(0));
151155
// Possible lengths are the same as the general description, so aren't stored separately in the
152156
// toll free element as well.
@@ -1315,7 +1319,7 @@ public void testIsPossibleNumber() {
13151319
assertTrue(phoneUtil.isPossibleNumber("253-0000", RegionCode.US));
13161320
assertTrue(phoneUtil.isPossibleNumber("+1 650 253 0000", RegionCode.GB));
13171321
assertTrue(phoneUtil.isPossibleNumber("+44 20 7031 3000", RegionCode.GB));
1318-
assertTrue(phoneUtil.isPossibleNumber("(020) 7031 3000", RegionCode.GB));
1322+
assertTrue(phoneUtil.isPossibleNumber("(020) 7031 300", RegionCode.GB));
13191323
assertTrue(phoneUtil.isPossibleNumber("7031 3000", RegionCode.GB));
13201324
assertTrue(phoneUtil.isPossibleNumber("3331 6005", RegionCode.NZ));
13211325
assertTrue(phoneUtil.isPossibleNumber("+800 1234 5678", RegionCode.UN001));
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)