Skip to content

Commit 99fd0ac

Browse files
committed
Porting usage of possibleLengths from Java to C++ (google#1358)
* Porting code to use the new possible length information for calculating whether a phone number is possible, too short etc.
1 parent ae71e8b commit 99fd0ac

File tree

2 files changed

+1010
-956
lines changed

2 files changed

+1010
-956
lines changed

cpp/src/phonenumbers/phonenumberutil.cc

Lines changed: 63 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@
5252
namespace i18n {
5353
namespace phonenumbers {
5454

55+
using google::protobuf::RepeatedField;
5556
using google::protobuf::RepeatedPtrField;
57+
using std::find;
5658

5759
// static constants
5860
const size_t PhoneNumberUtil::kMinLengthForNsn;
@@ -263,17 +265,41 @@ void NormalizeHelper(const map<char32, char>& normalization_replacements,
263265
number->assign(normalized_number);
264266
}
265267

266-
PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
267-
const RegExp& number_pattern, const string& number) {
268-
string extracted_number;
269-
if (number_pattern.FullMatch(number, &extracted_number)) {
268+
// Helper method to check a number against possible lengths for this number, and
269+
// determine whether it matches, or is too short or too long. Currently, if a
270+
// number pattern suggests that numbers of length 7 and 10 are possible, and a
271+
// number in between these possible lengths is entered, such as of length 8,
272+
// this will return TOO_LONG.
273+
PhoneNumberUtil::ValidationResult TestNumberLength(
274+
const string& number, const PhoneNumberDesc& phone_number_desc) {
275+
RepeatedField<int> possible_lengths = phone_number_desc.possible_length();
276+
RepeatedField<int> local_lengths =
277+
phone_number_desc.possible_length_local_only();
278+
int actual_length = number.length();
279+
if (find(local_lengths.begin(), local_lengths.end(), actual_length) !=
280+
local_lengths.end()) {
270281
return PhoneNumberUtil::IS_POSSIBLE;
271282
}
272-
if (number_pattern.PartialMatch(number, &extracted_number)) {
273-
return PhoneNumberUtil::TOO_LONG;
274-
} else {
283+
// There should always be "possibleLengths" set for every element. This will
284+
// be a build-time check once ShortNumberMetadata.xml is migrated to contain
285+
// this information as well.
286+
int minimum_length = possible_lengths.Get(0);
287+
if (minimum_length == actual_length) {
288+
return PhoneNumberUtil::IS_POSSIBLE;
289+
} else if (minimum_length > actual_length) {
275290
return PhoneNumberUtil::TOO_SHORT;
291+
} else if (*(possible_lengths.end() - 1) < actual_length) {
292+
return PhoneNumberUtil::TOO_LONG;
276293
}
294+
// Note that actually the number is not too long if possible_lengths does not
295+
// contain the length: we know it is less than the highest possible number
296+
// length, and higher than the lowest possible number length. However, we
297+
// don't currently have an enum to express this, so we return TOO_LONG in the
298+
// short-term.
299+
// We skip the first element; we've already checked it.
300+
return find(possible_lengths.begin() + 1, possible_lengths.end(),
301+
actual_length) != possible_lengths.end()
302+
? PhoneNumberUtil::IS_POSSIBLE : PhoneNumberUtil::TOO_LONG;
277303
}
278304

279305
} // namespace
@@ -661,8 +687,7 @@ PhoneNumberUtil::PhoneNumberUtil()
661687
country_calling_code_to_region_map.end());
662688
// Sort all the pairs in ascending order according to country calling code.
663689
std::sort(country_calling_code_to_region_code_map_->begin(),
664-
country_calling_code_to_region_code_map_->end(),
665-
OrderByFirst());
690+
country_calling_code_to_region_code_map_->end(), OrderByFirst());
666691
}
667692

668693
PhoneNumberUtil::~PhoneNumberUtil() {
@@ -786,9 +811,9 @@ bool PhoneNumberUtil::HasValidCountryCallingCode(
786811
// locate the pair with the same country_code in the sorted vector.
787812
IntRegionsPair target_pair;
788813
target_pair.first = country_calling_code;
789-
return (binary_search(country_calling_code_to_region_code_map_->begin(),
790-
country_calling_code_to_region_code_map_->end(),
791-
target_pair, OrderByFirst()));
814+
return (std::binary_search(country_calling_code_to_region_code_map_->begin(),
815+
country_calling_code_to_region_code_map_->end(),
816+
target_pair, OrderByFirst()));
792817
}
793818

794819
// Returns a pointer to the phone metadata for the appropriate region or NULL
@@ -1033,8 +1058,8 @@ void PhoneNumberUtil::FormatNumberForMobileDialing(
10331058
string national_number;
10341059
GetNationalSignificantNumber(number_no_extension, &national_number);
10351060
if (CanBeInternationallyDialled(number_no_extension) &&
1036-
!IsShorterThanPossibleNormalNumber(region_metadata,
1037-
national_number)) {
1061+
TestNumberLength(national_number, region_metadata->general_desc()) !=
1062+
TOO_SHORT) {
10381063
Format(number_no_extension, INTERNATIONAL, formatted_number);
10391064
} else {
10401065
Format(number_no_extension, NATIONAL, formatted_number);
@@ -1055,8 +1080,9 @@ void PhoneNumberUtil::FormatNumberForMobileDialing(
10551080
// national format, but don't have it when used for display. The
10561081
// reverse is true for mobile numbers. As a result, we output them in
10571082
// the international format to make it work.
1058-
((region_code == "MX" || region_code == "CL") &&
1059-
is_fixed_line_or_mobile)) &&
1083+
((region_code == "MX" ||
1084+
region_code == "CL") &&
1085+
is_fixed_line_or_mobile)) &&
10601086
CanBeInternationallyDialled(number_no_extension)) {
10611087
Format(number_no_extension, INTERNATIONAL, formatted_number);
10621088
} else {
@@ -1985,11 +2011,11 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper(
19852011
&potential_national_number,
19862012
&carrier_code);
19872013
// We require that the NSN remaining after stripping the national prefix
1988-
// and carrier code be of a possible length for the region. Otherwise, we
1989-
// don't do the stripping, since the original number could be a valid short
1990-
// number.
1991-
if (!IsShorterThanPossibleNormalNumber(country_metadata,
1992-
potential_national_number)) {
2014+
// and carrier code be long enough to be a possible length for the region.
2015+
// Otherwise, we don't do the stripping, since the original number could be
2016+
// a valid short number.
2017+
if (TestNumberLength(potential_national_number,
2018+
country_metadata->general_desc()) != TOO_SHORT) {
19932019
normalized_national_number.assign(potential_national_number);
19942020
if (keep_raw_input) {
19952021
temp_number.set_preferred_domestic_carrier_code(carrier_code);
@@ -2098,10 +2124,7 @@ PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason(
20982124
// Metadata cannot be NULL because the country calling code is valid.
20992125
const PhoneMetadata* metadata =
21002126
GetMetadataForRegionOrCallingCode(country_code, region_code);
2101-
const RegExp& possible_number_pattern = reg_exps_->regexp_cache_->GetRegExp(
2102-
StrCat("(", metadata->general_desc().possible_number_pattern(), ")"));
2103-
return TestNumberLengthAgainstPattern(possible_number_pattern,
2104-
national_number);
2127+
return TestNumberLength(national_number, metadata->general_desc());
21052128
}
21062129

21072130
bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const {
@@ -2203,19 +2226,22 @@ void PhoneNumberUtil::SetItalianLeadingZerosForPhoneNumber(
22032226
}
22042227
}
22052228

2206-
bool PhoneNumberUtil::IsNumberPossibleForDesc(
2207-
const string& national_number, const PhoneNumberDesc& number_desc) const {
2208-
return reg_exps_->regexp_cache_.get()->
2209-
GetRegExp(number_desc.possible_number_pattern())
2210-
.FullMatch(national_number);
2211-
}
2212-
22132229
bool PhoneNumberUtil::IsNumberMatchingDesc(
22142230
const string& national_number, const PhoneNumberDesc& number_desc) const {
2215-
return IsNumberPossibleForDesc(national_number, number_desc) &&
2216-
reg_exps_->regexp_cache_.get()->
2217-
GetRegExp(number_desc.national_number_pattern())
2218-
.FullMatch(national_number);
2231+
// Check if any possible number lengths are present; if so, we use them to
2232+
// avoid checking the validation pattern if they don't match. If they are
2233+
// absent, this means they match the general description, which we have
2234+
// already checked before checking a specific number type.
2235+
int actual_length = national_number.length();
2236+
if (number_desc.possible_length_size() > 0 &&
2237+
std::find(number_desc.possible_length().begin(),
2238+
number_desc.possible_length().end(),
2239+
actual_length) == number_desc.possible_length().end()) {
2240+
return false;
2241+
}
2242+
return reg_exps_->regexp_cache_
2243+
->GetRegExp(number_desc.national_number_pattern())
2244+
.FullMatch(national_number);
22192245
}
22202246

22212247
PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberTypeHelper(
@@ -2732,16 +2758,12 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode(
27322758
NULL);
27332759
VLOG(4) << "Number without country calling code prefix: "
27342760
<< potential_national_number;
2735-
const RegExp& possible_number_pattern =
2736-
reg_exps_->regexp_cache_->GetRegExp(
2737-
StrCat("(", general_num_desc.possible_number_pattern(), ")"));
27382761
// If the number was not valid before but is valid now, or if it was too
27392762
// long before, we consider the number with the country code stripped to
27402763
// be a better result and keep that instead.
27412764
if ((!valid_number_pattern.FullMatch(*national_number) &&
27422765
valid_number_pattern.FullMatch(potential_national_number)) ||
2743-
TestNumberLengthAgainstPattern(possible_number_pattern,
2744-
*national_number) == TOO_LONG) {
2766+
TestNumberLength(*national_number, general_num_desc) == TOO_LONG) {
27452767
national_number->assign(potential_national_number);
27462768
if (keep_raw_input) {
27472769
phone_number->set_country_code_source(
@@ -2897,15 +2919,6 @@ AsYouTypeFormatter* PhoneNumberUtil::GetAsYouTypeFormatter(
28972919
return new AsYouTypeFormatter(region_code);
28982920
}
28992921

2900-
bool PhoneNumberUtil::IsShorterThanPossibleNormalNumber(
2901-
const PhoneMetadata* country_metadata, const string& number) const {
2902-
const RegExp& possible_number_pattern =
2903-
reg_exps_->regexp_cache_->GetRegExp(StrCat("(",
2904-
country_metadata->general_desc().possible_number_pattern(), ")"));
2905-
return TestNumberLengthAgainstPattern(possible_number_pattern, number) ==
2906-
PhoneNumberUtil::TOO_SHORT;
2907-
}
2908-
29092922
bool PhoneNumberUtil::CanBeInternationallyDialled(
29102923
const PhoneNumber& number) const {
29112924
string region_code;

0 commit comments

Comments
 (0)