Skip to content

Commit e772042

Browse files
committed
Refactor phone context parsing for RFC3966 numbers.
1 parent 0bd1332 commit e772042

File tree

9 files changed

+577
-281
lines changed

9 files changed

+577
-281
lines changed

java/libphonenumber/src/com/google/i18n/phonenumbers/AsYouTypeFormatter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ private String removeNationalPrefixFromNationalNumber() {
573573
*/
574574
private boolean attemptToExtractIdd() {
575575
Pattern internationalPrefix =
576-
regexCache.getPatternForRegex("\\" + PhoneNumberUtil.PLUS_SIGN + "|"
576+
regexCache.getPatternForRegex("\\" + Constants.PLUS_SIGN + "|"
577577
+ currentMetadata.getInternationalPrefix());
578578
Matcher iddMatcher = internationalPrefix.matcher(accruedInputWithoutFormatting);
579579
if (iddMatcher.lookingAt()) {
@@ -584,7 +584,7 @@ private boolean attemptToExtractIdd() {
584584
prefixBeforeNationalNumber.setLength(0);
585585
prefixBeforeNationalNumber.append(
586586
accruedInputWithoutFormatting.substring(0, startOfCountryCallingCode));
587-
if (accruedInputWithoutFormatting.charAt(0) != PhoneNumberUtil.PLUS_SIGN) {
587+
if (accruedInputWithoutFormatting.charAt(0) != Constants.PLUS_SIGN) {
588588
prefixBeforeNationalNumber.append(SEPARATOR_BEFORE_NATIONAL_NUMBER);
589589
}
590590
return true;
@@ -631,7 +631,7 @@ private boolean attemptToExtractCountryCallingCode() {
631631
// digit or the plus sign.
632632
private char normalizeAndAccrueDigitsAndPlusSign(char nextChar, boolean rememberPosition) {
633633
char normalizedChar;
634-
if (nextChar == PhoneNumberUtil.PLUS_SIGN) {
634+
if (nextChar == Constants.PLUS_SIGN) {
635635
normalizedChar = nextChar;
636636
accruedInputWithoutFormatting.append(nextChar);
637637
} else {
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/*
2+
* Copyright (C) 2025 The Libphonenumber Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.i18n.phonenumbers;
18+
19+
import java.util.Arrays;
20+
import java.util.Collections;
21+
import java.util.HashMap;
22+
import java.util.HashSet;
23+
import java.util.Map;
24+
import java.util.Set;
25+
import java.util.regex.Pattern;
26+
27+
/** Constants used by the PhoneNumberUtil. */
28+
final class Constants {
29+
// The maximum length of the country calling code.
30+
static final int MAX_LENGTH_COUNTRY_CODE = 3;
31+
32+
// Map of country calling codes that use a mobile token before the area code. One example of when
33+
// this is relevant is when determining the length of the national destination code, which should
34+
// be the length of the area code plus the length of the mobile token.
35+
static final Map<Integer, String> MOBILE_TOKEN_MAPPINGS;
36+
37+
// Set of country codes that have geographically assigned mobile numbers (see GEO_MOBILE_COUNTRIES
38+
// below) which are not based on *area codes*. For example, in China mobile numbers start with a
39+
// carrier indicator, and beyond that are geographically assigned: this carrier indicator is not
40+
// considered to be an area code.
41+
static final Set<Integer> GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES;
42+
43+
// Set of country codes that doesn't have national prefix, but it has area codes.
44+
static final Set<Integer> COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES;
45+
46+
// Set of country calling codes that have geographically assigned mobile numbers. This may not be
47+
// complete; we add calling codes case by case, as we find geographical mobile numbers or hear
48+
// from user reports. Note that countries like the US, where we can't distinguish between
49+
// fixed-line or mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE to be
50+
// a possibly geographically-related type anyway (like FIXED_LINE).
51+
static final Set<Integer> GEO_MOBILE_COUNTRIES;
52+
53+
// The PLUS_SIGN signifies the international prefix.
54+
static final char PLUS_SIGN = '+';
55+
56+
static final String RFC3966_PHONE_CONTEXT = ";phone-context=";
57+
58+
// A map that contains characters that are essential when dialling. That means any of the
59+
// characters in this map must not be removed from a number when dialling, otherwise the call
60+
// will not reach the intended destination.
61+
static final Map<Character, Character> DIALLABLE_CHAR_MAPPINGS;
62+
63+
64+
// Only upper-case variants of alpha characters are stored.
65+
static final Map<Character, Character> ALPHA_MAPPINGS;
66+
67+
// For performance reasons, amalgamate both into one map.
68+
static final Map<Character, Character> ALPHA_PHONE_MAPPINGS;
69+
70+
// Separate map of all symbols that we wish to retain when formatting alpha numbers. This
71+
// includes digits, ASCII letters and number grouping symbols such as "-" and " ".
72+
static final Map<Character, Character> ALL_PLUS_NUMBER_GROUPING_SYMBOLS;
73+
74+
static {
75+
HashMap<Integer, String> mobileTokenMap = new HashMap<>();
76+
mobileTokenMap.put(54, "9");
77+
MOBILE_TOKEN_MAPPINGS = Collections.unmodifiableMap(mobileTokenMap);
78+
79+
HashSet<Integer> geoMobileCountriesWithoutMobileAreaCodes = new HashSet<>();
80+
geoMobileCountriesWithoutMobileAreaCodes.add(86); // China
81+
GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES =
82+
Collections.unmodifiableSet(geoMobileCountriesWithoutMobileAreaCodes);
83+
84+
HashSet<Integer> countriesWithoutNationalPrefixWithAreaCodes = new HashSet<>();
85+
countriesWithoutNationalPrefixWithAreaCodes.add(52); // Mexico
86+
COUNTRIES_WITHOUT_NATIONAL_PREFIX_WITH_AREA_CODES =
87+
Collections.unmodifiableSet(countriesWithoutNationalPrefixWithAreaCodes);
88+
89+
HashSet<Integer> geoMobileCountries = new HashSet<>();
90+
geoMobileCountries.add(52); // Mexico
91+
geoMobileCountries.add(54); // Argentina
92+
geoMobileCountries.add(55); // Brazil
93+
geoMobileCountries.add(62); // Indonesia: some prefixes only (fixed CMDA wireless)
94+
geoMobileCountries.addAll(geoMobileCountriesWithoutMobileAreaCodes);
95+
GEO_MOBILE_COUNTRIES = Collections.unmodifiableSet(geoMobileCountries);
96+
97+
// Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and
98+
// ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
99+
HashMap<Character, Character> asciiDigitMappings = new HashMap<>();
100+
asciiDigitMappings.put('0', '0');
101+
asciiDigitMappings.put('1', '1');
102+
asciiDigitMappings.put('2', '2');
103+
asciiDigitMappings.put('3', '3');
104+
asciiDigitMappings.put('4', '4');
105+
asciiDigitMappings.put('5', '5');
106+
asciiDigitMappings.put('6', '6');
107+
asciiDigitMappings.put('7', '7');
108+
asciiDigitMappings.put('8', '8');
109+
asciiDigitMappings.put('9', '9');
110+
111+
HashMap<Character, Character> alphaMap = new HashMap<>(40);
112+
alphaMap.put('A', '2');
113+
alphaMap.put('B', '2');
114+
alphaMap.put('C', '2');
115+
alphaMap.put('D', '3');
116+
alphaMap.put('E', '3');
117+
alphaMap.put('F', '3');
118+
alphaMap.put('G', '4');
119+
alphaMap.put('H', '4');
120+
alphaMap.put('I', '4');
121+
alphaMap.put('J', '5');
122+
alphaMap.put('K', '5');
123+
alphaMap.put('L', '5');
124+
alphaMap.put('M', '6');
125+
alphaMap.put('N', '6');
126+
alphaMap.put('O', '6');
127+
alphaMap.put('P', '7');
128+
alphaMap.put('Q', '7');
129+
alphaMap.put('R', '7');
130+
alphaMap.put('S', '7');
131+
alphaMap.put('T', '8');
132+
alphaMap.put('U', '8');
133+
alphaMap.put('V', '8');
134+
alphaMap.put('W', '9');
135+
alphaMap.put('X', '9');
136+
alphaMap.put('Y', '9');
137+
alphaMap.put('Z', '9');
138+
ALPHA_MAPPINGS = Collections.unmodifiableMap(alphaMap);
139+
140+
HashMap<Character, Character> combinedMap = new HashMap<>(100);
141+
combinedMap.putAll(ALPHA_MAPPINGS);
142+
combinedMap.putAll(asciiDigitMappings);
143+
ALPHA_PHONE_MAPPINGS = Collections.unmodifiableMap(combinedMap);
144+
145+
HashMap<Character, Character> diallableCharMap = new HashMap<>();
146+
diallableCharMap.putAll(asciiDigitMappings);
147+
diallableCharMap.put(PLUS_SIGN, PLUS_SIGN);
148+
diallableCharMap.put('*', '*');
149+
diallableCharMap.put('#', '#');
150+
DIALLABLE_CHAR_MAPPINGS = Collections.unmodifiableMap(diallableCharMap);
151+
152+
HashMap<Character, Character> allPlusNumberGroupings = new HashMap<>();
153+
// Put (lower letter -> upper letter) and (upper letter -> upper letter) mappings.
154+
for (char c : ALPHA_MAPPINGS.keySet()) {
155+
allPlusNumberGroupings.put(Character.toLowerCase(c), c);
156+
allPlusNumberGroupings.put(c, c);
157+
}
158+
allPlusNumberGroupings.putAll(asciiDigitMappings);
159+
// Put grouping symbols.
160+
allPlusNumberGroupings.put('-', '-');
161+
allPlusNumberGroupings.put('\uFF0D', '-');
162+
allPlusNumberGroupings.put('\u2010', '-');
163+
allPlusNumberGroupings.put('\u2011', '-');
164+
allPlusNumberGroupings.put('\u2012', '-');
165+
allPlusNumberGroupings.put('\u2013', '-');
166+
allPlusNumberGroupings.put('\u2014', '-');
167+
allPlusNumberGroupings.put('\u2015', '-');
168+
allPlusNumberGroupings.put('\u2212', '-');
169+
allPlusNumberGroupings.put('/', '/');
170+
allPlusNumberGroupings.put('\uFF0F', '/');
171+
allPlusNumberGroupings.put(' ', ' ');
172+
allPlusNumberGroupings.put('\u3000', ' ');
173+
allPlusNumberGroupings.put('\u2060', ' ');
174+
allPlusNumberGroupings.put('.', '.');
175+
allPlusNumberGroupings.put('\uFF0E', '.');
176+
ALL_PLUS_NUMBER_GROUPING_SYMBOLS = Collections.unmodifiableMap(allPlusNumberGroupings);
177+
}
178+
179+
static final String DIGITS = "\\p{Nd}";
180+
// We accept alpha characters in phone numbers, ASCII only, upper and lower case.
181+
static final String VALID_ALPHA =
182+
Arrays.toString(ALPHA_MAPPINGS.keySet().toArray()).replaceAll("[, \\[\\]]", "")
183+
+ Arrays.toString(ALPHA_MAPPINGS.keySet().toArray())
184+
.toLowerCase().replaceAll("[, \\[\\]]", "");
185+
186+
// We use this pattern to check if the phone number has at least three letters in it - if so, then
187+
// we treat it as a number where some phone-number digits are represented by letters.
188+
static final Pattern VALID_ALPHA_PHONE_PATTERN = Pattern.compile("(?:.*?[A-Za-z]){3}.*");
189+
190+
// Regular expression of valid global-number-digits for the phone-context parameter, following the
191+
// syntax defined in RFC3966.
192+
static final String RFC3966_VISUAL_SEPARATOR = "[\\-\\.\\(\\)]?";
193+
static final String RFC3966_PHONE_DIGIT =
194+
"(" + DIGITS + "|" + RFC3966_VISUAL_SEPARATOR + ")";
195+
static final String RFC3966_GLOBAL_NUMBER_DIGITS =
196+
"^\\" + PLUS_SIGN + RFC3966_PHONE_DIGIT + "*" + DIGITS + RFC3966_PHONE_DIGIT + "*$";
197+
static final Pattern RFC3966_GLOBAL_NUMBER_DIGITS_PATTERN =
198+
Pattern.compile(RFC3966_GLOBAL_NUMBER_DIGITS);
199+
200+
// Regular expression of valid domainname for the phone-context parameter, following the syntax
201+
// defined in RFC3966.
202+
static final String ALPHANUM = VALID_ALPHA + DIGITS;
203+
static final String RFC3966_DOMAINLABEL =
204+
"[" + ALPHANUM + "]+((\\-)*[" + ALPHANUM + "])*";
205+
static final String RFC3966_TOPLABEL =
206+
"[" + VALID_ALPHA + "]+((\\-)*[" + ALPHANUM + "])*";
207+
static final String RFC3966_DOMAINNAME =
208+
"^(" + RFC3966_DOMAINLABEL + "\\.)*" + RFC3966_TOPLABEL + "\\.?$";
209+
static final Pattern RFC3966_DOMAINNAME_PATTERN = Pattern.compile(RFC3966_DOMAINNAME);
210+
211+
private Constants() {}
212+
}

0 commit comments

Comments
 (0)