Skip to content

Commit 14f1825

Browse files
authored
PTHMINT-77: Add AddressParser (#28)
1 parent 7159696 commit 14f1825

File tree

2 files changed

+296
-0
lines changed

2 files changed

+296
-0
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (c) MultiSafepay, Inc. All rights reserved.
2+
3+
# This file is licensed under the Open Software License (OSL) version 3.0.
4+
# For a copy of the license, see the LICENSE.txt file in the project root.
5+
6+
# See the DISCLAIMER.md file for disclaimer details.
7+
8+
import re
9+
from typing import List
10+
11+
12+
class AddressParser:
13+
"""
14+
Class AddressParser.
15+
16+
Parses and splits up an address in street and house number
17+
"""
18+
19+
def parse(
20+
self: "AddressParser",
21+
address1: str,
22+
address2: str = "",
23+
) -> List[str]:
24+
"""
25+
Parses and splits up an address in street and house number.
26+
27+
Args:
28+
----
29+
address1 (str): Primary address line
30+
address2 (str): Secondary address line (optional)
31+
32+
Returns:
33+
-------
34+
List[str]: [street, house_number] where street is the street name
35+
and house_number is the house number with any extensions
36+
37+
"""
38+
# Remove whitespaces from the beginning and end
39+
full_address = f"{address1} {address2}".strip()
40+
41+
# Turn multiple whitespaces into one single whitespace
42+
full_address = re.sub(r"\s+", " ", full_address)
43+
44+
# Split the address into 3 groups: street, apartment and extension
45+
pattern = r"(.+?)\s?([\d]+[\S]*)((\s?[A-z])*?)$"
46+
matches = re.match(pattern, full_address)
47+
48+
if not matches:
49+
return [full_address, ""]
50+
51+
return self.extract_street_and_apartment(
52+
matches.group(1) or "",
53+
matches.group(2) or "",
54+
matches.group(3) or "",
55+
)
56+
57+
def extract_street_and_apartment(
58+
self: "AddressParser",
59+
group1: str,
60+
group2: str,
61+
group3: str,
62+
) -> List[str]:
63+
"""
64+
Extract the street and apartment from the matched RegEx results.
65+
66+
When the address starts with a number, it is most likely that group1 and group2 are the house number and
67+
extension. We therefore check if group1 and group2 are numeric, if so, we can assume that group3
68+
will be the street and return group1 and group2 together as the apartment.
69+
If group1 or group2 contains more than just numbers, we can assume group1 is the street and group2 and
70+
group3 are the house number and extension. We therefore return group1 as the street and return group2 and
71+
group3 together as the apartment.
72+
73+
Args:
74+
----
75+
group1 (str): First captured group from regex
76+
group2 (str): Second captured group from regex
77+
group3 (str): Third captured group from regex
78+
79+
Returns:
80+
-------
81+
List[str]: [street, apartment] where street is the street name
82+
and apartment is the house number with extensions
83+
84+
"""
85+
if group1.isdigit() and group2.isdigit():
86+
return [group3.strip(), f"{group1}{group2}".strip()]
87+
88+
return [group1.strip(), f"{group2}{group3}".strip()]
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# Copyright (c) MultiSafepay, Inc. All rights reserved.
2+
3+
# This file is licensed under the Open Software License (OSL) version 3.0.
4+
# For a copy of the license, see the LICENSE.txt file in the project root.
5+
6+
# See the DISCLAIMER.md file for disclaimer details.
7+
8+
import pytest
9+
from multisafepay.util.address_parser import AddressParser
10+
11+
12+
class TestAddressParser:
13+
"""Test class for AddressParser functionality."""
14+
15+
@pytest.mark.parametrize(
16+
("address1", "address2", "expected_street", "expected_apartment"),
17+
[
18+
(
19+
"Kraanspoor",
20+
"39",
21+
"Kraanspoor",
22+
"39",
23+
),
24+
(
25+
"Kraanspoor ",
26+
"39",
27+
"Kraanspoor",
28+
"39",
29+
),
30+
(
31+
"Kraanspoor 39",
32+
"",
33+
"Kraanspoor",
34+
"39",
35+
),
36+
(
37+
"Kraanspoor 39 ",
38+
"",
39+
"Kraanspoor",
40+
"39",
41+
),
42+
(
43+
"Kraanspoor",
44+
"39 ",
45+
"Kraanspoor",
46+
"39",
47+
),
48+
(
49+
"Kraanspoor39",
50+
"",
51+
"Kraanspoor",
52+
"39",
53+
),
54+
(
55+
"Kraanspoor39c",
56+
"",
57+
"Kraanspoor",
58+
"39c",
59+
),
60+
(
61+
"laan 1933 2",
62+
"",
63+
"laan 1933",
64+
"2",
65+
),
66+
(
67+
"laan 1933",
68+
"2",
69+
"laan 1933",
70+
"2",
71+
),
72+
(
73+
"18 septemberplein 12",
74+
"",
75+
"18 septemberplein",
76+
"12",
77+
),
78+
(
79+
"18 septemberplein",
80+
"12",
81+
"18 septemberplein",
82+
"12",
83+
),
84+
(
85+
"kerkstraat 42-f3",
86+
"",
87+
"kerkstraat",
88+
"42-f3",
89+
),
90+
(
91+
"kerkstraat",
92+
"42-f3",
93+
"kerkstraat",
94+
"42-f3",
95+
),
96+
(
97+
"Kerk straat 2b",
98+
"",
99+
"Kerk straat",
100+
"2b",
101+
),
102+
(
103+
"Kerk straat",
104+
"2b",
105+
"Kerk straat",
106+
"2b",
107+
),
108+
(
109+
"1e constantijn huigensstraat 1b",
110+
"",
111+
"1e constantijn huigensstraat",
112+
"1b",
113+
),
114+
(
115+
"1e constantijn huigensstraat",
116+
"1b",
117+
"1e constantijn huigensstraat",
118+
"1b",
119+
),
120+
(
121+
"Heuvel, 2a",
122+
"",
123+
"Heuvel,",
124+
"2a",
125+
),
126+
(
127+
"1e Jan van Kraanspoor",
128+
"2",
129+
"1e Jan van Kraanspoor",
130+
"2",
131+
),
132+
(
133+
"Neherkade 1 XI",
134+
"",
135+
"Neherkade",
136+
"1 XI",
137+
),
138+
(
139+
"Kamp 20 38",
140+
"",
141+
"Kamp 20",
142+
"38",
143+
),
144+
(
145+
"2065 Rue de la Gare",
146+
"",
147+
"Rue de la Gare",
148+
"2065",
149+
),
150+
(
151+
"10 Downing Street",
152+
"",
153+
"Downing Street",
154+
"10",
155+
),
156+
(
157+
"27",
158+
"Alexander Road",
159+
"Alexander Road",
160+
"27",
161+
),
162+
(
163+
"15 Sullivan",
164+
"",
165+
"Sullivan",
166+
"15",
167+
),
168+
(
169+
"110 Kraanspoor",
170+
"",
171+
"Kraanspoor",
172+
"110",
173+
),
174+
(
175+
"Plaza Callao s/n",
176+
"",
177+
"Plaza Callao s/n",
178+
"",
179+
),
180+
],
181+
)
182+
def test_parse_addresses_from_data_provider(
183+
self: "TestAddressParser",
184+
address1: str,
185+
address2: str,
186+
expected_street: str,
187+
expected_apartment: str,
188+
) -> None:
189+
"""
190+
Test the function parse with a provider, to confirm all addresses work.
191+
192+
Args:
193+
----
194+
address1: Primary address line
195+
address2: Secondary address line
196+
expected_street: Expected street name result
197+
expected_apartment: Expected apartment/house number result
198+
199+
"""
200+
parser = AddressParser()
201+
result = parser.parse(address1, address2)
202+
203+
assert (
204+
result[0] == expected_street
205+
), f"Street mismatch: expected '{expected_street}', got '{result[0]}'"
206+
assert (
207+
result[1] == expected_apartment
208+
), f"Apartment mismatch: expected '{expected_apartment}', got '{result[1]}'"

0 commit comments

Comments
 (0)