Skip to content

Commit ecb5589

Browse files
authored
Merge pull request #106 from rosette-api/rcb-620-name-and-address-similarity-parameters-overrides
RCB-620: Name and Address match parameter overrides.
2 parents fa48b56 + 86b2952 commit ecb5589

File tree

7 files changed

+130
-31
lines changed

7 files changed

+130
-31
lines changed

CI.Jenkinsfile

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22

3-
def versions = [3.11, 3.10, 3.9, 3.8, 3.7]
3+
def versions = [3.7, 3.8, 3.9, 3.10, 3.11]
44

55
def runSonnarForPythonVersion(sourceDir, ver){
66
mySonarOpts="-Dsonar.sources=/source -Dsonar.host.url=${env.SONAR_HOST_URL} -Dsonar.login=${env.SONAR_AUTH_TOKEN}"
@@ -13,21 +13,27 @@ def runSonnarForPythonVersion(sourceDir, ver){
1313
mySonarOpts="$mySonarOpts -Dsonar.pullrequest.base=${env.CHANGE_TARGET} -Dsonar.pullrequest.branch=${env.CHANGE_BRANCH}"
1414
}
1515

16-
// TODO: find a way to skip the Sonar scan for all those version, but one (maybe the latest?).
16+
// Only run Sonar once.
17+
if(ver == 3.11) {
18+
sonarExec="cd /root/ && \
19+
wget -q https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-4.8.0.2856-linux.zip && \
20+
unzip -q sonar-scanner-cli-4.8.0.2856-linux.zip && \
21+
cd /source && \
22+
/root/sonar-scanner-4.8.0.2856-linux/bin/sonar-scanner ${mySonarOpts}"
23+
} else {
24+
sonarExec="echo Skipping Sonar for this version."
25+
}
26+
1727
sh "docker run \
1828
--pull always \
1929
--rm --volume ${sourceDir}:/source \
2030
python:${ver}-slim \
2131
bash -c \"apt-get update && \
2232
apt-get install -y wget unzip && \
2333
pip3 install tox && \
24-
cd /root/ && \
25-
wget -q https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-4.8.0.2856-linux.zip && \
26-
unzip -q sonar-scanner-cli-4.8.0.2856-linux.zip && \
2734
cd /source && \
2835
tox && \
29-
/root/sonar-scanner-4.8.0.2856-linux/bin/sonar-scanner \
30-
${mySonarOpts}\""
36+
${sonarExec}\""
3137
}
3238

3339
node ("docker-light") {

examples/address_similarity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'):
1818
params = AddressSimilarityParameters()
1919
params["address1"] = {"houseNumber": "1600", "road": "Pennsylvania Ave NW", "city": "Washington", "state": "DC", "postCode": "20500"}
2020
params["address2"] = "160 Pennsilvana Avenue, Washington, D.C., 20500"
21+
#params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"}
2122

2223
try:
2324
return api.address_similarity(params)

examples/name_similarity.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'):
2020
params = NameSimilarityParameters()
2121
params["name1"] = {"text": matched_name_data1, "language": "eng", "entityType": "PERSON"}
2222
params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"}
23+
#params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"}
24+
2325
try:
2426
return api.name_similarity(params)
2527
except RosetteException as exception:

rosette/api.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def serialize(self, options):
113113
"""serialize keys with values"""
114114
self.validate()
115115
values = {}
116-
for (key, val) in self.__params.items():
116+
for key, val in self.__params.items():
117117
if val is None:
118118
continue
119119
else:
@@ -212,7 +212,7 @@ class NameTranslationParameters(_DocumentParamSetBase):
212212
213213
C{targetLangauge} The language into which the name is to be translated.
214214
215-
C{entityType} The entity type (TBD) of the name.
215+
C{entityType} The entity type of the name. PERSON (default), LOCATION, or ORGANIZATION
216216
217217
C{sourceLanguageOfOrigin} The language of origin of the name.
218218
@@ -240,7 +240,7 @@ def __init__(self):
240240

241241
def validate(self):
242242
"""Internal. Do not use."""
243-
for option in ("name", "targetLanguage"): # required
243+
for option in "name", "targetLanguage": # required
244244
if self[option] is None:
245245
raise RosetteException(
246246
"missingParameter",
@@ -250,23 +250,32 @@ def validate(self):
250250

251251
class AddressSimilarityParameters(_DocumentParamSetBase):
252252
"""Parameter object for C{address-similarity} endpoint.
253-
All are required.
253+
254+
C{address1} and C{address2} are required.
255+
256+
`parameters` is optional.
254257
255258
C{address1} The address to be matched, a C{address} object or address string.
256259
257260
C{address2} The address to be matched, a C{address} object or address string.
258261
259262
The C{address} object contains these optional fields:
260263
city, island, district, stateDistrict, state, countryRegion, country, worldRegion, postCode, poBox
264+
265+
`parameters` is a dictionary listing any parameter overrides to include. For example, `postCodeAddressFieldWeight`.
266+
Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the
267+
final declaration is used.
268+
269+
See `examples/address_similarity.py`
261270
"""
262271

263272
def __init__(self):
264273
self.use_multipart = False
265-
_DocumentParamSetBase.__init__(self, ("address1", "address2"))
274+
_DocumentParamSetBase.__init__(self, ("address1", "address2", "parameters"))
266275

267276
def validate(self):
268277
"""Internal. Do not use."""
269-
for option in ("address1", "address2"): # required
278+
for option in "address1", "address2": # required
270279
if self[option] is None:
271280
raise RosetteException(
272281
"missingParameter",
@@ -276,30 +285,39 @@ def validate(self):
276285

277286
class NameSimilarityParameters(_DocumentParamSetBase):
278287
"""Parameter object for C{name-similarity} endpoint.
279-
All are required.
288+
289+
C{name1} and C{name2} are required.
290+
291+
`parameters` is optional.
280292
281293
C{name1} The name to be matched, a C{name} object.
282294
283295
C{name2} The name to be matched, a C{name} object.
284296
285297
The C{name} object contains these fields:
286298
287-
C{text} Text of the name, required.
299+
C{text} Text of the name, required.
300+
301+
C{language} Language of the name in ISO639 three-letter code, optional.
302+
303+
C{script} The ISO15924 code of the name, optional.
288304
289-
C{language} Language of the name in ISO639 three-letter code, optional.
305+
C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
290306
291-
C{script} The ISO15924 code of the name, optional.
307+
`parameters` is a dictionary listing any parameter overrides to include. For example, `deletionScore`.
308+
Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the
309+
final declaration is used.
292310
293-
C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
311+
See `examples/name_similarity.py`
294312
"""
295313

296314
def __init__(self):
297315
self.use_multipart = False
298-
_DocumentParamSetBase.__init__(self, ("name1", "name2"))
316+
_DocumentParamSetBase.__init__(self, ("name1", "name2", "parameters"))
299317

300318
def validate(self):
301319
"""Internal. Do not use."""
302-
for option in ("name1", "name2"): # required
320+
for option in "name1", "name2": # required
303321
if self[option] is None:
304322
raise RosetteException(
305323
"missingParameter",

sonar-project.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
sonar.projectKey=rosette-api-python-binding
22
sonar.sources=rosette
3+
sonar.exclusions=**/tests/**,**/docs/**,**/examples/**
34
sonar.python.coverage.reportPaths=coverage.xml
45
#sonar.branch.name=RCB-596-pool-size

tests/test_rosette_api.py

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,48 @@ def test_the_name_translation_endpoint(api, json_response):
427427
# Test the name similarity endpoint
428428

429429

430+
def test_the_name_similarity_single_parameters(api, json_response):
431+
"""Test the name similarity parameters"""
432+
httpretty.enable()
433+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info",
434+
body=json_response, status=200, content_type="application/json")
435+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity",
436+
body=json_response, status=200, content_type="application/json")
437+
438+
matched_name_data1 = "John Mike Smith"
439+
matched_name_data2 = "John Joe Smith"
440+
params = NameSimilarityParameters()
441+
params["name1"] = {"text": matched_name_data1}
442+
params["name2"] = {"text": matched_name_data2}
443+
params["parameters"] = {"conflictScore": "0.9"}
444+
445+
result = api.name_similarity(params)
446+
assert result["name"] == "Rosette"
447+
httpretty.disable()
448+
httpretty.reset()
449+
450+
451+
def test_the_name_similarity_multiple_parameters(api, json_response):
452+
"""Test the name similarity parameters"""
453+
httpretty.enable()
454+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info",
455+
body=json_response, status=200, content_type="application/json")
456+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity",
457+
body=json_response, status=200, content_type="application/json")
458+
459+
matched_name_data1 = "John Mike Smith"
460+
matched_name_data2 = "John Joe Smith"
461+
params = NameSimilarityParameters()
462+
params["name1"] = {"text": matched_name_data1}
463+
params["name2"] = {"text": matched_name_data2}
464+
params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.5"}
465+
466+
result = api.name_similarity(params)
467+
assert result["name"] == "Rosette"
468+
httpretty.disable()
469+
httpretty.reset()
470+
471+
430472
def test_the_name_similarity_endpoint(api, json_response):
431473
"""Test the name similarity endpoint"""
432474
httpretty.enable()
@@ -449,10 +491,11 @@ def test_the_name_similarity_endpoint(api, json_response):
449491
httpretty.disable()
450492
httpretty.reset()
451493

494+
452495
# Test the name deduplication endpoint
453496

454497

455-
def test_name_deduplicatation_parameters(api, json_response):
498+
def test_name_deduplication_parameters(api, json_response):
456499
"""Test the Name Deduplication Parameters"""
457500
httpretty.enable()
458501
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info",
@@ -546,7 +589,7 @@ def test_for_content_and_contentUri(api, json_response, doc_params):
546589
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities",
547590
body=json_response, status=200, content_type="application/json")
548591

549-
doc_params['contentUri'] = 'http://google.com'
592+
doc_params['contentUri'] = 'https://example.com'
550593
with pytest.raises(RosetteException) as e_rosette:
551594
api.entities(doc_params)
552595

@@ -612,6 +655,32 @@ def test_for_address_similarity_required_parameters(api, json_response):
612655
httpretty.reset()
613656

614657

658+
def test_for_address_similarity_optional_parameters(api, json_response):
659+
"""Test address similarity parameters"""
660+
httpretty.enable()
661+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info",
662+
body=json_response, status=200, content_type="application/json")
663+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/address-similarity",
664+
body=json_response, status=200, content_type="application/json")
665+
666+
params = AddressSimilarityParameters()
667+
668+
params["address1"] = {"houseNumber": "1600",
669+
"road": "Pennsylvania Ave NW",
670+
"city": "Washington",
671+
"state": "DC",
672+
"postCode": "20500"}
673+
674+
params["address2"] = {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"}
675+
676+
params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"}
677+
678+
result = api.address_similarity(params)
679+
assert result["name"] == "Rosette"
680+
httpretty.disable()
681+
httpretty.reset()
682+
683+
615684
# Test for required Name Similarity parameters
616685

617686

tox.ini

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
1-
# Tox (http://tox.testrun.org/) is a tool for running tests
2-
# in multiple virtualenvs. This configuration file will run the
3-
# test suite on all supported python versions. To use it, "pip install tox"
4-
# and then run "tox" from this directory.
5-
61
[tox]
7-
skipsdist = True
82
envlist = py3
3+
skipsdist = True
94

105
[testenv]
11-
commands =
12-
{envpython} setup.py install
13-
{envbindir}/py.test
146
deps =
157
pytest
168
pep8
179
httpretty
1810
epydoc
1911
requests
12+
coverage
13+
build
14+
15+
commands =
16+
python -m build
17+
coverage run -m pytest
18+
coverage xml
19+
20+
[coverage:run]
21+
relative_files = True

0 commit comments

Comments
 (0)