Skip to content

Commit 310767a

Browse files
authored
Merge pull request #4 from dedupeio/jfc/attempted-match
Define indicator variable for attempted match and configure deployments
2 parents 658139a + be5c0ab commit 310767a

File tree

5 files changed

+97
-29
lines changed

5 files changed

+97
-29
lines changed

.github/workflows/main.yml

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
push:
55
branches:
66
- master
7+
tags:
8+
- v*
79
pull_request:
810
branches:
911
- master
@@ -25,6 +27,31 @@ jobs:
2527
run: |
2628
pip install --upgrade pip
2729
pip install -e .[tests]
28-
pip uninstall -y ilcs-parser
29-
pip install -i https://test.pypi.org/simple/ ilcs-parser
3030
pytest
31+
deploy:
32+
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags') || endsWith(github.ref, 'master'))
33+
needs: test
34+
runs-on: ubuntu-latest
35+
steps:
36+
- uses: actions/checkout@v1
37+
- uses: actions/setup-python@v1
38+
with:
39+
python-version: 3.8
40+
- name: Build distribution files
41+
run: |
42+
pip install --upgrade pip setuptools wheel
43+
pip install -e .[tests]
44+
python setup.py sdist bdist_wheel
45+
- name: Publish to test PyPI
46+
if: endsWith(github.ref, 'master')
47+
uses: pypa/[email protected]
48+
with:
49+
user: __token__
50+
password: ${{ secrets.test_pypi_password }}
51+
repository_url: https://test.pypi.org/legacy/
52+
- name: Publish to PyPI
53+
if: startsWith(github.ref, 'refs/tags')
54+
uses: pypa/[email protected]
55+
with:
56+
user: __token__
57+
password: ${{ secrets.pypi_password }}

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
README.md
2+
LICENSE

dedupe/variables/ilcs.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
1-
import string
2-
import math
3-
import functools
4-
51
import numpy
62
import ilcs_parser
7-
from parseratorvariable import ParseratorType, consolidate
3+
from parseratorvariable import ParseratorType
84

95
CITATION = (
106
('chapter', ('Chapter',)),
117
('act prefix', ('ActPrefix',)),
128
('section', ('Section',)),
139
('subsection', ('SubSection',)),
14-
('attempted', ('Attempted',))
1510
)
1611

1712

@@ -25,29 +20,44 @@ def __init__(self, definition):
2520
self.components = (('Citation', self.compareFields, CITATION),)
2621
block_parts = ('Citation',)
2722
super().__init__(definition, ilcs_parser.tag, block_parts)
28-
# Add exact match to the distance vector
29-
self.expanded_size += 1
23+
# Add exact match and attempted indicators to the distance vector
24+
self.num_additional_indicators = 2
25+
self.expanded_size += self.num_additional_indicators
3026

3127
def fields(self, field):
3228
"""
3329
Override the parent method to append an exact match field.
3430
"""
3531
fields = super().fields(field)
36-
fields += [('exact match', 'Exact')]
32+
fields += [('attempted match', 'Dummy'), ('exact match', 'Exact')]
3733
return fields
3834

3935
def comparator(self, field_1, field_2):
4036
"""
4137
Override the parent method to append an exact match field.
4238
"""
43-
# Temporarily subtract the exact match indicator from expanded_size,
44-
# since the parent method assumes that the last element of the distance
45-
# vector is the full-string comparison.
46-
self.expanded_size -= 1
39+
# Temporarily subtract the exact and attempted match indicators from
40+
# expanded_size, since the parent method assumes that the last element
41+
# of the distance vector is the full-string comparison
42+
self.expanded_size -= self.num_additional_indicators
4743
distances = super().comparator(field_1, field_2)
48-
self.expanded_size += 1
44+
self.expanded_size += self.num_additional_indicators
45+
46+
# Set the attempted match indicator variable
47+
try:
48+
parsed_variable_1 = self.tagger(field_1)
49+
parsed_variable_2 = self.tagger(field_2)
50+
except TypeError:
51+
attempted = 0
52+
else:
53+
variable_type_1, variable_type_2 = parsed_variable_1[1], parsed_variable_2[1]
54+
if 'Ambiguous' in (variable_type_1, variable_type_2):
55+
attempted = 0
56+
else:
57+
attempted = int(parsed_variable_1.is_attempted == parsed_variable_2.is_attempted)
58+
distances = numpy.append(distances, attempted)
4959

50-
# Set the exact match indicator variable.
60+
# Set the exact match indicator variable
5161
exact_match = 1 if field_1 == field_2 else 0
5262
distances = numpy.append(distances, exact_match)
5363

setup.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,24 @@
77
"for installing setuptools"
88
)
99

10+
11+
def readme():
12+
with open('README.md') as f:
13+
return f.read()
14+
15+
1016
setup(
1117
version='0.0.0',
18+
author='DataMade',
1219
url='https://github.com/dedupeio/dedupe-variable-ilcs',
1320
description='Dedupe variable for Illinois Compiled Statute (ILCS) codes',
21+
long_description=readme(),
22+
long_description_content_type='text/markdown',
1423
name='dedupe-variable-ilcs',
1524
packages=['dedupe.variables'],
1625
license='The MIT License: http://www.opensource.org/licenses/mit-license.php',
1726
install_requires=[
18-
'ilcs-parser @ https://github.com/datamade/ilcs-parser/archive/master.zip#egg=ilcs-parser-0.0.0',
27+
'ilcs-parser',
1928
'parseratorvariable'
2029
],
2130
extras_require={'tests': ['pytest', 'parserator']},

tests/test_ilcs.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,30 @@
33

44
def test_all_distances(ilcs):
55
numpy.testing.assert_almost_equal(
6-
ilcs.comparator('125 55/21-a (att)', '126 55/21-b (atttt)'),
6+
ilcs.comparator('125 55/21-a (att)', '126 55/21-b (att)'),
77
numpy.array([
8-
1, 0, 1,
9-
ilcs.compareString('126', '125'),
10-
ilcs.compareString('55', '55'),
11-
ilcs.compareString('21', '21'),
12-
ilcs.compareString('a', 'b'),
13-
ilcs.compareString('att', 'atttt'),
14-
1, 1, 1, 1, 1, 0, 0
8+
1, # citation: Not Missing
9+
0, # ambiguous: Dummy
10+
1, # same name type?: Dummy
11+
ilcs.compareString('126', '125'), # chapter: Derived
12+
ilcs.compareString('55', '55'), # act prefix: Derived
13+
ilcs.compareString('21', '21'), # section: Derived
14+
ilcs.compareString('a', 'b'), # subsection: Derived
15+
1, # chapter: Not Missing
16+
1, # act prefix: Not Missing
17+
1, # section: Not Missing
18+
1, # subsection: Not Missing
19+
0, # full string: String
20+
1, # attempted match: Dummy
21+
0 # exact match: Exact
1522
])
1623
)
1724

1825

1926
def test_exact_match(ilcs):
2027
numpy.testing.assert_almost_equal(
2128
ilcs.comparator('125 55/21 (att)', '125 55/21 (att)'),
22-
numpy.array([1, 0, 1, 0.5, 0.5, 0.5, 0, 0.5, 1, 1, 1, 0, 1, 0, 1])
29+
numpy.array([1, 0, 1, 0.5, 0.5, 0.5, 0, 1, 1, 1, 0, 0, 1, 1])
2330
)
2431

2532

@@ -29,8 +36,21 @@ def test_mismatched_elements(ilcs):
2936
numpy.array([
3037
1, 0, 1,
3138
ilcs.compareString('125', '125'),
32-
ilcs.compareString('56', '55'),
39+
ilcs.compareString('55', '56'),
40+
ilcs.compareString('21', '21'),
41+
0, 1, 1, 1, 0, 0, 0, 0
42+
])
43+
)
44+
45+
46+
def test_attempted_match(ilcs):
47+
numpy.testing.assert_almost_equal(
48+
ilcs.comparator('725 5/21-a (att)', '720-5/8-4 725 6/21'),
49+
numpy.array([
50+
1, 0, 1,
51+
ilcs.compareString('125', '125'),
52+
ilcs.compareString('5', '6'),
3353
ilcs.compareString('21', '21'),
34-
0, 0, 1, 1, 1, 0, 0, 0, 0
54+
0, 1, 1, 1, 0, 0, 1, 0
3555
])
3656
)

0 commit comments

Comments
 (0)