Skip to content

Commit c95c4c5

Browse files
committed
Merge branch 'ConsiderClassifyingProperties' of https://github.com/Wikidata-lib/PropertySuggester-Python into ConsiderClassifyingProperties
Conflicts: propertysuggester/analyzer/impl/MainAnalyzer.py
2 parents 693d0dc + cff1f46 commit c95c4c5

File tree

4 files changed

+16
-8
lines changed

4 files changed

+16
-8
lines changed

propertysuggester/analyzer/RuleGenerator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
1+
import ConfigParser
2+
import os
13
from collections import defaultdict
24
import itertools
35
from propertysuggester.analyzer.impl.MainAnalyzer import ItemAnalyzer
46
from propertysuggester.analyzer.impl.QualifierReferenceAnalyzer import QualifierAnalyzer, ReferenceAnalyzer
57
from propertysuggester.analyzer.rule import Rule
68
from propertysuggester.utils.datamodel import Entity
79

10+
config = ConfigParser.ConfigParser()
11+
config.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'analyzer.ini'))
12+
classifying_pids = config.get("mainAnalyzer","classifying_properties").split(",")
13+
classifying_pids = map(int, classifying_pids)
814

915
def compute_rules(entities, min_probability=0.01):
1016
"""
1117
@type entities: collections.Iterable[Entity]
1218
@return: list[Rule]
1319
"""
1420

15-
analyzers = [ItemAnalyzer(), QualifierAnalyzer(), ReferenceAnalyzer()]
21+
analyzers = [ItemAnalyzer(classifying_pids), QualifierAnalyzer(), ReferenceAnalyzer()]
1622

1723
for i, entity in enumerate(entities):
1824
if i % 100000 == 0 and i > 0:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[mainAnalyzer]
2+
#List of classifying properties - at the moment limited to 'instance of' (31) and 'subclass of' (279)
3+
classifying_properties = 31,279

propertysuggester/analyzer/impl/MainAnalyzer.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
from propertysuggester.analyzer.impl.Analyzer import Analyzer
33
from propertysuggester.analyzer.rule import Rule
44

5-
classiying_property_ids = [31,279]
6-
75
class ItemAnalyzer(Analyzer):
8-
def __init__(self):
6+
def __init__(self, classiying_property_ids = [31,279]):
97
Analyzer.__init__(self)
8+
self.classiying_pids = classiying_property_ids
109
self.tuple_occurrences = defaultdict(int)
1110
self.pair_occurrences = defaultdict(lambda: defaultdict(int))
1211

@@ -17,7 +16,7 @@ def process(self, item):
1716

1817
def _count_occurrences(self, distinct_ids, property_value_pairs):
1918
for pid1 in distinct_ids:
20-
if pid1 in classiying_property_ids:
19+
if pid1 in self.classiying_pids:
2120
continue
2221
currentTuple = (pid1, None)
2322
self.tuple_occurrences[currentTuple] += 1
@@ -26,8 +25,7 @@ def _count_occurrences(self, distinct_ids, property_value_pairs):
2625
self.pair_occurrences[currentTuple][pid2] += 1
2726

2827
for pid1, value in property_value_pairs:
29-
30-
if pid1 in classiying_property_ids:
28+
if pid1 in self.classiying_pids:
3129
self.tuple_occurrences[pid1, int(value[1:])] += 1
3230
for pid2 in distinct_ids:
3331
if pid1 != pid2:

propertysuggester/test/analyzer/test_rule_generator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
test_data1 = [Entity('Q15', [Claim(Snak(31, 'wikibase-entityid', 'Q5107')),
1212
Claim(Snak(373, 'string', 'Africa'))]),
13-
Entity('Q16', [Claim(Snak(31, 'wikibase-entityid', 'Q384'))])]
13+
Entity('Q16', [Claim(Snak(31, 'wikibase-entityid', 'Q5107'))]),
14+
Entity('Q17', [Claim(Snak(31, 'wikibase-entityid', 'Q1337'))])]
1415

1516
test_data2 = [Entity('Q15', [Claim(Snak(31, 'wikibase-entityid', 'Q5107')),
1617
Claim(Snak(373, 'string', 'Africa')),

0 commit comments

Comments
 (0)