Skip to content

Commit 6fc0e4b

Browse files
committed
move analyzers in separate files
1 parent 232ee93 commit 6fc0e4b

File tree

5 files changed

+98
-71
lines changed

5 files changed

+98
-71
lines changed
Lines changed: 2 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from collections import defaultdict
22
import itertools
3+
from propertysuggester.analyzer.impl.MainAnalyzer import ItemAnalyzer
4+
from propertysuggester.analyzer.impl.QualifierReferenceAnalyzer import QualifierAnalyzer, ReferenceAnalyzer
35
from propertysuggester.analyzer.rule import Rule
46
from propertysuggester.utils.datamodel import Entity
57

@@ -21,74 +23,3 @@ def compute_rules(entities, min_probability=0.01):
2123
rules = filter(lambda rule: rule.probability > min_probability, itertools.chain(*(a.get_rules() for a in analyzers)))
2224
return rules
2325

24-
25-
class Analyzer:
26-
def __init__(self, context):
27-
"""
28-
@type context: string
29-
"""
30-
self.propertyOccurances = defaultdict(int)
31-
self.coOccurances = defaultdict(lambda: defaultdict(int))
32-
self.context = context
33-
34-
def process(self, entity):
35-
"""
36-
@type entity: Entity
37-
"""
38-
raise NotImplemented("Please implement this method")
39-
40-
def get_rules(self):
41-
"""
42-
@return: list[Rule]
43-
"""
44-
rules = []
45-
for pid1, row in self.coOccurances.iteritems():
46-
pid1count = self.propertyOccurances[pid1]
47-
for pid2, value in row.iteritems():
48-
if value > 0:
49-
probability = value/float(pid1count)
50-
rules.append(Rule(pid1, None, pid2, value, probability, self.context))
51-
return rules
52-
53-
54-
class ItemAnalyzer(Analyzer):
55-
def __init__(self):
56-
Analyzer.__init__(self, "item")
57-
58-
def process(self, entity):
59-
distinct_ids = set(claim.mainsnak.property_id for claim in entity.claims)
60-
self._count_occurances(distinct_ids)
61-
62-
def _count_occurances(self, distinct_ids):
63-
for pid1 in distinct_ids:
64-
self.propertyOccurances[pid1] += 1
65-
for pid2 in distinct_ids:
66-
if pid1 != pid2:
67-
self.coOccurances[pid1][pid2] += 1
68-
69-
70-
class QualifierAnalyzer(Analyzer):
71-
def __init__(self):
72-
Analyzer.__init__(self, "qualifier")
73-
74-
def process(self, entity):
75-
for claim in entity.claims:
76-
distinct_pids = set(q.property_id for q in self.get_special(claim))
77-
if len(distinct_pids) > 0:
78-
self.propertyOccurances[claim.mainsnak.property_id] += 1
79-
self._count_special_appearances(claim.mainsnak.property_id, distinct_pids)
80-
81-
def _count_special_appearances(self, mainsnak_id, distinct_ids):
82-
for pid in distinct_ids:
83-
self.coOccurances[mainsnak_id][pid] += 1
84-
85-
def get_special(self, claim):
86-
return claim.qualifiers
87-
88-
89-
class ReferenceAnalyzer(QualifierAnalyzer):
90-
def __init__(self):
91-
Analyzer.__init__(self, "reference")
92-
93-
def get_special(self, claim):
94-
return claim.references
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from collections import defaultdict
2+
from propertysuggester.analyzer.rule import Rule
3+
from propertysuggester.utils.datamodel import Entity
4+
5+
class Analyzer:
6+
def __init__(self):
7+
pass
8+
9+
def process(self, entity):
10+
"""
11+
@type entity: Entity
12+
"""
13+
raise NotImplementedError("Please implement this method")
14+
15+
def get_rules(self):
16+
"""
17+
@return: list[Rule]
18+
"""
19+
raise NotImplementedError("Please implement this method")
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from collections import defaultdict
2+
from propertysuggester.analyzer.impl.Analyzer import Analyzer
3+
from propertysuggester.analyzer.rule import Rule
4+
5+
6+
class ItemAnalyzer(Analyzer):
7+
def __init__(self):
8+
Analyzer.__init__(self)
9+
self.propertyOccurances = defaultdict(int)
10+
self.coOccurances = defaultdict(lambda: defaultdict(int))
11+
12+
def process(self, entity):
13+
distinct_ids = set(claim.mainsnak.property_id for claim in entity.claims)
14+
self._count_occurances(distinct_ids)
15+
16+
def _count_occurances(self, distinct_ids):
17+
for pid1 in distinct_ids:
18+
self.propertyOccurances[pid1] += 1
19+
for pid2 in distinct_ids:
20+
if pid1 != pid2:
21+
self.coOccurances[pid1][pid2] += 1
22+
23+
def get_rules(self):
24+
rules = []
25+
for pid1, row in self.coOccurances.iteritems():
26+
pid1count = self.propertyOccurances[pid1]
27+
for pid2, paircount in row.iteritems():
28+
if paircount > 0:
29+
probability = (paircount/float(pid1count))
30+
rules.append(Rule(pid1, None, pid2, paircount, probability, "item"))
31+
return rules
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from collections import defaultdict
2+
from propertysuggester.analyzer.impl.Analyzer import Analyzer
3+
from propertysuggester.analyzer.rule import Rule
4+
5+
6+
class QualifierAnalyzer(Analyzer):
7+
def __init__(self):
8+
Analyzer.__init__(self)
9+
self.mainOccurances = defaultdict(int)
10+
self.qualifierOccurances = defaultdict(lambda: defaultdict(int))
11+
self.context = "qualifier"
12+
13+
def process(self, entity):
14+
for claim in entity.claims:
15+
distinct_pids = set(q.property_id for q in self.get_special(claim))
16+
if len(distinct_pids) > 0:
17+
main_pid = claim.mainsnak.property_id
18+
self.mainOccurances[main_pid] += 1
19+
self._count_special_appearances(main_pid, distinct_pids)
20+
21+
def _count_special_appearances(self, mainsnak_id, distinct_ids):
22+
for pid in distinct_ids:
23+
self.qualifierOccurances[mainsnak_id][pid] += 1
24+
25+
def get_special(self, claim):
26+
return claim.qualifiers
27+
28+
def get_rules(self):
29+
rules = []
30+
for main_pid, row in self.qualifierOccurances.iteritems():
31+
maincount = self.mainOccurances[main_pid]
32+
for qualifier_pid, paircount in row.iteritems():
33+
if paircount > 0:
34+
probability = paircount/float(maincount)
35+
rules.append(Rule(main_pid, None, qualifier_pid, paircount, probability, self.context))
36+
return rules
37+
38+
39+
class ReferenceAnalyzer(QualifierAnalyzer):
40+
def __init__(self):
41+
QualifierAnalyzer.__init__(self)
42+
self.context = "reference"
43+
44+
def get_special(self, claim):
45+
return claim.references
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'Christian'

0 commit comments

Comments
 (0)