Skip to content

Commit ea23779

Browse files
committed
Merge branch 'master' of github.com:Wikidata-lib/PropertySuggester-Python into use_idf_to_rank_common_properties_lower
Conflicts: propertysuggester/analyzer/impl/MainAnalyzer.py
2 parents 7711920 + 31cd047 commit ea23779

File tree

4 files changed

+18
-35
lines changed

4 files changed

+18
-35
lines changed

propertysuggester/analyzer/impl/MainAnalyzer.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,29 @@
77
class ItemAnalyzer(Analyzer):
88
def __init__(self):
99
Analyzer.__init__(self)
10-
self.propertyOccurances = defaultdict(int)
11-
self.coOccurances = defaultdict(lambda: defaultdict(int))
10+
self.property_occurances = defaultdict(int)
11+
self.pair_occurances = defaultdict(lambda: defaultdict(int))
1212

1313
def process(self, entity):
1414
distinct_ids = set(claim.mainsnak.property_id for claim in entity.claims)
1515
self._count_occurances(distinct_ids)
1616

1717
def _count_occurances(self, distinct_ids):
1818
for pid1 in distinct_ids:
19-
self.propertyOccurances[pid1] += 1
19+
self.property_occurances[pid1] += 1
2020
for pid2 in distinct_ids:
2121
if pid1 != pid2:
22-
self.coOccurances[pid1][pid2] += 1
22+
self.pair_occurances[pid1][pid2] += 1
2323

2424
def get_rules(self):
2525
rules = []
26-
totalpropertycount = len(self.propertyOccurances)
27-
for pid1, row in self.coOccurances.iteritems():
26+
totalpropertycount = len(self.property_occurances)
27+
for pid1, row in self.pair_occurances.iteritems():
2828
sharedpids = len(row)
2929
idf = math.log(totalpropertycount/float(sharedpids))
30-
pid1count = self.propertyOccurances[pid1]
30+
pid1count = self.property_occurances[pid1]
3131
for pid2, paircount in row.iteritems():
3232
if paircount > 0:
3333
probability = (paircount/float(pid1count)) * idf
3434
rules.append(Rule(pid1, None, pid2, paircount, probability, "item"))
35-
return rules
35+
return rules

propertysuggester/analyzer/impl/QualifierReferenceAnalyzer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,29 @@
66
class QualifierAnalyzer(Analyzer):
77
def __init__(self):
88
Analyzer.__init__(self)
9-
self.mainOccurances = defaultdict(int)
10-
self.qualifierOccurances = defaultdict(lambda: defaultdict(int))
9+
self.main_occurances = defaultdict(int)
10+
self.qualifier_occurances = defaultdict(lambda: defaultdict(int))
1111
self.context = "qualifier"
1212

1313
def process(self, entity):
1414
for claim in entity.claims:
1515
distinct_pids = set(q.property_id for q in self.get_special(claim))
1616
if len(distinct_pids) > 0:
1717
main_pid = claim.mainsnak.property_id
18-
self.mainOccurances[main_pid] += 1
18+
self.main_occurances[main_pid] += 1
1919
self._count_special_appearances(main_pid, distinct_pids)
2020

2121
def _count_special_appearances(self, mainsnak_id, distinct_ids):
2222
for pid in distinct_ids:
23-
self.qualifierOccurances[mainsnak_id][pid] += 1
23+
self.qualifier_occurances[mainsnak_id][pid] += 1
2424

2525
def get_special(self, claim):
2626
return claim.qualifiers
2727

2828
def get_rules(self):
2929
rules = []
30-
for main_pid, row in self.qualifierOccurances.iteritems():
31-
maincount = self.mainOccurances[main_pid]
30+
for main_pid, row in self.qualifier_occurances.iteritems():
31+
maincount = self.main_occurances[main_pid]
3232
for qualifier_pid, paircount in row.iteritems():
3333
if paircount > 0:
3434
probability = paircount/float(maincount)

propertysuggester/parser/XmlReader.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ def _parse_json_snak(claim_json):
114114
if datatype == "string":
115115
value = claim_json[3]
116116
elif datatype == "wikibase-entityid":
117-
value = "Q" + str(claim_json[3]["numeric-id"])
117+
if claim_json[3]["entity-type"] == "item":
118+
value = "Q" + str(claim_json[3]["numeric-id"])
119+
else:
120+
print "WARNING unknown entitytype: {0}".format(claim_json[3]["entity-type"])
118121
elif datatype == "time":
119122
value = claim_json[3]["time"]
120123
elif datatype == "quantity":

propertysuggester/utils/itemstats.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)