-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathorn.py
More file actions
executable file
·59 lines (44 loc) · 1.52 KB
/
orn.py
File metadata and controls
executable file
·59 lines (44 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
"""
Fetch all Icelandic placenames (örnefni) from iceaddr database
and try to look them up using GreynirEngine's bindb module.
Print any placenames that could not be found in either
BÍN proper or using the word combinator (samsetjari).
"""
import sys
import sqlite3
from reynir.bindb import GreynirBin
if __name__ == "__main__":
"""Invocation via command line."""
db_path = sys.argv[1] if len(sys.argv) > 1 else None
if not db_path:
print("No db path")
sys.exit(1)
db_conn = sqlite3.connect(db_path, check_same_thread=False)
db_conn.row_factory = lambda c, r: dict(zip([col[0] for col in c.description], r))
q = "SELECT DISTINCT nafn FROM ornefni;"
res = db_conn.cursor().execute(q)
matches = [row["nafn"] for row in res]
num_bin = 0
num_comb = 0
num_fail = 0
with GreynirBin.get_db() as db:
for m in matches:
w = m.strip()
if " " in w or "-" in w or "-" in w:
continue
# Direct BÍN lookup
meanings = db.meanings(w)
if meanings:
num_bin += 1
continue
# Lookup using BÍN and combinator
_, meanings = db.lookup_g(w, auto_uppercase=True)
if meanings:
num_comb += 1
continue
print(w)
num_fail += 1
print("Num BÍN: {0}".format(num_bin))
print("Num comb: {0}".format(num_comb))
print("Num fail: {0}".format(num_fail))