-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathgraph.py
More file actions
28 lines (27 loc) · 723 Bytes
/
graph.py
File metadata and controls
28 lines (27 loc) · 723 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# projects the full knowledge base into a simple graph, only having
# the connections between items
# needs kb
import gzip
output = gzip.open('graph.txt.gz', 'w')
count = 0
linecount = 0
for line in gzip.open('kb.txt.gz') :
linecount += 1
if (linecount % 1000000) == 0 : print linecount / 1000000
if line.startswith('#') :
output.write(line)
continue
if line.startswith(' ') : continue
parts = line.split(' ')
if len(parts) != 4 : continue
s = parts[0]
p = parts[1]
o = parts[2]
if not s.startswith('Q') : continue
if not p.startswith('P') : continue
if not o.startswith('Q') : continue
output.write(s + ' ' + p + ' ' + o + "\n")
count += 1
print linecount, 'lines'
print count, 'results'
output.close()