-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_formatting.py
More file actions
101 lines (78 loc) · 3.2 KB
/
data_formatting.py
File metadata and controls
101 lines (78 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import json
# Reading and filtering
df = pd.read_csv('original_data.csv', header=None, sep='\t').iloc[:, 1:]
df.index = df[1]
labels = df.index
df = df.iloc[:, 1:].T
sources = []
dest = []
d = {}
k = 0
# iteration over rows
for index, row in df.iterrows():
# i indicates first field
for i in range(row.shape[0]):
# Adding tag
if row.iloc[i] != '0' and '{}_{}'.format(row.index[i],row.iloc[i]) not in d.keys():
d['{}_{}'.format(row.index[i],row.iloc[i])] = k
k += 1
# j indicates 2nd field
j = i + 1
# Adding target tag and link
if j < row.shape[0]:
# Target is missing (skip)
if row.iloc[j] == '0':
continue
# Origin is missing (backward search)
elif row.iloc[i] == '0':
# Adding target tag if necessary
if '{}_{}'.format(row.index[j],row.iloc[j]) not in d.keys():
d['{}_{}'.format(row.index[j],row.iloc[j])] = k
k += 1
# Backward search
l = i - 1
while l > 0 and row.iloc[l] == '0':
l -= 1
if l > 0:
sources.append(d['{}_{}'.format(row.index[l],row.iloc[l])])
dest.append(d['{}_{}'.format(row.index[j],row.iloc[j])])
elif '{}_{}'.format(row.index[j],row.iloc[j]) not in d.keys():
d['{}_{}'.format(row.index[j],row.iloc[j])] = k
k += 1
sources.append(d['{}_{}'.format(row.index[i],row.iloc[i])])
dest.append(d['{}_{}'.format(row.index[j],row.iloc[j])])
else:
sources.append(d['{}_{}'.format(row.index[i],row.iloc[i])])
dest.append(d['{}_{}'.format(row.index[j],row.iloc[j])])
else:
# i is in the last field and j is out of bounds
pass
# Dictionary formating
out_data = {}
out_data['data'] = []
out_data['data'].append({'node': {}, 'link': {}})
out_data['data'][0]['node']['label'] = [name.split('_')[1] for name in list(d.keys())]
out_data['data'][0]['node']['color'] = ['black'] * len(list(d.keys()))
out_data['data'][0]['link']['source'] = sources
out_data['data'][0]['link']['target'] = dest
out_data['data'][0]['link']['color'] = ['black'] * len(sources)
out_data['data'][0]['link']['value'] = [1] * len(sources)
out_data['data'][0]['link']['label'] = []
# Exporting to JSON
with open('formatted_data.json', 'w') as outfile:
json.dump(out_data, outfile)
# Dictionary formating
out_data = {}
out_data['data'] = []
out_data['data'].append({'node': {}, 'link': {}})
out_data['data'][0]['node']['label'] = ['' for name in list(d.keys())]
out_data['data'][0]['node']['color'] = ['black'] * len(list(d.keys()))
out_data['data'][0]['link']['source'] = sources
out_data['data'][0]['link']['target'] = dest
out_data['data'][0]['link']['color'] = ['black'] * len(sources)
out_data['data'][0]['link']['value'] = [1] * len(sources)
out_data['data'][0]['link']['label'] = []
# Exporting to JSON
with open('formatted_data_nolabels.json', 'w') as outfile:
json.dump(out_data, outfile)