-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_dico.py
More file actions
95 lines (76 loc) · 4.08 KB
/
get_dico.py
File metadata and controls
95 lines (76 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os, sys, json
import numpy as np
import pickle
from tqdm import tqdm, trange
from bicm import BipartiteGraph as BG
import igraph
def get_dico(rtn_el, memb_dict, rounds=100):
"""
Once you have the discursive community labels for verified users, it propagates them in the retweet network until all users
(...ok, nearly all users) get a label, using the label propagation algorithm in igraph.
:param rtn_el: the retweet network edge list, as generated by get_rt_network
:type rtn_el: numpy array with a special structure
:param memb_dict: the dictionary encoding the discursive communities for verified users, as generated by get_vu_dico
:type memb_dict: dict
:param rounds: the number of label propagations. Since it has a randomic step, by rerunning the procedure several time, we can properly account for its contribution.
:type rounds: int
:return final_labels: a dictionary containing users name and the relative label
:type final_labels: dict
"""
# define the graph. first, extract the topology:
topo_rt_list=np.unique(rtn_el[['source', 'target']])
# define the graph...
rtn=igraph.Graph(directed=True)
# ...then the node list...
rtn_nl=np.unique(np.concatenate((topo_rt_list['source'],topo_rt_list['target'])))
rtn.add_vertices(rtn_nl)
# ... and finally add all edges.
rtn.add_edges(topo_rt_list)
# fixed labels are the one for verified users
fixed_labels=[memb_dict.get(i, -1)>=0 for i in rtn.vs['name']]
# for some reasons, python-igraph doesn't like anymore having negative numbers for unassigned labels.
# the code should be fixed accordingly
labels=[memb_dict.get(i, -1) for i in rtn.vs['name']]
# the "1" in the following line should have been "0" in a perfect world,
# nevertheless python-igraph changed idea about the sign of unassigned labels (see above)
where_fixed_labels=[i for i in range(len(rtn.vs)) if labels[i]>=0]
# make 'rounds' number of iteration of the label propagation
_membership=np.zeros((rounds,len(rtn.vs)), dtype='i4')
# the conversion to undirected is intended to overcome the issue due to giving labels to unverified accounts
# that nevertheless tweet a lot, without retweeting.
# Morally, it is justified by the fact that if my audience is quite homogeneous, I will get their label, if I don't have one.
ug=rtn.copy()
ug.to_undirected()
for i in trange(rounds, desc='Multiple label propagations'):
label_propagated=ug.community_label_propagation(initial=labels, fixed=fixed_labels)
memba=label_propagated.membership
# check for the coherence of the _translator:
# the annotated verified users should mantain the same label
_translator={}
for wfl in where_fixed_labels:
# remember that we added 1 to all labels, actually without any reason
# but for the fact that igraph returned a Segmentation Fault
# (probably a bad update of label propagation)
if memba[wfl] in _translator.keys():
assert _translator[memba[wfl]]==labels[wfl]
else:
_translator[memba[wfl]]=labels[wfl]
for j in range(len(ug.vs)):
_membership[i,j]=_translator.get(memba[j], -1)
final_labels=np.zeros(len(ug.vs), dtype='i4')
for i in trange(len(ug.vs)):
# the check is needed in order to test if there are more than a maximum
_example=np.unique(_membership[:, i], return_counts=True)
max_agreement_distribution=np.max(_example[1])
where_max=max_agreement_distribution==_example[1]
how_many_max=np.sum(where_max)
# check id there is a single maximum or more than one
if how_many_max==1:
final_labels[i]=_example[0][where_max]
elif how_many_max>1:
_aux=np.random.choice(_example[0][where_max])
final_labels[i]=_aux
else:
raise 'What!?'
final_labels=dict(zip(list(ug.vs['name']),final_labels))
return final_labels