1- import networkx as nx
21import csv
32from itertools import combinations
43import matplotlib .pyplot as plt
4+ import networkx as nx
5+ from networkx .algorithms import bipartite
6+ from sknetwork .data import from_edge_list
7+ from sknetwork .clustering import Louvain
8+ from nxviz import CircosPlot , BasePlot
9+ import nxviz as nv
510import math
11+ import json
612
713G = nx .Graph ()
814
9- papers = []
10- dbs = []
11-
12- with open ('./data/merged_records.csv' , 'r' , encoding = 'UTF-8' ) as terms :
13- reader = csv .reader (terms )
14- for i in reader :
15- dbs .append ([j .strip () for j in i ])
16-
17- with open ('./data/repohits.csv' , 'r' , encoding = 'UTF-8' ) as file :
18- reader = csv .reader (file )
19- for i in reader :
20- db = [j [0 ] for j in dbs ].index (i [3 ])
21- papers .append ({'doi' : i [0 ],
22- 'snippet' : i [1 ],
23- 'title' : i [2 ],
24- 'database' : dbs [db ][1 ]})
25-
26- # Remove all papers without a DOI:
27- papers = [i for i in papers if i .get ('doi' ) or '' != '' ]
28- doi_list = [i .get ('doi' ) for i in papers ]
29- doi_set = set (doi_list )
30- doi_count = {i : doi_list .count (i ) for i in doi_set if doi_list .count (i ) > 1 }
31- clean_dois = set (doi_count .keys ())
32-
33- clean_papers = [i for i in papers if i .get ('doi' ) in clean_dois ]
34-
35- dataresources = set ([i .get ('database' ) for i in papers ])
36-
37- G .add_nodes_from (dataresources )
38- dois = []
39-
40- j = 0
41-
42- for i in clean_papers :
43- j = j + 1
44- if i .get ('doi' ) not in [i .get ('doi' ) or '' for i in dois ]:
45- dois .append ({'doi' : i .get ('doi' ), 'resource' : set (i .get ('database' ))})
46- else :
47- doi_loc = [j .get ('doi' ) for j in dois ].index (i .get ('doi' ))
48- dois [doi_loc ]['resource' ].add (i .get ('database' ))
49- if j % 1000 == 0 :
50- print (j )
51-
52- for i in dois :
53- if len (i .get ('resource' )) > 1 :
54- combs = list (combinations (i .get ('resource' ), 2 ))
55- for j in combs :
56- if j [0 ] != j [1 ]:
57- if j in G .edges :
58- G .edges [j [0 ], j [1 ]]['weight' ] = G .edges [j ]['weight' ] + 1
59- else :
60- G .add_edge (j [0 ], j [1 ], weight = 1 )
61-
62- weights = [math .sqrt (G [u ][v ]['weight' ]) for u ,v in G .edges ()]
63-
64- subax1 = plt .subplot (111 )
65- nx .draw (G , nx .kamada_kawai_layout (G ), with_labels = True , edge_color = "tab:red" , font_weight = 'bold' )
15+ with open ('./data/doi_joined.json' ) as terms :
16+ graph = json .load (terms )
17+
18+ graph = [i for i in graph if i ['doi' ] != '' ]
19+
20+ dois = [i ['doi' ] for i in graph ]
21+
22+ resources = set ()
23+
24+ for i in graph :
25+ for j in i ['resources' ]:
26+ resources .add (j )
27+
28+ G .add_nodes_from (dois , bipartite = 0 )
29+ G .add_nodes_from (resources , bipartite = 1 )
30+
31+ for i in graph :
32+ for j in i ['resources' ]:
33+ G .add_edges_from ([(i ['doi' ], j )])
34+
35+ # We have a bipartite graph.
36+ nx .is_connected (G )
37+
38+ edge_list = [(e [0 ],e [1 ], 1 ) for e in G .edges (data = True )]
39+ bgraph = from_edge_list (edge_list , bipartite = True )
40+
41+ names = bgraph .names
42+ names_row = bgraph .names_row
43+ names_col = bgraph .names_col
44+ biadjacency = bgraph .biadjacency
45+
46+ #Louvain with Barber modularity
47+ louvain = Louvain ()
48+ louvain .fit (biadjacency ,force_bipartite = True )
49+ labels_row = louvain .labels_row_
50+ labels_col = louvain .labels_col_
51+
52+ #Add the label to the graph
53+ partition = {}
54+ for i ,n_r in enumerate (names_row ):
55+ partition [n_r ]= labels_row [i ]
56+ for i ,n_c in enumerate (names_col ):
57+ partition [n_c ]= labels_col [i ]
58+
59+ nx .set_node_attributes (G , partition , 'community_louvain' )
60+
61+ resource_nodes = [node for node in G .nodes () if G ._node [node ]['bipartite' ] == 1 ]
62+ paper_nodes = [node for node in G .nodes () if G ._node [node ]['bipartite' ] == 0 ]
63+
64+ resource_centrality = [node for node in nx .bipartite .degree_centrality (G , resource_nodes ).items () if not node [0 ].startswith ("1" )]
65+
66+ sorted (resource_centrality , key = lambda x : x [1 ], reverse = True )[:5 ]
67+
68+ resource_graph = nx .bipartite .projection .projected_graph (G , resource_nodes )
69+
70+ for n , d in resource_graph .nodes (data = True ):
71+ resource_graph ._node [n ]['neighbors_count' ] = len (list (resource_graph .neighbors (n )))
72+
73+ options = {"edgecolors" : "tab:gray" , "node_size" : 700 , "alpha" : 0.7 }
74+ label_options = {"ec" : "k" , "fc" : "white" , "alpha" : 0.7 }
75+
76+ pos = nx .spring_layout (resource_graph , seed = 3113794652 ) # positions for all nodes
77+
78+ fig = plt .figure (figsize = (6 , 9 ))
79+
80+ nx .draw_networkx_edges (resource_graph , pos , alpha = 0.1 )
81+ nx .draw_networkx_nodes (resource_graph , pos , ** options )
82+ nx .draw_networkx_labels (resource_graph , pos , font_size = 14 , bbox = label_options )
6683plt .show ()
6784
68- # Betweenness
69- # remove randomly selected nodes (to make example fast)
70- # largest connected component
71- components = nx .connected_components (G )
72- largest_component = max (components , key = len )
73- H = G .subgraph (largest_component )
74-
75- # compute centrality
76- centrality = nx .betweenness_centrality (G , endpoints = True , weight = 'weight' )
77-
78- # compute community structure
79- lpc = nx .community .label_propagation_communities (G )
80- community_index = {n : i for i , com in enumerate (lpc ) for n in com }
81-
82- #### draw graph ####
83- fig , ax = plt .subplots (figsize = (20 , 15 ))
84- pos = nx .spring_layout (G , k = 0.15 , seed = 4572321 )
85- node_color = [community_index [n ] for n in G ]
86- node_size = [v * 20000 for v in centrality .values ()]
87- nx .draw_networkx (
88- G ,
89- pos = pos ,
90- with_labels = False ,
91- node_color = node_color ,
92- node_size = node_size ,
93- edge_color = "gainsboro" ,
94- alpha = 0.4 ,
95- )
96-
97- # Title/legend
98- font = {"color" : "k" , "fontweight" : "bold" , "fontsize" : 20 }
99- ax .set_title ("Gene functional association network (C. elegans)" , font )
100- # Change font color for legend
101- font ["color" ] = "r"
102-
103- ax .text (
104- 0.80 ,
105- 0.10 ,
106- "node color = community structure" ,
107- horizontalalignment = "center" ,
108- transform = ax .transAxes ,
109- fontdict = font ,
110- )
111- ax .text (
112- 0.80 ,
113- 0.06 ,
114- "node size = betweenness centrality" ,
115- horizontalalignment = "center" ,
116- transform = ax .transAxes ,
117- fontdict = font ,
118- )
119-
120- # Resize figure for label readability
121- ax .margins (0.1 , 0.05 )
122- fig .tight_layout ()
123- plt .axis ("off" )
85+
86+ # function to create node colour list
87+ def create_community_node_colors (graph , communities ):
88+ number_of_colors = len (communities )
89+ colors = ["#D4FCB1" , "#CDC5FC" , "#FFC2C4" , "#F2D140" , "#BCC6C8" ][:number_of_colors ]
90+ node_colors = []
91+ for node in graph :
92+ current_community_index = 0
93+ for community in communities :
94+ if node in community :
95+ node_colors .append (colors [current_community_index ])
96+ break
97+ current_community_index += 1
98+ return node_colors
99+
100+
101+ # function to plot graph with node colouring based on communities
102+ def visualize_communities (graph , communities , i ):
103+ node_colors = create_community_node_colors (graph , communities )
104+ modularity = round (nx .community .modularity (graph , communities ), 6 )
105+ title = f"Community Visualization of { len (communities )} communities with modularity of { modularity } "
106+ pos = nx .spring_layout (graph , k = 0.3 , iterations = 50 , seed = 2 )
107+ plt .subplot (3 , 1 , i )
108+ plt .title (title )
109+ nx .draw (
110+ graph ,
111+ pos = pos ,
112+ node_size = 1000 ,
113+ node_color = node_colors ,
114+ with_labels = True ,
115+ font_size = 20 ,
116+ font_color = "black" ,
117+ )
118+
119+
120+ communities = list (nx .algorithms .community .girvan_newman (resource_graph ))
121+
122+ # Plot graph with colouring based on communities
123+ visualize_communities (resource_graph , communities [0 ], 1 )
124+ visualize_communities (resource_graph , communities [3 ], 2 )
124125plt .show ()
0 commit comments