Skip to content

Commit 115eadd

Browse files
committed
add tutorial to find articles
1 parent eb91a88 commit 115eadd

File tree

3 files changed

+386
-1
lines changed

3 files changed

+386
-1
lines changed

notebook/EDA_graph.ipynb

Lines changed: 217 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 3,
5+
"execution_count": null,
66
"id": "8926eba4-6572-47dd-8125-b87ab731bcad",
77
"metadata": {
88
"execution": {
@@ -133,6 +133,222 @@
133133
"metrics_df = pd.read_csv(\"D:Users/Paschalis/phd/data/graph_metrics_lang.csv\")"
134134
]
135135
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": 2,
139+
"id": "c5b70dd9",
140+
"metadata": {},
141+
"outputs": [
142+
{
143+
"data": {
144+
"text/html": [
145+
"<div>\n",
146+
"<style scoped>\n",
147+
" .dataframe tbody tr th:only-of-type {\n",
148+
" vertical-align: middle;\n",
149+
" }\n",
150+
"\n",
151+
" .dataframe tbody tr th {\n",
152+
" vertical-align: top;\n",
153+
" }\n",
154+
"\n",
155+
" .dataframe thead th {\n",
156+
" text-align: right;\n",
157+
" }\n",
158+
"</style>\n",
159+
"<table border=\"1\" class=\"dataframe\">\n",
160+
" <thead>\n",
161+
" <tr style=\"text-align: right;\">\n",
162+
" <th></th>\n",
163+
" <th>language_code</th>\n",
164+
" <th>num_nodes</th>\n",
165+
" <th>num_edges</th>\n",
166+
" <th>average_in_degree</th>\n",
167+
" <th>average_out_degree</th>\n",
168+
" <th>max_in_degree</th>\n",
169+
" <th>max_out_degree</th>\n",
170+
" <th>in_degree_distribution</th>\n",
171+
" <th>out_degree_distribution</th>\n",
172+
" <th>density</th>\n",
173+
" </tr>\n",
174+
" </thead>\n",
175+
" <tbody>\n",
176+
" <tr>\n",
177+
" <th>0</th>\n",
178+
" <td>en</td>\n",
179+
" <td>6720886</td>\n",
180+
" <td>140233009</td>\n",
181+
" <td>20.865256</td>\n",
182+
" <td>20.865256</td>\n",
183+
" <td>188311</td>\n",
184+
" <td>4588</td>\n",
185+
" <td>{0: 1169, 45580: 89, 352716: 204, 854409: 27, ...</td>\n",
186+
" <td>{0: 221, 45580: 23, 352716: 86, 854409: 18, 28...</td>\n",
187+
" <td>0.000003</td>\n",
188+
" </tr>\n",
189+
" <tr>\n",
190+
" <th>1</th>\n",
191+
" <td>es</td>\n",
192+
" <td>3355884</td>\n",
193+
" <td>33344361</td>\n",
194+
" <td>9.936089</td>\n",
195+
" <td>9.936089</td>\n",
196+
" <td>183293</td>\n",
197+
" <td>3652</td>\n",
198+
" <td>{0: 1680, 3283175: 10, 159275: 100, 564: 37622...</td>\n",
199+
" <td>{0: 359, 3283175: 1, 159275: 124, 564: 431, 19...</td>\n",
200+
" <td>0.000003</td>\n",
201+
" </tr>\n",
202+
" <tr>\n",
203+
" <th>2</th>\n",
204+
" <td>de</td>\n",
205+
" <td>2933122</td>\n",
206+
" <td>76792004</td>\n",
207+
" <td>26.180978</td>\n",
208+
" <td>26.180978</td>\n",
209+
" <td>204835</td>\n",
210+
" <td>10676</td>\n",
211+
" <td>{'alan smithee': 60, 'pseudonym': 8356, 'regis...</td>\n",
212+
" <td>{'alan smithee': 38, 'pseudonym': 187, 'regiss...</td>\n",
213+
" <td>0.000009</td>\n",
214+
" </tr>\n",
215+
" <tr>\n",
216+
" <th>3</th>\n",
217+
" <td>nl</td>\n",
218+
" <td>2168204</td>\n",
219+
" <td>27553227</td>\n",
220+
" <td>12.707857</td>\n",
221+
" <td>12.707857</td>\n",
222+
" <td>163511</td>\n",
223+
" <td>12760</td>\n",
224+
" <td>{'albert speer': 164, 'mannheim': 777, '19 maa...</td>\n",
225+
" <td>{'albert speer': 99, 'mannheim': 102, '19 maar...</td>\n",
226+
" <td>0.000006</td>\n",
227+
" </tr>\n",
228+
" <tr>\n",
229+
" <th>4</th>\n",
230+
" <td>it</td>\n",
231+
" <td>1817866</td>\n",
232+
" <td>42906457</td>\n",
233+
" <td>23.602651</td>\n",
234+
" <td>23.602651</td>\n",
235+
" <td>153415</td>\n",
236+
" <td>5179</td>\n",
237+
" <td>{'armonium': 382, 'lingua francese': 15697, 'o...</td>\n",
238+
" <td>{'armonium': 100, 'lingua francese': 354, 'org...</td>\n",
239+
" <td>0.000013</td>\n",
240+
" </tr>\n",
241+
" <tr>\n",
242+
" <th>5</th>\n",
243+
" <td>pl</td>\n",
244+
" <td>1622506</td>\n",
245+
" <td>30137502</td>\n",
246+
" <td>18.574663</td>\n",
247+
" <td>18.574663</td>\n",
248+
" <td>138618</td>\n",
249+
" <td>3798</td>\n",
250+
" <td>{'awk': 29, 'język programowania': 890, 'plik ...</td>\n",
251+
" <td>{'awk': 25, 'język programowania': 73, 'plik d...</td>\n",
252+
" <td>0.000011</td>\n",
253+
" </tr>\n",
254+
" <tr>\n",
255+
" <th>6</th>\n",
256+
" <td>eu</td>\n",
257+
" <td>435231</td>\n",
258+
" <td>4440116</td>\n",
259+
" <td>10.201746</td>\n",
260+
" <td>10.201746</td>\n",
261+
" <td>65339</td>\n",
262+
" <td>1573</td>\n",
263+
" <td>{0: 563, 107294: 8, 2212: 1654, 20504: 103, 24...</td>\n",
264+
" <td>{0: 197, 107294: 9, 2212: 75, 20504: 59, 2415:...</td>\n",
265+
" <td>0.000023</td>\n",
266+
" </tr>\n",
267+
" <tr>\n",
268+
" <th>7</th>\n",
269+
" <td>el</td>\n",
270+
" <td>235927</td>\n",
271+
" <td>3794715</td>\n",
272+
" <td>16.084276</td>\n",
273+
" <td>16.084276</td>\n",
274+
" <td>16312</td>\n",
275+
" <td>1464</td>\n",
276+
" <td>{0: 1, 40408: 13246, 502: 10469, 9337: 60, 8: ...</td>\n",
277+
" <td>{0: 10, 40408: 745, 502: 426, 9337: 226, 8: 23...</td>\n",
278+
" <td>0.000068</td>\n",
279+
" </tr>\n",
280+
" <tr>\n",
281+
" <th>8</th>\n",
282+
" <td>hi</td>\n",
283+
" <td>159255</td>\n",
284+
" <td>1126513</td>\n",
285+
" <td>7.073643</td>\n",
286+
" <td>7.073643</td>\n",
287+
" <td>42123</td>\n",
288+
" <td>4018</td>\n",
289+
" <td>{'हम होंगे कामयाब': 3, 'गिरिजा कुमार माथुर': 1...</td>\n",
290+
" <td>{'हम होंगे कामयाब': 2, 'गिरिजा कुमार माथुर': 4...</td>\n",
291+
" <td>0.000044</td>\n",
292+
" </tr>\n",
293+
" </tbody>\n",
294+
"</table>\n",
295+
"</div>"
296+
],
297+
"text/plain": [
298+
" language_code num_nodes num_edges average_in_degree average_out_degree \\\n",
299+
"0 en 6720886 140233009 20.865256 20.865256 \n",
300+
"1 es 3355884 33344361 9.936089 9.936089 \n",
301+
"2 de 2933122 76792004 26.180978 26.180978 \n",
302+
"3 nl 2168204 27553227 12.707857 12.707857 \n",
303+
"4 it 1817866 42906457 23.602651 23.602651 \n",
304+
"5 pl 1622506 30137502 18.574663 18.574663 \n",
305+
"6 eu 435231 4440116 10.201746 10.201746 \n",
306+
"7 el 235927 3794715 16.084276 16.084276 \n",
307+
"8 hi 159255 1126513 7.073643 7.073643 \n",
308+
"\n",
309+
" max_in_degree max_out_degree \\\n",
310+
"0 188311 4588 \n",
311+
"1 183293 3652 \n",
312+
"2 204835 10676 \n",
313+
"3 163511 12760 \n",
314+
"4 153415 5179 \n",
315+
"5 138618 3798 \n",
316+
"6 65339 1573 \n",
317+
"7 16312 1464 \n",
318+
"8 42123 4018 \n",
319+
"\n",
320+
" in_degree_distribution \\\n",
321+
"0 {0: 1169, 45580: 89, 352716: 204, 854409: 27, ... \n",
322+
"1 {0: 1680, 3283175: 10, 159275: 100, 564: 37622... \n",
323+
"2 {'alan smithee': 60, 'pseudonym': 8356, 'regis... \n",
324+
"3 {'albert speer': 164, 'mannheim': 777, '19 maa... \n",
325+
"4 {'armonium': 382, 'lingua francese': 15697, 'o... \n",
326+
"5 {'awk': 29, 'język programowania': 890, 'plik ... \n",
327+
"6 {0: 563, 107294: 8, 2212: 1654, 20504: 103, 24... \n",
328+
"7 {0: 1, 40408: 13246, 502: 10469, 9337: 60, 8: ... \n",
329+
"8 {'हम होंगे कामयाब': 3, 'गिरिजा कुमार माथुर': 1... \n",
330+
"\n",
331+
" out_degree_distribution density \n",
332+
"0 {0: 221, 45580: 23, 352716: 86, 854409: 18, 28... 0.000003 \n",
333+
"1 {0: 359, 3283175: 1, 159275: 124, 564: 431, 19... 0.000003 \n",
334+
"2 {'alan smithee': 38, 'pseudonym': 187, 'regiss... 0.000009 \n",
335+
"3 {'albert speer': 99, 'mannheim': 102, '19 maar... 0.000006 \n",
336+
"4 {'armonium': 100, 'lingua francese': 354, 'org... 0.000013 \n",
337+
"5 {'awk': 25, 'język programowania': 73, 'plik d... 0.000011 \n",
338+
"6 {0: 197, 107294: 9, 2212: 75, 20504: 59, 2415:... 0.000023 \n",
339+
"7 {0: 10, 40408: 745, 502: 426, 9337: 226, 8: 23... 0.000068 \n",
340+
"8 {'हम होंगे कामयाब': 2, 'गिरिजा कुमार माथुर': 4... 0.000044 "
341+
]
342+
},
343+
"execution_count": 2,
344+
"metadata": {},
345+
"output_type": "execute_result"
346+
}
347+
],
348+
"source": [
349+
"metrics_df"
350+
]
351+
},
136352
{
137353
"cell_type": "markdown",
138354
"id": "94a0203e-c1e0-4375-b895-b0b6fd8cc15e",

0 commit comments

Comments
 (0)