Skip to content

Commit c7e802b

Browse files
move normalised conditional entropy to utils
1 parent 2658c63 commit c7e802b

File tree

2 files changed

+13
-49
lines changed

2 files changed

+13
-49
lines changed

notebooks/02_baseline_network_analysis.ipynb

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
" sys.path.append(module_path)\n",
4949
"\n",
5050
"from network_analysis import remove_self_loops, visualse_largest_components\n",
51-
"from utils import R2_score\n",
51+
"from utils import R2_score, normalised_conditional_entropy\n",
5252
"\n",
5353
"root_figure = path+\"/figures/\"\n",
5454
"root_map = path+'/data/geo_shapefiles//NUTS_Level_3__January_2018__Boundaries-shp/NUTS_Level_3__January_2018__Boundaries.shp'\n",
@@ -3798,54 +3798,6 @@
37983798
"see Lambiotte et al. 2009: belongs to the interval [0, 1], but is now an asymmetric quantity that vanishes only if each community of Pt is the union of communities of Pt"
37993799
]
38003800
},
3801-
{
3802-
"cell_type": "code",
3803-
"execution_count": 37,
3804-
"metadata": {},
3805-
"outputs": [],
3806-
"source": [
3807-
"def entropy(labels):\n",
3808-
" \"\"\"Calculates the entropy for a labeling.\n",
3809-
" Parameters\n",
3810-
" ----------\n",
3811-
" labels : int array, shape = [n_samples]\n",
3812-
" The labels\n",
3813-
" Notes\n",
3814-
" -----\n",
3815-
" The logarithm used is the natural logarithm (base-e).\n",
3816-
" \"\"\"\n",
3817-
" if len(labels) == 0:\n",
3818-
" return 1.0\n",
3819-
" label_idx = np.unique(labels, return_inverse=True)[1]\n",
3820-
" pi = np.bincount(label_idx).astype(np.float64)\n",
3821-
" pi = pi[pi > 0]\n",
3822-
" pi_sum = np.sum(pi)\n",
3823-
" # log(a / b) should be calculated as log(a) - log(b) for\n",
3824-
" # possible loss of precision\n",
3825-
" return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))\n",
3826-
"\n",
3827-
"def variation_of_information(x,y, normalised = True):\n",
3828-
" Ex = entropy(x)\n",
3829-
" Ey = entropy(y)\n",
3830-
" I = metrics.mutual_info_score(x,y)\n",
3831-
" \n",
3832-
" if normalised:\n",
3833-
" return (Ex + Ey - 2*I) / (Ex + Ey - I)\n",
3834-
" else: \n",
3835-
" return Ex + Ey - 2*I\n",
3836-
"\n",
3837-
"def normalised_conditional_entropy(x,y):\n",
3838-
" \"\"\"\n",
3839-
" H(X|Y) = H(X) - I(X,Y) and we normalise with log(N)\n",
3840-
" \"\"\"\n",
3841-
" \n",
3842-
" N = len(x)\n",
3843-
" Ex = entropy(x)\n",
3844-
" I = metrics.mutual_info_score(x,y)\n",
3845-
"\n",
3846-
" return (Ex - I) / np.log(N)"
3847-
]
3848-
},
38493801
{
38503802
"cell_type": "code",
38513803
"execution_count": 38,

src/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,15 @@ def variation_of_information(x, y, normalised=True):
4848
return (Ex + Ey - 2 * I) / (Ex + Ey - I)
4949
else:
5050
return Ex + Ey - 2 * I
51+
52+
53+
def normalised_conditional_entropy(x, y):
54+
"""
55+
H(X|Y) = H(X) - I(X,Y) and we normalise with log(N)
56+
"""
57+
58+
N = len(x)
59+
Ex = entropy(x)
60+
I = metrics.mutual_info_score(x, y)
61+
62+
return (Ex - I) / np.log(N)

0 commit comments

Comments
 (0)