|
48 | 48 | " sys.path.append(module_path)\n", |
49 | 49 | "\n", |
50 | 50 | "from network_analysis import remove_self_loops, visualse_largest_components\n", |
51 | | - "from utils import R2_score\n", |
| 51 | + "from utils import R2_score, normalised_conditional_entropy\n", |
52 | 52 | "\n", |
53 | 53 | "root_figure = path+\"/figures/\"\n", |
54 | 54 | "root_map = path+'/data/geo_shapefiles//NUTS_Level_3__January_2018__Boundaries-shp/NUTS_Level_3__January_2018__Boundaries.shp'\n", |
|
3798 | 3798 | "see Lambiotte et al. 2009: belongs to the interval [0, 1], but is now an asymmetric quantity that vanishes only if each community of Pt is the union of communities of Pt" |
3799 | 3799 | ] |
3800 | 3800 | }, |
3801 | | - { |
3802 | | - "cell_type": "code", |
3803 | | - "execution_count": 37, |
3804 | | - "metadata": {}, |
3805 | | - "outputs": [], |
3806 | | - "source": [ |
3807 | | - "def entropy(labels):\n", |
3808 | | - " \"\"\"Calculates the entropy for a labeling.\n", |
3809 | | - " Parameters\n", |
3810 | | - " ----------\n", |
3811 | | - " labels : int array, shape = [n_samples]\n", |
3812 | | - " The labels\n", |
3813 | | - " Notes\n", |
3814 | | - " -----\n", |
3815 | | - " The logarithm used is the natural logarithm (base-e).\n", |
3816 | | - " \"\"\"\n", |
3817 | | - " if len(labels) == 0:\n", |
3818 | | - " return 1.0\n", |
3819 | | - " label_idx = np.unique(labels, return_inverse=True)[1]\n", |
3820 | | - " pi = np.bincount(label_idx).astype(np.float64)\n", |
3821 | | - " pi = pi[pi > 0]\n", |
3822 | | - " pi_sum = np.sum(pi)\n", |
3823 | | - " # log(a / b) should be calculated as log(a) - log(b) for\n", |
3824 | | - " # possible loss of precision\n", |
3825 | | - " return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))\n", |
3826 | | - "\n", |
3827 | | - "def variation_of_information(x,y, normalised = True):\n", |
3828 | | - " Ex = entropy(x)\n", |
3829 | | - " Ey = entropy(y)\n", |
3830 | | - " I = metrics.mutual_info_score(x,y)\n", |
3831 | | - " \n", |
3832 | | - " if normalised:\n", |
3833 | | - " return (Ex + Ey - 2*I) / (Ex + Ey - I)\n", |
3834 | | - " else: \n", |
3835 | | - " return Ex + Ey - 2*I\n", |
3836 | | - "\n", |
3837 | | - "def normalised_conditional_entropy(x,y):\n", |
3838 | | - " \"\"\"\n", |
3839 | | - " H(X|Y) = H(X) - I(X,Y) and we normalise with log(N)\n", |
3840 | | - " \"\"\"\n", |
3841 | | - " \n", |
3842 | | - " N = len(x)\n", |
3843 | | - " Ex = entropy(x)\n", |
3844 | | - " I = metrics.mutual_info_score(x,y)\n", |
3845 | | - "\n", |
3846 | | - " return (Ex - I) / np.log(N)" |
3847 | | - ] |
3848 | | - }, |
3849 | 3801 | { |
3850 | 3802 | "cell_type": "code", |
3851 | 3803 | "execution_count": 38, |
|
0 commit comments