Skip to content

Commit e6dbf7c

Browse files
committed
14_evaluation.ipynb: False positives for graph intervals
Calculate the number of false positives for DFS post-order numbering inexact graph interval labels (working as negative cut filter). Those are slightly more than topological order used as reachability label. Add to a summary of findings in a Markdown cell.
1 parent faa1bd3 commit e6dbf7c

File tree

1 file changed

+214
-1
lines changed

1 file changed

+214
-1
lines changed

14_evaluation.ipynb

Lines changed: 214 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,218 @@
14891489
"conn_sample_df['fn_intervals'].describe()"
14901490
]
14911491
},
1492+
{
1493+
"cell_type": "markdown",
1494+
"metadata": {},
1495+
"source": [
1496+
"### False positives for DFS inexact graph interval labels (negative cut)"
1497+
]
1498+
},
1499+
{
1500+
"cell_type": "code",
1501+
"execution_count": null,
1502+
"metadata": {},
1503+
"outputs": [
1504+
{
1505+
"name": "stdout",
1506+
"output_type": "stream",
1507+
"text": [
1508+
"graph intervals u-~->v: 4640 of 10000 (0.464)\n"
1509+
]
1510+
},
1511+
{
1512+
"data": {
1513+
"text/html": [
1514+
"<div>\n",
1515+
"<style scoped>\n",
1516+
" .dataframe tbody tr th:only-of-type {\n",
1517+
" vertical-align: middle;\n",
1518+
" }\n",
1519+
"\n",
1520+
" .dataframe tbody tr th {\n",
1521+
" vertical-align: top;\n",
1522+
" }\n",
1523+
"\n",
1524+
" .dataframe thead th {\n",
1525+
" text-align: right;\n",
1526+
" }\n",
1527+
"</style>\n",
1528+
"<table border=\"1\" class=\"dataframe\">\n",
1529+
" <thead>\n",
1530+
" <tr style=\"text-align: right;\">\n",
1531+
" <th></th>\n",
1532+
" <th>u</th>\n",
1533+
" <th>v</th>\n",
1534+
" <th>f_min(u)</th>\n",
1535+
" <th>f_min(v)</th>\n",
1536+
" <th>min(u)</th>\n",
1537+
" <th>min(v)</th>\n",
1538+
" <th>post(u)</th>\n",
1539+
" <th>post(v)</th>\n",
1540+
" <th>u-&gt;v</th>\n",
1541+
" <th>v in [f_min(u),post(u)]</th>\n",
1542+
" </tr>\n",
1543+
" </thead>\n",
1544+
" <tbody>\n",
1545+
" <tr>\n",
1546+
" <th>0</th>\n",
1547+
" <td>c7f34c180</td>\n",
1548+
" <td>23c204455</td>\n",
1549+
" <td>1</td>\n",
1550+
" <td>1</td>\n",
1551+
" <td>9555</td>\n",
1552+
" <td>55543</td>\n",
1553+
" <td>9555</td>\n",
1554+
" <td>55545</td>\n",
1555+
" <td>False</td>\n",
1556+
" <td>False</td>\n",
1557+
" </tr>\n",
1558+
" <tr>\n",
1559+
" <th>1</th>\n",
1560+
" <td>9dc527adb</td>\n",
1561+
" <td>53ec551c8</td>\n",
1562+
" <td>1</td>\n",
1563+
" <td>1</td>\n",
1564+
" <td>1</td>\n",
1565+
" <td>35785</td>\n",
1566+
" <td>459</td>\n",
1567+
" <td>35785</td>\n",
1568+
" <td>False</td>\n",
1569+
" <td>False</td>\n",
1570+
" </tr>\n",
1571+
" <tr>\n",
1572+
" <th>2</th>\n",
1573+
" <td>5e3ce663b</td>\n",
1574+
" <td>9affecbc8</td>\n",
1575+
" <td>1</td>\n",
1576+
" <td>1</td>\n",
1577+
" <td>23750</td>\n",
1578+
" <td>1</td>\n",
1579+
" <td>23770</td>\n",
1580+
" <td>18771</td>\n",
1581+
" <td>True</td>\n",
1582+
" <td>True</td>\n",
1583+
" </tr>\n",
1584+
" <tr>\n",
1585+
" <th>3</th>\n",
1586+
" <td>6440fdbab</td>\n",
1587+
" <td>c8c35f6a0</td>\n",
1588+
" <td>1</td>\n",
1589+
" <td>1</td>\n",
1590+
" <td>30197</td>\n",
1591+
" <td>61062</td>\n",
1592+
" <td>30202</td>\n",
1593+
" <td>61067</td>\n",
1594+
" <td>False</td>\n",
1595+
" <td>False</td>\n",
1596+
" </tr>\n",
1597+
" <tr>\n",
1598+
" <th>4</th>\n",
1599+
" <td>f1a7082f2</td>\n",
1600+
" <td>caac7a3ab</td>\n",
1601+
" <td>1</td>\n",
1602+
" <td>1</td>\n",
1603+
" <td>31397</td>\n",
1604+
" <td>40137</td>\n",
1605+
" <td>31397</td>\n",
1606+
" <td>40150</td>\n",
1607+
" <td>False</td>\n",
1608+
" <td>False</td>\n",
1609+
" </tr>\n",
1610+
" </tbody>\n",
1611+
"</table>\n",
1612+
"</div>"
1613+
],
1614+
"text/plain": [
1615+
" u v f_min(u) f_min(v) min(u) min(v) post(u) post(v) \\\n",
1616+
"0 c7f34c180 23c204455 1 1 9555 55543 9555 55545 \n",
1617+
"1 9dc527adb 53ec551c8 1 1 1 35785 459 35785 \n",
1618+
"2 5e3ce663b 9affecbc8 1 1 23750 1 23770 18771 \n",
1619+
"3 6440fdbab c8c35f6a0 1 1 30197 61062 30202 61067 \n",
1620+
"4 f1a7082f2 caac7a3ab 1 1 31397 40137 31397 40150 \n",
1621+
"\n",
1622+
" u->v v in [f_min(u),post(u)] \n",
1623+
"0 False False \n",
1624+
"1 False False \n",
1625+
"2 True True \n",
1626+
"3 False False \n",
1627+
"4 False False "
1628+
]
1629+
},
1630+
"execution_count": null,
1631+
"metadata": {},
1632+
"output_type": "execute_result"
1633+
}
1634+
],
1635+
"source": [
1636+
"conn_sample_df['v in [f_min(u),post(u)]'] = \\\n",
1637+
" (conn_sample_df['f_min(u)'] <= conn_sample_df['post(v)']) & \\\n",
1638+
" (conn_sample_df['post(v)'] <= conn_sample_df['post(u)'])\n",
1639+
"print('graph intervals u-~->v: %d of %d (%g)' %\n",
1640+
" (conn_sample_df['v in [f_min(u),post(u)]'].sum(),\n",
1641+
" conn_sample_df['v in [f_min(u),post(u)]'].count(),\n",
1642+
" conn_sample_df['v in [f_min(u),post(u)]'].mean()))\n",
1643+
"conn_sample_df[['u','v',\n",
1644+
" 'f_min(u)','f_min(v)',\n",
1645+
" 'min(u)','min(v)',\n",
1646+
" 'post(u)','post(v)',\n",
1647+
" 'u->v',\n",
1648+
" 'v in [f_min(u),post(u)]']].head()"
1649+
]
1650+
},
1651+
{
1652+
"cell_type": "code",
1653+
"execution_count": null,
1654+
"metadata": {},
1655+
"outputs": [
1656+
{
1657+
"name": "stdout",
1658+
"output_type": "stream",
1659+
"text": [
1660+
"10000 total queries\n",
1661+
"graph intervals: true negatives 5360 out of 5485 negative queries (97.7211 %)\n",
1662+
"graph intervals: true negatives 421 not covered by level filter\n",
1663+
"levels: true negatives 101 not covered by graph intervals\n",
1664+
"graph intervals: false positives 125 out of 5485 negative queries (2.27894 %)\n"
1665+
]
1666+
},
1667+
{
1668+
"data": {
1669+
"text/plain": [
1670+
"count 10000\n",
1671+
"unique 2\n",
1672+
"top False\n",
1673+
"freq 9875\n",
1674+
"Name: fp_intervals, dtype: object"
1675+
]
1676+
},
1677+
"execution_count": null,
1678+
"metadata": {},
1679+
"output_type": "execute_result"
1680+
}
1681+
],
1682+
"source": [
1683+
"conn_sample_df['fp_intervals'] = conn_sample_df['!u->v'] & conn_sample_df['v in [f_min(u),post(u)]']\n",
1684+
"\n",
1685+
"print('%d total queries' % conn_sample_df['u->v'].count())\n",
1686+
"print('graph intervals: true negatives %4d out of %4d negative queries (%g %%)' %\n",
1687+
" ((~conn_sample_df['v in [f_min(u),post(u)]']).sum(),\n",
1688+
" conn_sample_df['!u->v'].sum(),\n",
1689+
" 100.0*(~conn_sample_df['v in [f_min(u),post(u)]']).sum()/conn_sample_df['!u->v'].sum()))\n",
1690+
"\n",
1691+
"print('graph intervals: true negatives %4d not covered by level filter' %\n",
1692+
" (~conn_sample_df['v in [f_min(u),post(u)]'] & conn_sample_df['l_v<l_u'] & conn_sample_df['!u->v']).sum())\n",
1693+
"print('levels: true negatives %4d not covered by graph intervals' %\n",
1694+
" (conn_sample_df['l_v>l_u'] & conn_sample_df['v in [f_min(u),post(u)]'] & conn_sample_df['!u->v']).sum())\n",
1695+
"\n",
1696+
"print('graph intervals: false positives %4d out of %4d negative queries (%g %%)' %\n",
1697+
" (conn_sample_df['fp_intervals'].sum(),\n",
1698+
" conn_sample_df['!u->v'].sum(),\n",
1699+
" 100.0*conn_sample_df['fp_intervals'].sum()/conn_sample_df['!u->v'].sum()))\n",
1700+
"\n",
1701+
"conn_sample_df['fp_intervals'].describe()"
1702+
]
1703+
},
14921704
{
14931705
"cell_type": "markdown",
14941706
"metadata": {},
@@ -1504,7 +1716,8 @@
15041716
"- number of connected nodes: 4515 out of 10000 (45.15 % = 0.4515 +/- 0.00497667)\n",
15051717
"- there were 918 out of 10000 ( 9.18 %) nodes for which have neither u->v nor v->u\n",
15061718
"- level: false positives 445 out of 5485 negative queries (8.11304 %)\n",
1507-
"- intervals: false negatives 3298 out of 4515 positive queries (73.0454 %)"
1719+
"- inexact graph intervals: false positives 125 out of 5485 negative queries (2.27894 %), or topological sort\n",
1720+
"- exact tree intervals: false negatives 3298 out of 4515 positive queries (73.0454 %)"
15081721
]
15091722
},
15101723
{

0 commit comments

Comments
 (0)