|
1489 | 1489 | "conn_sample_df['fn_intervals'].describe()"
|
1490 | 1490 | ]
|
1491 | 1491 | },
|
| 1492 | + { |
| 1493 | + "cell_type": "markdown", |
| 1494 | + "metadata": {}, |
| 1495 | + "source": [ |
| 1496 | + "### False positives for DFS inexact graph interval labels (negative cut)" |
| 1497 | + ] |
| 1498 | + }, |
| 1499 | + { |
| 1500 | + "cell_type": "code", |
| 1501 | + "execution_count": null, |
| 1502 | + "metadata": {}, |
| 1503 | + "outputs": [ |
| 1504 | + { |
| 1505 | + "name": "stdout", |
| 1506 | + "output_type": "stream", |
| 1507 | + "text": [ |
| 1508 | + "graph intervals u-~->v: 4640 of 10000 (0.464)\n" |
| 1509 | + ] |
| 1510 | + }, |
| 1511 | + { |
| 1512 | + "data": { |
| 1513 | + "text/html": [ |
| 1514 | + "<div>\n", |
| 1515 | + "<style scoped>\n", |
| 1516 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1517 | + " vertical-align: middle;\n", |
| 1518 | + " }\n", |
| 1519 | + "\n", |
| 1520 | + " .dataframe tbody tr th {\n", |
| 1521 | + " vertical-align: top;\n", |
| 1522 | + " }\n", |
| 1523 | + "\n", |
| 1524 | + " .dataframe thead th {\n", |
| 1525 | + " text-align: right;\n", |
| 1526 | + " }\n", |
| 1527 | + "</style>\n", |
| 1528 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1529 | + " <thead>\n", |
| 1530 | + " <tr style=\"text-align: right;\">\n", |
| 1531 | + " <th></th>\n", |
| 1532 | + " <th>u</th>\n", |
| 1533 | + " <th>v</th>\n", |
| 1534 | + " <th>f_min(u)</th>\n", |
| 1535 | + " <th>f_min(v)</th>\n", |
| 1536 | + " <th>min(u)</th>\n", |
| 1537 | + " <th>min(v)</th>\n", |
| 1538 | + " <th>post(u)</th>\n", |
| 1539 | + " <th>post(v)</th>\n", |
| 1540 | + " <th>u->v</th>\n", |
| 1541 | + " <th>v in [f_min(u),post(u)]</th>\n", |
| 1542 | + " </tr>\n", |
| 1543 | + " </thead>\n", |
| 1544 | + " <tbody>\n", |
| 1545 | + " <tr>\n", |
| 1546 | + " <th>0</th>\n", |
| 1547 | + " <td>c7f34c180</td>\n", |
| 1548 | + " <td>23c204455</td>\n", |
| 1549 | + " <td>1</td>\n", |
| 1550 | + " <td>1</td>\n", |
| 1551 | + " <td>9555</td>\n", |
| 1552 | + " <td>55543</td>\n", |
| 1553 | + " <td>9555</td>\n", |
| 1554 | + " <td>55545</td>\n", |
| 1555 | + " <td>False</td>\n", |
| 1556 | + " <td>False</td>\n", |
| 1557 | + " </tr>\n", |
| 1558 | + " <tr>\n", |
| 1559 | + " <th>1</th>\n", |
| 1560 | + " <td>9dc527adb</td>\n", |
| 1561 | + " <td>53ec551c8</td>\n", |
| 1562 | + " <td>1</td>\n", |
| 1563 | + " <td>1</td>\n", |
| 1564 | + " <td>1</td>\n", |
| 1565 | + " <td>35785</td>\n", |
| 1566 | + " <td>459</td>\n", |
| 1567 | + " <td>35785</td>\n", |
| 1568 | + " <td>False</td>\n", |
| 1569 | + " <td>False</td>\n", |
| 1570 | + " </tr>\n", |
| 1571 | + " <tr>\n", |
| 1572 | + " <th>2</th>\n", |
| 1573 | + " <td>5e3ce663b</td>\n", |
| 1574 | + " <td>9affecbc8</td>\n", |
| 1575 | + " <td>1</td>\n", |
| 1576 | + " <td>1</td>\n", |
| 1577 | + " <td>23750</td>\n", |
| 1578 | + " <td>1</td>\n", |
| 1579 | + " <td>23770</td>\n", |
| 1580 | + " <td>18771</td>\n", |
| 1581 | + " <td>True</td>\n", |
| 1582 | + " <td>True</td>\n", |
| 1583 | + " </tr>\n", |
| 1584 | + " <tr>\n", |
| 1585 | + " <th>3</th>\n", |
| 1586 | + " <td>6440fdbab</td>\n", |
| 1587 | + " <td>c8c35f6a0</td>\n", |
| 1588 | + " <td>1</td>\n", |
| 1589 | + " <td>1</td>\n", |
| 1590 | + " <td>30197</td>\n", |
| 1591 | + " <td>61062</td>\n", |
| 1592 | + " <td>30202</td>\n", |
| 1593 | + " <td>61067</td>\n", |
| 1594 | + " <td>False</td>\n", |
| 1595 | + " <td>False</td>\n", |
| 1596 | + " </tr>\n", |
| 1597 | + " <tr>\n", |
| 1598 | + " <th>4</th>\n", |
| 1599 | + " <td>f1a7082f2</td>\n", |
| 1600 | + " <td>caac7a3ab</td>\n", |
| 1601 | + " <td>1</td>\n", |
| 1602 | + " <td>1</td>\n", |
| 1603 | + " <td>31397</td>\n", |
| 1604 | + " <td>40137</td>\n", |
| 1605 | + " <td>31397</td>\n", |
| 1606 | + " <td>40150</td>\n", |
| 1607 | + " <td>False</td>\n", |
| 1608 | + " <td>False</td>\n", |
| 1609 | + " </tr>\n", |
| 1610 | + " </tbody>\n", |
| 1611 | + "</table>\n", |
| 1612 | + "</div>" |
| 1613 | + ], |
| 1614 | + "text/plain": [ |
| 1615 | + " u v f_min(u) f_min(v) min(u) min(v) post(u) post(v) \\\n", |
| 1616 | + "0 c7f34c180 23c204455 1 1 9555 55543 9555 55545 \n", |
| 1617 | + "1 9dc527adb 53ec551c8 1 1 1 35785 459 35785 \n", |
| 1618 | + "2 5e3ce663b 9affecbc8 1 1 23750 1 23770 18771 \n", |
| 1619 | + "3 6440fdbab c8c35f6a0 1 1 30197 61062 30202 61067 \n", |
| 1620 | + "4 f1a7082f2 caac7a3ab 1 1 31397 40137 31397 40150 \n", |
| 1621 | + "\n", |
| 1622 | + " u->v v in [f_min(u),post(u)] \n", |
| 1623 | + "0 False False \n", |
| 1624 | + "1 False False \n", |
| 1625 | + "2 True True \n", |
| 1626 | + "3 False False \n", |
| 1627 | + "4 False False " |
| 1628 | + ] |
| 1629 | + }, |
| 1630 | + "execution_count": null, |
| 1631 | + "metadata": {}, |
| 1632 | + "output_type": "execute_result" |
| 1633 | + } |
| 1634 | + ], |
| 1635 | + "source": [ |
| 1636 | + "conn_sample_df['v in [f_min(u),post(u)]'] = \\\n", |
| 1637 | + " (conn_sample_df['f_min(u)'] <= conn_sample_df['post(v)']) & \\\n", |
| 1638 | + " (conn_sample_df['post(v)'] <= conn_sample_df['post(u)'])\n", |
| 1639 | + "print('graph intervals u-~->v: %d of %d (%g)' %\n", |
| 1640 | + " (conn_sample_df['v in [f_min(u),post(u)]'].sum(),\n", |
| 1641 | + " conn_sample_df['v in [f_min(u),post(u)]'].count(),\n", |
| 1642 | + " conn_sample_df['v in [f_min(u),post(u)]'].mean()))\n", |
| 1643 | + "conn_sample_df[['u','v',\n", |
| 1644 | + " 'f_min(u)','f_min(v)',\n", |
| 1645 | + " 'min(u)','min(v)',\n", |
| 1646 | + " 'post(u)','post(v)',\n", |
| 1647 | + " 'u->v',\n", |
| 1648 | + " 'v in [f_min(u),post(u)]']].head()" |
| 1649 | + ] |
| 1650 | + }, |
| 1651 | + { |
| 1652 | + "cell_type": "code", |
| 1653 | + "execution_count": null, |
| 1654 | + "metadata": {}, |
| 1655 | + "outputs": [ |
| 1656 | + { |
| 1657 | + "name": "stdout", |
| 1658 | + "output_type": "stream", |
| 1659 | + "text": [ |
| 1660 | + "10000 total queries\n", |
| 1661 | + "graph intervals: true negatives 5360 out of 5485 negative queries (97.7211 %)\n", |
| 1662 | + "graph intervals: true negatives 421 not covered by level filter\n", |
| 1663 | + "levels: true negatives 101 not covered by graph intervals\n", |
| 1664 | + "graph intervals: false positives 125 out of 5485 negative queries (2.27894 %)\n" |
| 1665 | + ] |
| 1666 | + }, |
| 1667 | + { |
| 1668 | + "data": { |
| 1669 | + "text/plain": [ |
| 1670 | + "count 10000\n", |
| 1671 | + "unique 2\n", |
| 1672 | + "top False\n", |
| 1673 | + "freq 9875\n", |
| 1674 | + "Name: fp_intervals, dtype: object" |
| 1675 | + ] |
| 1676 | + }, |
| 1677 | + "execution_count": null, |
| 1678 | + "metadata": {}, |
| 1679 | + "output_type": "execute_result" |
| 1680 | + } |
| 1681 | + ], |
| 1682 | + "source": [ |
| 1683 | + "conn_sample_df['fp_intervals'] = conn_sample_df['!u->v'] & conn_sample_df['v in [f_min(u),post(u)]']\n", |
| 1684 | + "\n", |
| 1685 | + "print('%d total queries' % conn_sample_df['u->v'].count())\n", |
| 1686 | + "print('graph intervals: true negatives %4d out of %4d negative queries (%g %%)' %\n", |
| 1687 | + " ((~conn_sample_df['v in [f_min(u),post(u)]']).sum(),\n", |
| 1688 | + " conn_sample_df['!u->v'].sum(),\n", |
| 1689 | + " 100.0*(~conn_sample_df['v in [f_min(u),post(u)]']).sum()/conn_sample_df['!u->v'].sum()))\n", |
| 1690 | + "\n", |
| 1691 | + "print('graph intervals: true negatives %4d not covered by level filter' %\n", |
| 1692 | + " (~conn_sample_df['v in [f_min(u),post(u)]'] & conn_sample_df['l_v<l_u'] & conn_sample_df['!u->v']).sum())\n", |
| 1693 | + "print('levels: true negatives %4d not covered by graph intervals' %\n", |
| 1694 | + " (conn_sample_df['l_v>l_u'] & conn_sample_df['v in [f_min(u),post(u)]'] & conn_sample_df['!u->v']).sum())\n", |
| 1695 | + "\n", |
| 1696 | + "print('graph intervals: false positives %4d out of %4d negative queries (%g %%)' %\n", |
| 1697 | + " (conn_sample_df['fp_intervals'].sum(),\n", |
| 1698 | + " conn_sample_df['!u->v'].sum(),\n", |
| 1699 | + " 100.0*conn_sample_df['fp_intervals'].sum()/conn_sample_df['!u->v'].sum()))\n", |
| 1700 | + "\n", |
| 1701 | + "conn_sample_df['fp_intervals'].describe()" |
| 1702 | + ] |
| 1703 | + }, |
1492 | 1704 | {
|
1493 | 1705 | "cell_type": "markdown",
|
1494 | 1706 | "metadata": {},
|
|
1504 | 1716 | "- number of connected nodes: 4515 out of 10000 (45.15 % = 0.4515 +/- 0.00497667)\n",
|
1505 | 1717 | "- there were 918 out of 10000 ( 9.18 %) nodes for which have neither u->v nor v->u\n",
|
1506 | 1718 | "- level: false positives 445 out of 5485 negative queries (8.11304 %)\n",
|
1507 |
| - "- intervals: false negatives 3298 out of 4515 positive queries (73.0454 %)" |
| 1719 | + "- inexact graph intervals: false positives 125 out of 5485 negative queries (2.27894 %), or topological sort\n", |
| 1720 | + "- exact tree intervals: false negatives 3298 out of 4515 positive queries (73.0454 %)" |
1508 | 1721 | ]
|
1509 | 1722 | },
|
1510 | 1723 | {
|
|
0 commit comments