@@ -10,13 +10,17 @@ use crate::metadata::SiteMetadata;
1010use crate :: sys:: bindings as ll_bindings;
1111use crate :: sys:: TableCollection as LLTableCollection ;
1212use crate :: types:: Bookmark ;
13+ use crate :: EdgeTable ;
1314use crate :: IndividualTableSortOptions ;
1415use crate :: MigrationId ;
16+ use crate :: MigrationTable ;
1517use crate :: MutationId ;
18+ use crate :: MutationTable ;
1619use crate :: PopulationId ;
1720use crate :: Position ;
1821use crate :: SimplificationOptions ;
1922use crate :: SiteId ;
23+ use crate :: SiteTable ;
2024use crate :: TableClearOptions ;
2125use crate :: TableEqualityOptions ;
2226use crate :: TableIntegrityCheckFlags ;
@@ -1372,4 +1376,206 @@ impl TableCollection {
13721376 pub fn as_mut_ptr ( & mut self ) -> * mut ll_bindings:: tsk_table_collection_t {
13731377 self . inner . as_mut_ptr ( )
13741378 }
1379+
1380+ /// Truncate the [TableCollection] to specified genome intervals.
1381+ ///
1382+ /// # Return
1383+ /// - `Ok(None)`: when truncation leads to empty edge table.
1384+ /// - `Ok(Some(TableCollection))`: when trunction is successfully performed
1385+ /// and results in non-empty edge table.
1386+ /// - `Error(TskitError)`: Any errors from the C API propagate. An
1387+ /// [TskitError::RangeError] will occur when `intervals` are not
1388+ /// sorted. Note that as `tskit` currently does not support `simplify`
1389+ /// on [TableCollection] with a non-empty migration table, calling
1390+ /// `keep_intervals` on those [TableCollection] with `simplify` set to
1391+ /// `true` will return an error.
1392+ ///
1393+ /// # Example
1394+ /// ```rust
1395+ /// # use tskit::*;
1396+ /// # let snode = NodeFlags::new_sample();
1397+ /// # let anode = NodeFlags::default();
1398+ /// # let pop = PopulationId::NULL;
1399+ /// # let ind = IndividualId::NULL;
1400+ /// # let seqlen = 100.0;
1401+ /// # let (t0, t10) = (0.0, 10.0);
1402+ /// # let (left, right) = (0.0, 100.0);
1403+ /// # let sim_opts = SimplificationOptions::default();
1404+ /// #
1405+ /// # let mut tables = TableCollection::new(seqlen).unwrap();
1406+ /// # let child1 = tables.add_node(snode, t0, pop, ind).unwrap();
1407+ /// # let child2 = tables.add_node(snode, t0, pop, ind).unwrap();
1408+ /// # let parent = tables.add_node(anode, t10, pop, ind).unwrap();
1409+ /// #
1410+ /// # tables.add_edge(left, right, parent, child1).unwrap();
1411+ /// # tables.add_edge(left, right, parent, child2).unwrap();
1412+ /// # tables.full_sort(TableSortOptions::all()).unwrap();
1413+ /// # tables.simplify(&[child1, child2], sim_opts, false).unwrap();
1414+ /// # tables.build_index().unwrap();
1415+ /// #
1416+ /// let intervals = [(0.0, 10.0), (90.0, 100.0)].into_iter();
1417+ /// tables.keep_intervals(intervals, true).unwrap().unwrap();
1418+ /// ```
1419+ ///
1420+ /// Note that no new provenance will be appended.
1421+ pub fn keep_intervals < P > (
1422+ self ,
1423+ intervals : impl Iterator < Item = ( P , P ) > ,
1424+ simplify : bool ,
1425+ ) -> Result < Option < Self > , TskitError >
1426+ where
1427+ P : Into < Position > ,
1428+ {
1429+ use streaming_iterator:: StreamingIterator ;
1430+ let mut tables = self ;
1431+ // use tables from sys to allow easier process with metadata
1432+ let options = 0 ;
1433+ let mut new_edges = crate :: sys:: EdgeTable :: new ( options) ?;
1434+ let mut new_migrations = crate :: sys:: MigrationTable :: new ( options) ?;
1435+ let mut new_sites = crate :: sys:: SiteTable :: new ( options) ?;
1436+ let mut new_mutations = crate :: sys:: MutationTable :: new ( options) ?;
1437+
1438+ // for old site id to new site id mapping
1439+ let mut site_map = vec ! [ -1i32 ; tables. sites( ) . num_rows( ) . as_usize( ) ] ;
1440+
1441+ // logicals to indicate whether a site (old) will be kept in new site table
1442+ let mut keep_sites = vec ! [ false ; tables. sites( ) . num_rows( ) . try_into( ) ?] ;
1443+
1444+ let mut last_interval = ( Position :: from ( 0.0 ) , Position :: from ( 0.0 ) ) ;
1445+ for ( s, e) in intervals {
1446+ let ( s, e) = ( s. into ( ) , e. into ( ) ) ;
1447+ // make sure intervals are sorted
1448+ if ( s > e) || ( s < last_interval. 1 ) {
1449+ return Err ( TskitError :: RangeError (
1450+ "intervals not valid or sorted" . into ( ) ,
1451+ ) ) ;
1452+ }
1453+ keep_sites
1454+ . iter_mut ( )
1455+ . zip ( tables. sites_iter ( ) )
1456+ . for_each ( |( k, site_row) | {
1457+ * k = * k || ( ( site_row. position >= s) && ( site_row. position < e) ) ;
1458+ } ) ;
1459+
1460+ // use stream_iter and while-let pattern for easier ? operator within a loop
1461+ let mut edge_iter = tables
1462+ . edges ( )
1463+ . lending_iter ( )
1464+ . filter ( |edge_row| !( ( edge_row. right <= s) || ( edge_row. left >= e) ) ) ;
1465+
1466+ while let Some ( edge_row) = edge_iter. next ( ) {
1467+ new_edges. add_row_with_metadata (
1468+ if edge_row. left < s { s } else { edge_row. left } . into ( ) ,
1469+ if edge_row. right > e {
1470+ e
1471+ } else {
1472+ edge_row. right
1473+ }
1474+ . into ( ) ,
1475+ edge_row. parent . into ( ) ,
1476+ edge_row. child . into ( ) ,
1477+ edge_row. metadata . unwrap_or ( & [ 0u8 ; 0 ] ) ,
1478+ ) ?;
1479+ }
1480+
1481+ let mut migration_iter = tables
1482+ . migrations ( )
1483+ . lending_iter ( )
1484+ . filter ( |mrow| !( ( mrow. right <= s) || ( mrow. left >= e) ) ) ;
1485+
1486+ while let Some ( migration_row) = migration_iter. next ( ) {
1487+ new_migrations. add_row_with_metadata (
1488+ ( migration_row. left . into ( ) , migration_row. right . into ( ) ) ,
1489+ migration_row. node . into ( ) ,
1490+ migration_row. source . into ( ) ,
1491+ migration_row. dest . into ( ) ,
1492+ migration_row. time . into ( ) ,
1493+ migration_row. metadata . unwrap_or ( & [ 0u8 ; 0 ] ) ,
1494+ ) ?;
1495+ }
1496+ last_interval = ( s, e) ;
1497+ }
1498+
1499+ let mut running_site_id = 0 ;
1500+ let mut site_iter = tables. sites ( ) . lending_iter ( ) ;
1501+ while let Some ( site_row) = site_iter. next ( ) {
1502+ let old_id = site_row. id . to_usize ( ) . unwrap ( ) ;
1503+ if keep_sites[ old_id] {
1504+ new_sites. add_row_with_metadata (
1505+ site_row. position . into ( ) ,
1506+ site_row. ancestral_state ,
1507+ site_row. metadata . unwrap_or ( & [ 0u8 ; 0 ] ) ,
1508+ ) ?;
1509+ site_map[ old_id] = running_site_id;
1510+ running_site_id += 1 ;
1511+ }
1512+ }
1513+
1514+ // build mutation_map
1515+ let mutation_map: Vec < _ > = {
1516+ let mut n = 0 ;
1517+ tables
1518+ . mutations ( )
1519+ . site_slice ( )
1520+ . iter ( )
1521+ . map ( |site| {
1522+ if keep_sites[ site. as_usize ( ) ] {
1523+ n += 1
1524+ } ;
1525+ n - 1
1526+ } )
1527+ . collect ( )
1528+ } ;
1529+
1530+ let mut mutations_iter = tables. mutations ( ) . lending_iter ( ) ;
1531+ while let Some ( mutation_row) = mutations_iter. next ( ) {
1532+ let old_id = mutation_row. site . to_usize ( ) . unwrap ( ) ;
1533+ if keep_sites[ old_id] {
1534+ let new_site = site_map[ old_id] ;
1535+ let new_parent = {
1536+ if mutation_row. parent . is_null ( ) {
1537+ mutation_row. parent . into ( )
1538+ } else {
1539+ mutation_map[ mutation_row. parent . as_usize ( ) ]
1540+ }
1541+ } ;
1542+ new_mutations. add_row_with_metadata (
1543+ new_site,
1544+ mutation_row. node . into ( ) ,
1545+ new_parent,
1546+ mutation_row. time . into ( ) ,
1547+ mutation_row. derived_state ,
1548+ mutation_row. metadata . unwrap_or ( & [ 0u8 ; 0 ] ) ,
1549+ ) ?;
1550+ }
1551+ }
1552+
1553+ // convert sys version of tables to non-sys version of tables
1554+ let new_edges = EdgeTable :: new_from_table ( new_edges. as_mut ( ) ) ?;
1555+ let new_migrations = MigrationTable :: new_from_table ( new_migrations. as_mut ( ) ) ?;
1556+ let new_mutations = MutationTable :: new_from_table ( new_mutations. as_mut ( ) ) ?;
1557+ let new_sites = SiteTable :: new_from_table ( new_sites. as_mut ( ) ) ?;
1558+
1559+ // replace old tables with new tables
1560+ tables. set_edges ( & new_edges) . map ( |_| ( ) ) ?;
1561+ tables. set_migrations ( & new_migrations) . map ( |_| ( ) ) ?;
1562+ tables. set_mutations ( & new_mutations) . map ( |_| ( ) ) ?;
1563+ tables. set_sites ( & new_sites) ?;
1564+
1565+ // sort tables
1566+ tables. full_sort ( TableSortOptions :: default ( ) ) ?;
1567+
1568+ // simplify tables
1569+ if simplify {
1570+ let samples = tables. samples_as_vector ( ) ;
1571+ tables. simplify ( samples. as_slice ( ) , SimplificationOptions :: default ( ) , false ) ?;
1572+ }
1573+
1574+ // return None when edge table is empty
1575+ if tables. edges ( ) . num_rows ( ) == 0 {
1576+ Ok ( None )
1577+ } else {
1578+ Ok ( Some ( tables) )
1579+ }
1580+ }
13751581}
0 commit comments