@@ -1539,6 +1539,8 @@ static int pack_offset_sort(const void *_a, const void *_b)
1539
1539
* 2. Updating our size/type to the non-delta representation. These were
1540
1540
* either not recorded initially (size) or overwritten with the delta type
1541
1541
* (type) when check_object() decided to reuse the delta.
1542
+ *
1543
+ * 3. Resetting our delta depth, as we are now a base object.
1542
1544
*/
1543
1545
static void drop_reused_delta (struct object_entry * entry )
1544
1546
{
@@ -1552,6 +1554,7 @@ static void drop_reused_delta(struct object_entry *entry)
1552
1554
p = & (* p )-> delta_sibling ;
1553
1555
}
1554
1556
entry -> delta = NULL ;
1557
+ entry -> depth = 0 ;
1555
1558
1556
1559
oi .sizep = & entry -> size ;
1557
1560
oi .typep = & entry -> type ;
@@ -1570,39 +1573,123 @@ static void drop_reused_delta(struct object_entry *entry)
1570
1573
* Follow the chain of deltas from this entry onward, throwing away any links
1571
1574
* that cause us to hit a cycle (as determined by the DFS state flags in
1572
1575
* the entries).
1576
+ *
1577
+ * We also detect too-long reused chains that would violate our --depth
1578
+ * limit.
1573
1579
*/
1574
1580
static void break_delta_chains (struct object_entry * entry )
1575
1581
{
1576
- /* If it's not a delta, it can't be part of a cycle. */
1577
- if (!entry -> delta ) {
1578
- entry -> dfs_state = DFS_DONE ;
1579
- return ;
1580
- }
1582
+ /*
1583
+ * The actual depth of each object we will write is stored as an int,
1584
+ * as it cannot exceed our int "depth" limit. But before we break
1585
+ * changes based no that limit, we may potentially go as deep as the
1586
+ * number of objects, which is elsewhere bounded to a uint32_t.
1587
+ */
1588
+ uint32_t total_depth ;
1589
+ struct object_entry * cur , * next ;
1590
+
1591
+ for (cur = entry , total_depth = 0 ;
1592
+ cur ;
1593
+ cur = cur -> delta , total_depth ++ ) {
1594
+ if (cur -> dfs_state == DFS_DONE ) {
1595
+ /*
1596
+ * We've already seen this object and know it isn't
1597
+ * part of a cycle. We do need to append its depth
1598
+ * to our count.
1599
+ */
1600
+ total_depth += cur -> depth ;
1601
+ break ;
1602
+ }
1581
1603
1582
- switch (entry -> dfs_state ) {
1583
- case DFS_NONE :
1584
1604
/*
1585
- * This is the first time we've seen the object. We mark it as
1586
- * part of the active potential cycle and recurse.
1605
+ * We break cycles before looping, so an ACTIVE state (or any
1606
+ * other cruft which made its way into the state variable)
1607
+ * is a bug.
1587
1608
*/
1588
- entry -> dfs_state = DFS_ACTIVE ;
1589
- break_delta_chains (entry -> delta );
1590
- entry -> dfs_state = DFS_DONE ;
1591
- break ;
1609
+ if (cur -> dfs_state != DFS_NONE )
1610
+ die ("BUG: confusing delta dfs state in first pass: %d" ,
1611
+ cur -> dfs_state );
1592
1612
1593
- case DFS_DONE :
1594
- /* object already examined, and not part of a cycle */
1595
- break ;
1613
+ /*
1614
+ * Now we know this is the first time we've seen the object. If
1615
+ * it's not a delta, we're done traversing, but we'll mark it
1616
+ * done to save time on future traversals.
1617
+ */
1618
+ if (!cur -> delta ) {
1619
+ cur -> dfs_state = DFS_DONE ;
1620
+ break ;
1621
+ }
1596
1622
1597
- case DFS_ACTIVE :
1598
1623
/*
1599
- * We found a cycle that needs broken. It would be correct to
1600
- * break any link in the chain, but it's convenient to
1601
- * break this one.
1624
+ * Mark ourselves as active and see if the next step causes
1625
+ * us to cycle to another active object. It's important to do
1626
+ * this _before_ we loop, because it impacts where we make the
1627
+ * cut, and thus how our total_depth counter works.
1628
+ * E.g., We may see a partial loop like:
1629
+ *
1630
+ * A -> B -> C -> D -> B
1631
+ *
1632
+ * Cutting B->C breaks the cycle. But now the depth of A is
1633
+ * only 1, and our total_depth counter is at 3. The size of the
1634
+ * error is always one less than the size of the cycle we
1635
+ * broke. Commits C and D were "lost" from A's chain.
1636
+ *
1637
+ * If we instead cut D->B, then the depth of A is correct at 3.
1638
+ * We keep all commits in the chain that we examined.
1602
1639
*/
1603
- drop_reused_delta (entry );
1604
- entry -> dfs_state = DFS_DONE ;
1605
- break ;
1640
+ cur -> dfs_state = DFS_ACTIVE ;
1641
+ if (cur -> delta -> dfs_state == DFS_ACTIVE ) {
1642
+ drop_reused_delta (cur );
1643
+ cur -> dfs_state = DFS_DONE ;
1644
+ break ;
1645
+ }
1646
+ }
1647
+
1648
+ /*
1649
+ * And now that we've gone all the way to the bottom of the chain, we
1650
+ * need to clear the active flags and set the depth fields as
1651
+ * appropriate. Unlike the loop above, which can quit when it drops a
1652
+ * delta, we need to keep going to look for more depth cuts. So we need
1653
+ * an extra "next" pointer to keep going after we reset cur->delta.
1654
+ */
1655
+ for (cur = entry ; cur ; cur = next ) {
1656
+ next = cur -> delta ;
1657
+
1658
+ /*
1659
+ * We should have a chain of zero or more ACTIVE states down to
1660
+ * a final DONE. We can quit after the DONE, because either it
1661
+ * has no bases, or we've already handled them in a previous
1662
+ * call.
1663
+ */
1664
+ if (cur -> dfs_state == DFS_DONE )
1665
+ break ;
1666
+ else if (cur -> dfs_state != DFS_ACTIVE )
1667
+ die ("BUG: confusing delta dfs state in second pass: %d" ,
1668
+ cur -> dfs_state );
1669
+
1670
+ /*
1671
+ * If the total_depth is more than depth, then we need to snip
1672
+ * the chain into two or more smaller chains that don't exceed
1673
+ * the maximum depth. Most of the resulting chains will contain
1674
+ * (depth + 1) entries (i.e., depth deltas plus one base), and
1675
+ * the last chain (i.e., the one containing entry) will contain
1676
+ * whatever entries are left over, namely
1677
+ * (total_depth % (depth + 1)) of them.
1678
+ *
1679
+ * Since we are iterating towards decreasing depth, we need to
1680
+ * decrement total_depth as we go, and we need to write to the
1681
+ * entry what its final depth will be after all of the
1682
+ * snipping. Since we're snipping into chains of length (depth
1683
+ * + 1) entries, the final depth of an entry will be its
1684
+ * original depth modulo (depth + 1). Any time we encounter an
1685
+ * entry whose final depth is supposed to be zero, we snip it
1686
+ * from its delta base, thereby making it so.
1687
+ */
1688
+ cur -> depth = (total_depth -- ) % (depth + 1 );
1689
+ if (!cur -> depth )
1690
+ drop_reused_delta (cur );
1691
+
1692
+ cur -> dfs_state = DFS_DONE ;
1606
1693
}
1607
1694
}
1608
1695
0 commit comments