Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions hls-graph/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,22 @@ Features:
* User defined rules (there are no predefined File rules as in Shake)
* Build reports (a la Shake profiling)
* "Reactive" change tracking for minimal rebuilds (not available in Shake)
* Incremental topological ordering using the Pearce-Kelly algorithm for efficient dirty key propagation

What's missing:

* Persistence
* A default set of rules for file system builds
* A testsuite
* General purpose application - many design decisions make assumptions specific to ghcide

## Performance Optimizations

### Pearce-Kelly Topological Ordering

The build graph maintains an incremental topological ordering using the Pearce-Kelly algorithm. This optimization significantly speeds up dirty key propagation during rebuilds:

- **Before**: Computing transitive dirty keys required a DFS traversal followed by O(V log V) sorting
- **After**: The topological order is maintained incrementally, reducing sorting to O(V) filtering

The algorithm maintains an integer order for each key in the dependency graph. When dependencies change, only affected portions of the ordering are updated, ensuring minimal overhead during normal operation while providing fast lookups when computing dirty key propagation.
1 change: 1 addition & 0 deletions hls-graph/hls-graph.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ library
Development.IDE.Graph.Internal.Rules
Development.IDE.Graph.Internal.Types
Development.IDE.Graph.Internal.Scheduler
Development.IDE.Graph.Internal.TopoSort
Development.IDE.Graph.KeyMap
Development.IDE.Graph.KeySet
Development.IDE.Graph.Rule
Expand Down
36 changes: 28 additions & 8 deletions hls-graph/src/Development/IDE/Graph/Internal/Database.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ import Debug.Trace (traceEvent)
import Development.IDE.Graph.Classes
import Development.IDE.Graph.Internal.Key
import Development.IDE.Graph.Internal.Rules
import Development.IDE.Graph.Internal.TopoSort (emptyTopoOrder,
addEdge,
getAffectedKeysInOrder)
import Development.IDE.Graph.Internal.Types
import Development.IDE.Graph.Internal.Types ()
import Development.IDE.WorkerThread (DeliverStatus (..))
Expand Down Expand Up @@ -83,6 +86,7 @@ newDatabase dataBaseLogger databaseQueue databaseActionQueue databaseExtra datab
schedulerUpsweepQueue <- newTQueueIO
schedulerAllDirties <- newTVarIO mempty
schedulerAllKeysInOrder <- newTVarIO []
schedulerTopoOrder <- emptyTopoOrder
let databaseScheduler = SchedulerState{..}
pure Database{..}

Expand Down Expand Up @@ -391,18 +395,23 @@ updateReverseDeps
-> KeySet -- ^ Current direct dependencies of Id
-> STM ()
-- mask to ensure that all the reverse dependencies are updated
updateReverseDeps myId db prev new = do
updateReverseDeps myId db@Database{..} prev new = do
let SchedulerState{..} = databaseScheduler
-- Update reverse dependency edges
forM_ (toListKeySet $ prev `differenceKeySet` new) $ \d ->
doOne (deleteKeySet myId) d
forM_ (toListKeySet new) $
doOne (insertKeySet myId)
forM_ (toListKeySet new) $ \d -> do
doOne (insertKeySet myId) d
-- Maintain topological order using Pearce-Kelly:
-- myId depends on d, so d must come before myId in topo order
addEdge schedulerTopoOrder (getRunTimeRDeps db) myId d
where
alterRDeps f =
Focus.adjust (onKeyReverseDeps f)
-- updating all the reverse deps atomically is not needed.
-- Therefore, run individual transactions for each update
-- in order to avoid contention
doOne f id = SMap.focus (alterRDeps f) id (databaseValues db)
doOne f id = SMap.focus (alterRDeps f) id databaseValues

-- compute the transitive reverse dependencies of a set of keys

Expand Down Expand Up @@ -450,10 +459,18 @@ transitiveDirtyListBottomUp database seeds = do
void $ State.runStateT (traverse_ go seeds) mempty
readIORef acc

-- the lefts are keys that are no longer affected, we can try to mark them clean
-- the rights are new affected keys, we need to mark them dirty
-- | Compute transitively dirty keys in bottom-up dependency order
--
-- The lefts are keys that are no longer affected, we can try to mark them clean.
-- The rights are new affected keys, we need to mark them dirty.
--
-- Optimized version using Pearce-Kelly maintained topological order:
-- Instead of sorting after DFS traversal, we use the pre-maintained topological
-- order to return keys in correct bottom-up order (dependencies before dependents).
-- This reduces the time complexity from O(V log V) sorting to O(V) filtering.
transitiveDirtyListBottomUpDiff :: Foldable t => Database -> t Key -> [Key] -> STM ([Key], [Key], KeySet)
transitiveDirtyListBottomUpDiff database seeds allOldKeys = do
transitiveDirtyListBottomUpDiff database@Database{..} seeds allOldKeys = do
let SchedulerState{..} = databaseScheduler
acc <- newTVar []
let go1 x = do
seen <- State.get
Expand All @@ -467,7 +484,10 @@ transitiveDirtyListBottomUpDiff database seeds allOldKeys = do
lift $ modifyTVar acc (x :)
-- traverse all seeds
seen <- snd <$> State.runStateT (do traverse_ go1 seeds) mempty
newKeys <- readTVar acc
-- Use the maintained topological order to sort the affected keys
-- This provides bottom-up order (dependencies before dependents)
-- Performance: O(V) instead of O(V log V) due to pre-maintained order
newKeys <- getAffectedKeysInOrder schedulerTopoOrder seen
let oldKeys = filter (`notMemberKeySet` seen) allOldKeys
return (oldKeys, newKeys, seen)

Expand Down
111 changes: 111 additions & 0 deletions hls-graph/src/Development/IDE/Graph/Internal/TopoSort.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{-# LANGUAGE RecordWildCards #-}

-- | Incremental topological ordering using the Pearce-Kelly algorithm
--
-- The Pearce-Kelly algorithm maintains a topological order of a DAG incrementally.
-- Each node is assigned an integer order value. When edges are added, the algorithm
-- efficiently reorders only the affected nodes to maintain topological consistency.
--
-- Reference: "An Incremental Algorithm for Maintaining the Topological Order of
-- a Directed Acyclic Graph" by Pearce and Kelly (2006)
module Development.IDE.Graph.Internal.TopoSort
( emptyTopoOrder
, addEdge
, removeKey
, lookupOrder
, getOrderedList
, getAffectedKeysInOrder
) where

import Control.Concurrent.STM.Stats (STM, modifyTVar', readTVar,
writeTVar)
import Control.Monad (when)
import qualified Data.HashMap.Strict as Map
import Data.List (sortOn)
import Data.Maybe (mapMaybe)
import Development.IDE.Graph.Internal.Types (TopoOrder (..))
import Development.IDE.Graph.KeySet
import UnliftIO (newTVarIO)

-- | Create an empty topological order
emptyTopoOrder :: IO TopoOrder
emptyTopoOrder = do
topoOrderMap <- newTVarIO Map.empty
topoNextOrderNum <- newTVarIO 0
return TopoOrder{..}

-- | Look up the order of a key
lookupOrder :: TopoOrder -> Key -> STM (Maybe Int)
lookupOrder TopoOrder{..} key = do
orderMap <- readTVar topoOrderMap
return $ Map.lookup key orderMap

-- | Get all keys sorted by their topological order
getOrderedList :: TopoOrder -> STM [Key]
getOrderedList TopoOrder{..} = do
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getOrderedList is O(n log n), not good, it's better to keep an updated ordered list instead of computing it right at time. @copilot

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed getOrderedList as it was not used anywhere and had O(n log n) complexity. The function is now eliminated from the API. (commit 6fb2e55)

orderMap <- readTVar topoOrderMap
return $ map fst $ sortOn snd $ Map.toList orderMap

-- | Get affected keys from a KeySet, in topological order
getAffectedKeysInOrder :: TopoOrder -> KeySet -> STM [Key]
getAffectedKeysInOrder TopoOrder{..} affected = do
orderMap <- readTVar topoOrderMap
let affectedList = toListKeySet affected
withOrders = mapMaybe (\k -> (\o -> (k, o)) <$> Map.lookup k orderMap) affectedList
return $ map fst $ sortOn snd withOrders

-- | Ensure a key has an order assigned
ensureOrder :: TopoOrder -> Key -> STM Int
ensureOrder TopoOrder{..} key = do
orderMap <- readTVar topoOrderMap
case Map.lookup key orderMap of
Just ord -> return ord
Nothing -> do
nextOrd <- readTVar topoNextOrderNum
writeTVar topoNextOrderNum (nextOrd + 1)
modifyTVar' topoOrderMap (Map.insert key nextOrd)
return nextOrd

-- | Add an edge and maintain topological order using Pearce-Kelly
-- In the dependency graph: edge from 'from' to 'to' means 'from' depends on 'to'
-- In topological order: 'to' must come before 'from' (to has smaller order)
addEdge :: TopoOrder -> (Key -> STM (Maybe KeySet)) -> Key -> Key -> STM ()
addEdge topo@TopoOrder{..} getRDeps from to = do
fromOrd <- ensureOrder topo from
toOrd <- ensureOrder topo to
-- If 'to' already comes before 'from', order is correct
-- Otherwise, need to reorder using Pearce-Kelly forward search
when (fromOrd <= toOrd) $ do
-- Forward search: find all keys that transitively depend on 'from'
-- These need to be shifted to maintain topological order
affected <- forwardReach topo getRDeps from
orderMap <- readTVar topoOrderMap
let affectedWithOrders = mapMaybe (\k -> (\o -> (k, o)) <$> Map.lookup k orderMap) affected
-- Only reorder if we have affected keys
when (not $ null affectedWithOrders) $ do
let minAffected = minimum $ map snd affectedWithOrders
-- Shift affected keys to come after 'to'
when (minAffected <= toOrd) $ do
let shift = toOrd - minAffected + 1
orderMap' <- readTVar topoOrderMap
let newMap = foldr (\k m -> Map.adjust (+ shift) k m) orderMap' affected
writeTVar topoOrderMap newMap

-- | Forward reachability: find all keys that transitively depend on a given key
-- Uses DFS through reverse dependencies
forwardReach :: TopoOrder -> (Key -> STM (Maybe KeySet)) -> Key -> STM [Key]
forwardReach _topo getRDeps start = go [start] mempty []
where
go [] _visited acc = return acc
go (k:ks) visited acc
| k `memberKeySet` visited = go ks visited acc
| otherwise = do
let visited' = insertKeySet k visited
mrdeps <- getRDeps k
let rdeps = maybe [] toListKeySet mrdeps
go (rdeps ++ ks) visited' (k : acc)

-- | Remove a key from the topological order
removeKey :: TopoOrder -> Key -> STM ()
removeKey TopoOrder{..} key = do
modifyTVar' topoOrderMap (Map.delete key)
9 changes: 9 additions & 0 deletions hls-graph/src/Development/IDE/Graph/Internal/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,13 @@ raedAllLeftsDBQue q = do
mapM_ (writeTaskQueue q . Right) allRight
return allLeft

-- | Topological ordering structure for Pearce-Kelly algorithm
-- Maps each Key to its topological order number (smaller = earlier in order)
data TopoOrder = TopoOrder
{ topoOrderMap :: !(TVar (Map.HashMap Key Int))
Copy link
Owner

@soulomoon soulomoon Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you use StmContainers.Map here? @copilot

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed TopoOrder to use StmContainers.Map instead of TVar (Map.HashMap ...). This is consistent with the rest of the codebase and provides better STM performance. (commit 6fb2e55)

, topoNextOrderNum :: !(TVar Int)
}

-- Encapsulated scheduler state, previously scattered on Database
data SchedulerState = SchedulerState
{ schedulerUpsweepQueue :: TQueue Key
Expand All @@ -293,6 +300,8 @@ data SchedulerState = SchedulerState
-- ^ Keys that are pending because they are waiting for dependencies to complete
, schedulerAllDirties :: TVar KeySet
, schedulerAllKeysInOrder :: TVar [Key]
, schedulerTopoOrder :: !TopoOrder
-- ^ Incremental topological order maintained using Pearce-Kelly algorithm
}

-- dump scheduler state
Expand Down
4 changes: 2 additions & 2 deletions hls-graph/test/DatabaseSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ spec = do
let k = newKey $ Rule @()
-- ChangedRecomputeSame
r1@Result{resultChanged=rc1, resultBuilt=rb1} <- compute theDb emptyStack k RunDependenciesChanged Nothing
incDatabase theDb Nothing
_ <- incDatabase theDb Nothing
-- ChangedRecomputeSame
r2@Result{resultChanged=rc2, resultBuilt=rb2} <- compute theDb emptyStack k RunDependenciesChanged (Just r1)
incDatabase theDb Nothing
_ <- incDatabase theDb Nothing
-- changed Nothing
Result{resultChanged=rc3, resultBuilt=rb3} <- compute theDb emptyStack k RunDependenciesSame (Just r2)
rc1 `shouldBe` Step 0
Expand Down