From 5d1ff9d52e4def109898657ff8554124a7e30f29 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 20:25:47 +0100 Subject: [PATCH 01/20] add test that demonstrates the issue --- tests/Makefile | 1 + tests/issue_269.x | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/issue_269.x diff --git a/tests/Makefile b/tests/Makefile index eede942..9585911 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -59,6 +59,7 @@ TESTS = \ issue_141.x \ issue_197.x \ issue_262.x \ + issue_269.x \ monad_typeclass.x \ monad_typeclass_bytestring.x \ monadUserState_typeclass.x \ diff --git a/tests/issue_269.x b/tests/issue_269.x new file mode 100644 index 0000000..15573e8 --- /dev/null +++ b/tests/issue_269.x @@ -0,0 +1,37 @@ +{ + +-- Issue #269 +-- reported 2025-04-02 by https://github.com/nicuveo +-- fixed 2025-04-03 by Antoine Leblanc +-- +-- Problem was: +-- The minimizer was not initialized with the proper subsets of +-- states, and could result in different states being erroneously +-- considered equivalent, which in turn could resulting in the wrong +-- rule being selected at runtime. + +import Control.Monad (when) +import System.Exit + +} + +%wrapper "basic" + +tokens :- + [abc] { Left } + "abc" { const $ Right "abc" } + +{ + +test :: String -> [Either String String] -> IO () +test input expected = + when (expected /= alexScanTokens input) + exitFailure + +main :: IO () +main = do + test "abc" [Right "abc"] + test "bbb" [Left "b", Left "b", Left "b"] + test "bbc" [Left "b", Left "b", Left "c"] + +} From ad1dd62e8fdc3c64d6b01f8b01b87ff6319b7d8a Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 20:26:07 +0100 Subject: [PATCH 02/20] remove incorrect extra-source-files entries --- alex.cabal | 2 -- 1 file changed, 2 deletions(-) diff --git a/alex.cabal b/alex.cabal index 4e5f174..823d632 100644 --- a/alex.cabal +++ b/alex.cabal @@ -58,8 +58,6 @@ extra-source-files: examples/words.x examples/words_monad.x examples/words_posn.x - src/Parser.y.boot - src/Scan.x.boot src/ghc_hooks.c tests/Makefile tests/simple.x From 4db93cdec29a839b3241a7bc531e103ddf0b6eef Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 22:58:57 +0100 Subject: [PATCH 03/20] rewrite DFAMin --- src/DFAMin.hs | 439 +++++++++++++++++++++++++++----------------------- 1 file changed, 238 insertions(+), 201 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 5a0a781..5570114 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -1,218 +1,255 @@ -{-# OPTIONS_GHC -fno-warn-name-shadowing #-} - {-# LANGUAGE CPP #-} module DFAMin (minimizeDFA) where -import AbsSyn +import AbsSyn + +import Control.Monad (guard) +import Data.IntMap (IntMap) +import Data.IntSet (IntSet) +import Data.Map (Map) + +import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import qualified Data.List as List +import qualified Data.Map as Map + +{- Note [Hopcroft's Algorithm] + +DFA minimization is implemented using Hopcroft's algorithm. It is defined on +Wikipedia as follows. + +We assume the following definitions: + - Q is the set of all states in our DFA + - F is the subset of Q that contains all final (or "accepting") states + - ∑ is the set of input symbols (for us, [0..255]) + +We use the phrase "X refines Y into Y1 and Y2" to mean the following: + - Y1 := Y ∩ X + - Y2 := Y \ X + - |Y1| > 0 + - |Y2| > 0 + +The algorithm itself is defined thusly: + + P := {F, Q \ F} + W := {F, Q \ F} + while (W is not empty) do + choose and remove a set A from W + for each c in Σ do + let X be the set of states for which a transition on c leads to a state in A + for each set Y in P that is refined by X into Y1 and Y2 do + replace Y in P by the two sets Y1 and Y2 + if Y is in W + replace Y in W by Y1 and Y2 + else + if |Y1| <= |Y2| + add Y1 to W + else + add Y2 to W + + +Our implementation differs slightly, as we perform several optimizations. + +In the Wikipedia implementation, P and W are initialized to subsets of all sets +of Q, specifically F and Q \ F. The exact subsets do not matter; what matters is +the following: + - all states in Q should be in W + - equivalent states should all be in the same subset + +As per the first requirement, it would be fine for P and W to be initialized +with a set that only contains Q. The second requirement stems from the fact that +our partition "refining" can divide subsets, but we do not have a way to +re-merge subsets. + +Our first optimization is that we use a more granular division of states in the +initial set. Specifically, we group all states by their list of "accepts", since +we know that for two states to be equivalent their list of "accepts" must be the +same: the resulting subsets therefore meet our two stated criteria. + + +Our second optimization relies on the observation that given that all states are +in W, then all states will appear in A; as a result, instead of starting with a +set P that contains all subsets, that we refine in parallel to W, we can instead +start with an empty set R, and add each A to R before iterating over P. This +makes updating R and W easier, and removes the need for the expensive "is Y in +W" check. + + +With those two optimizations, our implementation is therefore: + + R := {} + W := {all "accept" subsets of Q} + while (W is not empty) do + choose and remove a set A from W + add A to R + for each c in Σ do + let X be the set of states for which a transition on c leads to a state in A + for each set Y in R that is refined by X into Y1 and Y2 do + replace Y in R by the two sets Y1 and Y2 + if |Y1| <= |Y2| + add Y1 to Q + else + add Y2 to Q + for each set Y in Q that is refined by X into Y1 and Y2 do + replace Y in Q by the two sets Y1 and Y2 + + +-} + +-- | Reduce the number of states in the given DFA by grouping indistinguishable +-- states. +minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a +minimizeDFA dfa@(DFA starts statemap) = DFA starts (Map.fromList states) + where + equiv_classes :: [EquivalenceClass] + equiv_classes = groupEquivalentStates dfa + + numbered_states :: [(Int, EquivalenceClass)] + numbered_states = number (length starts) equiv_classes + + -- assign each state in the minimized DFA a number, making + -- sure that we assign the numbers [0..] to the start states. + number :: Int -> [EquivalenceClass] -> [(Int, EquivalenceClass)] + number _ [] = [] + number n (ss:sss) = + case filter (`IntSet.member` ss) starts of + [] -> (n,ss) : number (n+1) sss + starts' -> map (,ss) starts' ++ number n sss + -- if one of the states of the minimized DFA corresponds + -- to multiple starts states, we just have to duplicate + -- that state. + + states :: [(Int, State Int a)] + states = do + (n, equiv) <- numbered_states + let old_states = map (lookupOrPanic statemap) (IntSet.toList equiv) + accepts = map fix_acc $ state_acc $ headOrPanic old_states + transitions = IntMap.fromList $ do + State _ out <- old_states + (b, old) <- IntMap.toList out + pure (b, get_new old) + pure (n, State accepts transitions) + + fix_acc :: Accept a -> Accept a + fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) } + + fix_rctxt :: RightContext SNum -> RightContext SNum + fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s) + fix_rctxt other = other + + get_new :: Int -> Int + get_new = lookupOrPanic old_to_new + + old_to_new :: Map Int Int + old_to_new = Map.fromList $ do + (n,ss) <- numbered_states + s <- IntSet.toList ss + pure (s,n) + + headOrPanic :: forall x. [x] -> x + headOrPanic [] = error "minimizeDFA: empty equivalence class" + headOrPanic (x:_) = x + + lookupOrPanic :: forall x. Map Int x -> Int -> x + lookupOrPanic m k = case Map.lookup k m of + Nothing -> error "minimizeDFA: state not found" + Just x -> x -import Data.IntMap ( IntMap ) -import Data.IntSet ( IntSet ) -import Data.Map ( Map ) -#if !MIN_VERSION_containers(0,6,0) -import Data.Maybe ( mapMaybe ) -#endif -import qualified Data.Map as Map -import qualified Data.IntSet as IntSet -import qualified Data.IntMap as IntMap -import qualified Data.List as List +type EquivalenceClass = IntSet + --- % Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia): --- % X refines Y into Y1 and Y2 means --- % Y1 := Y ∩ X --- % Y2 := Y \ X --- % where both Y1 and Y2 are nonempty +-- | Creates the subset of Q that are used to initialize W. -- --- P := {{all accepting states}, {all nonaccepting states}}; --- Q := {{all accepting states}}; --- while (Q is not empty) do --- choose and remove a set A from Q --- for each c in ∑ do --- let X be the set of states for which a transition on c leads to a state in A --- for each set Y in P for which X refines Y into Y1 and Y2 do --- replace Y in P by the two sets Y1 and Y2 --- if Y is in Q --- replace Y in Q by the same two sets --- else --- add the smaller of the two sets to Q --- end; --- end; --- end; +-- As per the two conditions listed in Note [Hopcroft's Algorithm], we have two +-- requirements: the union of all resulting sets must be equivalent to Q the set +-- of all states, and all equivalent states must be in the same subsets. -- --- % X is a preimage of A under transition function. +-- We group states by their list of 'Accept'. +initialSubsets :: forall a. Ord a => DFA Int a -> [EquivalenceClass] +initialSubsets dfa = Map.elems $ Map.fromListWith IntSet.union $ do + (stateIndex, State accepts _transitions) <- Map.toList $ dfa_states dfa + pure (accepts, IntSet.singleton stateIndex) + --- % observation : Q is always subset of P --- % let R = P \ Q. then following algorithm is the equivalent of the Hopcroft's Algorithm +-- | Creates a cache of all reverse transitions for a given DFA. +-- +-- To each token c in Σ, this map contains a reverse map of transitions. +-- That is, for each c, we have a map that, to a state s, associate the set +-- of states that can reach s via c. -- --- R := {{all nonaccepting states}}; --- Q := {{all accepting states}}; --- while (Q is not empty) do --- choose a set A from Q --- remove A from Q and add it to R --- for each c in ∑ do --- let X be the set of states for which a transition on c leads to a state in A --- for each set Y in R for which X refines Y into Y1 and Y2 do --- replace Y in R by the greater of the two sets Y1 and Y2 --- add the smaller of the two sets to Q --- end; --- for each set Y in Q for which X refines Y into Y1 and Y2 do --- replace Y in Q by the two sets Y1 and Y2 --- end; --- end; --- end; +-- Given that the actual value of c is never actually required, we flatten the +-- result into a list. +generateReverseTransitionCache :: forall a. Ord a => DFA Int a -> [IntMap EquivalenceClass] +generateReverseTransitionCache dfa = IntMap.elems $ + IntMap.fromListWith (IntMap.unionWith IntSet.union) $ do + (startingState, stateInfo) <- Map.toList $ dfa_states dfa + (token, targetState) <- IntMap.toList $ state_out stateInfo + pure (token, IntMap.singleton targetState (IntSet.singleton startingState)) + + +-- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are +-- within the IntSet. -- --- % The second for loop that iterates over R mutates Q, --- % but it does not affect the third for loop that iterates over Q. --- % Because once X refines Y into Y1 and Y2, Y1 and Y2 can't be more refined by X. +-- This function is equivalent to 'IntMap.restrictKeys', but provided for +-- compatibility with older versions of containers. +restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a +restrictKeys m s = +#if MIN_VERSION_containers(0,6,0) + IntMap.restrictKeys m s +#else + IntMap.filterWithKey (\k _ -> k `IntSet.member` s) m +#endif -minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a -minimizeDFA dfa@(DFA { dfa_start_states = starts, - dfa_states = statemap - }) - = DFA { dfa_start_states = starts, - dfa_states = Map.fromList states } + +-- | Given two sets X and Y, compute their intersection and difference. +-- Only returns both if both are non-empty, otherwise return neither. +refine + :: EquivalenceClass + -> EquivalenceClass + -> Maybe (EquivalenceClass, EquivalenceClass) +refine x y = + if IntSet.null intersection || IntSet.null difference + then Nothing + else Just (intersection, difference) where - equiv_classes :: [EquivalenceClass] - equiv_classes = groupEquivStates dfa - - numbered_states :: [(Int, EquivalenceClass)] - numbered_states = number (length starts) equiv_classes - - -- assign each state in the minimized DFA a number, making - -- sure that we assign the numbers [0..] to the start states. - number :: Int -> [EquivalenceClass] -> [(Int, EquivalenceClass)] - number _ [] = [] - number n (ss:sss) = - case filter (`IntSet.member` ss) starts of - [] -> (n,ss) : number (n+1) sss - starts' -> map (,ss) starts' ++ number n sss - -- if one of the states of the minimized DFA corresponds - -- to multiple starts states, we just have to duplicate - -- that state. - - states :: [(Int, State Int a)] - states = [ - let old_states = map (lookup statemap) (IntSet.toList equiv) - accs = map fix_acc (state_acc (headWithDefault undefined old_states)) - -- accepts should all be the same - out = IntMap.fromList [ (b, get_new old) - | State _ out <- old_states, - (b,old) <- IntMap.toList out ] - in (n, State accs out) - | (n, equiv) <- numbered_states - ] - - fix_acc :: Accept a -> Accept a - fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) } - - fix_rctxt :: RightContext SNum -> RightContext SNum - fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s) - fix_rctxt other = other - - lookup :: Ord k => Map k v -> k -> v - lookup m k = Map.findWithDefault (error "minimizeDFA") k m - - get_new :: Int -> Int - get_new = lookup old_to_new - - old_to_new :: Map Int Int - old_to_new = Map.fromList [ (s,n) | (n,ss) <- numbered_states, - s <- IntSet.toList ss ] + intersection = IntSet.intersection y x + difference = IntSet.difference y x -type EquivalenceClass = IntSet -groupEquivStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass] -groupEquivStates DFA { dfa_states = statemap } - = go init_r init_q +-- | Given a DFA, compute all sets of equivalent states. +-- +-- See Note [Hopcroft's Algorithm] +groupEquivalentStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass] +groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) where - accepting, nonaccepting :: Map Int (State Int a) - (accepting, nonaccepting) = Map.partition acc statemap - where acc (State as _) = not (List.null as) - - nonaccepting_states :: EquivalenceClass - nonaccepting_states = IntSet.fromList (Map.keys nonaccepting) - - -- group the accepting states into equivalence classes - accept_map :: Map [Accept a] [Int] - accept_map = {-# SCC "accept_map" #-} - List.foldl' (\m (n,s) -> Map.insertWith (++) (state_acc s) [n] m) - Map.empty - (Map.toList accepting) - - accept_groups :: [EquivalenceClass] - accept_groups = map IntSet.fromList (Map.elems accept_map) - - init_r, init_q :: [EquivalenceClass] - init_r -- Issue #71: each EquivalenceClass needs to be a non-empty set - | IntSet.null nonaccepting_states = [] - | otherwise = [nonaccepting_states] - init_q = accept_groups - - -- a map from token T to - -- a map from state S to the set of states that transition to - -- S on token T - -- bigmap is an inversed transition function classified by each input token. - -- the codomain of each inversed function is a set of states rather than single state - -- since a transition function might not be an injective. - -- This is a cache of the information needed to compute xs below - bigmap :: IntMap (IntMap EquivalenceClass) - bigmap = IntMap.fromListWith (IntMap.unionWith IntSet.union) - [ (i, IntMap.singleton to (IntSet.singleton from)) - | (from, state) <- Map.toList statemap, - (i,to) <- IntMap.toList (state_out state) ] - - -- The outer loop: recurse on each set in R and Q - go :: [EquivalenceClass] -> [EquivalenceClass] -> [EquivalenceClass] - go r [] = r - go r (a:q) = uncurry go $ List.foldl' go0 (a:r,q) xs - where - preimage :: IntMap EquivalenceClass -- inversed transition function - -> EquivalenceClass -- subset of codomain of original transition function - -> EquivalenceClass -- preimage of given subset -#if MIN_VERSION_containers(0,6,0) - preimage invMap = IntSet.unions . IntMap.restrictKeys invMap -#else - preimage invMap = IntSet.unions . mapMaybe (`IntMap.lookup` invMap) . IntSet.toList -#endif - - xs :: [EquivalenceClass] - xs = - [ x - | invMap <- IntMap.elems bigmap - , let x = preimage invMap a - , not (IntSet.null x) - ] - - refineWith - :: EquivalenceClass -- preimage set that bisects the input equivalence class - -> EquivalenceClass -- input equivalence class - -> Maybe (EquivalenceClass, EquivalenceClass) -- refined equivalence class - refineWith x y = - if IntSet.null y1 || IntSet.null y2 - then Nothing - else Just (y1, y2) - where - y1 = IntSet.intersection y x - y2 = IntSet.difference y x - - go0 (r,q) x = go1 r [] [] - where - -- iterates over R - go1 [] r' q' = (r', go2 q q') - go1 (y:r) r' q' = case refineWith x y of - Nothing -> go1 r (y:r') q' - Just (y1, y2) - | IntSet.size y1 <= IntSet.size y2 -> go1 r (y2:r') (y1:q') - | otherwise -> go1 r (y1:r') (y2:q') - - -- iterates over Q - go2 [] q' = q' - go2 (y:q) q' = case refineWith x y of - Nothing -> go2 q (y:q') - Just (y1, y2) -> go2 q (y1:y2:q') - --- To pacify GHC 9.8's warning about 'head' -headWithDefault :: a -> [a] -> a -headWithDefault a [] = a -headWithDefault _ (a:_) = a + reverseTransitionCache :: [IntMap EquivalenceClass] + reverseTransitionCache = generateReverseTransitionCache dfa + + -- while W isn't empty, pick an A from W, add it to R + -- and iterate on X for each c in ∑ + outerLoop :: ([EquivalenceClass], [EquivalenceClass]) -> [EquivalenceClass] + outerLoop (r, []) = r + outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do + allPreviousStates <- reverseTransitionCache + let x = IntSet.unions $ restrictKeys allPreviousStates a + guard $ not $ IntSet.null x + pure x + + -- given X, refine values in R, refine values in W, and finally combine the + -- results to obtain the new values of R an W + refineWithX (r, w) x = + let (r', w') = unzip $ map (processR x) r + w'' = concatMap (processW x) w + in (concat r', concat w' ++ w'') + + processR x y = case refine x y of + Nothing -> ([y], []) + Just (y1, y2) + | IntSet.size y1 <= IntSet.size y2 -> ([y2], [y1]) + | otherwise -> ([y1], [y2]) + + processW x y = case refine x y of + Nothing -> [y] + Just (y1, y2) -> [y1, y2] From bfba77076994ffa157980a05892f731a68df2d74 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 23:24:44 +0100 Subject: [PATCH 04/20] add test for older versions of the code --- tests/Makefile | 3 ++- tests/issue_269-1.x | 40 ++++++++++++++++++++++++++++ tests/{issue_269.x => issue_269-2.x} | 5 +++- 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 tests/issue_269-1.x rename tests/{issue_269.x => issue_269-2.x} (86%) diff --git a/tests/Makefile b/tests/Makefile index 9585911..c28c71c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -59,7 +59,8 @@ TESTS = \ issue_141.x \ issue_197.x \ issue_262.x \ - issue_269.x \ + issue_269-1.x \ + issue_269-2.x \ monad_typeclass.x \ monad_typeclass_bytestring.x \ monadUserState_typeclass.x \ diff --git a/tests/issue_269-1.x b/tests/issue_269-1.x new file mode 100644 index 0000000..938e053 --- /dev/null +++ b/tests/issue_269-1.x @@ -0,0 +1,40 @@ +{ + +-- Issue #269 +-- reported 2025-04-02 by https://github.com/nicuveo +-- fixed 2025-04-03 by Antoine Leblanc +-- +-- Problem was: +-- The minimizer was not initialized with the proper subsets of +-- states, and could result in different states being erroneously +-- considered equivalent, which in turn could resulting in the wrong +-- rule being selected at runtime. +-- +-- This version of the test fails with the minimizer as implemented +-- after the changes in 4f0b51b8c370d0dd0f9c65af98282789f1cb035f. + +import Control.Monad (when) +import System.Exit + +} + +%wrapper "basic" + +tokens :- + [abc] { Left } + "abc" { const $ Right "abc" } + +{ + +test :: String -> [Either String String] -> IO () +test input expected = + when (expected /= alexScanTokens input) + exitFailure + +main :: IO () +main = do + test "abc" [Right "abc"] + test "bbb" [Left "b", Left "b", Left "b"] + test "bbc" [Left "b", Left "b", Left "c"] + +} diff --git a/tests/issue_269.x b/tests/issue_269-2.x similarity index 86% rename from tests/issue_269.x rename to tests/issue_269-2.x index 15573e8..2bdb081 100644 --- a/tests/issue_269.x +++ b/tests/issue_269-2.x @@ -9,6 +9,9 @@ -- states, and could result in different states being erroneously -- considered equivalent, which in turn could resulting in the wrong -- rule being selected at runtime. +-- +-- This version of the test fails with the minimizer as implemented +-- pre 4f0b51b8c370d0dd0f9c65af98282789f1cb035f. import Control.Monad (when) import System.Exit @@ -18,8 +21,8 @@ import System.Exit %wrapper "basic" tokens :- - [abc] { Left } "abc" { const $ Right "abc" } + [abc] { Left } { From aabfad27af42905deb7e99581fdb7a15e572d07c Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 23:30:17 +0100 Subject: [PATCH 05/20] improve comments --- src/DFAMin.hs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 5570114..10898d4 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -175,9 +175,9 @@ initialSubsets dfa = Map.elems $ Map.fromListWith IntSet.union $ do -- | Creates a cache of all reverse transitions for a given DFA. -- --- To each token c in Σ, this map contains a reverse map of transitions. --- That is, for each c, we have a map that, to a state s, associate the set --- of states that can reach s via c. +-- To each token c in Σ, the resulting map contains a reverse map of +-- transitions. That is, for each c, we have a map that, to a state +-- s, associate the set of states that can reach s via c. -- -- Given that the actual value of c is never actually required, we flatten the -- result into a list. @@ -192,8 +192,8 @@ generateReverseTransitionCache dfa = IntMap.elems $ -- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are -- within the IntSet. -- --- This function is equivalent to 'IntMap.restrictKeys', but provided for --- compatibility with older versions of containers. +-- This function is a simple wrapper around 'IntMap.restrictKeys', +-- provided for compatibility with older versions of containers. restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a restrictKeys m s = #if MIN_VERSION_containers(0,6,0) @@ -220,15 +220,15 @@ refine x y = -- | Given a DFA, compute all sets of equivalent states. -- --- See Note [Hopcroft's Algorithm] +-- See Note [Hopcroft's Algorithm] for details. groupEquivalentStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass] groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) where reverseTransitionCache :: [IntMap EquivalenceClass] reverseTransitionCache = generateReverseTransitionCache dfa - -- while W isn't empty, pick an A from W, add it to R - -- and iterate on X for each c in ∑ + -- While W isn't empty, pick an A from W, add it to R + -- and iterate on X for each c in ∑. outerLoop :: ([EquivalenceClass], [EquivalenceClass]) -> [EquivalenceClass] outerLoop (r, []) = r outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do @@ -237,8 +237,11 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) guard $ not $ IntSet.null x pure x - -- given X, refine values in R, refine values in W, and finally combine the - -- results to obtain the new values of R an W + -- Given X, refine values in R, refine values in W, and finally combine the + -- results to obtain the new values of R an W. + -- We can do both steps in parallel, since the new values to add in W while + -- we process R are already defined and don't need to be processed when + -- iterating over the original value of W. refineWithX (r, w) x = let (r', w') = unzip $ map (processR x) r w'' = concatMap (processW x) w From 529e2c6b57a16a574b11bf61392e2d199b3ba249 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Thu, 3 Apr 2025 23:59:09 +0100 Subject: [PATCH 06/20] fix call of `restrictKeys` for older GHC versions --- src/DFAMin.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 10898d4..547b5c3 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -233,7 +233,7 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) outerLoop (r, []) = r outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do allPreviousStates <- reverseTransitionCache - let x = IntSet.unions $ restrictKeys allPreviousStates a + let x = IntSet.unions $ IntMap.elems $ restrictKeys allPreviousStates a guard $ not $ IntSet.null x pure x From e82122772e5442e45b529bf5f2e1c9c16b91fc7f Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 00:01:38 +0100 Subject: [PATCH 07/20] Revert "remove incorrect extra-source-files entries" This reverts commit e0126a846e00e4982541f4eebe842b7638e5fe2d. --- alex.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alex.cabal b/alex.cabal index 823d632..4e5f174 100644 --- a/alex.cabal +++ b/alex.cabal @@ -58,6 +58,8 @@ extra-source-files: examples/words.x examples/words_monad.x examples/words_posn.x + src/Parser.y.boot + src/Scan.x.boot src/ghc_hooks.c tests/Makefile tests/simple.x From d8b594b8d5bb254a1664d9131ff46ce3cd81c48e Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 00:04:57 +0100 Subject: [PATCH 08/20] rename tests --- tests/Makefile | 4 ++-- tests/{issue_269-1.x => issue_269_part1.x} | 0 tests/{issue_269-2.x => issue_269_part2.x} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename tests/{issue_269-1.x => issue_269_part1.x} (100%) rename tests/{issue_269-2.x => issue_269_part2.x} (100%) diff --git a/tests/Makefile b/tests/Makefile index c28c71c..97ecd22 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -59,8 +59,8 @@ TESTS = \ issue_141.x \ issue_197.x \ issue_262.x \ - issue_269-1.x \ - issue_269-2.x \ + issue_269_part1.x \ + issue_269_part2.x \ monad_typeclass.x \ monad_typeclass_bytestring.x \ monadUserState_typeclass.x \ diff --git a/tests/issue_269-1.x b/tests/issue_269_part1.x similarity index 100% rename from tests/issue_269-1.x rename to tests/issue_269_part1.x diff --git a/tests/issue_269-2.x b/tests/issue_269_part2.x similarity index 100% rename from tests/issue_269-2.x rename to tests/issue_269_part2.x From ce8fb612d502d04d74f6ed1b1b4f75e9c1c8411c Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 00:15:14 +0100 Subject: [PATCH 09/20] add new tests to extra-source-files --- alex.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alex.cabal b/alex.cabal index 4e5f174..8fb99ce 100644 --- a/alex.cabal +++ b/alex.cabal @@ -91,6 +91,8 @@ extra-source-files: tests/issue_141.x tests/issue_197.x tests/issue_262.x + tests/issue_269_part1.x + tests/issue_269_part2.x tests/strict_text_typeclass.x tests/posn_typeclass_strict_text.x tests/tokens_monadUserState_strict_text.x From 60e796e5c95b296a1c6771384db37b06e3bfcd7f Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 00:35:58 +0100 Subject: [PATCH 10/20] make use of `fold` --- src/DFAMin.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 547b5c3..7a01f4d 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -5,6 +5,7 @@ module DFAMin (minimizeDFA) where import AbsSyn import Control.Monad (guard) +import Data.Foldable (fold) import Data.IntMap (IntMap) import Data.IntSet (IntSet) import Data.Map (Map) @@ -233,7 +234,7 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) outerLoop (r, []) = r outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do allPreviousStates <- reverseTransitionCache - let x = IntSet.unions $ IntMap.elems $ restrictKeys allPreviousStates a + let x = fold $ restrictKeys allPreviousStates a guard $ not $ IntSet.null x pure x From ca3d85e9983087e18bf6449cb528ed1e00e6f743 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 00:36:12 +0100 Subject: [PATCH 11/20] revert formatter changes --- src/DFAMin.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 7a01f4d..29ba771 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -2,13 +2,13 @@ module DFAMin (minimizeDFA) where -import AbsSyn +import AbsSyn -import Control.Monad (guard) -import Data.Foldable (fold) -import Data.IntMap (IntMap) -import Data.IntSet (IntSet) -import Data.Map (Map) +import Control.Monad (guard) +import Data.Foldable (fold) +import Data.IntMap (IntMap) +import Data.IntSet (IntSet) +import Data.Map (Map) import qualified Data.IntMap as IntMap import qualified Data.IntSet as IntSet From c3877e94821d5013ce7c0205886eba67df936f5e Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 12:47:40 +0100 Subject: [PATCH 12/20] improve comments, fix typos --- src/DFAMin.hs | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 29ba771..eb2cf28 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -17,8 +17,8 @@ import qualified Data.Map as Map {- Note [Hopcroft's Algorithm] -DFA minimization is implemented using Hopcroft's algorithm. It is defined on -Wikipedia as follows. +DFA minimization is implemented using Hopcroft's algorithm. The following +definition is mostly copied from Wikipedia. We assume the following definitions: - Q is the set of all states in our DFA @@ -52,14 +52,15 @@ The algorithm itself is defined thusly: Our implementation differs slightly, as we perform several optimizations. -In the Wikipedia implementation, P and W are initialized to subsets of all sets -of Q, specifically F and Q \ F. The exact subsets do not matter; what matters is -the following: - - all states in Q should be in W +In the Wikipedia implementation, P and W are initialized to two subsets of Q, +specifically F and Q \ F. The exact subsets do not matter; what matters is the +following: + - P and W should contain all Q states - equivalent states should all be in the same subset -As per the first requirement, it would be fine for P and W to be initialized -with a set that only contains Q. The second requirement stems from the fact that +As per the first requirement, it would be fine for P and W to be initialized as +a set that only contains Q. Using more fine-grained subsets reduces the amount +of work that needs to be done. The second requirement stems from the fact that our partition "refining" can divide subsets, but we do not have a way to re-merge subsets. @@ -89,12 +90,11 @@ With those two optimizations, our implementation is therefore: for each set Y in R that is refined by X into Y1 and Y2 do replace Y in R by the two sets Y1 and Y2 if |Y1| <= |Y2| - add Y1 to Q + add Y1 to W else - add Y2 to Q - for each set Y in Q that is refined by X into Y1 and Y2 do - replace Y in Q by the two sets Y1 and Y2 - + add Y2 to W + for each set Y in W that is refined by X into Y1 and Y2 do + replace Y in W by the two sets Y1 and Y2 -} @@ -161,7 +161,7 @@ minimizeDFA dfa@(DFA starts statemap) = DFA starts (Map.fromList states) type EquivalenceClass = IntSet --- | Creates the subset of Q that are used to initialize W. +-- | Creates the subsets of Q that are used to initialize W. -- -- As per the two conditions listed in Note [Hopcroft's Algorithm], we have two -- requirements: the union of all resulting sets must be equivalent to Q the set @@ -176,9 +176,9 @@ initialSubsets dfa = Map.elems $ Map.fromListWith IntSet.union $ do -- | Creates a cache of all reverse transitions for a given DFA. -- --- To each token c in Σ, the resulting map contains a reverse map of --- transitions. That is, for each c, we have a map that, to a state --- s, associate the set of states that can reach s via c. +-- To each token c in Σ, the resulting map associates a reverse map of +-- transitions. That is: for each c, we have a map that, to a state s, +-- associates the set of states that can reach s via c. -- -- Given that the actual value of c is never actually required, we flatten the -- result into a list. @@ -193,8 +193,8 @@ generateReverseTransitionCache dfa = IntMap.elems $ -- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are -- within the IntSet. -- --- This function is a simple wrapper around 'IntMap.restrictKeys', --- provided for compatibility with older versions of containers. +-- This function is a simple wrapper around 'IntMap.restrictKeys', provided for +-- compatibility with older versions of @containers@. restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a restrictKeys m s = #if MIN_VERSION_containers(0,6,0) @@ -240,9 +240,9 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) -- Given X, refine values in R, refine values in W, and finally combine the -- results to obtain the new values of R an W. - -- We can do both steps in parallel, since the new values to add in W while - -- we process R are already defined and don't need to be processed when - -- iterating over the original value of W. + -- We can do both steps in parallel, since the new sets to add to W we find + -- while processing R have already been refined by X; it is therefore fine + -- to only refine the original states in W. refineWithX (r, w) x = let (r', w') = unzip $ map (processR x) r w'' = concatMap (processW x) w From 4e38c0aa1988f1b4c05b2918a6617fa8feb6ee20 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 12:53:13 +0100 Subject: [PATCH 13/20] fix typos in tests --- tests/issue_269_part1.x | 6 +++--- tests/issue_269_part2.x | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/issue_269_part1.x b/tests/issue_269_part1.x index 938e053..84ea491 100644 --- a/tests/issue_269_part1.x +++ b/tests/issue_269_part1.x @@ -1,13 +1,13 @@ { -- Issue #269 --- reported 2025-04-02 by https://github.com/nicuveo +-- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo) -- fixed 2025-04-03 by Antoine Leblanc -- -- Problem was: -- The minimizer was not initialized with the proper subsets of --- states, and could result in different states being erroneously --- considered equivalent, which in turn could resulting in the wrong +-- states, which could result in different states being erroneously +-- considered equivalent, which in turn could result in the wrong -- rule being selected at runtime. -- -- This version of the test fails with the minimizer as implemented diff --git a/tests/issue_269_part2.x b/tests/issue_269_part2.x index 2bdb081..0d1994f 100644 --- a/tests/issue_269_part2.x +++ b/tests/issue_269_part2.x @@ -1,13 +1,13 @@ { -- Issue #269 --- reported 2025-04-02 by https://github.com/nicuveo +-- reported 2025-04-02 by Antoine Leblanc (https://github.com/nicuveo) -- fixed 2025-04-03 by Antoine Leblanc -- -- Problem was: -- The minimizer was not initialized with the proper subsets of --- states, and could result in different states being erroneously --- considered equivalent, which in turn could resulting in the wrong +-- states, which could result in different states being erroneously +-- considered equivalent, which in turn could result in the wrong -- rule being selected at runtime. -- -- This version of the test fails with the minimizer as implemented From baf71bdade3b3f6aebda6a2f6070225373baf019 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 4 Apr 2025 21:13:02 +0200 Subject: [PATCH 14/20] Optimize 'number' by removing already assigned start states A start state cannot belong to several equivalence classes (they are disjoint), so we can remove already assigned start states from the list of start states we want to assign. --- src/DFAMin.hs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index eb2cf28..973e435 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -98,25 +98,29 @@ With those two optimizations, our implementation is therefore: -} +type OldSNum = Int -- ^ Old state number +type NewSNum = Int -- ^ New state number + -- | Reduce the number of states in the given DFA by grouping indistinguishable -- states. -minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a -minimizeDFA dfa@(DFA starts statemap) = DFA starts (Map.fromList states) +minimizeDFA :: forall a. Ord a => DFA OldSNum a -> DFA NewSNum a +minimizeDFA dfa@(DFA starts statemap) = DFA starts $ Map.fromList new_states where + -- Group the states into classes according to the language they accept. equiv_classes :: [EquivalenceClass] equiv_classes = groupEquivalentStates dfa - numbered_states :: [(Int, EquivalenceClass)] - numbered_states = number (length starts) equiv_classes + -- A map from new state numbers to a class of equivalent old states. + numbered_states :: [(NewSNum, EquivalenceClass)] + numbered_states = number (length starts) starts equiv_classes - -- assign each state in the minimized DFA a number, making + -- Assign each state in the minimized DFA a number, making -- sure that we assign the numbers [0..] to the start states. - number :: Int -> [EquivalenceClass] -> [(Int, EquivalenceClass)] - number _ [] = [] - number n (ss:sss) = - case filter (`IntSet.member` ss) starts of - [] -> (n,ss) : number (n+1) sss - starts' -> map (,ss) starts' ++ number n sss + number :: NewSNum -> [NewSNum] -> [EquivalenceClass] -> [(NewSNum, EquivalenceClass)] + number _ _ [] = [] + number n unassigned_starts (ss:sss) + | null starts_ss = (n,ss) : continue (n+1) + | otherwise = map (,ss) starts_ss ++ continue n -- if one of the states of the minimized DFA corresponds -- to multiple starts states, we just have to duplicate -- that state. From afe58f5da44b9e88eceaaef31d50233d4ff58bf1 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 4 Apr 2025 21:15:34 +0200 Subject: [PATCH 15/20] Optimize the construction of new states from old states Since all old states in an equivalence class are equivalent, we only need the data from one of them to construct the new state. --- alex.cabal | 1 + src/AbsSyn.hs | 2 +- src/DFAMin.hs | 65 ++++++++++++++++++++++++++++----------------------- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/alex.cabal b/alex.cabal index 8fb99ce..f49fa58 100644 --- a/alex.cabal +++ b/alex.cabal @@ -115,6 +115,7 @@ executable alex default-language: Haskell2010 default-extensions: + DeriveFunctor PatternSynonyms ScopedTypeVariables TupleSections diff --git a/src/AbsSyn.hs b/src/AbsSyn.hs index 8db9871..56b2be0 100644 --- a/src/AbsSyn.hs +++ b/src/AbsSyn.hs @@ -123,7 +123,7 @@ data RightContext r = NoRightContext | RightContextRExp r | RightContextCode Code - deriving (Eq,Ord) + deriving (Eq, Ord, Functor) instance Show RECtx where showsPrec _ (RECtx scs _ r rctx code) = diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 973e435..5058936 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -124,43 +124,50 @@ minimizeDFA dfa@(DFA starts statemap) = DFA starts $ Map.fromList new_states -- if one of the states of the minimized DFA corresponds -- to multiple starts states, we just have to duplicate -- that state. - - states :: [(Int, State Int a)] - states = do - (n, equiv) <- numbered_states - let old_states = map (lookupOrPanic statemap) (IntSet.toList equiv) - accepts = map fix_acc $ state_acc $ headOrPanic old_states - transitions = IntMap.fromList $ do - State _ out <- old_states - (b, old) <- IntMap.toList out - pure (b, get_new old) - pure (n, State accepts transitions) + where + -- All the start states in ss (starts_ss) are assigned this equivalence class. + -- The remaining ones are passed to the recursive call. + (starts_ss, starts_other) = List.partition (`IntSet.member` ss) unassigned_starts + continue n' = number n' starts_other sss + + -- Mapping new state numbers to their state description. + new_states :: [(NewSNum, State NewSNum a)] + new_states = map (second class_to_new_state) numbered_states + + -- Translate an equivalence class of old states into a new state description. + class_to_new_state :: EquivalenceClass -> State NewSNum a + class_to_new_state = + -- A new state is constructed from any of the old states in the equivalence class. + -- It does not matter which old state we pick since by construction of the classes + -- they have the same behavior, both in their output (accepts) and their transitions. + -- Since IntSet does not have a method to give an arbitrary element + -- (ideally the one that is fastest to retrieve) + -- we use findMin (always succeeds because the IntSet is non-empty). + old_state_to_new_state . lookupOrPanic statemap . IntSet.findMin + where + lookupOrPanic = flip $ Map.findWithDefault panic + panic = error "alex::DFAMin.minimizeDFA: panic: state not found" + + -- Convert all state numbers in the State structure to new ones. + old_state_to_new_state :: State OldSNum a -> State NewSNum a + old_state_to_new_state (State old_accepts old_transitions) = + State (map fix_acc old_accepts) (fmap get_new old_transitions) fix_acc :: Accept a -> Accept a - fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) } - - fix_rctxt :: RightContext SNum -> RightContext SNum - fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s) - fix_rctxt other = other + fix_acc acc = acc { accRightCtx = fmap get_new $ accRightCtx acc } - get_new :: Int -> Int - get_new = lookupOrPanic old_to_new + get_new :: OldSNum -> NewSNum + get_new k = IntMap.findWithDefault panic k old_to_new + where + panic = error "alex::DFAMin.minimizeDFA: panic: state not found" - old_to_new :: Map Int Int - old_to_new = Map.fromList $ do + -- Memoized translation of old state numbers to new state numbers. + old_to_new :: IntMap NewSNum + old_to_new = IntMap.fromList $ do (n,ss) <- numbered_states s <- IntSet.toList ss pure (s,n) - headOrPanic :: forall x. [x] -> x - headOrPanic [] = error "minimizeDFA: empty equivalence class" - headOrPanic (x:_) = x - - lookupOrPanic :: forall x. Map Int x -> Int -> x - lookupOrPanic m k = case Map.lookup k m of - Nothing -> error "minimizeDFA: state not found" - Just x -> x - type EquivalenceClass = IntSet From e090a9a91fe187d31a2ec3a018abc262580b1244 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 4 Apr 2025 21:16:56 +0200 Subject: [PATCH 16/20] Cosmetics: use OldSNum / NewSNum instead of Int; etc. --- src/DFAMin.hs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 5058936..f8e0556 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -169,17 +169,18 @@ minimizeDFA dfa@(DFA starts statemap) = DFA starts $ Map.fromList new_states pure (s,n) +-- | An equivalence class is a /non-empty/ set of states. type EquivalenceClass = IntSet -- | Creates the subsets of Q that are used to initialize W. -- -- As per the two conditions listed in Note [Hopcroft's Algorithm], we have two --- requirements: the union of all resulting sets must be equivalent to Q the set --- of all states, and all equivalent states must be in the same subsets. +-- requirements: the union of all resulting sets must be equivalent to Q (the set +-- of all states), and all equivalent states must be in the same subsets. -- -- We group states by their list of 'Accept'. -initialSubsets :: forall a. Ord a => DFA Int a -> [EquivalenceClass] +initialSubsets :: forall a. Ord a => DFA OldSNum a -> [EquivalenceClass] initialSubsets dfa = Map.elems $ Map.fromListWith IntSet.union $ do (stateIndex, State accepts _transitions) <- Map.toList $ dfa_states dfa pure (accepts, IntSet.singleton stateIndex) @@ -193,12 +194,12 @@ initialSubsets dfa = Map.elems $ Map.fromListWith IntSet.union $ do -- -- Given that the actual value of c is never actually required, we flatten the -- result into a list. -generateReverseTransitionCache :: forall a. Ord a => DFA Int a -> [IntMap EquivalenceClass] +generateReverseTransitionCache :: forall a. Ord a => DFA OldSNum a -> [IntMap EquivalenceClass] generateReverseTransitionCache dfa = IntMap.elems $ IntMap.fromListWith (IntMap.unionWith IntSet.union) $ do - (startingState, stateInfo) <- Map.toList $ dfa_states dfa - (token, targetState) <- IntMap.toList $ state_out stateInfo - pure (token, IntMap.singleton targetState (IntSet.singleton startingState)) + (sourceState, State _accepts transitions) <- Map.toList $ dfa_states dfa + (token, targetState) <- IntMap.toList transitions + pure (token, IntMap.singleton targetState (IntSet.singleton sourceState)) -- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are @@ -233,7 +234,7 @@ refine x y = -- | Given a DFA, compute all sets of equivalent states. -- -- See Note [Hopcroft's Algorithm] for details. -groupEquivalentStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass] +groupEquivalentStates :: forall a. Ord a => DFA OldSNum a -> [EquivalenceClass] groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) where reverseTransitionCache :: [IntMap EquivalenceClass] From e88bd3c5baa2084422b7c840bcb5b3d4b3ca3657 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 4 Apr 2025 21:17:34 +0200 Subject: [PATCH 17/20] Suggestion: Use a plain filter instead of restrictKeys --- src/DFAMin.hs | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index f8e0556..46db9b3 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -5,10 +5,11 @@ module DFAMin (minimizeDFA) where import AbsSyn import Control.Monad (guard) -import Data.Foldable (fold) +import Data.Bifunctor (second) +-- import Data.Foldable (fold) import Data.IntMap (IntMap) import Data.IntSet (IntSet) -import Data.Map (Map) +-- import Data.Map (Map) import qualified Data.IntMap as IntMap import qualified Data.IntSet as IntSet @@ -202,18 +203,18 @@ generateReverseTransitionCache dfa = IntMap.elems $ pure (token, IntMap.singleton targetState (IntSet.singleton sourceState)) --- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are --- within the IntSet. --- --- This function is a simple wrapper around 'IntMap.restrictKeys', provided for --- compatibility with older versions of @containers@. -restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a -restrictKeys m s = -#if MIN_VERSION_containers(0,6,0) - IntMap.restrictKeys m s -#else - IntMap.filterWithKey (\k _ -> k `IntSet.member` s) m -#endif +-- -- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are +-- -- within the IntSet. +-- -- +-- -- This function is a simple wrapper around 'IntMap.restrictKeys', provided for +-- -- compatibility with older versions of @containers@. +-- restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a +-- restrictKeys m s = +-- #if MIN_VERSION_containers(0,6,0) +-- IntMap.restrictKeys m s +-- #else +-- IntMap.filterWithKey (\k _ -> k `IntSet.member` s) m +-- #endif -- | Given two sets X and Y, compute their intersection and difference. @@ -245,8 +246,12 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) outerLoop :: ([EquivalenceClass], [EquivalenceClass]) -> [EquivalenceClass] outerLoop (r, []) = r outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do - allPreviousStates <- reverseTransitionCache - let x = fold $ restrictKeys allPreviousStates a + allPreviousStates :: IntMap EquivalenceClass <- reverseTransitionCache + -- let x = fold $ restrictKeys allPreviousStates a + -- Is 'restrictKeys' here really better than the following simpler filter? + -- The given asymptotics does look better, but aren't there overheads to create a balanced 'IntMap' + -- just to destroy it immediately with a 'fold'? + let x = IntSet.unions $ map (\ (tgt, srcs) -> if tgt `IntSet.member` a then srcs else IntSet.empty) $ IntMap.toList allPreviousStates guard $ not $ IntSet.null x pure x From e7090e2d996a09c82440f26813427883b962331a Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 22:43:17 +0100 Subject: [PATCH 18/20] remove `restrictKeys` in favour of filter --- src/DFAMin.hs | 43 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index 46db9b3..a859296 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -1,20 +1,16 @@ -{-# LANGUAGE CPP #-} - module DFAMin (minimizeDFA) where import AbsSyn -import Control.Monad (guard) +import Control.Monad (guard) import Data.Bifunctor (second) --- import Data.Foldable (fold) -import Data.IntMap (IntMap) -import Data.IntSet (IntSet) --- import Data.Map (Map) +import Data.IntMap (IntMap) +import Data.IntSet (IntSet) -import qualified Data.IntMap as IntMap -import qualified Data.IntSet as IntSet -import qualified Data.List as List -import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import qualified Data.List as List +import qualified Data.Map as Map {- Note [Hopcroft's Algorithm] @@ -203,20 +199,6 @@ generateReverseTransitionCache dfa = IntMap.elems $ pure (token, IntMap.singleton targetState (IntSet.singleton sourceState)) --- -- | Given an IntMap and an IntSet, restrict the IntMap to the keys that are --- -- within the IntSet. --- -- --- -- This function is a simple wrapper around 'IntMap.restrictKeys', provided for --- -- compatibility with older versions of @containers@. --- restrictKeys :: forall a. IntMap a -> IntSet -> IntMap a --- restrictKeys m s = --- #if MIN_VERSION_containers(0,6,0) --- IntMap.restrictKeys m s --- #else --- IntMap.filterWithKey (\k _ -> k `IntSet.member` s) m --- #endif - - -- | Given two sets X and Y, compute their intersection and difference. -- Only returns both if both are non-empty, otherwise return neither. refine @@ -246,12 +228,11 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) outerLoop :: ([EquivalenceClass], [EquivalenceClass]) -> [EquivalenceClass] outerLoop (r, []) = r outerLoop (r, a:w) = outerLoop $ List.foldl' refineWithX (a:r,w) $ do - allPreviousStates :: IntMap EquivalenceClass <- reverseTransitionCache - -- let x = fold $ restrictKeys allPreviousStates a - -- Is 'restrictKeys' here really better than the following simpler filter? - -- The given asymptotics does look better, but aren't there overheads to create a balanced 'IntMap' - -- just to destroy it immediately with a 'fold'? - let x = IntSet.unions $ map (\ (tgt, srcs) -> if tgt `IntSet.member` a then srcs else IntSet.empty) $ IntMap.toList allPreviousStates + allPreviousStates <- reverseTransitionCache + let x = IntSet.unions $ do + (target, sources) <- IntMap.toList allPreviousStates + guard $ target `IntSet.member` a + pure sources guard $ not $ IntSet.null x pure x From 240397c4d99383e777c4f6cf35cf276de0d3c465 Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Fri, 4 Apr 2025 22:57:25 +0100 Subject: [PATCH 19/20] use folds to avoid concat --- src/DFAMin.hs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/DFAMin.hs b/src/DFAMin.hs index a859296..deceeb0 100644 --- a/src/DFAMin.hs +++ b/src/DFAMin.hs @@ -236,22 +236,18 @@ groupEquivalentStates dfa = outerLoop ([], initialSubsets dfa) guard $ not $ IntSet.null x pure x - -- Given X, refine values in R, refine values in W, and finally combine the - -- results to obtain the new values of R an W. - -- We can do both steps in parallel, since the new sets to add to W we find - -- while processing R have already been refined by X; it is therefore fine - -- to only refine the original states in W. + -- Given X, refine values in R, then refine values in W, building + -- the new values of R and W along the way. refineWithX (r, w) x = - let (r', w') = unzip $ map (processR x) r - w'' = concatMap (processW x) w - in (concat r', concat w' ++ w'') + let (r', w') = List.foldl' (processR x) ([], []) r + in (r', List.foldl' (processW x) w' w) - processR x y = case refine x y of - Nothing -> ([y], []) + processR x (r', w') y = case refine x y of + Nothing -> (y:r', w') Just (y1, y2) - | IntSet.size y1 <= IntSet.size y2 -> ([y2], [y1]) - | otherwise -> ([y1], [y2]) + | IntSet.size y1 <= IntSet.size y2 -> (y2:r', y1:w') + | otherwise -> (y1:r', y2:w') - processW x y = case refine x y of - Nothing -> [y] - Just (y1, y2) -> [y1, y2] + processW x w' y = case refine x y of + Nothing -> y:w' + Just (y1, y2) -> y1:y2:w' From d2b6a9e818349e4733ef63e63a70429f856bdf1c Mon Sep 17 00:00:00 2001 From: Antoine Leblanc Date: Sat, 5 Apr 2025 11:38:56 +0100 Subject: [PATCH 20/20] add `-XDeriveFunctor` to emulated workflow --- .github/workflows/emulated.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/emulated.yml b/.github/workflows/emulated.yml index 1c48b4f..e753a51 100644 --- a/.github/workflows/emulated.yml +++ b/.github/workflows/emulated.yml @@ -44,7 +44,7 @@ jobs: # interpret the forward slashes. You're welcome. sed -i "s/getDataDir/\(return \"$(pwd | sed 's/\//\\\//g')\\/data\"\)/g" src/Main.hs sed -i "s/version/undefined/g" src/Main.hs - ghc -XHaskell2010 -XPatternSynonyms -XFlexibleContexts -XMagicHash -XCPP -XNondecreasingIndentation -XScopedTypeVariables -XTupleSections \ + ghc -XHaskell2010 -XPatternSynonyms -XFlexibleContexts -XMagicHash -XCPP -XNondecreasingIndentation -XScopedTypeVariables -XTupleSections -XDeriveFunctor \ -package array -package containers -package directory \ -isrc src/Main.hs \ -o alex