Merge pull request #176 from damhiya/improve-dfa-minimization

Ericson2314 · web-flow · commit cbfe73086b0f · 2021-01-31T10:28:56.000-05:00
renew groupEquivStates
diff --git a/src/DFAMin.hs b/src/DFAMin.hs
@@ -16,24 +16,53 @@ import Data.IntMap (IntMap)
 import qualified Data.IntMap as IM
 import qualified Data.List as List
 
-
--- Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia):
-
+-- % Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia):
+-- % X refines Y into Y1 and Y2 means
+-- %  Y1 := Y ∩ X
+-- %  Y2 := Y \ X
+-- %  where both Y1 and Y2 are nonempty
+--
 -- P := {{all accepting states}, {all nonaccepting states}};
 -- Q := {{all accepting states}};
 -- while (Q is not empty) do
 --      choose and remove a set A from Q
 --      for each c in ∑ do
 --           let X be the set of states for which a transition on c leads to a state in A
---           for each set Y in P for which X ∩ Y is nonempty do
---                replace Y in P by the two sets X ∩ Y and Y \ X
+--           for each set Y in P for which X refines Y into Y1 and Y2 do
+--                replace Y in P by the two sets Y1 and Y2
 --                if Y is in Q
 --                     replace Y in Q by the same two sets
 --                else
 --                     add the smaller of the two sets to Q
 --           end;
 --      end;
 -- end;
+--
+-- % X is a preimage of A under transition function.
+
+-- % observation : Q is always subset of P
+-- % let R = P \ Q. then following algorithm is the equivalent of the Hopcroft's Algorithm
+--
+-- R := {{all nonaccepting states}};
+-- Q := {{all accepting states}};
+-- while (Q is not empty) do
+--      choose a set A from Q
+--      remove A from Q and add it to R
+--      for each c in ∑ do
+--           let X be the set of states for which a transition on c leads to a state in A
+--           for each set Y in R for which X refines Y into Y1 and Y2 do
+--                replace Y in R by the greater of the two sets Y1 and Y2
+--                add the smaller of the two sets to Q
+--           end;
+--           for each set Y in Q for which X refines Y into Y1 and Y2 do
+--                replace Y in Q by the two sets Y1 and Y2
+--           end;
+--      end;
+-- end;
+--
+-- % The second for loop that iterates over R mutates Q,
+-- % but it does not affect the third for loop that iterates over Q.
+-- % Because once X refines Y into Y1 and Y2, Y1 and Y2 can't be more refined by X. 
 
 minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a
 minimizeDFA  dfa@(DFA { dfa_start_states = starts,
@@ -93,7 +122,7 @@ type EquivalenceClass = IntSet
 
 groupEquivStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass]
 groupEquivStates DFA { dfa_states = statemap }
-  = go init_p init_q
+  = go init_r init_q
   where
     accepting, nonaccepting :: Map Int (State Int a)
     (accepting, nonaccepting) = Map.partition acc statemap
@@ -112,57 +141,61 @@ groupEquivStates DFA { dfa_states = statemap }
     accept_groups :: [EquivalenceClass]
     accept_groups = map IS.fromList (Map.elems accept_map)
 
-    init_p, init_q :: [EquivalenceClass]
-    init_p  -- Issue #71: each EquivalenceClass needs to be a non-empty set
-      | IS.null nonaccepting_states = accept_groups
-      | otherwise                   = nonaccepting_states : accept_groups
+    init_r, init_q :: [EquivalenceClass]
+    init_r  -- Issue #71: each EquivalenceClass needs to be a non-empty set
+      | IS.null nonaccepting_states = []
+      | otherwise                   = [nonaccepting_states]
     init_q = accept_groups
 
-    -- map token T to
-    --   a map from state S to the list of states that transition to
+    -- a map from token T to
+    --   a map from state S to the set of states that transition to
     --   S on token T
-    -- This is a cache of the information needed to compute x below
-    bigmap :: IntMap (IntMap [SNum])
-    bigmap = IM.fromListWith (IM.unionWith (++))
-                [ (i, IM.singleton to [from])
+    -- This is a cache of the information needed to compute xs below
+    bigmap :: IntMap (IntMap EquivalenceClass)
+    bigmap = IM.fromListWith (IM.unionWith IS.union)
+                [ (i, IM.singleton to (IS.singleton from))
                 | (from, state) <- Map.toList statemap,
                   (i,to) <- IM.toList (state_out state) ]
 
-    -- incoming I A = the set of states that transition to a state in
-    -- A on token I.
-    incoming :: Int -> IntSet -> IntSet
-    incoming i a = IS.fromList (concat ss)
-       where
-         map1 = IM.findWithDefault IM.empty i bigmap
-         ss = [ IM.findWithDefault [] s map1
-              | s <- IS.toList a ]
-
-    -- The outer loop: recurse on each set in Q
-    go ::  [EquivalenceClass] -> [EquivalenceClass] -> [EquivalenceClass]
-    go p [] = p
-    go p (a:q) = go1 0 p q
-     where
-       -- recurse on each token (0..255)
-       go1 256 p q = go p q
-       go1 i   p q = go1 (i+1) p' q'
+    -- The outer loop: recurse on each set in R and Q
+    go :: [EquivalenceClass] -> [EquivalenceClass] -> [EquivalenceClass]
+    go r [] = r
+    go r (a:q) = uncurry go $ List.foldl' go0 (a:r,q) xs
+      where
+        xs :: [EquivalenceClass]
+        xs =
+          [ x
+          | preimageMap <- IM.elems bigmap
+#if MIN_VERSION_containers(0, 6, 0)
+          , let x = IS.unions (IM.restrictKeys preimageMap a)
+#else
+          , let x = IS.unions [IM.findWithDefault IS.empty s preimageMap | s <- IS.toList a]
+#endif
+          , not (IS.null x)
+          ]
+
+        refineWith
+          :: IntSet -- preimage set that bisects the input equivalence class
+          -> IntSet -- initial equivalence class
+          -> Maybe (IntSet, IntSet)
+        refineWith x y =
+          if IS.null y1 || IS.null y2
+            then Nothing
+            else Just (y1, y2)
+          where
+            y1 = IS.intersection y x
+            y2 = IS.difference   y x
+
+        go0 (r,q) x = go1 r [] []
           where
-            (p',q') = go2 p [] q
-
-            x = incoming i a
-
-            -- recurse on each set in P
-            go2 []    p' q = (p',q)
-            go2 (y:p) p' q
-              | IS.null i || IS.null d = go2 p (y:p') q
-              | otherwise              = go2 p (i:d:p') q1
-              where
-                    i = IS.intersection x y
-                    d = IS.difference y x
-
-                    q1 = replaceyin q
-                           where
-                             replaceyin [] =
-                                if IS.size i < IS.size d then [i] else [d]
-                             replaceyin (z:zs)
-                                | z == y    = i : d : zs
-                                | otherwise = z : replaceyin zs
+            go1 []    r' q' = (r', go2 q q')
+            go1 (y:r) r' q' = case refineWith x y of
+              Nothing                       -> go1 r (y:r') q'
+              Just (y1, y2)
+                | IS.size y1 <= IS.size y2  -> go1 r (y2:r') (y1:q')
+                | otherwise                 -> go1 r (y1:r') (y2:q')
+
+            go2 []    q' = q'
+            go2 (y:q) q' = case refineWith x y of
+              Nothing       -> go2 q (y:q')
+              Just (y1, y2) -> go2 q (y1:y2:q')