Skip to content

Commit 0d93232

Browse files
authored
Merge pull request #12 from morphismtech/stringRegEx
RegEx combinators
2 parents c763548 + 816614a commit 0d93232

File tree

5 files changed

+69
-30
lines changed

5 files changed

+69
-30
lines changed

CHANGELOG.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# Changelog for `distributors`
22

3-
All notable changes to this project will be documented in this file.
3+
## 0.2.0.0 - 2025-07-08
44

5-
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6-
and this project adheres to the
7-
[Haskell Package Versioning Policy](https://pvp.haskell.org/).
5+
Added some combinators for `RegEx`es. Updated documentation.
86

9-
## Unreleased
7+
## 0.1.0.0
8+
9+
First version with profunctorial interpretation of invertible syntax.
1010

11-
## 0.1.0.0 - YYYY-MM-DD

distributors.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ cabal-version: 2.2
55
-- see: https://github.com/sol/hpack
66

77
name: distributors
8-
version: 0.1.0.3
8+
version: 0.2.0.0
99
synopsis: Unifying Parsers, Printers & Grammars
1010
description: Distributors provides mathematically inspired abstractions for coders to write parsers that can also be inverted to printers.
1111
category: Profunctors, Optics, Parsing

package.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: distributors
2-
version: 0.1.0.3
2+
version: 0.2.0.0
33
github: "morphismtech/distributors"
44
license: BSD-3-Clause
55
author: "Eitan Chatav"

src/Text/Grammar/Distributor.hs

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ module Text.Grammar.Distributor
2525
, genGrammar
2626
, printGrammar
2727
-- * RegEx
28-
, RegEx (..), regexString, regexGrammar
28+
, RegEx (..)
29+
, regexNorm
30+
, regexParse
31+
, regexString
32+
, regexGrammar
2933
) where
3034

3135
import Control.Applicative
@@ -79,6 +83,10 @@ and `IsString` with the property:
7983
8084
prop> fromString = tokens
8185
86+
`Grammatical` has defaults for methods
87+
`inClass`, `notInClass`, `inCategory`, `notInCategory`
88+
in terms of `satisfy`;
89+
and `rule` & `ruleRec` in terms of `id` & `fix`.
8290
-}
8391
class
8492
( Alternator p
@@ -104,11 +112,11 @@ class
104112
notInCategory cat = satisfy $ \ch -> cat /= generalCategory ch
105113

106114
{- | A nonterminal rule. -}
107-
rule :: String -> p a b -> p a b
115+
rule :: String -> p a a -> p a a
108116
rule _ = id
109117

110118
{- | A recursive, nonterminal rule. -}
111-
ruleRec :: String -> (p a b -> p a b) -> p a b
119+
ruleRec :: String -> (p a a -> p a a) -> p a a
112120
ruleRec name = rule name . fix
113121

114122
instance (Alternative f, Cons s s Char Char)
@@ -137,8 +145,6 @@ data RegEx
137145
makeNestedPrisms ''RegEx
138146
makeNestedPrisms ''GeneralCategory
139147

140-
-- Kleene Star Algebra Operators
141-
142148
(-*-), (|||) :: RegEx -> RegEx -> RegEx
143149

144150
Terminal "" -*- rex = rex
@@ -174,7 +180,47 @@ plusK Fail = Fail
174180
plusK (Terminal "") = Terminal ""
175181
plusK rex = KleenePlus rex
176182

177-
-- RegEx generator
183+
{- | Normalize a `RegEx`.
184+
185+
>>> regexNorm (Sequence (Terminal "abc") (Terminal "xyz"))
186+
Terminal "abcxyz"
187+
-}
188+
regexNorm :: RegEx -> RegEx
189+
regexNorm = \case
190+
Sequence rex0 rex1 -> regexNorm rex0 -*- regexNorm rex1
191+
Alternate rex0 rex1 -> regexNorm rex0 ||| regexNorm rex1
192+
KleeneOpt rex -> optK (regexNorm rex)
193+
KleeneStar rex -> starK (regexNorm rex)
194+
KleenePlus rex -> plusK (regexNorm rex)
195+
otherRegEx -> otherRegEx
196+
197+
{- | Parse a `RegEx` from a `String`.
198+
199+
>>> let str = "xy|z+"
200+
>>> regexParse str
201+
Alternate (Terminal "xy") (KleenePlus (Terminal "z"))
202+
203+
`Fail` if the `String` is not a valid regular expression.
204+
205+
>>> let bad = ")("
206+
>>> regexParse bad
207+
Fail
208+
-}
209+
regexParse :: String -> RegEx
210+
regexParse str = case readGrammar regexGrammar str of
211+
[] -> Fail
212+
rex:_ -> regexNorm rex
213+
214+
{- | The `RegEx` `String`.
215+
216+
>>> let rex = Alternate (Terminal "xy") (KleenePlus (Terminal "z"))
217+
>>> putStrLn (regexString rex)
218+
xy|z+
219+
-}
220+
regexString :: RegEx -> String
221+
regexString rex = maybe "\\q" id (showGrammar regexGrammar rex)
222+
223+
-- RegEx Generator --
178224

179225
newtype DiRegEx a b = DiRegEx RegEx
180226
instance Functor (DiRegEx a) where fmap = rmap
@@ -214,7 +260,7 @@ instance Grammatical DiRegEx where
214260
inCategory cat = DiRegEx (InCategory cat)
215261
notInCategory cat = DiRegEx (NotInCategory cat)
216262

217-
-- Grammar generator
263+
-- Grammar Generator --
218264

219265
data DiGrammar a b = DiGrammar
220266
{ grammarStart :: DiRegEx a b
@@ -281,7 +327,7 @@ instance Grammatical DiGrammar where
281327

282328
-- Generators --
283329

284-
{- | Generate a `ReadS` from a `Grammar`. -}
330+
{- | Generate a `ReadS` parser from a `Grammar`. -}
285331
genReadS :: Grammar a -> ReadS a
286332
genReadS = runParsor
287333

@@ -293,7 +339,7 @@ readGrammar grammar str =
293339
, remaining == []
294340
]
295341

296-
{- | Generate a `ShowS` from a `Grammar`. -}
342+
{- | Generate `ShowS` printers from a `Grammar`. -}
297343
genShowS :: Alternative f => Grammar a -> a -> f ShowS
298344
genShowS = runPrintor
299345

@@ -309,7 +355,7 @@ genRegEx :: Grammar a -> RegEx
309355
genRegEx (DiRegEx rex) = rex
310356

311357
{- | Generate a context free grammar,
312-
consisting of a @"start"@ and named `RegEx`es, from a `Grammar`.
358+
consisting of @"start"@ & named `RegEx` rules, from a `Grammar`.
313359
-}
314360
genGrammar :: Grammar a -> [(String, RegEx)]
315361
genGrammar (DiGrammar (DiRegEx start) rules) =
@@ -322,15 +368,6 @@ printGrammar gram = for_ (genGrammar gram) $ \(name_i, rule_i) -> do
322368
putStr " = "
323369
putStrLn (regexString rule_i)
324370

325-
{- | The `RegEx` `String`.
326-
327-
>>> let rex = Terminal "xy" `Alternate` KleenePlus (Terminal "z")
328-
>>> putStrLn (regexString rex)
329-
xy|z+
330-
-}
331-
regexString :: RegEx -> String
332-
regexString rex = maybe "\\q" id (showGrammar regexGrammar rex)
333-
334371
-- RegEx Grammar --
335372

336373
{- | `regexGrammar` provides an important example of a `Grammar`.
@@ -459,7 +496,7 @@ nonterminalG :: Grammar RegEx
459496
nonterminalG = rule "nonterminal" $
460497
_NonTerminal >?< "\\q{" >* manyP charG *< "}"
461498

462-
parenG :: Grammarr RegEx RegEx
499+
parenG :: Grammarr a a
463500
parenG rex = rule "parenthesized" $
464501
"(" >* rex *< ")"
465502

test/Spec.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module Main (main) where
22

33
import Data.Char
44
import Data.Foldable
5+
import Data.List (nub)
56
import Text.Grammar.Distributor
67
import Test.Hspec
78

@@ -57,5 +58,7 @@ main = hspec $ do
5758
for_ regexExamples $ \(rex, str) -> do
5859
it ("should print " <> show rex <> " correctly") $
5960
showGrammar regexGrammar rex `shouldBe` Just str
60-
it ("should parse " <> str <> " correctly") $
61-
readGrammar regexGrammar str `shouldSatisfy` elem rex
61+
it ("should parse " <> str <> " correctly") $ do
62+
let parses = readGrammar regexGrammar str
63+
parses `shouldSatisfy` elem rex
64+
length (nub (map regexNorm parses)) `shouldBe` 1

0 commit comments

Comments
 (0)