forked from vshabanov/fast-tagsoup
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathTest.hs
More file actions
143 lines (117 loc) · 6.19 KB
/
Test.hs
File metadata and controls
143 lines (117 loc) · 6.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{-# LANGUAGE OverloadedStrings, StandaloneDeriving #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
module Main where
import Text.HTML.TagSoup hiding (parseTags, renderTags)
import Text.HTML.TagSoup.Fast.Utf8Only
import Text.HTML.TagSoup.Entity
import Text.HTML.TagSoup.Match
import Control.Monad
import Data.List
import Data.String
import Test.QuickCheck
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text (unpack, pack)
import Text.XML.Light
deriving instance Eq Content
deriving instance Eq CData
deriving instance Eq Element
-- * The Test Monad
type Test a = IO a
pass :: Test ()
pass = return ()
runTest :: Test () -> IO ()
runTest x = x >> putStrLn "All tests passed"
(===) :: (Show a, Eq a) => a -> a -> IO ()
a === b = if a == b then pass else putStrLn $ "Does not equal: " ++ show a ++ " =/= " ++ show b
check :: Testable prop => prop -> IO ()
check prop = do
res <- quickCheckWithResult stdArgs{maxSuccess=1000, chatty=False} prop
case res of
Success{} -> pass
_ -> fail "Property failed"
newtype HTML = HTML ByteString deriving Show
instance Arbitrary HTML where
arbitrary = fmap (HTML . BS.concat) $ listOf $ elements frags
where frags = map fromString $ map (:[]) " \n!-</>#&;xy01[]?'\"" ++ ["CDATA","amp","gt","lt"]
shrink (HTML x) = map HTML $ zipWith BS.append (BS.inits x) (tail $ BS.tails x)
-- * The Main section
main :: IO ()
main = runTest $ do
parseTests
renderTests
entityTests
parseTests :: Test ()
parseTests = do
parseTags "<!DOCTYPE TEST>" === [TagOpen "!doctype" [("test","")]]
parseTags "<test \"foo bar\">" === [TagOpen "test" [("\"foo",""),("bar\"","")]]
parseTags "<test baz \"foo\">" === [TagOpen "test" [("baz",""),("\"foo\"","")]]
parseTags "<test 'foo bar'>" === [TagOpen "test" [("'foo",""),("bar'","")]]
parseTags "<test bar=''' />" === [TagOpen "test" [("bar",""),("'","")], TagClose "test"]
parseTags "<test2 a b>" === [TagOpen "test2" [("a",""),("b","")]]
parseTags "<test2 ''>" === [TagOpen "test2" [("''","")]]
parseTags "</test foo>" === [TagClose "test"]
parseTags "<test/>" === [TagOpen "test" [], TagClose "test"]
parseTags "<test1 a = b>" === [TagOpen "test1" [("a","b")]]
parseTags "hello & world" === [TagText "hello & world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello &haskell; world" === [TagText "hello &haskell; world"]
parseTags "hello \n\t world" === [TagText "hello \n\t world"]
parseTags "<a href=http://www.google.com>" === [TagOpen "a" [("href","http://www.google.com")]]
parseTags "<foo bar=\"bar6baz\">" === [TagOpen "foo" [("bar","bar6baz")]]
parseTags "<foo bar=\"bar&baz\">" === [TagOpen "foo" [("bar","bar&baz")]]
parseTags "hey &how are you" === [TagText "hey &how are you"]
parseTags "hey &how; are you" === [TagText "hey &how; are you"]
-- parseTags "hey & are you" === [TagText "hey & are you"] -- This is not valid XML!
parseTags "hey & are you" === [TagText "hey & are you"]
parseTags "hey ' are you" === [TagText "hey ' are you"]
parseTags "<a test=\"'\">" === [TagOpen "a" [("test", "'")]]
parseTags "<bla:hoi att:bla>" === [TagOpen "bla:hoi" [("att:bla","")]]
-- real cases reported by users
parseTags "test � test" === [TagText "test ? test"] -- HTML edge-case
parseTags "<a href=\"series.php?view=single&ID=72710\">" === [TagOpen "a" [("href","series.php?view=single&ID=72710")]]
parseTags "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" ===
[TagOpen "!DOCTYPE" [("HTML",""),("PUBLIC",""),("","-//W3C//DTD HTML 4.01//EN"),("","http://www.w3.org/TR/html4/strict.dtd")]]
parseTags "<script src=\"http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot\">" ===
[TagOpen "script" [("src","http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot")]]
parseTags "<a title='foo'bar' href=correct>text" === [TagOpen "a" [("title","foo"),("bar'",""),("href", "correct")],TagText "text"]
parseTags "<test><![CDATA[Anything goes, <em>even hidden markup</em> & entities]]> but this is outside</test>" ===
[TagOpen "test" [],TagText "Anything goes, <em>even hidden markup</em> & entities", TagText " but this is outside",TagClose "test"]
parseTags "<a \r\n href=\"url\">" === [TagOpen "a" [("href","url")]]
parseTags "<a href='random.php'><img src='strips/130307.jpg' alt='nukular bish'' title='' /></a>" ===
[TagOpen "a" [("href","random.php")],TagOpen "img" [("src","strips/130307.jpg"),("alt","nukular bish"),("'",""),("title","")],TagClose "img",TagClose "a"]
parseTags "<p>some text</p\n<img alt='< &yyy; >' src=\"abc.gif\">" ===
[TagOpen "p" [],TagText "some text",TagClose "p"]
renderTests :: Test ()
renderTests = do
let rp = renderTags . parseTags
rp "<test>" === "<test>"
rp "<br></br>" === "<br/>"
rp "<script></script>" === "<script></script>"
rp "hello & world" === "hello & world"
rp "<a href=test>" === "<a href=\"test\">"
rp "<a href>" === "<a href=\"\">"
-- rp "<a href?>" === "<a href?>" Not valid XML
rp "<?xml foo?>" === "<?xml foo ?>"
rp "<?xml foo?>" === "<?xml foo ?>"
rp "<!-- neil -->" === "<!-- neil -->"
rp "<a test=\"a'b\">" === "<a test=\"a'b\">"
rp "<a test=\"a&b\">" === "<a test=\"a&b\">"
check $ \(HTML x) -> let y = rp x in rp y == (y :: ByteString)
testF <- readFile "testfile.xml"
parseXML testF === parseXML (unpack $ decodeUtf8 $ rp $ encodeUtf8 $ pack testF)
entityTests :: Test ()
entityTests = do
lookupNumericEntity "65" === Just 'A'
lookupNumericEntity "x41" === Just 'A'
lookupNumericEntity "x4E" === Just 'N'
lookupNumericEntity "x4e" === Just 'N'
lookupNumericEntity "Haskell" === Nothing
lookupNumericEntity "" === Nothing
lookupNumericEntity "89439085908539082" === Nothing
lookupNamedEntity "amp" === Just '&'
lookupNamedEntity "haskell" === Nothing
escapeXMLChar 'a' === Nothing
escapeXMLChar '&' === Just "amp"