@@ -5,7 +5,9 @@ import PGF
55import qualified Data.Map as M
66import Data.Char
77import Data.List
8+ import Safe
89import System.Environment (getArgs )
10+ import Debug.Trace
911
1012-- AR 2020-02-28
1113
@@ -28,29 +30,33 @@ usage = "runghc MkMorphodict (raw|pgf) <configfile> <datafile> <outfile>"
2830main = do
2931 xx <- getArgs
3032 if length xx /= 4
31- then putStrLn usage
33+ then do
34+ putStrLn " Usage:"
35+ putStrLn usage
36+ putStrLn $ " Got instead: " ++ show xx
3237 else do
33- let mode: configfile: datafile: outfile: _ = xx
38+ let mode: configfile: datafile: outfile: _ = xx
3439 config <- readFile configfile >>= return . mkConfig
35-
40+
3641 rawdata <- case mode of
3742 " pgf" -> pgfFile2rawData config datafile
38- " raw" -> readFile datafile >>= return . map getRawData . filter (not . null ) . lines
43+ " raw" -> readFile datafile >>= return . map getRawData . filter (not . null ) . lines
44+ _ -> error $ " Expected mode (pgf|raw), got " ++ mode
3945 rawdata2gf config rawdata outfile
4046
4147
4248rawdata2gf config rawdata outfile = do
43-
49+
4450 let env = MDEnv rawdata config
4551 let (absrules,cncrules) = mkMorphoDict env
46-
52+
4753 absheader <- readFile (outfile ++ " Abs.header" )
4854 cncheader <- readFile (outfile ++ " .header" )
49-
55+
5056 writeFile (outfile ++ " Abs.gf" ) absheader
5157 appendFile (outfile ++ " Abs.gf" ) $ unlines $ sort absrules
5258 appendFile (outfile ++ " Abs.gf" ) " }"
53-
59+
5460 writeFile (outfile ++ " .gf" ) cncheader
5561 appendFile (outfile ++ " .gf" ) $ unlines $ sort cncrules
5662 appendFile (outfile ++ " .gf" ) " }"
@@ -68,7 +74,7 @@ pgfFile2rawData config pgffile = do
6874 cat <- cats,
6975 f <- functionsByCat pgf (mkCId cat),
7076 lin <- tabularLinearizes pgf lang (mkApp f [] )
71- ]
77+ ]
7278
7379type Cat = String
7480type Fun = String
@@ -84,12 +90,13 @@ mkConfig :: String -> Config -- N : N mkN 0 2 4 6 # 9
8490mkConfig ls = M. fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
8591 where
8692 mkOne s = case words s of
87- " --" : _ -> Right s
93+ " --" : _ -> Right s
8894 cat: " :" : tcat: oper: ints -> Left (cat,(tcat,oper,mkArgs ints))
8995 _ -> Right s
9096 mkArgs ints = case break (== " #" ) ints of
91- (ss,[] ) -> (map read ss, [] )
92- (ss,_: fs) -> (map read ss, map read fs)
97+ (ss,[] ) -> (map read' ss, [] )
98+ (ss,_: fs) -> (map read' ss, map read' fs)
99+ read' a = readNote [] a -- Safe.readNote provides better error message
93100
94101getRawData s = case words s of
95102 c: cs -> (c,cs)
@@ -118,10 +125,13 @@ mkMorphoDict env =
118125 (([lemma],newcat),(oper, appSig sig args)) |
119126 (oldcat,args) <- raws,
120127 Just (newcat, oper, sig) <- [M. lookup oldcat (config env)],
121- let lemma = args !! head (fst sig)
122- ]
128+ let lemma = args `at` head (fst sig)
129+ ]
123130
124- appSig (ints,feats) args = ([args !! i | i <- ints], [args !! i | i <- feats])
131+ appSig (ints,feats) args =
132+ -- If there's wrong number in config file, uncomment the line below to see which number it should be
133+ -- trace (intercalate "\n" $ map show (zip [0..] args)) $
134+ ([args `at` i | i <- ints], [args `at` i | i <- feats])
125135
126136 mergeRules :: [RawRule ] -> [RawRule ]
127137 mergeRules = map head . groupBy (\ x y -> snd x == snd y) . sortOn snd
@@ -153,16 +163,20 @@ mkMorphoDict env =
153163 _ | length (nub (map tail fls)) == length fls -> shrinkMore (map tail fls)
154164 _ -> fls
155165
156-
157- mkFun = showCId . mkCId . concat . intersperse " _"
166+ -- >>> mkFun ["hello", "world", "hello friends", "hello-all"]
167+ -- "hello_world_hello_friends_hello_all"
168+ mkFun :: [String ] -> String -- if word contains space or hyphen, replace with underscore
169+ mkFun = showCId . mkCId . concat . intersperse " _" . concatMap (words . removeHyphen)
170+ where
171+ removeHyphen [] = []
172+ removeHyphen [' -' ] = [' -' ] -- If hyphen is the last character, it's usually meaningful, leave it
173+ removeHyphen (' -' : cs) = ' ' : removeHyphen cs
174+ removeHyphen (c: cs) = c : removeHyphen cs
158175
159176quote s = " \" " ++ s ++ " \" "
160177
161-
162-
163-
164178{- ---- let us ignore this
165- findCompounds :: [RuleData] -> [RuleData]
179+ findCompounds :: [RuleData] -> [RuleData]
166180 findCompounds = getCompounds . sortOn cat_orthrevforms
167181
168182 cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
@@ -171,9 +185,9 @@ quote s = "\"" ++ s ++ "\""
171185 revstem = head . snd . cat_revforms
172186 wforms (_,(_,_:forms)) = forms
173187
174- getCompounds :: [RuleData] -> [RuleData]
188+ getCompounds :: [RuleData] -> [RuleData]
175189 getCompounds fls = case fls of
176- fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
190+ fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
177191 fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
178192 ([],_:_) -> markWith fl [] : getCompounds fls2
179193 (fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
@@ -188,7 +202,7 @@ quote s = "\"" ++ s ++ "\""
188202
189203 isPrefixWord x xy =
190204 length suff > 1 && ---- compound first part must be at least two letters long
191- any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
205+ any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
192206 isPrefixOf x xy ---- and of course be a prefix
193207 where
194208 suff = drop (length x) xy
0 commit comments