@@ -49,13 +49,15 @@ import edu.stanford.nlp.util.logging.Redwood;
4949 /* * A list of verbs that have doubling of consonants
5050 * this list can be rebuilt with the main method in process.Morphology!
5151 * the verb stem list lives in "/u/nlp/data/morph/verbstem.list"
52+ *<br>
53+ * Removed appal and enrol - use American instead of English lemma for those words
5254 */
5355 private static final String [] verbStems = { " abat" ,
5456 " abet" , " abhor" , " abut" , " accur" , " acquit" ,
5557 " adlib" , " admit" , " aerobat" , " aerosol" , " agendaset" ,
56- " allot" , " alot" , " anagram" , " annul" , " appal " ,
58+ " allot" , " alot" , " anagram" , " annul" ,
5759 " apparel" , " armbar" , " aver" , " babysit" , " airdrop" ,
58- " appal " , " blackleg" , " bobsled" , " bur" , " chum" ,
60+ " blackleg" , " bobsled" , " bur" , " chum" ,
5961 " confab" , " counterplot" , " curet" , " dib" , " backdrop" ,
6062 " backfil" , " backflip" , " backlog" , " backpedal" , " backslap" ,
6163 " backstab" , " bag" , " balfun" , " ballot" , " ban" ,
@@ -67,7 +69,7 @@ import edu.stanford.nlp.util.logging.Redwood;
6769 " bevel" , " bewig" , " bib" , " bid" , " billet" ,
6870 " bin" , " bip" , " bit" , " bitmap" , " blab" ,
6971 " blag" , " blam" , " blan" , " blat" , " bles" ,
70- " blim" , " blip" , " blob" , " bloodlet" , " blot" ,
72+ " blim" , " blip" , " blob" , " blog " , " bloodlet" , " blot" ,
7173 " blub" , " blur" , " bob" , " bodypop" , " bog" ,
7274 " booby-trap" , " boobytrap" , " booksel" , " bootleg" , " bop" ,
7375 " bot" , " bowel" , " bracket" , " brag" , " brig" ,
@@ -102,13 +104,13 @@ import edu.stanford.nlp.util.logging.Redwood;
102104 " disembowel" , " dishevel" , " disinter" , " dispel" , " disprefer" ,
103105 " distil" , " dog" , " dognap" , " don" , " doorstep" ,
104106 " dot" , " dowel" , " drag" , " drat" , " driftnet" ,
105- " distil" , " egotrip" , " enrol " , " enthral" , " extol" ,
107+ " distil" , " egotrip" , " enthral" , " extol" ,
106108 " fulfil" , " gaffe" , " golliwog" , " idyl" , " inspan" ,
107109 " drip" , " drivel" , " drop" , " drub" , " drug" ,
108110 " drum" , " dub" , " duel" , " dun" , " dybbuk" ,
109111 " earwig" , " eavesdrop" , " ecolabel" , " eitherspigot" , " electroblot" ,
110112 " embed" , " emit" , " empanel" , " enamel" , " endlabel" ,
111- " endtrim" , " enrol " , " enthral" , " entrammel" , " entrap" ,
113+ " endtrim" , " enthral" , " entrammel" , " entrap" ,
112114 " enwrap" , " equal" , " equip" , " estop" , " exaggerat" ,
113115 " excel" , " expel" , " extol" , " fag" , " fan" ,
114116 " farewel" , " fat" , " featherbed" , " feget" , " fet" ,
@@ -277,7 +279,7 @@ import edu.stanford.nlp.util.logging.Redwood;
277279 " wet" , " wham" , " whet" , " whip" , " whir" ,
278280 " whiteskin" , " whiz" , " whup" , " wildcat" , " win" ,
279281 " windmil" , " wit" , " woodchop" , " woodcut" , " wor" ,
280- " worship" , " wrap" , " will" , " wiretap" , " yen" ,
282+ " worship" , " wrap" , " will" , " wiretap" , " xfer " , " yen" ,
281283 " yak" , " yap" , " yarnspin" , " yip" , " yodel" ,
282284 " zag" , " zap" , " zig" , " zig-zag" , " zigzag" ,
283285 " zip" , " ztrip" };
@@ -456,7 +458,7 @@ CXY = [bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ]
456458CXY2 = "bb" | "cc" | "dd" | "ff" | "gg" | "hh" | "jj" | "kk" | "ll" | "mm" | "nn" | "pp" | "qq" | "rr" | "ss" | "tt" | "vv" | "ww" | "xx" | "zz"
457459S2 = "ss" | "zz"
458460S = [ sxzSXZ] |( [ csCS] "h" )
459- PRE = "be" | "ex" | "in" | "mis" | "pre" | "pro" | "re"
461+ PRE = "be" | "de" | " ex"| "in" | "mis" | "pre" | "pro" | "re"
460462EDING = "ed" | "ing"
461463ESEDING = "es" | "ed" | "ing"
462464G = [^ \t\r\n \u2028\u2029\u000B\u000C\u0085 _]
0 commit comments