Skip to content

Commit 1dac325

Browse files
committed
Add Synonyms and Antonyms
1 parent 6c0e0d8 commit 1dac325

File tree

2 files changed

+121
-14
lines changed

2 files changed

+121
-14
lines changed

pymathics/natlang/__init__.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@
3939
"""
4040

4141
from pymathics.natlang.linguistic_data import (
42+
Antonyms,
4243
DictionaryLookup,
4344
DictionaryWordQ,
4445
RandomWord,
46+
Synonyms,
4547
WordData,
4648
WordDefinition,
4749
WordList,
@@ -64,14 +66,6 @@
6466
WordStem,
6567
)
6668

67-
from pymathics.natlang.linguistic_data import (
68-
DictionaryLookup,
69-
DictionaryWordQ,
70-
RandomWord,
71-
WordData,
72-
WordDefinition,
73-
WordList,
74-
)
7569
from pymathics.natlang.linguistic_data.translation import LanguageIdentify
7670
from pymathics.natlang.version import __version__
7771

@@ -83,6 +77,7 @@
8377
}
8478

8579
__all__ = [
80+
"Antonyms",
8681
"Containing",
8782
"DeleteStopwords",
8883
"DictionaryLookup",
@@ -91,6 +86,7 @@
9186
"Pluralize",
9287
"RandomWord",
9388
"SpellingCorrectionList",
89+
"Synonyms",
9490
"TextCases",
9591
"TextPosition",
9692
"TextSentences",

pymathics/natlang/linguistic_data/__init__.py

Lines changed: 117 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
# TODO: Complete me
1212

1313
# WordFrequencyData — data on typical current and historical word frequencies
14-
# Synonyms — synonyms for a word
15-
# Antonyms — antonyms for a word
1614
# PartOfSpeech — possible parts of speech for a word
1715

1816

@@ -23,7 +21,7 @@
2321
from mathics.builtin.atomic.strings import anchor_pattern
2422
from mathics.builtin.numbers.randomnumbers import RandomEnv
2523
from mathics.core.atoms import String
26-
from mathics.core.builtin import MessageException
24+
from mathics.core.builtin import Builtin, MessageException
2725
from mathics.core.convert.expression import Expression, to_expression
2826
from mathics.core.convert.regex import to_regex
2927
from mathics.core.element import ElementsProperties
@@ -45,6 +43,63 @@
4543

4644
SymbolDictionaryLookup = Symbol("Pymathics`Natlang`DictionaryLookup")
4745
StringNotAvailable = String("NotAvailable")
46+
StringUnkownWord = String("UnknownWord")
47+
48+
49+
class Antonyms(_WordListBuiltin):
50+
"""
51+
<url>:Antonyms:
52+
https://www.merriam-webster.com/dictionary/antonym</url>
53+
54+
<url>:WMA link:
55+
https://reference.wolfram.com/language/ref/Antonyms.html</url>
56+
57+
<dl>
58+
<dt>'Antonyms["word"]'
59+
<dd>returns a list of the antonyms associated with string "word".
60+
</dl>
61+
62+
>> Antonyms["big"]
63+
= {little, small}
64+
65+
>> Antonyms["forget"]
66+
= ...
67+
68+
>> Antonyms["peccary"]
69+
= {}
70+
71+
>> Antonyms["fdasfdsafdsa"]
72+
= Missing[UnknownWord]
73+
74+
"""
75+
76+
# Set checking that the number of arguments required is one.
77+
eval_error = Builtin.generic_argument_error
78+
expected_args = 1
79+
80+
summary_text = "list antonyms for a word"
81+
82+
def eval(self, word, evaluation: Evaluation, options: dict):
83+
"Antonyms[word_String, OptionsPattern[Antonyms]]"
84+
85+
wordnet, _ = self._load_wordnet(
86+
evaluation, self._language_name(evaluation, options)
87+
)
88+
if not wordnet:
89+
return Expression(SymbolMissing, StringNotAvailable)
90+
91+
wordnet_synsets = wordnet.synsets(word.value)
92+
if len(wordnet_synsets) == 0:
93+
return Expression(SymbolMissing, StringUnkownWord)
94+
95+
antonyms = set()
96+
# Get all synsets for the word
97+
for syn in wordnet_synsets:
98+
for lemma in syn.lemmas():
99+
for ant in lemma.antonyms():
100+
antonyms.add(ant.name().replace("_", " "))
101+
102+
return ListExpression(*(String(word) for word in sorted(antonyms)))
48103

49104

50105
class DictionaryLookup(_WordListBuiltin):
@@ -96,7 +151,7 @@ def lookup(self, language_name, word, n, evaluation):
96151
matches = self.search(dictionary_words, pattern)
97152
if n is not None:
98153
matches = islice(matches, 0, n)
99-
return ListExpression(*(String(match) for match in sorted(matches)))
154+
return ListExpression(*(String(word) for word in sorted(matches)))
100155

101156
def eval_english(self, word, evaluation):
102157
"DictionaryLookup[word_]"
@@ -201,6 +256,62 @@ def eval_type_n(self, type, n, evaluation: Evaluation, options: dict):
201256
return ListExpression(*words)
202257

203258

259+
class Synonyms(_WordListBuiltin):
260+
"""
261+
<url>:Synonyms:
262+
https://www.merriam-webster.com/dictionary/synonym</url>
263+
264+
<url>:WMA link:
265+
https://reference.wolfram.com/language/ref/Synonyms.html</url>
266+
267+
<dl>
268+
<dt>'Synonyms["word"]'
269+
<dd>returns a list of the antonyms associated with string "word".
270+
</dl>
271+
272+
>> Synonyms["forget"]
273+
= ...
274+
275+
>> Synonyms["plot"]
276+
= ...
277+
278+
>> Synonyms["fdasfdsafdsa"]
279+
= Missing[UnknownWord]
280+
281+
"""
282+
283+
# Set checking that the number of arguments required is one.
284+
eval_error = Builtin.generic_argument_error
285+
expected_args = 1
286+
287+
summary_text = "list synonyms for a word"
288+
289+
def eval(self, word, evaluation: Evaluation, options: dict):
290+
"Synonyms[word_String, OptionsPattern[Antonyms]]"
291+
292+
wordnet, _ = self._load_wordnet(
293+
evaluation, self._language_name(evaluation, options)
294+
)
295+
if not wordnet:
296+
return Expression(SymbolMissing, StringNotAvailable)
297+
298+
wordnet_synsets = wordnet.synsets(word.value)
299+
if len(wordnet_synsets) == 0:
300+
return Expression(SymbolMissing, StringUnkownWord)
301+
302+
canonic_word = word.value.lower()
303+
304+
synonyms = set()
305+
# Get all synsets for the word
306+
for syn in wordnet_synsets:
307+
for lemma in syn.lemmas():
308+
# Exclude the original word
309+
if lemma.name().lower() != canonic_word:
310+
synonyms.add(lemma.name().replace("_", " "))
311+
312+
return ListExpression(*(String(word) for word in sorted(synonyms)))
313+
314+
204315
class WordData(_WordListBuiltin):
205316
"""
206317
@@ -427,7 +538,7 @@ def eval(self, evaluation: Evaluation, options: dict):
427538
words_mathics = (String(word) for word in words)
428539
result = ListExpression(
429540
*words_mathics,
430-
elements_properties=ElementsProperties(False, False, True)
541+
elements_properties=ElementsProperties(False, False, True),
431542
)
432543
return result
433544

@@ -441,5 +552,5 @@ def eval_type(self, wordtype, evaluation: Evaluation, options: dict):
441552
if words is not None:
442553
return ListExpression(
443554
*(String(word) for word in words),
444-
elements_properties=ElementsProperties(False, False, True)
555+
elements_properties=ElementsProperties(False, False, True),
445556
)

0 commit comments

Comments
 (0)