Skip to content

Commit 16334a4

Browse files
committed
Minfor fixes
1 parent e4e5a85 commit 16334a4

File tree

2 files changed

+64
-38
lines changed

2 files changed

+64
-38
lines changed

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ PIP ?= pip3
1010
RM ?= rm
1111
LANG ?= en
1212

13+
# Size of wordlist used
14+
# sm=small, lg=large, md=medium.
15+
WORDLIST_SIZE ?=md
16+
1317
.PHONY: all build \
1418
check clean \
1519
develop dist doc doc-data \
@@ -20,10 +24,11 @@ LANG ?= en
2024
#: Default target - same as "develop"
2125
all: develop
2226

23-
#
27+
#: Word-list data. Customize with LANG (and eventually WORDLIST_SIZE) variables
2428
wordlist:
2529
$(PYTHON) -m nltk.downloader wordnet omw
2630
$(PYTHON) -m spacy download $(LANG)
31+
# # $(PYTHON) -m spacy download $(LANG)_core_web_$(WORDLIST_SIZE)
2732

2833
#: build everything needed to install
2934
build:

pymathics/natlang/__main__.py

Lines changed: 58 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def is_stop(word):
285285
class WordFrequencyData(_SpacyBuiltin):
286286
"""
287287
<dl>
288-
<dt>'WordFrequencyData[$word$]'
288+
<dt>'WordFrequencyData[$word$]'
289289
<dd>returns the frequency of $word$ in common English texts.
290290
</dl>
291291
"""
@@ -306,7 +306,7 @@ def apply(self, word, evaluation, options):
306306
class WordCount(_SpacyBuiltin):
307307
"""
308308
<dl>
309-
<dt>'WordCount[$string$]'
309+
<dt>'WordCount[$string$]'
310310
<dd>returns the number of words in $string$.
311311
</dl>
312312
@@ -325,9 +325,10 @@ def apply(self, text, evaluation, options):
325325
class TextWords(_SpacyBuiltin):
326326
"""
327327
<dl>
328-
<dt>'TextWords[$string$]'
328+
<dt>'TextWords[$string$]'
329329
<dd>returns the words in $string$.
330-
<dt>'TextWords[$string$, $n$]'
330+
331+
<dt>'TextWords[$string$, $n$]'
331332
<dd>returns the first $n$ words in $string$
332333
</dl>
333334
@@ -364,9 +365,10 @@ def apply_n(self, text, n, evaluation, options):
364365
class TextSentences(_SpacyBuiltin):
365366
"""
366367
<dl>
367-
<dt>'TextSentences[$string$]'
368+
<dt>'TextSentences[$string$]'
368369
<dd>returns the sentences in $string$.
369-
<dt>'TextSentences[$string$, $n$]'
370+
371+
<dt>'TextSentences[$string$, $n$]'
370372
<dd>returns the first $n$ sentences in $string$
371373
</dl>
372374
@@ -380,8 +382,6 @@ class TextSentences(_SpacyBuiltin):
380382
= {Mr. Jones met Mrs. Jones.}
381383
"""
382384

383-
context = "PyMathics`"
384-
385385
def apply(self, text, evaluation, options):
386386
"TextSentences[text_String, OptionsPattern[%(name)s]]"
387387
doc = self._nlp(text.get_string_value(), evaluation, options)
@@ -406,9 +406,10 @@ def apply_n(self, text, n, evaluation, options):
406406
class DeleteStopwords(_SpacyBuiltin):
407407
"""
408408
<dl>
409-
<dt>'DeleteStopwords[$list$]'
409+
<dt>'DeleteStopwords[$list$]'
410410
<dd>returns the words in $list$ without stopwords.
411-
<dt>'DeleteStopwords[$string$]'
411+
412+
<dt>'DeleteStopwords[$string$]'
412413
<dd>returns $string$ without stopwords.
413414
</dl>
414415
@@ -453,7 +454,7 @@ def tokens():
453454
class WordFrequency(_SpacyBuiltin):
454455
"""
455456
<dl>
456-
<dt>'WordFrequency[$text$, $word$]'
457+
<dt>'WordFrequency[$text$, $word$]'
457458
<dd>returns the relative frequency of $word$ in $text$.
458459
</dl>
459460
@@ -506,7 +507,7 @@ def _cases(doc, form):
506507
if not all(isinstance(f, String) for f in form.leaves):
507508
return # error
508509
generators = [_forms.get(f.get_string_value()) for f in form.leaves]
509-
elif form.get_head_name() == "System`Containing":
510+
elif form.get_head_name() == "PyMathics`Containing":
510511
if len(form.leaves) == 2:
511512
for t in _containing(doc, *form.leaves):
512513
yield t
@@ -564,7 +565,7 @@ def _containing(doc, outer, inner):
564565
class TextCases(_SpacyBuiltin):
565566
"""
566567
<dl>
567-
<dt>'TextCases[$text$, $form$]'
568+
<dt>'TextCases[$text$, $form$]'
568569
<dd>returns all elements of type $form$ in $text$ in order of their appearance.
569570
</dl>
570571
@@ -599,7 +600,7 @@ def apply_n(self, text, form, n, evaluation, options):
599600
class TextPosition(_SpacyBuiltin):
600601
"""
601602
<dl>
602-
<dt>'TextPosition[$text$, $form$]'
603+
<dt>'TextPosition[$text$, $form$]'
603604
<dd>returns the positions of elements of type $form$ in $text$ in order of their appearance.
604605
</dl>
605606
@@ -628,7 +629,7 @@ def apply_n(self, text, form, n, evaluation, options):
628629
class TextStructure(_SpacyBuiltin):
629630
"""
630631
<dl>
631-
<dt>'TextStructure[$text$, $form$]'
632+
<dt>'TextStructure[$text$, $form$]'
632633
<dd>returns the grammatical structure of $text$ as $form$.
633634
</dl>
634635
@@ -686,11 +687,13 @@ def apply(self, text, evaluation, options):
686687
class WordSimilarity(_SpacyBuiltin):
687688
"""
688689
<dl>
689-
<dt>'WordSimilarity[$text1$, $text2]'
690+
<dt>'WordSimilarity[$text1$, $text2$]'
690691
<dd>returns a real-valued measure of semantic similarity of two texts or words.
691-
<dt>'WordSimilarity[{$text1$, $i1}, {$text2, $j1$}]'
692+
693+
<dt>'WordSimilarity[{$text1$, $i1$}, {$text2, $j1$}]'
692694
<dd>returns a measure of similarity of two words within two texts.
693-
<dt>'WordSimilarity[{$text1$, {$i1, $i2, ...}}, {$text2, {$j1$, $j2$, ...}}]'
695+
696+
<dt>'WordSimilarity[{$text1$, {$i1$, $i2$, ...}}, {$text2, {$j1$, $j2$, ...}}]'
694697
<dd>returns a measure of similarity of multiple words within two texts.
695698
</dl>
696699
@@ -775,15 +778,18 @@ def apply_pair(self, text1, i1, text2, i2, evaluation, options):
775778
class WordStem(Builtin):
776779
"""
777780
<dl>
778-
<dt>'WordStem[$text$]'
781+
<dt>'WordStem[$word$]'
779782
<dd>returns a stemmed form of $word$, thereby reducing an inflected form to its root.
783+
784+
<dt>'WordStem[{$word1$, $word2$, ...}]'
785+
<dd>returns a stemmed form for list of $word$, thereby reducing an inflected form to its root.
780786
</dl>
781787
782788
>> WordStem["towers"]
783789
= tower
784790
785-
>> WordStem[{"towers", "knights", "queens"}]
786-
= {tower, knight, queen}
791+
>> WordStem[{"heroes", "roses", "knights", "queens"}]
792+
= {hero, rose, knight, queen}
787793
"""
788794

789795
requires = ("nltk",)
@@ -801,12 +807,12 @@ def porter(w):
801807
return WordStem._get_porter_stemmer().stem(w)
802808

803809
def apply(self, word, evaluation):
804-
"WordStem[word_System`String]"
810+
"WordStem[word_String]"
805811
stemmer = self._get_porter_stemmer()
806812
return String(stemmer.stem(word.get_string_value()))
807813

808814
def apply_list(self, words, evaluation):
809-
"WordStem[words_System`List]"
815+
"WordStem[words_List]"
810816
if all(isinstance(w, String) for w in words.leaves):
811817
stemmer = self._get_porter_stemmer()
812818
return Expression(
@@ -969,7 +975,7 @@ def _senses(self, word, wordnet, language_code):
969975
class WordDefinition(_WordNetBuiltin):
970976
"""
971977
<dl>
972-
<dt>'WordDefinition[$word$]'
978+
<dt>'WordDefinition[$word$]'
973979
<dd>returns a definition of $word$ or Missing["Available"] if $word$ is not known.
974980
</dl>
975981
@@ -1081,20 +1087,30 @@ def inflected_forms(self, syn, desc):
10811087
class _WordListBuiltin(_WordNetBuiltin):
10821088
_dictionary = {}
10831089

1084-
def _words(self, language_name, type, evaluation):
1090+
def _words(self, language_name, ilk, evaluation):
10851091
wordnet, language_code = self._load_wordnet(evaluation, language_name)
10861092

10871093
if not wordnet:
10881094
return
10891095

1090-
key = "%s.%s" % (language_code, type)
1096+
key = "%s.%s" % (language_code, ilk)
10911097
words = self._dictionary.get(key)
10921098
if not words:
10931099
try:
1094-
if type == "All":
1100+
if ilk == "All":
10951101
filtered_pos = [None]
10961102
else:
1097-
filtered_pos = _wordnet_type_to_pos[type]
1103+
try:
1104+
filtered_pos = _wordnet_type_to_pos[ilk]
1105+
except KeyError:
1106+
evaluation.message(
1107+
self.get_name(),
1108+
"wordnet",
1109+
"type: %s is should be in %s"
1110+
% (ilk._wordnet_type_to_pos.keys()),
1111+
)
1112+
return
1113+
10981114
words = []
10991115
for pos in filtered_pos:
11001116
words.extend(list(wordnet.all_lemma_names(pos, language_code)))
@@ -1110,9 +1126,10 @@ def _words(self, language_name, type, evaluation):
11101126
class WordData(_WordListBuiltin):
11111127
"""
11121128
<dl>
1113-
<dt>'WordData[$word$]'
1129+
<dt>'WordData[$word$]'
11141130
<dd>returns a list of possible senses of a word.
1115-
<dt>'WordData[$word$, $property$]'
1131+
1132+
<dt>'WordData[$word$, $property$]'
11161133
<dd>returns detailed information about a word regarding $property$, e.g. "Definitions" or "Examples".
11171134
</dl>
11181135
@@ -1267,7 +1284,7 @@ def apply_property_form(self, word, property, form, evaluation, options):
12671284
class DictionaryWordQ(_WordNetBuiltin):
12681285
"""
12691286
<dl>
1270-
<dt>'DictionaryWordQ[$word$]'
1287+
<dt>'DictionaryWordQ[$word$]'
12711288
<dd>returns True if $word$ is a word usually found in dictionaries, and False otherwise.
12721289
</dl>
12731290
@@ -1297,7 +1314,7 @@ def apply(self, word, evaluation, options):
12971314
class DictionaryLookup(_WordListBuiltin):
12981315
"""
12991316
<dl>
1300-
<dt>'DictionaryLookup[$word$]'
1317+
<dt>'DictionaryLookup[$word$]'
13011318
<dd>lookup words that match the given $word$ or pattern.
13021319
13031320
<dt>'DictionaryLookup[$word$, $n$]'
@@ -1357,9 +1374,10 @@ def apply_language_n(self, language, word, n, evaluation):
13571374
class WordList(_WordListBuiltin):
13581375
"""
13591376
<dl>
1360-
<dt>'WordList[]'
1377+
<dt>'WordList[]'
13611378
<dd>returns a list of common words.
1362-
<dt>'WordList[$type]'
1379+
1380+
<dt>'WordList[$type$]'
13631381
<dd>returns a list of common words of type $type$.
13641382
</dl>
13651383
@@ -1387,11 +1405,13 @@ def apply_type(self, wordtype, evaluation, options):
13871405
class RandomWord(_WordListBuiltin):
13881406
"""
13891407
<dl>
1390-
<dt>'RandomWord[]'
1408+
<dt>'RandomWord[]'
13911409
<dd>returns a random word.
1392-
<dt>'RandomWord[$type$]'
1410+
1411+
<dt>'RandomWord[$type$]'
13931412
<dd>returns a random word of the given $type$, e.g. of type "Noun" or "Adverb".
1394-
<dt>'RandomWord[$type$, $n$]'
1413+
1414+
<dt>'RandomWord[$type$, $n$]'
13951415
<dd>returns $n$ random words of the given $type$.
13961416
</dl>
13971417
"""
@@ -1474,6 +1494,7 @@ def apply(self, word, evaluation):
14741494

14751495
return String(pluralize(word.get_string_value()))
14761496

1497+
14771498
class SpellingCorrectionList(Builtin):
14781499
"""
14791500
<dl>

0 commit comments

Comments
 (0)