Skip to content

Commit 9746a47

Browse files
committed
All the things we need to do for Mathics 5.0
1 parent 844a8d2 commit 9746a47

File tree

6 files changed

+77
-83
lines changed

6 files changed

+77
-83
lines changed

.github/workflows/osx.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,29 @@ on:
88

99
jobs:
1010
build:
11+
env:
12+
LDFLAGS: "-L/usr/local/opt/llvm@11/lib"
13+
CPPFLAGS: "-I/usr/local/opt/llvm@11/include"
1114
runs-on: macos-latest
1215
strategy:
1316
matrix:
1417
os: [macOS]
15-
python-version: [3.6, 3.7, 3.8]
18+
python-version: [3.6, 3.7, 3.8, 3.9]
1619
steps:
17-
- uses: actions/checkout@v2
20+
- uses: actions/checkout@v3
1821
- name: Set up Python ${{ matrix.python-version }}
1922
uses: actions/setup-python@v2
2023
with:
2124
python-version: ${{ matrix.python-version }}
2225
- name: Install dependencies
2326
run: |
24-
LLVM_CONFIG=/usr/local/Cellar/llvm@9/9.0.1_2/bin/llvm-config pip install llvmlite
25-
brew install mariadb
27+
brew install llvm@11
2628
python -m pip install --upgrade pip
29+
LLVM_CONFIG=/usr/local/Cellar/llvm@11/11.1.0/bin/llvm-config pip install llvmlite
30+
brew install mariadb
2731
pip install pytest
32+
# Can remove after next Mathics-core release
33+
python -m pip install -e git+https://github.com/Mathics3/mathics-core#egg=Mathics3[full]
2834
- name: Install pymathics.natlang
2935
run: |
3036
pip install -e .

.github/workflows/ubuntu.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
strategy:
1313
matrix:
14-
python-version: [3.6, 3.7, 3.8]
14+
python-version: [3.6, 3.7, 3.8, 3.9]
1515
steps:
1616
- uses: actions/checkout@v2
1717
- name: Set up Python ${{ matrix.python-version }}
@@ -23,6 +23,8 @@ jobs:
2323
sudo apt-get update -qq && sudo apt-get install -qq liblapack-dev llvm-dev maria libmysqlclient-dev
2424
python -m pip install --upgrade pip
2525
pip install pytest
26+
# Can remove after next Mathics-core release
27+
python -m pip install -e git+https://github.com/Mathics3/mathics-core#egg=Mathics3[full]
2628
- name: Install pymathics.natlang
2729
run: |
2830
pip install -e .

pymathics/natlang/__main__.py

Lines changed: 57 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
# -*- coding: utf-8 -*-
32

43
"""
@@ -41,27 +40,22 @@
4140
# for nltk, use the environment variable NLTK_DATA to specify a custom data path (instead of $HOME/.nltk).
4241
# for spacy, use SPACY_DATA; the latter is a custom Mathics variable.
4342

44-
from mathics.builtin.base import Builtin, MessageException
45-
from mathics.builtin.numbers.randomnumbers import RandomEnv
46-
from mathics.builtin.codetables import iso639_3
47-
from mathics.builtin.strings import to_regex, anchor_pattern
48-
from mathics.core.atoms import Integer, String, Real
49-
from mathics.core.expression import (
50-
Expression,
51-
Symbol,
52-
strip_context,
53-
string_list,
54-
)
55-
from mathics.core.listg import to_list_expression
56-
from mathics.core.symbols import SymbolDivide
57-
from mathics.core.systemsymbols import SymbolN
58-
43+
import heapq
44+
import itertools
45+
import math
5946
import os
6047
import re
61-
import itertools
6248
from itertools import chain
63-
import heapq
64-
import math
49+
50+
from mathics.builtin.atomic.strings import anchor_pattern, to_regex
51+
from mathics.builtin.base import Builtin, MessageException
52+
from mathics.builtin.codetables import iso639_3
53+
from mathics.builtin.numbers.randomnumbers import RandomEnv
54+
from mathics.core.atoms import Integer, Real, String
55+
from mathics.core.convert.expression import ListExpression, to_mathics_list
56+
from mathics.core.expression import Expression
57+
from mathics.core.symbols import Symbol, SymbolList, SymbolTrue, strip_context
58+
from mathics.core.systemsymbols import SymbolN
6559

6660

6761
def _parse_nltk_lookup_error(e):
@@ -138,7 +132,6 @@ def _init_nltk_maps():
138132
}
139133
)
140134

141-
142135
except ImportError:
143136
pass
144137

@@ -240,7 +233,7 @@ def _load_spacy(self, evaluation, options):
240233
language_name = String("Undefined")
241234
if isinstance(language_name, String):
242235
language_code = _SpacyBuiltin._language_codes.get(
243-
language_name.get_string_value()
236+
language_name.value
244237
)
245238
if not language_code:
246239
evaluation.message(
@@ -256,7 +249,7 @@ def _load_spacy(self, evaluation, options):
256249
if "SPACY_DATA" in os.environ:
257250
instance = spacy.load(language_code, via=os.environ["SPACY_DATA"])
258251
else:
259-
instance = spacy.load(language_code)
252+
instance = spacy.load(f"{language_code}_core_web_sm")
260253

261254
_SpacyBuiltin._spacy_instances[language_code] = instance
262255
return instance
@@ -296,7 +289,7 @@ class WordFrequencyData(_SpacyBuiltin):
296289

297290
def apply(self, word, evaluation, options):
298291
"WordFrequencyData[word_String, OptionsPattern[%(name)s]]"
299-
doc = self._nlp(word.get_string_value(), evaluation, options)
292+
doc = self._nlp(word.value, evaluation, options)
300293
frequency = 0.0
301294
if doc:
302295
if len(doc) == 1:
@@ -317,7 +310,7 @@ class WordCount(_SpacyBuiltin):
317310

318311
def apply(self, text, evaluation, options):
319312
"WordCount[text_String, OptionsPattern[%(name)s]]"
320-
doc = self._nlp(text.get_string_value(), evaluation, options)
313+
doc = self._nlp(text.value, evaluation, options)
321314
if doc:
322315
punctuation = spacy.parts_of_speech.PUNCT
323316
return Integer(sum(1 for word in doc if word.pos != punctuation))
@@ -339,27 +332,24 @@ class TextWords(_SpacyBuiltin):
339332

340333
def apply(self, text, evaluation, options):
341334
"TextWords[text_String, OptionsPattern[%(name)s]]"
342-
doc = self._nlp(text.get_string_value(), evaluation, options)
335+
doc = self._nlp(text.value, evaluation, options)
343336
if doc:
344337
punctuation = spacy.parts_of_speech.PUNCT
345-
return string_list(
346-
"List",
347-
[String(word.text) for word in doc if word.pos != punctuation],
348-
evaluation,
338+
return ListExpression(
339+
*[String(word.text) for word in doc if word.pos != punctuation],
349340
)
350341

351342
def apply_n(self, text, n, evaluation, options):
352343
"TextWords[text_String, n_Integer, OptionsPattern[%(name)s]]"
344+
from trepan.api import debug; debug()
353345
doc = self._nlp(text.get_string_value(), evaluation, options)
354346
if doc:
355347
punctuation = spacy.parts_of_speech.PUNCT
356-
return string_list(
357-
"List",
358-
itertools.islice(
348+
return ListExpression(
349+
*itertools.islice(
359350
(String(word.text) for word in doc if word.pos != punctuation),
360351
n.get_int_value(),
361352
),
362-
evaluation,
363353
)
364354

365355

@@ -387,20 +377,18 @@ def apply(self, text, evaluation, options):
387377
"TextSentences[text_String, OptionsPattern[%(name)s]]"
388378
doc = self._nlp(text.get_string_value(), evaluation, options)
389379
if doc:
390-
return string_list(
391-
"List", [String(sent.text) for sent in doc.sents], evaluation
380+
return ListExpression(
381+
*[String(sent.text) for sent in doc.sents]
392382
)
393383

394384
def apply_n(self, text, n, evaluation, options):
395385
"TextSentences[text_String, n_Integer, OptionsPattern[%(name)s]]"
396386
doc = self._nlp(text.get_string_value(), evaluation, options)
397387
if doc:
398-
return string_list(
399-
"List",
388+
return ListExpression(
400389
itertools.islice(
401390
(String(sent.text) for sent in doc.sents), n.get_int_value()
402391
),
403-
evaluation,
404392
)
405393

406394

@@ -433,11 +421,11 @@ def filter_words(words):
433421
elif not is_stop(s):
434422
yield String(s)
435423

436-
return string_list("List", filter_words(l.leaves), evaluation)
424+
return ListExpression(*list(filter_words(l.elements)))
437425

438426
def apply_string(self, s, evaluation, options):
439427
"DeleteStopwords[s_String, OptionsPattern[%(name)s]]"
440-
doc = self._nlp(s.get_string_value(), evaluation, options)
428+
doc = self._nlp(s.value, evaluation, options)
441429
if doc:
442430
is_stop = self._is_stop_lambda(evaluation, options)
443431
if is_stop:
@@ -473,18 +461,18 @@ class WordFrequency(_SpacyBuiltin):
473461

474462
def apply(self, text, word, evaluation, options):
475463
"WordFrequency[text_String, word_, OptionsPattern[%(name)s]]"
476-
doc = self._nlp(text.get_string_value(), evaluation, options)
464+
doc = self._nlp(text.value, evaluation, options)
477465
if not doc:
478466
return
479467
if isinstance(word, String):
480-
words = set((word.get_string_value(),))
468+
words = set([word.value])
481469
elif word.get_head_name() == "System`Alternatives":
482-
if not all(isinstance(a, String) for a in word.leaves):
470+
if not all(isinstance(a, String) for a in word.elements):
483471
return # error
484-
words = set(a.get_string_value() for a in word.leaves)
472+
words = set(a.value for a in word.elements)
485473
else:
486474
return # error
487-
ignore_case = self.get_option(options, "IgnoreCase", evaluation).is_true()
475+
ignore_case = self.get_option(options, "IgnoreCase", evaluation) is SymbolTrue
488476
if ignore_case:
489477
words = [w.lower() for w in words]
490478
n = 0
@@ -494,7 +482,9 @@ def apply(self, text, word, evaluation, options):
494482
text = text.lower()
495483
if text in words:
496484
n += 1
497-
return Expression(SymbolN, Expression(SymbolDivide, Integer(n), Integer(len(doc))))
485+
return Expression(
486+
SymbolN, Integer(n) / Integer(len(doc))
487+
)
498488

499489

500490
class Containing(Builtin):
@@ -505,12 +495,12 @@ def _cases(doc, form):
505495
if isinstance(form, String):
506496
generators = [_forms.get(form.get_string_value())]
507497
elif form.get_head_name() == "System`Alternatives":
508-
if not all(isinstance(f, String) for f in form.leaves):
498+
if not all(isinstance(f, String) for f in form.elements):
509499
return # error
510-
generators = [_forms.get(f.get_string_value()) for f in form.leaves]
500+
generators = [_forms.get(f.get_string_value()) for f in form.elements]
511501
elif form.get_head_name() == "PyMathics`Containing":
512-
if len(form.leaves) == 2:
513-
for t in _containing(doc, *form.leaves):
502+
if len(form.elements) == 2:
503+
for t in _containing(doc, *form.elements):
514504
yield t
515505
return
516506
else:
@@ -582,18 +572,17 @@ class TextCases(_SpacyBuiltin):
582572

583573
def apply(self, text, form, evaluation, options):
584574
"TextCases[text_String, form_, OptionsPattern[%(name)s]]"
585-
doc = self._nlp(text.get_string_value(), evaluation, options)
575+
doc = self._nlp(text.value, evaluation, options)
586576
if doc:
587-
return to_list_expression(*[t.text for t in _cases(doc, form)])
577+
return to_mathics_list(*[t.text for t in _cases(doc, form)])
588578

589579
def apply_n(self, text, form, n, evaluation, options):
590580
"TextCases[text_String, form_, n_Integer, OptionsPattern[%(name)s]]"
591-
doc = self._nlp(text.get_string_value(), evaluation, options)
581+
doc = self._nlp(text.value, evaluation, options)
592582
if doc:
593-
return Expression(
594-
"List",
583+
return to_mathics_list(
595584
*itertools.islice(
596-
(t.text for t in _cases(doc, form)), n.get_int_value()
585+
(t.text for t in _cases(doc, form)), n.value
597586
)
598587
)
599588

@@ -611,9 +600,9 @@ class TextPosition(_SpacyBuiltin):
611600

612601
def apply(self, text, form, evaluation, options):
613602
"TextPosition[text_String, form_, OptionsPattern[%(name)s]]"
614-
doc = self._nlp(text.get_string_value(), evaluation, options)
603+
doc = self._nlp(text.value, evaluation, options)
615604
if doc:
616-
return Expression("List", *[_position(t) for t in _cases(doc, form)])
605+
return ListExpression(*[_position(t) for t in _cases(doc, form)])
617606

618607
def apply_n(self, text, form, n, evaluation, options):
619608
"TextPosition[text_String, form_, n_Integer, OptionsPattern[%(name)s]]"
@@ -682,7 +671,7 @@ def apply(self, text, evaluation, options):
682671
if doc:
683672
tree = self._to_tree(list(doc))
684673
sents = ["(Sentence, (%s))" % self._to_constituent_string(x) for x in tree]
685-
return Expression("List", *[String(s) for s in sents])
674+
return to_mathics_list(*sents, elements_conversion_fn = String)
686675

687676

688677
class WordSimilarity(_SpacyBuiltin):
@@ -737,17 +726,17 @@ def apply_pair(self, text1, i1, text2, i2, evaluation, options):
737726
i1.get_head_name() == "System`List"
738727
and i2.get_head_name() == "System`List"
739728
):
740-
if len(i1.leaves) != len(i2.leaves):
729+
if len(i1.elements) != len(i2.elements):
741730
evaluation.message("TextSimilarity", "idxfmt")
742731
return
743732
if any(
744-
not all(isinstance(i, Integer) for i in l.leaves)
733+
not all(isinstance(i, Integer) for i in l.elements)
745734
for l in (i1, i2)
746735
):
747736
evaluation.message("TextSimilarity", "idxfmt")
748737
return
749-
indices1 = [i.get_int_value() for i in i1.leaves]
750-
indices2 = [i.get_int_value() for i in i2.leaves]
738+
indices1 = [i.get_int_value() for i in i1.elements]
739+
indices2 = [i.get_int_value() for i in i2.elements]
751740
multiple = True
752741
elif isinstance(i1, Integer) and isinstance(i2, Integer):
753742
indices1 = [i1.get_int_value()]
@@ -814,11 +803,11 @@ def apply(self, word, evaluation):
814803

815804
def apply_list(self, words, evaluation):
816805
"WordStem[words_List]"
817-
if all(isinstance(w, String) for w in words.leaves):
806+
if all(isinstance(w, String) for w in words.elements):
818807
stemmer = self._get_porter_stemmer()
819808
return Expression(
820809
"List",
821-
*[String(stemmer.stem(w.get_string_value())) for w in words.leaves]
810+
*[String(stemmer.stem(w.get_string_value())) for w in words.elements]
822811
)
823812

824813

@@ -1163,10 +1152,10 @@ def _parse_word(self, word):
11631152
if isinstance(word, String):
11641153
return word.get_string_value().lower()
11651154
elif word.get_head_name() == "System`List":
1166-
if len(word.leaves) == 3 and all(
1167-
isinstance(s, String) for s in word.leaves
1155+
if len(word.elements) == 3 and all(
1156+
isinstance(s, String) for s in word.elements
11681157
):
1169-
return tuple(s.get_string_value() for s in word.leaves)
1158+
return tuple(s.get_string_value() for s in word.elements)
11701159

11711160
def _standard_property(
11721161
self, py_word, py_form, py_property, wordnet, language_code, evaluation
@@ -1465,7 +1454,6 @@ class LanguageIdentify(Builtin):
14651454
def apply(self, text, evaluation):
14661455
"LanguageIdentify[text_String]"
14671456
import langid # see https://github.com/saffsd/langid.py
1468-
14691457
# an alternative: https://github.com/Mimino666/langdetect
14701458
import pycountry
14711459

pymathics/natlang/version.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
#!/usr/bin/env python3
21
# -*- coding: utf-8 -*-
32

43

54
# This file is suitable for sourcing inside POSIX shell as
65
# well as importing into Python. That's why there is no
76
# space around "=" below.
8-
__version__="2.2.1.dev0"
7+
__version__="5.0.0.dev0"

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,11 @@ def read(*rnames):
4848
packages=find_namespace_packages(include=["pymathics.*"]),
4949
install_requires=[
5050
"Mathics3 >= 5.0.0.dev0,<5.0.1",
51-
"click>=7.1",
51+
"click>=8.0",
5252
"joblib>=1.0.1",
5353
"llvmlite>=0.36",
5454
"nltk>=3.6.1",
5555
"pattern>=3.6.0",
56-
"six>=1.11.0",
5756
"spacy>=3.4",
5857
"wasabi<1.1.0,>=0.8.2",
5958
],

0 commit comments

Comments
 (0)