Skip to content

Commit 73f63d8

Browse files
authored
Merge branch 'nltk:develop' into hotfix-3072
2 parents 7250f93 + bc32be0 commit 73f63d8

File tree

123 files changed

+487
-366
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+487
-366
lines changed

.github/labeler.yml

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,64 @@
11
# https://github.com/actions/labeler
22
CI:
3-
- .github/workflows/*
4-
- .github/labeler.yml
3+
- changed-files:
4+
- any-glob-to-any-file: [".github/workflows/*", ".github/labeler.yml"]
55

66
classifier:
7-
- nltk/classify/**/*
7+
- changed-files:
8+
- any-glob-to-any-file: "nltk/classify/**/*"
89

910
cli:
10-
- nltk/cli.py
11+
- changed-files:
12+
- any-glob-to-any-file: "nltk/cli.py"
1113

1214
cluster:
13-
- nltk/cluster/**/*
15+
- changed-files:
16+
- any-glob-to-any-file: "nltk/cluster/**/*"
1417

1518
corpus:
16-
- nltk/corpus/**/*
19+
- changed-files:
20+
- any-glob-to-any-file: "nltk/corpus/**/*"
1721

1822
GUI:
19-
- nltk/app/**/*
23+
- changed-files:
24+
- any-glob-to-any-file: "nltk/app/**/*"
2025

2126
internals:
22-
- nltk/internals.py
27+
- changed-files:
28+
- any-glob-to-any-file: "nltk/internals.py"
2329

2430
language-model:
25-
- nltk/lm/**/*
31+
- changed-files:
32+
- any-glob-to-any-file: "nltk/lm/**/*"
2633

2734
metrics:
28-
- nltk/metrics/**/*
35+
- changed-files:
36+
- any-glob-to-any-file: "nltk/metrics/**/*"
2937

3038
parsing:
31-
- nltk/parse/**/*
39+
- changed-files:
40+
- any-glob-to-any-file: "nltk/parse/**/*"
3241

3342
sentiment:
34-
- nltk/sentiment/**/*
43+
- changed-files:
44+
- any-glob-to-any-file: "nltk/sentiment/**/*"
3545

3646
stem/lemma:
37-
- nltk/stem/**/*
47+
- changed-files:
48+
- any-glob-to-any-file: "nltk/stem/**/*"
3849

3950
tagger:
40-
- nltk/tag/**/*
51+
- changed-files:
52+
- any-glob-to-any-file: "nltk/tag/**/*"
4153

4254
tokenizer:
43-
- nltk/tokenize/**/*
55+
- changed-files:
56+
- any-glob-to-any-file: "nltk/tokenize/**/*"
4457

4558
twitter:
46-
- nltk/twitter/**/*
59+
- changed-files:
60+
- any-glob-to-any-file: "nltk/twitter/**/*"
4761

4862
wordnet:
49-
- nltk/corpus/reader/wordnet.py
63+
- changed-files:
64+
- any-glob-to-any-file: "nltk/wordnet/**/*"

.github/workflows/ci.yaml

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@ jobs:
2121
name: Run pre-commit
2222
runs-on: ubuntu-latest
2323
steps:
24-
- uses: actions/checkout@v3
25-
- uses: actions/setup-python@v3
26-
- uses: pre-commit/[email protected]
24+
- uses: actions/checkout@v4
25+
- uses: actions/setup-python@v5
26+
with:
27+
python-version: '3.x' # run with latest python version
28+
- run: |
29+
pip install pre-commit
30+
pre-commit run --all-files
2731
2832
cache_nltk_data:
2933
name: Cache nltk_data
@@ -34,7 +38,7 @@ jobs:
3438
runs-on: ${{ matrix.os }}
3539
steps:
3640
- name: Checkout code
37-
uses: actions/checkout@v3
41+
uses: actions/checkout@v4
3842

3943
- name: Cache nltk data
4044
uses: actions/cache@v3
@@ -56,7 +60,7 @@ jobs:
5660
runs-on: ubuntu-latest
5761
steps:
5862
- name: Checkout code
59-
uses: actions/checkout@v3
63+
uses: actions/checkout@v4
6064

6165
- name: Cache third party tools
6266
uses: actions/cache@v3
@@ -76,21 +80,21 @@ jobs:
7680
needs: [cache_nltk_data, cache_third_party]
7781
strategy:
7882
matrix:
79-
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
83+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
8084
os: [ubuntu-latest, macos-latest, windows-latest]
8185
fail-fast: false
8286
runs-on: ${{ matrix.os }}
8387
steps:
8488
- name: Checkout code
85-
uses: actions/checkout@v3
89+
uses: actions/checkout@v4
8690

8791
- name: Setup python
88-
uses: actions/setup-python@v3
92+
uses: actions/setup-python@v5
8993
with:
9094
python-version: ${{ matrix.python-version }}
9195

9296
- name: Set up JDK 16
93-
uses: actions/setup-java@v3
97+
uses: actions/setup-java@v4
9498
with:
9599
distribution: 'zulu'
96100
java-version: '16'

.github/workflows/labeler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
pull-requests: write
1010
runs-on: ubuntu-latest
1111
steps:
12-
- uses: actions/labeler@v4
12+
- uses: actions/labeler@v5
1313
with:
1414
repo-token: "${{ secrets.GITHUB_TOKEN }}"
1515
sync-labels: true

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.0.1
3+
rev: v4.5.0
44
hooks:
55
- id: fix-byte-order-marker
66
- id: trailing-whitespace
77
- id: end-of-file-fixer
88
- id: requirements-txt-fixer
99
- id: check-yaml
1010
- repo: https://github.com/asottile/pyupgrade
11-
rev: v3.1.0
11+
rev: v3.15.0
1212
hooks:
1313
- id: pyupgrade
14-
args: ["--py37-plus"]
14+
args: ["--py38-plus"]
1515
- repo: https://github.com/ambv/black
16-
rev: 22.3.0
16+
rev: 23.12.0
1717
hooks:
1818
- id: black
1919
- repo: local
@@ -27,4 +27,4 @@ repos:
2727
types_or: [cython, pyi, python]
2828
args: ['--filter-files']
2929
minimum_pre_commit_version: '2.9.2'
30-
additional_dependencies: ['isort==5.10.1']
30+
additional_dependencies: ['isort==5.13.2']

AUTHORS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
- Tom Lippincott
111111
- Peter Ljunglöf
112112
- Alex Louden
113+
- David Lukeš
113114
- Joseph Lynch
114115
- Nitin Madnani
115116
- Felipe Madrigal
@@ -127,6 +128,7 @@
127128
- David McClosky
128129
- Xinfan Meng
129130
- Dmitrijs Milajevs
131+
- Matt Miller
130132
- Margaret Mitchell
131133
- Tomonori Nagano
132134
- Jason Narad
@@ -296,6 +298,10 @@
296298
- M.K. Pawelkiewicz <https://github.com/hamiltonianflow>
297299
- Steven Thomas Smith <https://github.com/essandess>
298300
- Jan Lennartz <https://github.com/Madnex>
301+
- Tim Sockel <https://github.com/TiMauzi>
302+
- Akihiro Yamazaki <https://github.com/zakkie>
303+
- Ron Urbach <https://github.com/sharpblade4>
304+
- Vivek Kalyan <https://github.com/vivekkalyan>
299305

300306
## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
301307

CITATION.cff

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,9 @@ preferred-citation:
3939
- given-names: Edward
4040
family-names: Loper
4141
year: 2009
42+
month: 6
43+
url: "https://www.nltk.org/book/"
44+
isbn: "9780596516499"
4245
publisher:
4346
name: "O'Reilly Media, Inc."
47+
website: "https://www.oreilly.com/"

CONTRIBUTING.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Summary of our git branching model:
7777
- Do many small commits on that branch locally (`git add files-changed`,
7878
`git commit -m "Add some change"`);
7979
- Run the tests to make sure nothing breaks
80-
(`tox -e py37` if you are on Python 3.7);
80+
(`tox -e py312` if you are on Python 3.12);
8181
- Add your name to the `AUTHORS.md` file as a contributor;
8282
- Push to your fork on GitHub (with the name as your local branch:
8383
`git push origin branch-name`);
@@ -109,7 +109,7 @@ Summary of our git branching model:
109109
- Use [PEP8](https://www.python.org/dev/peps/pep-0008/);
110110
- Write tests for your new features (please see "Tests" topic below);
111111
- Always remember that [commented code is dead
112-
code](https://www.codinghorror.com/blog/2008/07/coding-without-comments.html);
112+
code](https://blog.codinghorror.com/coding-without-comments/);
113113
- Name identifiers (variables, classes, functions, module names) with readable
114114
names (`x` is always wrong);
115115
- When manipulating strings, we prefer either [f-string
@@ -169,7 +169,7 @@ The [`.github/workflows/ci.yaml`](https://github.com/nltk/nltk/blob/develop/.git
169169
- Otherwise, download all the data packages through `nltk.download('all')`.
170170

171171
- The `test` job
172-
- tests against supported Python versions (`3.7`, `3.8`, `3.9`).
172+
- tests against supported Python versions (`3.8`, `3.9`, `3.10`, `3.11`, `3.12`).
173173
- tests on `ubuntu-latest` and `macos-latest`.
174174
- relies on the `cache_nltk_data` job to ensure that `nltk_data` is available.
175175
- performs these steps:
@@ -189,7 +189,7 @@ The [`.github/workflows/ci.yaml`](https://github.com/nltk/nltk/blob/develop/.git
189189
#### To test with `tox` locally
190190

191191
First setup a new virtual environment, see https://docs.python-guide.org/dev/virtualenvs/
192-
Then run `tox -e py37`.
192+
Then run `tox -e py312`.
193193

194194
For example, using `pipenv`:
195195

@@ -198,7 +198,7 @@ git clone https://github.com/nltk/nltk.git
198198
cd nltk
199199
pipenv install -r pip-req.txt
200200
pipenv install tox
201-
tox -e py37
201+
tox -e py312
202202
```
203203

204204

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
NLTK -- the Natural Language Toolkit -- is a suite of open source Python
66
modules, data sets, and tutorials supporting research and development in Natural
7-
Language Processing. NLTK requires Python version 3.7, 3.8, 3.9, 3.10 or 3.11.
7+
Language Processing. NLTK requires Python version 3.8, 3.9, 3.10, 3.11 or 3.12.
88

99
For documentation, please visit [nltk.org](https://www.nltk.org/).
1010

nltk/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
# Description of the toolkit, keywords, and the project's primary URL.
5353
__longdescr__ = """\
5454
The Natural Language Toolkit (NLTK) is a Python package for
55-
natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11."""
55+
natural language processing. NLTK requires Python 3.8, 3.9, 3.10, 3.11 or 3.12."""
5656
__keywords__ = [
5757
"NLP",
5858
"CL",
@@ -84,11 +84,11 @@
8484
"Intended Audience :: Science/Research",
8585
"License :: OSI Approved :: Apache Software License",
8686
"Operating System :: OS Independent",
87-
"Programming Language :: Python :: 3.7",
8887
"Programming Language :: Python :: 3.8",
8988
"Programming Language :: Python :: 3.9",
9089
"Programming Language :: Python :: 3.10",
9190
"Programming Language :: Python :: 3.11",
91+
"Programming Language :: Python :: 3.12",
9292
"Topic :: Scientific/Engineering",
9393
"Topic :: Scientific/Engineering :: Artificial Intelligence",
9494
"Topic :: Scientific/Engineering :: Human Machine Interfaces",

nltk/app/chunkparser_app.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class RegexpChunkApp:
7878
"VBD": "Verb, past tense",
7979
"MD": "Modal",
8080
"NNS": "Noun, plural",
81-
"NN": "Noun, singular or masps",
81+
"NN": "Noun, singular or mass",
8282
"VBN": "Verb, past participle",
8383
"VBZ": "Verb,3rd ps. sing. present",
8484
"NNP": "Proper noun, singular",
@@ -713,7 +713,7 @@ def _eval_demon(self):
713713
if self.normalized_grammar != self._eval_normalized_grammar:
714714
# Check if we've seen this grammar already. If so, then
715715
# just use the old evaluation values.
716-
for (g, p, r, f) in self._history:
716+
for g, p, r, f in self._history:
717717
if self.normalized_grammar == self.normalize_grammar(g):
718718
self._history.append((g, p, r, f))
719719
self._history_index = len(self._history) - 1
@@ -850,7 +850,7 @@ def _init_widgets(self, top):
850850
).grid(column=i * 2 + 1, row=0)
851851
self.helptabs[self.HELP[0][0]].configure(font=self._font)
852852
self.helpbox.tag_config("elide", elide=True)
853-
for (tag, params) in self.HELP_AUTOTAG:
853+
for tag, params in self.HELP_AUTOTAG:
854854
self.helpbox.tag_config("tag-%s" % tag, **params)
855855
self.show_help(self.HELP[0][0])
856856

@@ -1047,7 +1047,7 @@ def show_trace(self, *e):
10471047
def show_help(self, tab):
10481048
self.helpbox["state"] = "normal"
10491049
self.helpbox.delete("1.0", "end")
1050-
for (name, tabstops, text) in self.HELP:
1050+
for name, tabstops, text in self.HELP:
10511051
if name == tab:
10521052
text = text.replace(
10531053
"<<TAGSET>>",
@@ -1066,7 +1066,7 @@ def show_help(self, tab):
10661066
self.helpbox.config(tabs=tabstops)
10671067
self.helpbox.insert("1.0", text + "\n" * 20)
10681068
C = "1.0 + %d chars"
1069-
for (tag, params) in self.HELP_AUTOTAG:
1069+
for tag, params in self.HELP_AUTOTAG:
10701070
pattern = f"(?s)(<{tag}>)(.*?)(</{tag}>)"
10711071
for m in re.finditer(pattern, text):
10721072
self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))

0 commit comments

Comments
 (0)