Skip to content

Commit 34fc0d1

Browse files
authored
Revert "nltk-data: make searchable, add all downloadables" (#409843)
2 parents 4049976 + 51a5e70 commit 34fc0d1

File tree

10 files changed

+45
-224
lines changed

10 files changed

+45
-224
lines changed

nixos/modules/services/web-apps/mealie.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ in
7676
API_PORT = toString cfg.port;
7777
BASE_URL = "http://localhost:${toString cfg.port}";
7878
DATA_DIR = "/var/lib/mealie";
79-
NLTK_DATA = pkgs.nltk-data.averaged-perceptron-tagger-eng;
79+
NLTK_DATA = pkgs.nltk-data.averaged_perceptron_tagger_eng;
8080
} // (builtins.mapAttrs (_: val: toString val) cfg.settings);
8181

8282
serviceConfig = {

pkgs/by-name/me/mealie/package.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ pythonpkgs.buildPythonApplication rec {
109109

110110
# Needed for tests
111111
preCheck = ''
112-
export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng}
112+
export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng}
113113
'';
114114

115115
disabledTestPaths = [

pkgs/by-name/pa/paperless-ngx/package.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,8 @@ python.pkgs.buildPythonApplication rec {
305305
tesseract5
306306
;
307307
nltkData = with nltk-data; [
308-
punkt-tab
309-
snowball-data
308+
punkt_tab
309+
snowball_data
310310
stopwords
311311
];
312312
tests = { inherit (nixosTests) paperless; };

pkgs/by-name/un/unstructured-api/package.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ let
152152

153153
paths = [
154154
nltk-data.punkt
155-
nltk-data.averaged-perceptron-tagger
155+
nltk-data.averaged_perceptron_tagger
156156
];
157157
};
158158
in

pkgs/development/python-modules/aider-chat/default.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ let
125125
aider-nltk-data = symlinkJoin {
126126
name = "aider-nltk-data";
127127
paths = [
128-
nltk-data.punkt-tab
128+
nltk-data.punkt_tab
129129
nltk-data.stopwords
130130
];
131131
};

pkgs/development/python-modules/ingredient-parser-nlp/default.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ buildPythonPackage rec {
4444

4545
# Needed for tests
4646
preCheck = ''
47-
export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng}
47+
export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng}
4848
'';
4949

5050
meta = {

pkgs/development/python-modules/type-infer/default.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ let
2424
name = "nltk-test-data";
2525
paths = [
2626
nltk-data.punkt
27-
nltk-data.punkt-tab
27+
nltk-data.punkt_tab
2828
nltk-data.stopwords
2929
];
3030
};

pkgs/tools/text/nltk-data/default.nix

Lines changed: 36 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,12 @@ let
1010
version = "0-unstable-2024-07-29";
1111
nativeBuildInputs = [ unzip ];
1212
dontBuild = true;
13-
dontFixup = true;
1413
meta = with lib; {
1514
description = "NLTK Data";
1615
homepage = "https://github.com/nltk/nltk_data";
1716
license = licenses.asl20;
1817
platforms = platforms.all;
19-
maintainers = with maintainers; [
20-
bengsparks
21-
happysalada
22-
];
18+
maintainers = with maintainers; [ happysalada ];
2319
};
2420
};
2521
makeNltkDataPackage =
@@ -54,212 +50,41 @@ let
5450
'';
5551
}
5652
);
57-
58-
makeChunker =
59-
pname:
60-
makeNltkDataPackage {
61-
inherit pname;
62-
location = "chunkers";
63-
hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc=";
64-
};
65-
66-
makeCorpus =
67-
pname:
68-
makeNltkDataPackage {
69-
inherit pname;
70-
location = "corpora";
71-
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
72-
};
73-
74-
makeGrammar =
75-
pname:
76-
makeNltkDataPackage {
77-
inherit pname;
78-
location = "grammars";
79-
hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM=";
80-
};
81-
82-
makeHelp =
83-
pname:
84-
makeNltkDataPackage {
85-
inherit pname;
86-
location = "help";
87-
hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE=";
88-
};
89-
90-
makeMisc =
91-
pname:
92-
makeNltkDataPackage {
93-
inherit pname;
94-
location = "misc";
95-
hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4=";
96-
};
97-
98-
makeModel =
99-
pname:
100-
makeNltkDataPackage {
101-
inherit pname;
102-
location = "models";
103-
hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4=";
104-
};
105-
106-
makeTagger =
107-
pname:
108-
makeNltkDataPackage {
109-
inherit pname;
110-
location = "taggers";
111-
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
112-
};
113-
114-
makeTokenizer =
115-
pname:
116-
makeNltkDataPackage {
117-
inherit pname;
118-
location = "tokenizers";
119-
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
120-
};
121-
122-
makeStemmer =
123-
pname:
124-
makeNltkDataPackage {
125-
inherit pname;
126-
location = "stemmers";
127-
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
128-
};
12953
in
13054
lib.makeScope newScope (self: {
131-
## Chunkers
132-
maxent-ne-chunker = makeChunker "maxent_ne_chunker";
133-
maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab";
134-
135-
## Corpora
136-
abc = makeCorpus "abc";
137-
alpino = makeCorpus "alpino";
138-
bcp47 = makeCorpus "bcp47";
139-
biocreative-ppi = makeCorpus "biocreative_ppi";
140-
brown = makeCorpus "brown";
141-
brown-tei = makeCorpus "brown_tei";
142-
cess-cat = makeCorpus "cess_cat";
143-
cess-esp = makeCorpus "cess_esp";
144-
chat80 = makeCorpus "chat80";
145-
city-database = makeCorpus "city_database";
146-
cmudict = makeCorpus "cmudict";
147-
comparative-sentences = makeCorpus "comparative_sentences";
148-
comtrans = makeCorpus "comtrans";
149-
conll2000 = makeCorpus "conll2000";
150-
conll2002 = makeCorpus "conll2002";
151-
conll2007 = makeCorpus "conll2007";
152-
crubadan = makeCorpus "crubadan";
153-
dependency-treebank = makeCorpus "dependency_treebank";
154-
dolch = makeCorpus "dolch";
155-
europarl-raw = makeCorpus "europarl_raw";
156-
extended-omw = makeCorpus "extended_omw";
157-
floresta = makeCorpus "floresta";
158-
framenet-v15 = makeCorpus "framenet_v15";
159-
framenet-v17 = makeCorpus "framenet_v17";
160-
gazetteers = makeCorpus "gazetteers";
161-
genesis = makeCorpus "genesis";
162-
gutenberg = makeCorpus "gutenberg";
163-
ieer = makeCorpus "ieer";
164-
inaugural = makeCorpus "inaugural";
165-
indian = makeCorpus "indian";
166-
jeita = makeCorpus "jeita";
167-
kimmo = makeCorpus "kimmo";
168-
knbc = makeCorpus "knbc";
169-
lin-thesaurus = makeCorpus "lin_thesaurus";
170-
mac-morpho = makeCorpus "mac_morpho";
171-
machado = makeCorpus "machado";
172-
masc-tagged = makeCorpus "masc_tagged";
173-
movie-reviews = makeCorpus "movie_reviews";
174-
mte-teip5 = makeCorpus "mte_teip5";
175-
names = makeCorpus "names";
176-
nombank-1-0 = makeCorpus "nombank.1.0";
177-
nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes";
178-
nps-chat = makeCorpus "nps_chat";
179-
omw = makeCorpus "omw";
180-
omw-1-4 = makeCorpus "omw-1.4";
181-
opinion-lexicon = makeCorpus "opinion_lexicon";
182-
panlex-swadesh = makeCorpus "panlex_swadesh";
183-
paradigms = makeCorpus "paradigms";
184-
pe08 = makeCorpus "pe08";
185-
pil = makeCorpus "pil";
186-
pl196x = makeCorpus "pl196x";
187-
ppattach = makeCorpus "ppattach";
188-
problem-reports = makeCorpus "problem_reports";
189-
product-reviews-1 = makeCorpus "product_reviews_1";
190-
product-reviews-2 = makeCorpus "product_reviews_2";
191-
propbank = makeCorpus "propbank";
192-
pros-cons = makeCorpus "pros_cons";
193-
ptb = makeCorpus "ptb";
194-
qc = makeCorpus "qc";
195-
reuters = makeCorpus "reuters";
196-
rte = makeCorpus "rte";
197-
semcor = makeCorpus "semcor";
198-
senseval = makeCorpus "senseval";
199-
sentence-polarity = makeCorpus "sentence_polarity";
200-
sentiwordnet = makeCorpus "sentiwordnet";
201-
shakespeare = makeCorpus "shakespeare";
202-
sinica-treebank = makeCorpus "sinica_treebank";
203-
smultron = makeCorpus "smultron";
204-
state-union = makeCorpus "state_union";
205-
stopwords = makeCorpus "stopwords";
206-
subjectivity = makeCorpus "subjectivity";
207-
swadesh = makeCorpus "swadesh";
208-
switchboard = makeCorpus "switchboard";
209-
timit = makeCorpus "timit";
210-
toolbox = makeCorpus "toolbox";
211-
treebank = makeCorpus "treebank";
212-
twitter-samples = makeCorpus "twitter_samples";
213-
udhr = makeCorpus "udhr";
214-
udhr2 = makeCorpus "udhr2";
215-
unicode-samples = makeCorpus "unicode_samples";
216-
universal-treebanks-v20 = makeCorpus "universal_treebanks_v20";
217-
verbnet = makeCorpus "verbnet";
218-
verbnet3 = makeCorpus "verbnet3";
219-
webtext = makeCorpus "webtext";
220-
wordnet = makeCorpus "wordnet";
221-
wordnet-ic = makeCorpus "wordnet_ic";
222-
wordnet2021 = makeCorpus "wordnet2021";
223-
wordnet2022 = makeCorpus "wordnet2022";
224-
wordnet31 = makeCorpus "wordnet31";
225-
words = makeCorpus "words";
226-
ycoe = makeCorpus "ycoe";
227-
228-
## Grammars
229-
basque-grammars = makeGrammar "basque_grammars";
230-
book-grammars = makeGrammar "book_grammars";
231-
large-grammars = makeGrammar "large_grammars";
232-
sample-grammars = makeGrammar "sample_grammars";
233-
spanish-grammars = makeGrammar "spanish_grammars";
234-
235-
## Help
236-
tagsets-json = makeHelp "tagsets_json";
237-
238-
## Misc
239-
mwa-ppdb = makeMisc "mwa_ppdb";
240-
perluniprops = makeMisc "perluniprops";
241-
242-
## Models
243-
bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux";
244-
moses-sample = makeModel "moses_sample";
245-
wmt15-eval = makeModel "wmt15_eval";
246-
word2vec-sample = makeModel "word2vec_sample";
247-
248-
## Taggers
249-
averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger";
250-
averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng";
251-
averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru";
252-
averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus";
253-
maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger";
254-
maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab";
255-
universal-tagset = makeTagger "universal_tagset";
256-
257-
## Tokenizers
258-
punkt = makeTokenizer "punkt";
259-
punkt-tab = makeTokenizer "punkt_tab";
260-
261-
## Stemmers
262-
porter-test = makeStemmer "porter_test";
263-
rslp = makeStemmer "rslp";
264-
snowball-data = makeStemmer "snowball_data";
55+
punkt = makeNltkDataPackage {
56+
pname = "punkt";
57+
location = "tokenizers";
58+
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
59+
};
60+
punkt_tab = makeNltkDataPackage {
61+
pname = "punkt_tab";
62+
location = "tokenizers";
63+
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
64+
};
65+
averaged_perceptron_tagger = makeNltkDataPackage {
66+
pname = "averaged_perceptron_tagger";
67+
location = "taggers";
68+
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
69+
};
70+
averaged_perceptron_tagger_eng = makeNltkDataPackage {
71+
pname = "averaged_perceptron_tagger_eng";
72+
location = "taggers";
73+
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
74+
};
75+
snowball_data = makeNltkDataPackage {
76+
pname = "snowball_data";
77+
location = "stemmers";
78+
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
79+
};
80+
stopwords = makeNltkDataPackage {
81+
pname = "stopwords";
82+
location = "corpora";
83+
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
84+
};
85+
wordnet = makeNltkDataPackage {
86+
pname = "wordnet";
87+
location = "corpora";
88+
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
89+
};
26590
})

pkgs/top-level/aliases.nix

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,10 +1354,6 @@ mapAliases {
13541354
# When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable.
13551355

13561356
nixosTest = testers.nixosTest; # Added 2022-05-05
1357-
nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21
1358-
nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21
1359-
nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21
1360-
nltk-data.snowball_data = nltk-data.snowball-data; # Added 2025-05-21
13611357
nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17
13621358
noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10
13631359
nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23

pkgs/top-level/all-packages.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2344,7 +2344,7 @@ with pkgs;
23442344

23452345
mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { };
23462346

2347-
nltk-data = lib.recurseIntoAttrs (callPackage ../tools/text/nltk-data { });
2347+
nltk-data = callPackage ../tools/text/nltk-data { };
23482348

23492349
seabios-coreboot = seabios.override { ___build-type = "coreboot"; };
23502350
seabios-csm = seabios.override { ___build-type = "csm"; };

0 commit comments

Comments
 (0)