|
10 | 10 | version = "0-unstable-2024-07-29"; |
11 | 11 | nativeBuildInputs = [ unzip ]; |
12 | 12 | dontBuild = true; |
13 | | - dontFixup = true; |
14 | 13 | meta = with lib; { |
15 | 14 | description = "NLTK Data"; |
16 | 15 | homepage = "https://github.com/nltk/nltk_data"; |
17 | 16 | license = licenses.asl20; |
18 | 17 | platforms = platforms.all; |
19 | | - maintainers = with maintainers; [ |
20 | | - bengsparks |
21 | | - happysalada |
22 | | - ]; |
| 18 | + maintainers = with maintainers; [ happysalada ]; |
23 | 19 | }; |
24 | 20 | }; |
25 | 21 | makeNltkDataPackage = |
|
54 | 50 | ''; |
55 | 51 | } |
56 | 52 | ); |
57 | | - |
58 | | - makeChunker = |
59 | | - pname: |
60 | | - makeNltkDataPackage { |
61 | | - inherit pname; |
62 | | - location = "chunkers"; |
63 | | - hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc="; |
64 | | - }; |
65 | | - |
66 | | - makeCorpus = |
67 | | - pname: |
68 | | - makeNltkDataPackage { |
69 | | - inherit pname; |
70 | | - location = "corpora"; |
71 | | - hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; |
72 | | - }; |
73 | | - |
74 | | - makeGrammar = |
75 | | - pname: |
76 | | - makeNltkDataPackage { |
77 | | - inherit pname; |
78 | | - location = "grammars"; |
79 | | - hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM="; |
80 | | - }; |
81 | | - |
82 | | - makeHelp = |
83 | | - pname: |
84 | | - makeNltkDataPackage { |
85 | | - inherit pname; |
86 | | - location = "help"; |
87 | | - hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE="; |
88 | | - }; |
89 | | - |
90 | | - makeMisc = |
91 | | - pname: |
92 | | - makeNltkDataPackage { |
93 | | - inherit pname; |
94 | | - location = "misc"; |
95 | | - hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4="; |
96 | | - }; |
97 | | - |
98 | | - makeModel = |
99 | | - pname: |
100 | | - makeNltkDataPackage { |
101 | | - inherit pname; |
102 | | - location = "models"; |
103 | | - hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4="; |
104 | | - }; |
105 | | - |
106 | | - makeTagger = |
107 | | - pname: |
108 | | - makeNltkDataPackage { |
109 | | - inherit pname; |
110 | | - location = "taggers"; |
111 | | - hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; |
112 | | - }; |
113 | | - |
114 | | - makeTokenizer = |
115 | | - pname: |
116 | | - makeNltkDataPackage { |
117 | | - inherit pname; |
118 | | - location = "tokenizers"; |
119 | | - hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; |
120 | | - }; |
121 | | - |
122 | | - makeStemmer = |
123 | | - pname: |
124 | | - makeNltkDataPackage { |
125 | | - inherit pname; |
126 | | - location = "stemmers"; |
127 | | - hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; |
128 | | - }; |
129 | 53 | in |
130 | 54 | lib.makeScope newScope (self: { |
131 | | - ## Chunkers |
132 | | - maxent-ne-chunker = makeChunker "maxent_ne_chunker"; |
133 | | - maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab"; |
134 | | - |
135 | | - ## Corpora |
136 | | - abc = makeCorpus "abc"; |
137 | | - alpino = makeCorpus "alpino"; |
138 | | - bcp47 = makeCorpus "bcp47"; |
139 | | - biocreative-ppi = makeCorpus "biocreative_ppi"; |
140 | | - brown = makeCorpus "brown"; |
141 | | - brown-tei = makeCorpus "brown_tei"; |
142 | | - cess-cat = makeCorpus "cess_cat"; |
143 | | - cess-esp = makeCorpus "cess_esp"; |
144 | | - chat80 = makeCorpus "chat80"; |
145 | | - city-database = makeCorpus "city_database"; |
146 | | - cmudict = makeCorpus "cmudict"; |
147 | | - comparative-sentences = makeCorpus "comparative_sentences"; |
148 | | - comtrans = makeCorpus "comtrans"; |
149 | | - conll2000 = makeCorpus "conll2000"; |
150 | | - conll2002 = makeCorpus "conll2002"; |
151 | | - conll2007 = makeCorpus "conll2007"; |
152 | | - crubadan = makeCorpus "crubadan"; |
153 | | - dependency-treebank = makeCorpus "dependency_treebank"; |
154 | | - dolch = makeCorpus "dolch"; |
155 | | - europarl-raw = makeCorpus "europarl_raw"; |
156 | | - extended-omw = makeCorpus "extended_omw"; |
157 | | - floresta = makeCorpus "floresta"; |
158 | | - framenet-v15 = makeCorpus "framenet_v15"; |
159 | | - framenet-v17 = makeCorpus "framenet_v17"; |
160 | | - gazetteers = makeCorpus "gazetteers"; |
161 | | - genesis = makeCorpus "genesis"; |
162 | | - gutenberg = makeCorpus "gutenberg"; |
163 | | - ieer = makeCorpus "ieer"; |
164 | | - inaugural = makeCorpus "inaugural"; |
165 | | - indian = makeCorpus "indian"; |
166 | | - jeita = makeCorpus "jeita"; |
167 | | - kimmo = makeCorpus "kimmo"; |
168 | | - knbc = makeCorpus "knbc"; |
169 | | - lin-thesaurus = makeCorpus "lin_thesaurus"; |
170 | | - mac-morpho = makeCorpus "mac_morpho"; |
171 | | - machado = makeCorpus "machado"; |
172 | | - masc-tagged = makeCorpus "masc_tagged"; |
173 | | - movie-reviews = makeCorpus "movie_reviews"; |
174 | | - mte-teip5 = makeCorpus "mte_teip5"; |
175 | | - names = makeCorpus "names"; |
176 | | - nombank-1-0 = makeCorpus "nombank.1.0"; |
177 | | - nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes"; |
178 | | - nps-chat = makeCorpus "nps_chat"; |
179 | | - omw = makeCorpus "omw"; |
180 | | - omw-1-4 = makeCorpus "omw-1.4"; |
181 | | - opinion-lexicon = makeCorpus "opinion_lexicon"; |
182 | | - panlex-swadesh = makeCorpus "panlex_swadesh"; |
183 | | - paradigms = makeCorpus "paradigms"; |
184 | | - pe08 = makeCorpus "pe08"; |
185 | | - pil = makeCorpus "pil"; |
186 | | - pl196x = makeCorpus "pl196x"; |
187 | | - ppattach = makeCorpus "ppattach"; |
188 | | - problem-reports = makeCorpus "problem_reports"; |
189 | | - product-reviews-1 = makeCorpus "product_reviews_1"; |
190 | | - product-reviews-2 = makeCorpus "product_reviews_2"; |
191 | | - propbank = makeCorpus "propbank"; |
192 | | - pros-cons = makeCorpus "pros_cons"; |
193 | | - ptb = makeCorpus "ptb"; |
194 | | - qc = makeCorpus "qc"; |
195 | | - reuters = makeCorpus "reuters"; |
196 | | - rte = makeCorpus "rte"; |
197 | | - semcor = makeCorpus "semcor"; |
198 | | - senseval = makeCorpus "senseval"; |
199 | | - sentence-polarity = makeCorpus "sentence_polarity"; |
200 | | - sentiwordnet = makeCorpus "sentiwordnet"; |
201 | | - shakespeare = makeCorpus "shakespeare"; |
202 | | - sinica-treebank = makeCorpus "sinica_treebank"; |
203 | | - smultron = makeCorpus "smultron"; |
204 | | - state-union = makeCorpus "state_union"; |
205 | | - stopwords = makeCorpus "stopwords"; |
206 | | - subjectivity = makeCorpus "subjectivity"; |
207 | | - swadesh = makeCorpus "swadesh"; |
208 | | - switchboard = makeCorpus "switchboard"; |
209 | | - timit = makeCorpus "timit"; |
210 | | - toolbox = makeCorpus "toolbox"; |
211 | | - treebank = makeCorpus "treebank"; |
212 | | - twitter-samples = makeCorpus "twitter_samples"; |
213 | | - udhr = makeCorpus "udhr"; |
214 | | - udhr2 = makeCorpus "udhr2"; |
215 | | - unicode-samples = makeCorpus "unicode_samples"; |
216 | | - universal-treebanks-v20 = makeCorpus "universal_treebanks_v20"; |
217 | | - verbnet = makeCorpus "verbnet"; |
218 | | - verbnet3 = makeCorpus "verbnet3"; |
219 | | - webtext = makeCorpus "webtext"; |
220 | | - wordnet = makeCorpus "wordnet"; |
221 | | - wordnet-ic = makeCorpus "wordnet_ic"; |
222 | | - wordnet2021 = makeCorpus "wordnet2021"; |
223 | | - wordnet2022 = makeCorpus "wordnet2022"; |
224 | | - wordnet31 = makeCorpus "wordnet31"; |
225 | | - words = makeCorpus "words"; |
226 | | - ycoe = makeCorpus "ycoe"; |
227 | | - |
228 | | - ## Grammars |
229 | | - basque-grammars = makeGrammar "basque_grammars"; |
230 | | - book-grammars = makeGrammar "book_grammars"; |
231 | | - large-grammars = makeGrammar "large_grammars"; |
232 | | - sample-grammars = makeGrammar "sample_grammars"; |
233 | | - spanish-grammars = makeGrammar "spanish_grammars"; |
234 | | - |
235 | | - ## Help |
236 | | - tagsets-json = makeHelp "tagsets_json"; |
237 | | - |
238 | | - ## Misc |
239 | | - mwa-ppdb = makeMisc "mwa_ppdb"; |
240 | | - perluniprops = makeMisc "perluniprops"; |
241 | | - |
242 | | - ## Models |
243 | | - bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux"; |
244 | | - moses-sample = makeModel "moses_sample"; |
245 | | - wmt15-eval = makeModel "wmt15_eval"; |
246 | | - word2vec-sample = makeModel "word2vec_sample"; |
247 | | - |
248 | | - ## Taggers |
249 | | - averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger"; |
250 | | - averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng"; |
251 | | - averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru"; |
252 | | - averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus"; |
253 | | - maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger"; |
254 | | - maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab"; |
255 | | - universal-tagset = makeTagger "universal_tagset"; |
256 | | - |
257 | | - ## Tokenizers |
258 | | - punkt = makeTokenizer "punkt"; |
259 | | - punkt-tab = makeTokenizer "punkt_tab"; |
260 | | - |
261 | | - ## Stemmers |
262 | | - porter-test = makeStemmer "porter_test"; |
263 | | - rslp = makeStemmer "rslp"; |
264 | | - snowball-data = makeStemmer "snowball_data"; |
| 55 | + punkt = makeNltkDataPackage { |
| 56 | + pname = "punkt"; |
| 57 | + location = "tokenizers"; |
| 58 | + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; |
| 59 | + }; |
| 60 | + punkt_tab = makeNltkDataPackage { |
| 61 | + pname = "punkt_tab"; |
| 62 | + location = "tokenizers"; |
| 63 | + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; |
| 64 | + }; |
| 65 | + averaged_perceptron_tagger = makeNltkDataPackage { |
| 66 | + pname = "averaged_perceptron_tagger"; |
| 67 | + location = "taggers"; |
| 68 | + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; |
| 69 | + }; |
| 70 | + averaged_perceptron_tagger_eng = makeNltkDataPackage { |
| 71 | + pname = "averaged_perceptron_tagger_eng"; |
| 72 | + location = "taggers"; |
| 73 | + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; |
| 74 | + }; |
| 75 | + snowball_data = makeNltkDataPackage { |
| 76 | + pname = "snowball_data"; |
| 77 | + location = "stemmers"; |
| 78 | + hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; |
| 79 | + }; |
| 80 | + stopwords = makeNltkDataPackage { |
| 81 | + pname = "stopwords"; |
| 82 | + location = "corpora"; |
| 83 | + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; |
| 84 | + }; |
| 85 | + wordnet = makeNltkDataPackage { |
| 86 | + pname = "wordnet"; |
| 87 | + location = "corpora"; |
| 88 | + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; |
| 89 | + }; |
265 | 90 | }) |
0 commit comments