Skip to content

Commit 4a2c6cc

Browse files
authored
python3Packages.nltk: add data(Dir) passthru, run tests (#409680)
2 parents aad53c5 + 020c1da commit 4a2c6cc

File tree

5 files changed

+98
-38
lines changed

5 files changed

+98
-38
lines changed

pkgs/by-name/un/unstructured-api/package.nix

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
python3,
66
makeWrapper,
77
nix-update-script,
8-
symlinkJoin,
9-
nltk-data,
108
}:
119
let
1210
pythonEnv = python3.withPackages (
@@ -147,14 +145,10 @@ let
147145
++ unstructured.optional-dependencies.all-docs
148146
);
149147
version = "0.0.82";
150-
unstructured_api_nltk_data = symlinkJoin {
151-
name = "unstructured_api_nltk_data";
152-
153-
paths = [
154-
nltk-data.punkt
155-
nltk-data.averaged-perceptron-tagger
156-
];
157-
};
148+
unstructured_api_nltk_data = python3.pkgs.nltk.dataDir (d: [
149+
d.punkt
150+
d.averaged-perceptron-tagger
151+
]);
158152
in
159153
stdenvNoCC.mkDerivation {
160154
pname = "unstructured-api";

pkgs/development/python-modules/aider-chat/default.nix

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
gitMinimal,
77
portaudio,
88
playwright-driver,
9-
symlinkJoin,
10-
nltk-data,
119
pythonOlder,
1210
pythonAtLeast,
1311
setuptools-scm,
@@ -122,13 +120,10 @@
122120
}:
123121

124122
let
125-
aider-nltk-data = symlinkJoin {
126-
name = "aider-nltk-data";
127-
paths = [
128-
nltk-data.punkt-tab
129-
nltk-data.stopwords
130-
];
131-
};
123+
aider-nltk-data = nltk.dataDir (d: [
124+
d.punkt-tab
125+
d.stopwords
126+
]);
132127

133128
version = "0.83.1";
134129
aider-chat = buildPythonPackage {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
lib,
3+
pkgs,
4+
python3Packages,
5+
}:
6+
lib.makeOverridable (
7+
{ ... }@nltkDataPkgs:
8+
f:
9+
pkgs.symlinkJoin {
10+
inherit (python3Packages.nltk) meta;
11+
name = "nltk-data-dir";
12+
13+
paths = f nltkDataPkgs;
14+
}
15+
) python3Packages.nltk.data
Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
11
{
22
lib,
3+
pkgs,
34
fetchPypi,
45
buildPythonPackage,
56
pythonOlder,
67
click,
78
joblib,
89
regex,
910
tqdm,
11+
12+
# preInstallCheck
13+
nltk,
14+
15+
# nativeCheckInputs
16+
matplotlib,
17+
numpy,
18+
pyparsing,
19+
pytestCheckHook,
20+
pytest-mock,
1021
}:
1122

1223
buildPythonPackage rec {
@@ -21,28 +32,78 @@ buildPythonPackage rec {
2132
hash = "sha256-h9EnvT3kvYmk+BJl5fpZyxsZmydEAXU3D3QX0rx66Gg=";
2233
};
2334

24-
propagatedBuildInputs = [
35+
dependencies = [
2536
click
2637
joblib
2738
regex
2839
tqdm
2940
];
3041

31-
# Tests require some data, the downloading of which is impure. It would
32-
# probably make sense to make the data another derivation, but then feeding
33-
# that into the tests (given that we need nltk itself to download the data,
34-
# unless there's an easy way to download it without nltk's downloader) might
35-
# be complicated. For now let's just disable the tests and hope for the
36-
# best.
37-
doCheck = false;
42+
# Use new passthru function to pass dependencies required for testing
43+
preInstallCheck = ''
44+
export NLTK_DATA=${
45+
nltk.dataDir (
46+
d: with d; [
47+
averaged-perceptron-tagger-eng
48+
averaged-perceptron-tagger-rus
49+
brown
50+
cess-cat
51+
cess-esp
52+
conll2007
53+
floresta
54+
gutenberg
55+
inaugural
56+
indian
57+
large-grammars
58+
nombank-1-0
59+
omw-1-4
60+
pl196x
61+
porter-test
62+
ptb
63+
punkt-tab
64+
rte
65+
sinica-treebank
66+
stopwords
67+
tagsets-json
68+
treebank
69+
twitter-samples
70+
udhr
71+
universal-tagset
72+
wmt15-eval
73+
wordnet
74+
wordnet-ic
75+
words
76+
]
77+
)
78+
}
79+
'';
80+
81+
nativeCheckInputs = [
82+
pytestCheckHook
83+
matplotlib
84+
numpy
85+
pyparsing
86+
pytest-mock
87+
88+
pkgs.which
89+
];
90+
91+
disabledTestPaths = [
92+
"nltk/test/unit/test_downloader.py" # Touches network
93+
];
3894

3995
pythonImportsCheck = [ "nltk" ];
4096

97+
passthru = {
98+
data = pkgs.nltk-data;
99+
dataDir = pkgs.callPackage ./data-dir.nix { };
100+
};
101+
41102
meta = with lib; {
42103
description = "Natural Language Processing ToolKit";
43104
mainProgram = "nltk";
44105
homepage = "http://nltk.org/";
45106
license = licenses.asl20;
46-
maintainers = [ ];
107+
maintainers = [ lib.maintainers.bengsparks ];
47108
};
48109
}

pkgs/development/python-modules/type-infer/default.nix

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,13 @@
1616
python-dateutil,
1717
scipy,
1818
toml,
19-
nltk-data,
20-
symlinkJoin,
2119
}:
2220
let
23-
testNltkData = symlinkJoin {
24-
name = "nltk-test-data";
25-
paths = [
26-
nltk-data.punkt
27-
nltk-data.punkt-tab
28-
nltk-data.stopwords
29-
];
30-
};
21+
testNltkData = nltk.dataDir (d: [
22+
d.punkt
23+
d.punkt-tab
24+
d.stopwords
25+
]);
3126

3227
version = "0.0.21";
3328
tag = "v${version}";

0 commit comments

Comments
 (0)