File tree Expand file tree Collapse file tree 5 files changed +98
-38
lines changed
by-name/un/unstructured-api
development/python-modules Expand file tree Collapse file tree 5 files changed +98
-38
lines changed Original file line number Diff line number Diff line change 55 python3 ,
66 makeWrapper ,
77 nix-update-script ,
8- symlinkJoin ,
9- nltk-data ,
108} :
119let
1210 pythonEnv = python3 . withPackages (
@@ -147,14 +145,10 @@ let
147145 ++ unstructured . optional-dependencies . all-docs
148146 ) ;
149147 version = "0.0.82" ;
150- unstructured_api_nltk_data = symlinkJoin {
151- name = "unstructured_api_nltk_data" ;
152-
153- paths = [
154- nltk-data . punkt
155- nltk-data . averaged-perceptron-tagger
156- ] ;
157- } ;
148+ unstructured_api_nltk_data = python3 . pkgs . nltk . dataDir ( d : [
149+ d . punkt
150+ d . averaged-perceptron-tagger
151+ ] ) ;
158152in
159153stdenvNoCC . mkDerivation {
160154 pname = "unstructured-api" ;
Original file line number Diff line number Diff line change 66 gitMinimal ,
77 portaudio ,
88 playwright-driver ,
9- symlinkJoin ,
10- nltk-data ,
119 pythonOlder ,
1210 pythonAtLeast ,
1311 setuptools-scm ,
122120} :
123121
124122let
125- aider-nltk-data = symlinkJoin {
126- name = "aider-nltk-data" ;
127- paths = [
128- nltk-data . punkt-tab
129- nltk-data . stopwords
130- ] ;
131- } ;
123+ aider-nltk-data = nltk . dataDir ( d : [
124+ d . punkt-tab
125+ d . stopwords
126+ ] ) ;
132127
133128 version = "0.83.1" ;
134129 aider-chat = buildPythonPackage {
Original file line number Diff line number Diff line change 1+ {
2+ lib ,
3+ pkgs ,
4+ python3Packages ,
5+ } :
6+ lib . makeOverridable (
7+ { ... } @nltkDataPkgs :
8+ f :
9+ pkgs . symlinkJoin {
10+ inherit ( python3Packages . nltk ) meta ;
11+ name = "nltk-data-dir" ;
12+
13+ paths = f nltkDataPkgs ;
14+ }
15+ ) python3Packages . nltk . data
Original file line number Diff line number Diff line change 11{
22 lib ,
3+ pkgs ,
34 fetchPypi ,
45 buildPythonPackage ,
56 pythonOlder ,
67 click ,
78 joblib ,
89 regex ,
910 tqdm ,
11+
12+ # preInstallCheck
13+ nltk ,
14+
15+ # nativeCheckInputs
16+ matplotlib ,
17+ numpy ,
18+ pyparsing ,
19+ pytestCheckHook ,
20+ pytest-mock ,
1021} :
1122
1223buildPythonPackage rec {
@@ -21,28 +32,78 @@ buildPythonPackage rec {
2132 hash = "sha256-h9EnvT3kvYmk+BJl5fpZyxsZmydEAXU3D3QX0rx66Gg=" ;
2233 } ;
2334
24- propagatedBuildInputs = [
35+ dependencies = [
2536 click
2637 joblib
2738 regex
2839 tqdm
2940 ] ;
3041
31- # Tests require some data, the downloading of which is impure. It would
32- # probably make sense to make the data another derivation, but then feeding
33- # that into the tests (given that we need nltk itself to download the data,
34- # unless there's an easy way to download it without nltk's downloader) might
35- # be complicated. For now let's just disable the tests and hope for the
36- # best.
37- doCheck = false ;
42+ # Use new passthru function to pass dependencies required for testing
43+ preInstallCheck = ''
44+ export NLTK_DATA=${
45+ nltk . dataDir (
46+ d : with d ; [
47+ averaged-perceptron-tagger-eng
48+ averaged-perceptron-tagger-rus
49+ brown
50+ cess-cat
51+ cess-esp
52+ conll2007
53+ floresta
54+ gutenberg
55+ inaugural
56+ indian
57+ large-grammars
58+ nombank-1-0
59+ omw-1-4
60+ pl196x
61+ porter-test
62+ ptb
63+ punkt-tab
64+ rte
65+ sinica-treebank
66+ stopwords
67+ tagsets-json
68+ treebank
69+ twitter-samples
70+ udhr
71+ universal-tagset
72+ wmt15-eval
73+ wordnet
74+ wordnet-ic
75+ words
76+ ]
77+ )
78+ }
79+ '' ;
80+
81+ nativeCheckInputs = [
82+ pytestCheckHook
83+ matplotlib
84+ numpy
85+ pyparsing
86+ pytest-mock
87+
88+ pkgs . which
89+ ] ;
90+
91+ disabledTestPaths = [
92+ "nltk/test/unit/test_downloader.py" # Touches network
93+ ] ;
3894
3995 pythonImportsCheck = [ "nltk" ] ;
4096
97+ passthru = {
98+ data = pkgs . nltk-data ;
99+ dataDir = pkgs . callPackage ./data-dir.nix { } ;
100+ } ;
101+
41102 meta = with lib ; {
42103 description = "Natural Language Processing ToolKit" ;
43104 mainProgram = "nltk" ;
44105 homepage = "http://nltk.org/" ;
45106 license = licenses . asl20 ;
46- maintainers = [ ] ;
107+ maintainers = [ lib . maintainers . bengsparks ] ;
47108 } ;
48109}
Original file line number Diff line number Diff line change 1616 python-dateutil ,
1717 scipy ,
1818 toml ,
19- nltk-data ,
20- symlinkJoin ,
2119} :
2220let
23- testNltkData = symlinkJoin {
24- name = "nltk-test-data" ;
25- paths = [
26- nltk-data . punkt
27- nltk-data . punkt-tab
28- nltk-data . stopwords
29- ] ;
30- } ;
21+ testNltkData = nltk . dataDir ( d : [
22+ d . punkt
23+ d . punkt-tab
24+ d . stopwords
25+ ] ) ;
3126
3227 version = "0.0.21" ;
3328 tag = "v${ version } " ;
You can’t perform that action at this time.
0 commit comments