fix path issues

theblackcat102 · theblackcat102 · commit 358ce8580991 · 2020-12-18T03:55:26.000+08:00
diff --git a/extractnet/extractor.py b/extractnet/extractor.py
@@ -3,18 +3,20 @@
 import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.ensemble import ExtraTreesClassifier
+import os
+import dateparser
+import joblib
 
 from .compat import string_, str_cast, unicode_
 from .util import get_and_union_features, convert_segmentation_to_text
 from .blocks import TagCountReadabilityBlockifier
 from .features.author import AuthorFeatures
 from .sequence_tagger.models import word2features
 
-import dateparser
-import joblib
-
 from sklearn.base import clone
 
+BASE_EXTRACTOR_DIR = __file__.replace('/extractor.py','')
+
 class MultiExtractor(BaseEstimator, ClassifierMixin):
     """
         An sklearn-style classifier that extracts the main content (and/or comments)
@@ -62,10 +64,15 @@ def state_dict(self):
     def __init__(self, blockifier=TagCountReadabilityBlockifier,
                  features=('kohlschuetter', 'weninger', 'readability'),
                  model=None,
-                 css_tokenizer_path='extractnet/models/css_tokenizer.pkl.gz',
-                 text_tokenizer_path='extractnet/models/text_tokenizer.pkl.gz',
+                 css_tokenizer_path=None,
+                 text_tokenizer_path=None,
                  num_labels=2, prob_threshold=0.5, max_block_weight=200,
                  features_type=None, author_feature_transforms=None):
+        if css_tokenizer_path is None:
+            css_tokenizer_path = os.path.join(BASE_EXTRACTOR_DIR, 'models/css_tokenizer.pkl.gz')
+        if text_tokenizer_path is None:
+            text_tokenizer_path = os.path.join(BASE_EXTRACTOR_DIR, 'models/text_tokenizer.pkl.gz')
+
         self.params = {
             'features': features,
             'num_labels': num_labels,
diff --git a/setup.py b/setup.py
@@ -117,7 +117,7 @@ def find_libxml2_include():
         'Cython>=0.21.1',
         'ftfy>=4.1.0,<5.0.0',
         'lxml',
-        'numpy>=1.11.0',
+        'numpy>=1.19.0',
         'scikit-learn>=0.22.0',
         'scipy>=0.17.0',
         'sklearn-crfsuite==0.3.6',