Skip to content

Commit 358ce85

Browse files
fix path issues
1 parent fd1718f commit 358ce85

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

extractnet/extractor.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
import numpy as np
44
from sklearn.base import BaseEstimator, ClassifierMixin
55
from sklearn.ensemble import ExtraTreesClassifier
6+
import os
7+
import dateparser
8+
import joblib
69

710
from .compat import string_, str_cast, unicode_
811
from .util import get_and_union_features, convert_segmentation_to_text
912
from .blocks import TagCountReadabilityBlockifier
1013
from .features.author import AuthorFeatures
1114
from .sequence_tagger.models import word2features
1215

13-
import dateparser
14-
import joblib
15-
1616
from sklearn.base import clone
1717

18+
BASE_EXTRACTOR_DIR = __file__.replace('/extractor.py','')
19+
1820
class MultiExtractor(BaseEstimator, ClassifierMixin):
1921
"""
2022
An sklearn-style classifier that extracts the main content (and/or comments)
@@ -62,10 +64,15 @@ def state_dict(self):
6264
def __init__(self, blockifier=TagCountReadabilityBlockifier,
6365
features=('kohlschuetter', 'weninger', 'readability'),
6466
model=None,
65-
css_tokenizer_path='extractnet/models/css_tokenizer.pkl.gz',
66-
text_tokenizer_path='extractnet/models/text_tokenizer.pkl.gz',
67+
css_tokenizer_path=None,
68+
text_tokenizer_path=None,
6769
num_labels=2, prob_threshold=0.5, max_block_weight=200,
6870
features_type=None, author_feature_transforms=None):
71+
if css_tokenizer_path is None:
72+
css_tokenizer_path = os.path.join(BASE_EXTRACTOR_DIR, 'models/css_tokenizer.pkl.gz')
73+
if text_tokenizer_path is None:
74+
text_tokenizer_path = os.path.join(BASE_EXTRACTOR_DIR, 'models/text_tokenizer.pkl.gz')
75+
6976
self.params = {
7077
'features': features,
7178
'num_labels': num_labels,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def find_libxml2_include():
117117
'Cython>=0.21.1',
118118
'ftfy>=4.1.0,<5.0.0',
119119
'lxml',
120-
'numpy>=1.11.0',
120+
'numpy>=1.19.0',
121121
'scikit-learn>=0.22.0',
122122
'scipy>=0.17.0',
123123
'sklearn-crfsuite==0.3.6',

0 commit comments

Comments
 (0)