|
1 | 1 | import gzip |
2 | 2 | import importlib.resources |
| 3 | +import sys |
3 | 4 | import typing |
4 | 5 |
|
5 | 6 | from . import pywordsegment |
6 | 7 |
|
| 8 | +PY_VERSION_MAJOR = sys.version_info.major |
| 9 | +PY_VERSION_MINOR = sys.version_info.minor |
7 | 10 |
|
8 | 11 | class WordSegmenter: |
9 | 12 | word_segmenter: pywordsegment.WordSegmenter = None |
10 | 13 |
|
11 | 14 | @staticmethod |
12 | 15 | def load() -> None: |
13 | 16 | if WordSegmenter.word_segmenter is None: |
14 | | - unigrams_serialized = gzip.decompress( |
15 | | - data=importlib.resources.read_binary( |
16 | | - package=__package__, |
17 | | - resource='unigrams.msgpack.gz', |
18 | | - ), |
19 | | - ) |
20 | | - bigrams_serialized = gzip.decompress( |
21 | | - data=importlib.resources.read_binary( |
22 | | - package=__package__, |
23 | | - resource='bigrams.msgpack.gz', |
24 | | - ), |
25 | | - ) |
| 17 | + if PY_VERSION_MAJOR >= 3 and PY_VERSION_MINOR >= 11: |
| 18 | + with importlib.resources.files( |
| 19 | + __package__, |
| 20 | + ).joinpath( |
| 21 | + 'unigrams.msgpack.gz', |
| 22 | + ).open( |
| 23 | + 'rb', |
| 24 | + ) as unigrams_msgpack, importlib.resources.files( |
| 25 | + __package__, |
| 26 | + ).joinpath( |
| 27 | + 'bigrams.msgpack.gz', |
| 28 | + ).open( |
| 29 | + 'rb', |
| 30 | + ) as bigrams_msgpack: |
| 31 | + unigrams_serialized = gzip.decompress( |
| 32 | + data=unigrams_msgpack.read(), |
| 33 | + ) |
| 34 | + bigrams_serialized = gzip.decompress( |
| 35 | + data=bigrams_msgpack.read(), |
| 36 | + ) |
| 37 | + |
| 38 | + else: |
| 39 | + unigrams_serialized = gzip.decompress( |
| 40 | + data=importlib.resources.read_binary( |
| 41 | + package=__package__, |
| 42 | + resource='unigrams.msgpack.gz', |
| 43 | + ), |
| 44 | + ) |
| 45 | + |
| 46 | + bigrams_serialized = gzip.decompress( |
| 47 | + data=importlib.resources.read_binary( |
| 48 | + package=__package__, |
| 49 | + resource='bigrams.msgpack.gz', |
| 50 | + ), |
| 51 | + ) |
26 | 52 |
|
27 | 53 | WordSegmenter.word_segmenter = pywordsegment.WordSegmenter( |
28 | 54 | unigrams_serialized=unigrams_serialized, |
|
0 commit comments