Skip to content

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 360: character maps to <undefined> #34

@narasimha1805

Description

@narasimha1805

Getting 'UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 360: character maps to ' error while importing word_topic_vectors from nlpia.book.examples.ch04_catdog_las*

Below is the error:

UnicodeDecodeError Traceback (most recent call last)
in
----> 1 from nlpia.book.examples.ch04_catdog_lsa_3x6x16 import word_topic_vectors

d:\python\lib\site-packages\nlpia\book\examples\ch04_catdog_lsa_3x6x16.py in
68 tfidfer = TfidfVectorizer(min_df=2, max_df=.6, stop_words=None, token_pattern=r'(?u)\b\w+\b')
69
---> 70 corpus = get_data('cats_and_dogs')[:NUM_DOCS]
71 docs = normalize_corpus_words(corpus, stemmer=None)
72 tfidf_dense = pd.DataFrame(tfidfer.fit_transform(docs).todense())

d:\python\lib\site-packages\nlpia\loaders.py in get_data(name, nrows, limit)
1111 return filepaths[name]
1112 elif name in DATASET_NAME2FILENAME:
-> 1113 return read_named_csv(name, nrows=nrows)
1114 elif name in DATA_NAMES:
1115 return read_named_csv(DATA_NAMES[name], nrows=nrows)

d:\python\lib\site-packages\nlpia\loaders.py in read_named_csv(name, data_path, nrows, verbose)
1003 name = DATASET_NAME2FILENAME[name]
1004 if name.lower().endswith('.txt') or name.lower().endswith('.txt.gz'):
-> 1005 return read_text(os.path.join(data_path, name), nrows=nrows)
1006 else:
1007 return read_csv(os.path.join(data_path, name), nrows=nrows)

d:\python\lib\site-packages\nlpia\futil.py in read_text(forfn, nrows, verbose)
416 """
417 tqdm_prog = tqdm if verbose else no_tqdm
--> 418 nrows = wc(forfn, nrows=nrows) # not necessary when nrows==None
419 lines = np.empty(dtype=object, shape=nrows)
420 with ensure_open(forfn) as f:

d:\python\lib\site-packages\nlpia\futil.py in wc(f, verbose, nrows)
48 tqdm_prog = tqdm if verbose else no_tqdm
49 with ensure_open(f, mode='r') as fin:
---> 50 for i, line in tqdm_prog(enumerate(fin)):
51 if nrows is not None and i >= nrows - 1:
52 break

d:\python\lib\encodings\cp1252.py in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
25 class StreamWriter(Codec,codecs.StreamWriter):

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1592: character maps to

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions