Issues accessing lexicon .json file

Thanks so much for this project! I've encountered an issue on Jupyter Notebooks when trying to access the lexicon nrc_en.json. Here's the output of a sample that I tried to run: 

from nrclex import NRCLex

text = "hateful"
emotion = NRCLex(text)
print("hateful", emotion.top_emotions)

FileNotFoundError                         Traceback (most recent call last)
Cell In[18], line 2
      1 text = "hateful"
----> 2 emotion = NRCLex(text)
      3 print("hateful", emotion.top_emotions)


File ~\Downloads\NRCLex-master\NRCLex-master\nrclex.py:60, in NRCLex.__init__(self, lexicon_file)
     59 def __init__(self, lexicon_file='nrc_en.json'):
---> 60     with open(lexicon_file, 'r') as json_file:
     61         self.__lexicon__ = load(json_file)

FileNotFoundError: [Errno 2] No such file or directory: 'hateful'

I'm not quite sure what's causing the error, would you have any suggestions? I tried loading the nrc_en.json file to check what could be causing the issue, and I got the following: 

ValueError                                Traceback (most recent call last)
Cell In[4], line 1
----> 1 df = pd.read_json('nrc_en.json')

File ~\Anaconda3\lib\site-packages\pandas\util\_decorators.py:207, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    205     else:
    206         kwargs[new_arg_name] = new_arg_value
--> 207 return func(*args, **kwargs)

File ~\Anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File ~\Anaconda3\lib\site-packages\pandas\io\json\_json.py:612, in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options)
    609     return json_reader
    611 with json_reader:
--> 612     return json_reader.read()

File ~\Anaconda3\lib\site-packages\pandas\io\json\_json.py:746, in JsonReader.read(self)
    744         obj = self._get_object_parser(self._combine_lines(data_lines))
    745 else:
--> 746     obj = self._get_object_parser(self.data)
    747 self.close()
    748 return obj

File ~\Anaconda3\lib\site-packages\pandas\io\json\_json.py:768, in JsonReader._get_object_parser(self, json)
    766 obj = None
    767 if typ == "frame":
--> 768     obj = FrameParser(json, **kwargs).parse()
    770 if typ == "series" or obj is None:
    771     if not isinstance(dtype, bool):

File ~\Anaconda3\lib\site-packages\pandas\io\json\_json.py:880, in Parser.parse(self)
    878     self._parse_numpy()
    879 else:
--> 880     self._parse_no_numpy()
    882 if self.obj is None:
    883     return None

File ~\Anaconda3\lib\site-packages\pandas\io\json\_json.py:1132, in FrameParser._parse_no_numpy(self)
   1129 orient = self.orient
   1131 if orient == "columns":
-> 1132     self.obj = DataFrame(
   1133         loads(json, precise_float=self.precise_float), dtype=None
   1134     )
   1135 elif orient == "split":
   1136     decoded = {
   1137         str(k): v
   1138         for k, v in loads(json, precise_float=self.precise_float).items()
   1139     }

File ~\Anaconda3\lib\site-packages\pandas\core\frame.py:636, in DataFrame.__init__(self, data, index, columns, dtype, copy)
    630     mgr = self._init_mgr(
    631         data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
    632     )
    634 elif isinstance(data, dict):
    635     # GH#38939 de facto copy defaults to False only in non-dict cases
--> 636     mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
    637 elif isinstance(data, ma.MaskedArray):
    638     import numpy.ma.mrecords as mrecords

File ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py:502, in dict_to_mgr(data, index, columns, dtype, typ, copy)
    494     arrays = [
    495         x
    496         if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype)
    497         else x.copy()
    498         for x in arrays
    499     ]
    500     # TODO: can we get rid of the dt64tz special case above?
--> 502 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)

File ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py:120, in arrays_to_mgr(arrays, columns, index, dtype, verify_integrity, typ, consolidate)
    117 if verify_integrity:
    118     # figure out the index, if necessary
    119     if index is None:
--> 120         index = _extract_index(arrays)
    121     else:
    122         index = ensure_index(index)

File ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py:674, in _extract_index(data)
    672 lengths = list(set(raw_lengths))
    673 if len(lengths) > 1:
--> 674     raise ValueError("All arrays must be of the same length")
    676 if have_dicts:
    677     raise ValueError(
    678         "Mixing dicts with non-Series may lead to ambiguous ordering."
    679     )

ValueError: All arrays must be of the same length

Any help you could provide would be useful. Thank you!

Best, 
Frankie


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Issues accessing lexicon .json file #21

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issues accessing lexicon .json file #21

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions