Skip to content
This repository was archived by the owner on Sep 3, 2023. It is now read-only.

Some notebooks do not run successfully #16

@teoguso

Description

@teoguso

/notebooks/hatespeech/rulesbased_hatespeech.ipynb and /notebooks/hatespeech/transferlearning_hatespeech.ipynb are currently failing. See the errors below.

rulesbased_hatespeech.ipynb

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-8bfbf01c1139> in <module>
     10 
     11 data = pd.read_csv("../../data/external/hatespeech/hs_data.csv")
---> 12 data['normalized'] = data['text'].apply(lambda comment: normalize(comment))
     13 
     14 # Generate vectors

~/anaconda3/envs/find-out/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
   4040             else:
   4041                 values = self.astype(object).values
-> 4042                 mapped = lib.map_infer(values, f, convert=convert_dtype)
   4043 
   4044         if len(mapped) and isinstance(mapped[0], Series):

pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

<ipython-input-1-8bfbf01c1139> in <lambda>(comment)
     10 
     11 data = pd.read_csv("../../data/external/hatespeech/hs_data.csv")
---> 12 data['normalized'] = data['text'].apply(lambda comment: normalize(comment))
     13 
     14 # Generate vectors

~/Code/find-out/src/utils/preprocess_text_pipelines.py in normalize(dataframe)
     62     pipeline.register_processor(remove_stopwords)
     63 
---> 64     dataframe["cleaned"] = dataframe["text"].apply(pipeline.process_text)
     65     dataframe["normalized"] = normalizer(dataframe["cleaned"])
     66     return dataframe

TypeError: string indices must be integers

transferlearning_hatespeech.ipynb

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-bd19b623da49> in <module>
     82 
     83 ### EDA of PREDICTIONS
---> 84 df_original = returns_predictions(path_to_original_model)
     85 df_all = returns_predictions(path_to_fine_tuned_model)
     86 df_last = returns_predictions(path_to_fine_tuned_model_last)

<ipython-input-1-bd19b623da49> in returns_predictions(path_to_model, differences)
     41     """
     42     data = pd.read_csv(path_to_target_data)
---> 43     best_model = create_model(datasets[6], vocab_size)
     44     best_model.load_weights(path_to_model)
     45     predictions = get_predictions(best_model, datasets[4])

~/Code/find-out/src/models/hatespeech/model_translearn_hatespeech.py in create_model(word_embedding_matrix, vocab_size)
     15             weights=[word_embedding_matrix],
     16             input_length=140,
---> 17             trainable=False,
     18         )
     19     )

~/anaconda3/envs/find-out/lib/python3.7/site-packages/keras/engine/sequential.py in add(self, layer)
    179                 self.inputs = network.get_source_inputs(self.outputs[0])
    180         elif self.outputs:
--> 181             output_tensor = layer(self.outputs[0])
    182             if isinstance(output_tensor, list):
    183                 raise TypeError('All layers in a Sequential model '

~/anaconda3/envs/find-out/lib/python3.7/site-packages/keras/engine/base_layer.py in __call__(self, inputs, **kwargs)
    472             if all([s is not None
    473                     for s in to_list(input_shape)]):
--> 474                 output_shape = self.compute_output_shape(input_shape)
    475             else:
    476                 if isinstance(input_shape, list):

~/anaconda3/envs/find-out/lib/python3.7/site-packages/keras/layers/embeddings.py in compute_output_shape(self, input_shape)
    125                 raise ValueError(
    126                     '"input_length" is %s, but received input has shape %s' %
--> 127                     (str(self.input_length), str(input_shape)))
    128             else:
    129                 for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):

ValueError: "input_length" is 140, but received input has shape (None, None, 40)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions