Skip to content

Commit c8eec1e

Browse files
authored
add huggingface/electra (#1482)
* add huggingface/electra Signed-off-by: Guenther Schmuelling <[email protected]> * pylint Signed-off-by: Guenther Schmuelling <[email protected]>
1 parent c682ae6 commit c8eec1e

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

tests/huggingface.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,85 @@ def test_TFBartLarge(self):
407407
def test_TFBartLargeCnn(self):
408408
self._test_TFBart("facebook/bart-large-cnn", large=True)
409409

410+
# ELECTRA
411+
412+
def _test_Electra(self, size, large=False):
413+
from transformers import ElectraTokenizer, TFElectraModel
414+
tokenizer = ElectraTokenizer.from_pretrained(size)
415+
model = TFElectraModel.from_pretrained(size)
416+
input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
417+
spec, input_dict = self.spec_and_pad(input_dict)
418+
outputs = ["last_hidden_state"]
419+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
420+
421+
def test_TFElectraSmall(self):
422+
self._test_Electra("google/electra-small-discriminator", large=True)
423+
424+
def _test_ElectraForPreTraining(self, size, large=False):
425+
from transformers import ElectraTokenizer, TFElectraForPreTraining
426+
tokenizer = ElectraTokenizer.from_pretrained(size)
427+
model = TFElectraForPreTraining.from_pretrained(size)
428+
input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
429+
spec, input_dict = self.spec_and_pad(input_dict)
430+
outputs = ["logits"]
431+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
432+
433+
def test_TFElectraForPreTrainingSmall(self):
434+
self._test_ElectraForPreTraining("google/electra-small-discriminator", large=True)
435+
436+
def _test_ElectraForMaskedLM(self, size, large=False):
437+
from transformers import ElectraTokenizer, TFElectraForMaskedLM
438+
tokenizer = ElectraTokenizer.from_pretrained(size)
439+
model = TFElectraForMaskedLM.from_pretrained(size)
440+
input_dict = tokenizer("The capital of France is [MASK].", return_tensors="tf")
441+
input_dict["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
442+
spec, input_dict = self.spec_and_pad(input_dict)
443+
outputs = ["logits"]
444+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
445+
446+
def test_TFElectraForMaskedLMSmall(self):
447+
self._test_ElectraForMaskedLM("google/electra-small-discriminator", large=True)
448+
449+
def _test_ElectraForSequenceClassification(self, size, large=False):
450+
from transformers import ElectraTokenizer, TFElectraForSequenceClassification
451+
tokenizer = ElectraTokenizer.from_pretrained(size)
452+
model = TFElectraForSequenceClassification.from_pretrained(size)
453+
input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
454+
input_dict["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
455+
spec, input_dict = self.spec_and_pad(input_dict)
456+
outputs = ["logits"]
457+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
458+
459+
def test_TFElectraForSequenceClassificationSmall(self):
460+
self._test_ElectraForSequenceClassification("google/electra-small-discriminator", large=True)
461+
462+
def _test_ElectraForTokenClassification(self, size, large=False):
463+
from transformers import ElectraTokenizer, TFElectraForTokenClassification
464+
tokenizer = ElectraTokenizer.from_pretrained(size)
465+
model = TFElectraForTokenClassification.from_pretrained(size)
466+
input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
467+
# input_ids = input_dict["input_ids"]
468+
# input_dict["labels"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids)))
469+
spec, input_dict = self.spec_and_pad(input_dict, max_length=128)
470+
outputs = ["logits"]
471+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
472+
473+
def test_TFElectraForTokenClassificationSmall(self):
474+
self._test_ElectraForTokenClassification("google/electra-small-discriminator", large=True)
475+
476+
def _test_ElectraForQuestionAnswering(self, size, large=False):
477+
from transformers import ElectraTokenizer, TFElectraForQuestionAnswering
478+
tokenizer = ElectraTokenizer.from_pretrained(size)
479+
model = TFElectraForQuestionAnswering.from_pretrained(size)
480+
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
481+
input_dict = tokenizer(question, text, return_tensors='tf')
482+
spec, input_dict = self.spec_and_pad(input_dict, max_length=128)
483+
outputs = ["start_logits", "end_logits"]
484+
self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
485+
486+
def test_TFElectraForQuestionAnsweringSmall(self):
487+
self._test_ElectraForQuestionAnswering("google/electra-small-discriminator", large=True)
488+
410489

411490
if __name__ == "__main__":
412491
unittest.main()

0 commit comments

Comments
 (0)