-
Notifications
You must be signed in to change notification settings - Fork 194
Description
#tokenization define
from bert.tokenization.albert_tokenization import FullTokenizer
def createTokenizer():
return FullTokenizer("../albert_base/assets/30k-clean.vocab", spm_model_file="../albert_base/assets/30k-clean.model", do_lower_case=True)
def get_masks(tokens, max_seq_length):
"""Mask for padding"""
if len(tokens)>max_seq_length:
#Cutting down the excess length
tokens = tokens[0:max_seq_length]
return [1]*len(tokens)
else :
return [1]*len(tokens) + [0] * (max_seq_length - len(tokens))
def get_segments(tokens, max_seq_length):
if len(tokens)>max_seq_length:
#Cutting down the excess length
tokens = tokens[:max_seq_length]
segments = []
current_segment_id = 0
for token in tokens:
segments.append(current_segment_id)
if token == "[SEP]":
current_segment_id = 1
return segments
else:
segments = []
current_segment_id = 0
for token in tokens:
segments.append(current_segment_id)
if token == "[SEP]":
current_segment_id = 1
return segments + [0] * (max_seq_length - len(tokens))
def get_ids(tokens, tokenizer, max_seq_length):
if len(tokens)>max_seq_length:
tokens = tokens[:max_seq_length]
token_ids = tokenizer.convert_tokens_to_ids(tokens)
return token_ids
else:
token_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = token_ids + [0] * (max_seq_length-len(token_ids))
return input_ids
tokenizer = createTokenizer()
max_seq_length = 64 #This number will determine the number of tokens
def prep(s, get = 'id'):
stokens = tokenizer.tokenize(s)
stokens = ["[CLS]"] + stokens + ["[SEP]"]
if get == 'id':
input_ids = get_ids(stokens, tokenizer, max_seq_length)
return input_ids
elif get == 'mask':
input_masks = get_masks(stokens, max_seq_length)
return input_masks
else:
input_segments = get_segments(stokens, max_seq_length)
return input_segments
#train and test data load
import pandas as pd
train_set = pd.read_csv("../goemotion/train_set.csv")
test_set = pd.read_csv("../goemotion/test_set.csv")
train_X = [prep(sentence) for sentence in train_set["text"]]
train_Y = list(map(int, train_set["emotion"].tolist()))
test_X = [prep(sentence) for sentence in test_set["text"]]
test_Y = list(map(int, test_set["emotion"].tolist()))
print("data preprocess finished")
#albert model calling
import os
import bert
import tensorflow as tf
#GPU config
tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices("GPU")[0], True)
#parameters
model_name = "albert_base_v2"
model_ckpt = os.path.join("../albert_base", "model.ckpt-best")
model_params = bert.albert_params("../albert_base/")
#call and define model layers
albert_layer = bert.BertModelLayer.from_params(model_params, name="albert")
model_layer = tf.keras.Sequential([
tf.keras.layers.Input(shape=(max_seq_length,), dtype="int32", name="input_ids"),
albert_layer,
tf.keras.layers.Dense(112, activation=tf.nn.relu),
tf.keras.layers.Dense(27, activation=tf.nn.softmax),#0~27
tf.keras.layers.Dense(1, activation=tf.nn.softmax)
])
model_layer.build(input_shape=(None, max_seq_length))
bert.load_albert_weights(albert_layer, model_ckpt)
#compile
model_layer.compile(loss="sparse_categorical_crossentropy", optimizer=tf.optimizers.Adam(lr=0.00001), metrics=["sparse_categorical_accuracy"])
print(model_layer.summary())
#train start
checkpointName = os.path.join("../albert_base/models/", "albert_faq.ckpt")
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointName,
save_weights_only=True,
verbose=1)
#train_start
history = model_layer.fit(
test_X,
test_Y,
epochs=300,
validation_data=(train_X, train_Y),
verbose=1,
callbacks=[cp_callback],
batch_size=2)
above is my code and
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.04 Driver Version: 455.23.04 CUDA Version: 11.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce RTX 3090 On | 00000000:09:00.0 On | N/A |
| 33% 53C P2 111W / 350W | 1016MiB / 24265MiB | 1% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
this is nvidia-smi
I use tensorflow-gpu 2.2 and cuda toolkit 10.1 and cudnn 7.6
My computer is 3900X 128GB(RAM) RTX3090 500GB(SSD)
and if run above code error message is below.
File "/home/sentiment/anaconda3/envs/mybert/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 6606, in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code, message), None)
File "", line 3, in raise_from
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:GPU:0 to /job:localhost/replica:0/task:0/device:CPU:0 in order to run Identity: GPU sync failed [Op:Identity]
I want to train albert finetuning.
if i use tensorflow for cpu. it work fine but 1 epoch per 6 hour for training.
so I hope to use gpu
I really hard to find out solution for fixing but failed.
is there anyone know how to fix this error?