-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
My train/val/test jsonlines files are converted from Ontonotes 5 Chinese corpus, something like:
{"doc_key": "bc_cctv_00_cctv_0000", "sentences": [["EMPTY"], ["二零零五年", "的", "夏天", ",", "一", "个", "被", "人们", "期待", "已", "久", "的", "画面", "开始", "在", "香港", "的", ">各", "大", "媒体", "频繁", "出现", ","], ["这些", "被", "人们", "所", "熟知", "的", "卡通", "形象", "以", "其", "独有", "的", "魅力", "再", "一", "次", "让", "世人", "的", "目光", "聚集", ">到", "香港", ","], ["全球", "第五", "个", "迪斯尼", "乐园", "即将", "在", "这里", "向", "公众", "开放", "。"], ["迪斯尼", "呢", "最", "重要", "的", "是", ",", "它", "是", "世界", "的", "品>牌", "嘛", "。"], ["那么", "这", "几", "年", "呐", ",", "虽然", "它", "建造", "的", "时候", ",", "呃", "还", ",", "还", "没有", "开幕", "呢", ",", "已经", "有", "很多", "的", "人", "对", "香港", "啊", ",", "可以", "说", "是", "另眼相看", "呐", "。"], ["<", "English", ">", "Then", "welcome", "to", "the", "official", "writing", "cere
mony", "of", "Hongkong", "DiskneyLand", "<", "English", ">", "."], ["香港", "迪斯尼", "乐园", "的", "建设", "开始", "于", "两", "年", "前", "的", "二零零三>年", ","], ["这", "年", "一月", ",", "香港", "政府", "将", "大屿山", "下", "的", "这", "片", "近年", "来", "最", "大", "的", "填海", "工程", "所", "得到", "的", "二百", "公顷", "土地", ",", "交给", "了", "迪斯尼", "公司", "。"], ["<", "English", ">", "One", "<", "English", ">", "."], ["EMPTY"], ["从", "那时", "开始", "这里", "就", "成", "了", "香港", "的", "一", "个", "禁区", "。"], ["同", "在", "大屿山", "与", "之", "相邻", "的", "香港", "国际", "机场", ",", "调整", "了", "航线", ","], ["使", "这里", "成为", "一", "个", "禁飞区", "。"], ["第一", "次", "在", "中国", "土地", "上", "落户", "的", "米奇", "老鼠", "的", "新", "家", ",", "引起", "了", "全球", "的", "关注", "。"], ["EMPTY"], ["现在", "距离", "香港", "迪斯尼
", "乐园", "九月", "十二号", "的", "开业", "只", "有", "一", "个", "月", "的", "时间", "了", ","], ["通往", "迪斯尼", "的", "地铁", "也", "已经", "建好", "。"], ["地铁站", "里", "不时", "会", "有", "乘客", "在", "售票机", "上", "点击", "迪斯尼", "一", "站", ","], ["试图", "买", "票", "去", "先睹为快", "。"], ["但是", "迪斯尼", "地铁站", "的", "开通日", "却", "被", "定在", "开业
", "当天", "。"], ["两", "年", "来", ",", "迪斯尼", "一直", "保持", "着", "它", "的", "神秘", ","], ["没有", "任何", "一", "家", "媒体", "被", "允许", "进入", "拍摄", "。"], ["我们", "乘坐", "出租车", "沿着", "通往", "迪斯尼", "方向", "的", "公路", "一路", "向前", ","], ["试图", "近距离", "去", "开始", "于", "两", "年", "前", "的", "二零零三年", ","], ["但是", "在", "迪斯尼", "的", "任何", "标志", "都", "还", "没有", "进入", "我们", "的", "视线", "时", ",", "车子", "就", "在", "去往", "迪斯尼", "的", "岔路口", "被", "保安", "人员", "拦", "了", "下来", ","], [">回来", "的", "路", "上", ",", "出租车", "司机", "在", "了解", "我们", "的", "意图", "后", ",", "给", "我们", "做", "了", "这样", "的", "解释", "。"], ["<", "Cantonese", ">", "呃", "据", "保安", "说", "是", "全部", "暂时", "未", "正式", "开", "<", "Cantonese", ">"], ["<", "Cantonese", ">", "开放", "之前", "呢", "就", "全部", ",", "任何", "车辆", ",", "除了", "特别", "有", "批准", "之外", "呢", "才", "可以", "进入", "<", "/", "Cantonese", ">"], ["<", "Cantonese", ">", "如果", "不", "是", "的话", "全部", "都", "不", "可以", "进入", "<", "/", "Cantonese", ">"], ["<", "Cantonese", ">", "尤其是", "不", "可以", "摄录机", "拍摄", "<", "/", "Cantonese", ">"], ["<", "C
antonese", ">", "啊", ",", "全部", "是", "高度", "机密", "<", "/", "Cantonese", ">"], ["<", "Cantonese", ">", "如果", "是", "啊", ",", "未", "批准", "拍摄", "呢", "<", "/", "Cantonese", ">"], ["<", "Cantonese", ">", "可", "分", ",", "就", "是", "随时", "呢", ",", "就", ",", "就", "接受", "法律", "追究", "的", ",", "很", "麻烦", "的", "<", "/", "Cantonese", ">"], ["迪斯尼", "公司", "虽然", "把", "中国", "的", "迪斯尼", "公园", "选址", "在", "香港", ",", "但是", "最", "让", "他们", "心动", "的", "却", "是", "中国", "内地", "的", "游客", "市场", "。"], ["自从", "香港", "和", "内地", "开通", "自由", "行", "后", ",", "来", "香港", "旅游", "的", "内地", "旅客", "越来越", "多", "。"], ["开始", "到", "现在", "啊", ",", "已经", "有", ",", "已经", "有", "七百多万", "的", "个人游", "的", "旅客", "来", "香港", "了", ","], ["那么", "现在", "呢", "我们", "呃", "相信", "哪", "这>个", "是", "会", "越来越", "多", "啦", ","], ["现在", "差不多", "两", "年", "了", "嘛", ","], ["还有", "现在", "三十四", "个", "城市", "呢", "会", "增加", "的", "。"], ["香港", "是", "由", "百", "年", "前", "的", "一", "个", "渔港", "发展", "成", "今天", "的", "国际", "大", "都会", ","], ["这里", "东", "西", "方", "文化", "荟萃", ",", "新", "旧", "事物", "交织", "共", "融", "。"], ["来到", "香港", ",", "你", "可以", "在", "高楼", "大厦", "间", "穿梭", "闲逛", ","], ["于", "商场", "名店", "中", "尽情", "地", "搜购", "来自", "各", "国", "的", "商品", "。"], ["在", "茶餐厅", "或者", "旺角", "的", "街头", "品尝", "来自", "世界", "各", "地", "的", "美食", "小吃", "。"], ["EMPTY"], ["来到", "汇聚", "了", "各", "路", "神仙", "的", "浅水湾", "烧香", "许愿", ","], ["感受", "香港", "最", "有", "魅力", "的", "阳光", "沙滩", "。"], ["EMPTY"], ["登上", "太平山", "顶", ",", "将", "香港岛", "和", "维多利亚湾", "的", "美丽", "风光", "尽收>眼底", "。"], ["EMPTY"]], "mention_clusters": [[[1, 15, 16], [2, 22, 23], [3, 7, 8], [5, 26, 27], [6, 11, 12], [7, 0, 1], [8, 4, 5], [11, 7, 8], [16, 2, 3], [34, 10, 11], [35, 1, 2], [35, 10, 11], [36, 16, 17], [40, 0, 1], [41, 0, 1], [42, 1, 2], [47, 1, 2]], [[2, 9, 10]], [[4, 0, 1], [4, 7, 8]], [[5, 7, 8], [6, 11, 13], [7, 0, 3], [14, 12, 13], [16, 2, 5], [17, 1, 2], [18, 10, 11], [20, 1, 2], [21, 4, 5], [21, 8, 9], [23, 5, 6], [25, 2, 3], [25, 19, 20], [34, 4, 8]], [[8, 7, 8], [12, 2, 3]], [[8, 7, 25], [11, 3, 4], [12, 4, 5], [13, 1, 2]], [[11, 1, 2]], [[14, 3, 4], [34, 4, 5], [34, 20, 21]], [[18, 10, 13], [20, 1, 3]], [[20, 1, 5]], [[23, 0, 1], [25, 10, 11], [26, 9, 10], [26, 15, 16]], [[23, 2, 3], [25, 15, 16]], [[27, 5, 6]], [[34, 15, 16]], [[36, 2, 3], [37, 1, 2], [38, 0, 1], [39, 1, 2]], [[37, 7, 8]]]}
My config is as below:
vi conf/model/cross/mdeberta.yaml
module:
_target_: models.pl_modules.CrossPLModule
RAdam:
_target_: torch.optim.RAdam
lr: 2e-5
Adafactor:
_target_: transformers.Adafactor
lr: 3e-5
weight_decay: 0.01
scale_parameter: False
relative_step: False
lr_scheduler:
num_warmup_steps: 10
num_training_steps: 10000
opt: "Adafactor" #RAdam
model:
_target_: models.model_cross.xCoRe_system
language_model: "mdeberta-v3-base"
huggingface_model_name: "microsoft/mdeberta-v3-base"
freeze_encoder: False
span_representation: "concat_start_end"
cluster_representation: "transformer" #average, transformer
t: "mes"But my training output as below:
...
...
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
[15:10:15] Instantiating the Trainer
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/lightning_fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
You are using a CUDA device ('NVIDIA RTX PRO 6000 Blackwell Server Edition') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'hig/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/transformers/convert_slow_tokenizer.py:566: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer us tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece nknown tokens into a sequence of byte tokens matching the originversion would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
warnings.warn(The tokenizer you are loading from '/home/mike/.cache/huggingface/hub/models--microsoft--mdeberta-v3-base/snapshots/a0484667b2236to fix this issue.5f84929a935b5e50a51f71f159d/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=Truto fix this issue.e` flag when loading this tokenizer to fix this issue.The tokenizer you are loading from '/home/mike/.cache/huggingface/hub/models--microsoft--mdeberta-v3-base/snapshots/a0484667b22365f84929a935b5e50a51f71f159d/' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/utilities/model_summary/model_summary.py:242: Precision 16-mixed is not supported by the model summary. Estimated model size in MB will not be accurate. Using 32 bits instead.
┏━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃ FLOPs ┃┡━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
│ 0 │ model │ xCoRe_system │ 327 M │ train │ 0 │└───┴───────┴──────────────┴────────┴───────┴───────┘Trainable params: 327 M ain_dataloader' does not have many workers which may be a bottle Non-trainable params: 0 Total params: 327 M Total estimated model params size (MB): 1.3 K Modules in train mode: 121 val/full_conll2012_f1_score' was not in top 1
Modules in eval mode: 237
Total FLOPs: 0
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=383` in the `DataLoader` to improve performance.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=383` in the `DataLoader` to improve performance.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:534: Found 237 module(s) in eval mode at the start of training. This may lead to unexpected behavior during training. If this is intentional, you can ignore this warning.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/1753 0:00:00 • -:--:-- 0.00it/s You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:06:17 • 0:06:50 2.14it/s v_num: o1i2You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:19 • 0:06:50 2.14it/s v_num: o1i2Epoch 0, global step 219: 'val/full_conll2012_f1_score' reached 0.00000 (best 0.00000), saving model to '/home/mike/xcore/experiments/xcore/microsoft/mdeberta-v3-base/2026-01-07/15-10-02/wandb/run-20260107_151014-5892o1i2/files/checkpoints/checkpoint-val_f1_0.0000-epoch_00.ckpt' as top 1
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:36 • 0:00:02 0.81it/s v_num: o1i2Epoch 0, global step 438: 'val/full_conll2012_f1_score' was not in top 1
Epoch 1/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:28 • 0:06:48 2.15it/s v_num: o1i2Epoch 1, global step 658: 'val/full_conll2012_f1_score' was not in top 1
Epoch 1/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:48 • 0:00:02 0.80it/s v_num: o1i2Epoch 1, global step 877: 'val/full_conll2012_f1_score' was not in top 1
Modules in train mode: 121
Modules in eval mode: 237
Total FLOPs: 0
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=383` in the `DataLoader` to improve performance.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:434: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=383` in the `DataLoader` to improve performance.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:534: Found 237 module(s) in eval mode at the start of training. This may lead to unexpected behavior during training. If this is intentional, you can ignore this warning.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/1753 0:00:00 • -:--:-- 0.00it/s You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:06:17 • 0:06:50 2.14it/s v_num: o1i2You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
/home/mike/miniconda3/envs/xcore/lib/python3.12/site-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
Epoch 0/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:19 • 0:06:50 2.14it/s v_num: o1i2Epoch 0, global step 219: 'val/full_conll2012_f1_score' reached 0.00000 (best 0.00000), saving model to '/home/mike/xcore/experiments/xcore/microsoft/mdeberta-v3-base/2026-01-07/15-10-02/wandb/run-20260107_151014-5892o1i2/files/checkpoints/checkpoint-val_f1_0.0000-epoch_00.ckpt' as top 1Epoch 0/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:36 • 0:00:02 0.81it/s v_num: o1i2Epoch 0, global step 438: 'val/full_conll2012_f1_score' was not in top 1
Epoch 1/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:28 • 0:06:48 2.15it/s v_num: o1i2Epoch 1, global step 658: 'val/full_conll2012_f1_score' was not in top 1Epoch 1/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:48 • 0:00:02 0.80it/s v_num: o1i2Epoch 1, global step 877: 'val/full_conll2012_f1_score' was not in top 1
Epoch 2/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:20 • 0:06:39 2.20it/s v_num: o1i2Epoch 2, global step 1097: 'val/full_conll2012_f1_score' was not in top 1Epoch 2/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:46 • 0:00:02 0.97it/s v_num: o1i2Epoch 2, global step 1316: 'val/full_conll2012_f1_score' was not in top 1
Epoch 3/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:28 • 0:06:11 2.37it/s v_num: o1i2Epoch 3, global step 1536: 'val/full_conll2012_f1_score' was not in top 1Epoch 3/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:46 • 0:00:02 0.86it/s v_num: o1i2Epoch 3, global step 1755: 'val/full_conll2012_f1_score' was not in top 1
Epoch 4/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:21 • 0:05:51 2.50it/s v_num: o1i2Epoch 4, global step 1975: 'val/full_conll2012_f1_score' was not in top 1Epoch 4/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:44 • 0:00:02 0.88it/s v_num: o1i2Epoch 4, global step 2194: 'val/full_conll2012_f1_score' was not in top 1
Epoch 5/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:21 • 0:07:41 1.91it/s v_num: o1i2Epoch 5, global step 2414: 'val/full_conll2012_f1_score' was not in top 1Epoch 5/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:50 • 0:00:02 0.84it/s v_num: o1i2Epoch 5, global step 2633: 'val/full_conll2012_f1_score' was not in top 1
Epoch 6/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:09 • 0:06:14 2.35it/s v_num: o1i2Epoch 6, global step 2853: 'val/full_conll2012_f1_score' was not in top 1Epoch 6/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:30 • 0:00:02 0.87it/s v_num: o1i2Epoch 6, global step 3072: 'val/full_conll2012_f1_score' was not in top 1
Epoch 7/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:19 • 0:07:01 2.08it/s v_num: o1i2Epoch 7, global step 3292: 'val/full_conll2012_f1_score' was not in top 1Epoch 7/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:36 • 0:00:02 0.96it/s v_num: o1i2Epoch 7, global step 3511: 'val/full_conll2012_f1_score' was not in top 1
Epoch 8/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:21 • 0:07:50 1.87it/s v_num: o1i2Epoch 8, global step 3731: 'val/full_conll2012_f1_score' was not in top 1Epoch 8/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:51 • 0:00:02 0.81it/s v_num: o1i2Epoch 8, global step 3950: 'val/full_conll2012_f1_score' was not in top 1
Epoch 9/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:28 • 0:06:43 2.18it/s v_num: o1i2Epoch 9, global step 4170: 'val/full_conll2012_f1_score' was not in top 1Epoch 9/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:31 • 0:00:02 0.97it/s v_num: o1i2Epoch 9, global step 4389: 'val/full_conll2012_f1_score' was not in top 1
Epoch 10/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:29 • 0:06:45 2.17it/s v_num: o1i2Epoch 10, global step 4609: 'val/full_conll2012_f1_score' was not in top 1Epoch 10/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:58 • 0:00:02 0.84it/s v_num: o1i2Epoch 10, global step 4828: 'val/full_conll2012_f1_score' was not in top 1
Epoch 11/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:33 • 0:06:06 2.40it/s v_num: o1i2Epoch 11, global step 5048: 'val/full_conll2012_f1_score' was not in top 1Epoch 11/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:49 • 0:00:02 0.93it/s v_num: o1i2Epoch 11, global step 5267: 'val/full_conll2012_f1_score' was not in top 1
Epoch 12/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:32 • 0:07:18 2.01it/s v_num: o1i2Epoch 12, global step 5487: 'val/full_conll2012_f1_score' was not in top 1Epoch 12/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:53 • 0:00:02 0.88it/s v_num: o1i2Epoch 12, global step 5706: 'val/full_conll2012_f1_score' was not in top 1
Epoch 13/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:21 • 0:06:43 2.18it/s v_num: o1i2Epoch 13, global step 5926: 'val/full_conll2012_f1_score' was not in top 1Epoch 13/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 1752/1753 0:16:48 • 0:00:02 0.91it/s v_num: o1i2Epoch 13, global step 6145: 'val/full_conll2012_f1_score' was not in top 1
Epoch 14/299 ━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━━━━━━━━━━━ 876/1753 0:07:21 • 0:07:32 1.94it/s v_num: o1i2Epoch 14, global step 6365: 'val/full_conll2012_f1_score' was not in top 1
Epoch 14/299 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━━━━━━━ 1296/1753 0:10:53 • 0:03:00 2.54it/s v_num: o1i2
Metadata
Metadata
Assignees
Labels
No labels