Extremly low scores of spancat #11835
-
Hello! config file content[paths]
train = null
dev = null
vectors = null
init_tok2vec = null
[system]
gpu_allocator = null
seed = 0
[nlp]
lang = "pl"
pipeline = ["tok2vec","spancat"]
batch_size = 1000
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
[components]
[components.spancat]
factory = "spancat"
max_positive = null
scorer = {"@scorers":"spacy.spancat_scorer.v1"}
spans_key = "sc"
threshold = 0.5
[components.spancat.model]
@architectures = "spacy.SpanCategorizer.v1"
[components.spancat.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
hidden_size = 128
[components.spancat.model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = null
nI = null
[components.spancat.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
upstream = "*"
[components.spancat.suggester]
@misc = "spacy.ngram_suggester.v1"
sizes = [1,2,3]
[components.tok2vec]
factory = "tok2vec"
[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = ${components.tok2vec.model.encode.width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,1000,2500,2500]
include_static_vectors = false
[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = 96
depth = 4
window_size = 1
maxout_pieces = 3
[corpora]
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components = []
annotating_components = []
before_to_disk = null
[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null
[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0
[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = false
[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001
[training.score_weights]
spans_sc_f = 1.0
spans_sc_p = 0.0
spans_sc_r = 0.0
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer]
... and the training logs>> python -m spacy train --verbose config/spacy/config_sm.cfg \
--output ./models/cli/cli-trained-spancat-kpwr-full.spacy \
--system.seed 42 \
--paths.train ./data/inzynierka-kpwr-train-3-full.spacy \
--paths.dev ./data/inzynierka-kpwr-test-3-full.spacy
/home/jjamnicki/miniconda3/envs/bachelor_thesis_project/lib/python3.8/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
/home/jjamnicki/miniconda3/envs/bachelor_thesis_project/lib/python3.8/site-packages/torch/cuda/__init__.py:529: UserWarning: CUDA initialization: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 803: system has unsupported display driver / cuda driver combination (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:109.)
return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
[2022-11-16 14:26:46,809] [DEBUG] Config overrides from CLI: ['system.seed', 'paths.train', 'paths.dev']
✔ Created output directory:
models/cli/cli-trained-spancat-kpwr-full.spacy
ℹ Saving to output directory:
models/cli/cli-trained-spancat-kpwr-full.spacy
ℹ Using CPU
=========================== Initializing pipeline ===========================
[2022-11-16 14:26:47,216] [INFO] Set up nlp object from config
[2022-11-16 14:26:47,236] [DEBUG] Loading corpus from path: data/inzynierka-kpwr-test-3-full.spacy
[2022-11-16 14:26:47,238] [DEBUG] Loading corpus from path: data/inzynierka-kpwr-train-3-full.spacy
[2022-11-16 14:26:47,238] [INFO] Pipeline: ['tok2vec', 'spancat']
[2022-11-16 14:26:47,245] [INFO] Created vocabulary
[2022-11-16 14:26:47,246] [INFO] Finished initializing nlp object
[2022-11-16 14:26:59,081] [INFO] Initialized pipeline components: ['tok2vec', 'spancat']
✔ Initialized pipeline
============================= Training pipeline =============================
[2022-11-16 14:26:59,112] [DEBUG] Loading corpus from path: data/inzynierka-kpwr-test-3-full.spacy
[2022-11-16 14:26:59,114] [DEBUG] Loading corpus from path: data/inzynierka-kpwr-train-3-full.spacy
ℹ Pipeline: ['tok2vec', 'spancat']
ℹ Initial learn rate: 0.001
E # LOSS TOK2VEC LOSS SPANCAT SPANS_SC_F SPANS_SC_P SPANS_SC_R SCORE
--- ------ ------------ ------------ ---------- ---------- ---------- ------
0 0 10.44 246.39 1.45 0.75 26.74 0.01
0 200 23.15 1734.84 48.75 55.18 43.66 0.49
0 400 2.98 1151.66 50.87 72.30 39.24 0.51
1 600 2.24 1249.81 57.71 71.15 48.54 0.58
1 800 3.51 1030.63 63.53 82.70 51.58 0.64
2 1000 3.66 916.07 70.36 77.50 64.43 0.70
3 1200 4.12 797.97 70.81 77.67 65.06 0.71
4 1400 3.48 664.85 71.25 81.52 63.28 0.71
5 1600 3.43 612.71 71.55 80.44 64.43 0.72
6 1800 3.83 624.41 71.59 78.01 66.15 0.72
8 2000 3.20 601.48 72.79 76.93 69.08 0.73
11 2200 3.46 667.97 73.30 82.54 65.92 0.73
13 2400 2.99 717.81 73.24 78.23 68.85 0.73
16 2600 2.27 670.45 73.29 79.34 68.10 0.73
19 2800 1.82 622.43 73.28 79.70 67.81 0.73
22 3000 1.68 617.95 72.58 76.89 68.73 0.73
25 3200 3.24 575.17 71.99 77.37 67.30 0.72
27 3400 2.42 479.11 73.92 80.11 68.62 0.74
30 3600 2.36 416.87 74.14 82.70 67.18 0.74
33 3800 1.80 396.29 73.81 77.16 70.74 0.74
36 4000 1.66 366.62 73.78 76.37 71.37 0.74
39 4200 1.86 397.35 73.59 81.52 67.07 0.74
41 4400 1.37 384.79 72.77 82.38 65.17 0.73
44 4600 1.41 377.31 75.16 81.92 69.42 0.75
47 4800 1.48 378.25 73.63 80.95 67.53 0.74
50 5000 1.37 357.54 73.50 79.06 68.67 0.74
53 5200 1.53 368.40 73.82 77.32 70.63 0.74
56 5400 1.77 355.94 73.29 80.22 67.47 0.73
58 5600 1.43 351.07 72.72 75.60 70.05 0.73
61 5800 1.20 341.93 72.59 80.17 66.32 0.73
64 6000 1.25 331.24 73.50 79.60 68.27 0.74
67 6200 0.49 321.52 72.51 79.48 66.67 0.73
✔ Saved pipeline to output directory
models/cli/cli-trained-spancat-kpwr-full.spacy/model-last
It works well, but I really need to use Python API, because of my Active Learning experiments. config = Config().from_disk("./config/spacy/config_sm.cfg")
nlp = Polish.from_config(config)
spancat = nlp.get_pipe("spancat")
for label in LABELS:
spancat.add_label(label)
train_corpus, test_corpus = Corpus(TRAIN_DB_PATH), Corpus(TEST_DB_PATH)
train_data, test_data = tuple(train_corpus(nlp)), tuple(test_corpus(nlp)) pprint(train_data[0].to_dict(), compact=True)
losses = {}
optimizer = nlp.initialize()
for batch in minibatch(train_data, size=1000):
losses = nlp.update(batch, losses=losses, sgd=optimizer)
eval_metrics = nlp.evaluate(test_data) pprint(eval_metrics, compact=True)
I would be extremely grateful for help! |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 1 reply
-
I found that works. but I dont know why 😠 Any ideas? losses = {}
optimizer = nlp.initialize()
for batch in minibatch(train_data, size=compounding(4.0, 32.0, 1.001)):
losses = nlp.update(batch, losses=losses, sgd=optimizer)
eval_metrics = nlp.evaluate(test_data)
|
Beta Was this translation helpful? Give feedback.
-
Hi @jamnicki , from your latest message it seems that changing how the batch was generated works? These things can definitely affect the direction of how your model will be trained. If you want a 1:1 parity between the CLI (assuming you have a config file) and your Python code, you can use the |
Beta Was this translation helpful? Give feedback.
Hi @jamnicki , from your latest message it seems that changing how the batch was generated works? These things can definitely affect the direction of how your model will be trained. If you want a 1:1 parity between the CLI (assuming you have a config file) and your Python code, you can use the
spacy.cli.train:train()
function to do so. Under the hood, it callsspacy.training.loop:train_while_improving()
which contains the actual training loop. You can also use the latter or pattern your training loop from that function.