wandb FileNotFoundError(2, 'No such file or directory') #10097
-
Hi Team, trying to replicate https://github.com/explosion/projects/tree/v3/integrations/wandb approach here by using my config and also doing "python API" approach I am getting this Here is my config: [paths]
train = "spacy/train.spacy"
dev = "spacy/dev.spacy"
vectors = null
init_tok2vec = null
[system]
gpu_allocator = null
seed = 0
[nlp]
lang = "en"
pipeline = ["tok2vec","tagger","parser","attribute_ruler","lemmatizer","ner"]
disabled = ["senter"]
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 256
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
[components]
[components.attribute_ruler]
source = "en_core_web_lg"
[components.lemmatizer]
source = "en_core_web_lg"
[components.ner]
source = "en_core_web_lg"
[components.parser]
source = "en_core_web_lg"
replace_listeners = ["model.tok2vec"]
[components.tagger]
source = "en_core_web_lg"
replace_listeners = ["model.tok2vec"]
[components.tok2vec]
source = "en_core_web_lg"
[corpora]
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[training]
train_corpus = "corpora.train"
dev_corpus = "corpora.dev"
seed = ${system:seed}
gpu_allocator = ${system:gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 5000
max_epochs = 0
max_steps = 0
eval_frequency = 1000
frozen_components = ["tagger","parser","attribute_ruler","lemmatizer"]
before_to_disk = null
annotating_components = []
[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null
[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0
[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = true
eps = 0.00000001
learn_rate = 0.001
[training.score_weights]
tag_acc = null
dep_uas = null
dep_las = null
dep_las_per_type = null
sents_p = null
sents_r = null
sents_f = null
lemma_acc = null
ents_f = 0.16
ents_p = 0.0
ents_r = 0.0
ents_per_type = null
speed = 0.0
[training.logger]
@loggers = "spacy.WandbLogger.v2"
# Our W&B Project name
project_name = "registered-nurse"
# Any config data you do not want logged to W&B
remove_config_values = ["paths.train", "paths.dev", "corpora.train.path", "corpora.dev.path"]
# Optional, log the model every N steps to W&B Artifacts
model_log_interval = 100
[pretraining]
[initialize]
vectors = "en_core_web_lg"
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.components.ner]
[initialize.components.ner.labels]
@readers = "spacy.read_labels.v1"
path = "spacy/labels/ner.json"
[initialize.components.parser]
[initialize.components.parser.labels]
@readers = "spacy.read_labels.v1"
path = "spacy/labels/parser.json"
[initialize.components.tagger]
[initialize.components.tagger.labels]
@readers = "spacy.read_labels.v1"
path = "spacy/labels/tagger.json"
[initialize.tokenizer] which generated by prodigy
sweep.py import typer
from pathlib import Path
from spacy.training.loop import train
from spacy.training.initialize import init_nlp
from spacy import util
from thinc.api import Config
import wandb
import os
os.environ["WANDB_CONSOLE"] = "off"
def main(default_config: Path, output_path: Path):
print(default_config, output_path)
def train_spacy():
loaded_local_config = util.load_config(default_config)
with wandb.init(dir=os.getcwd()) as run:
sweeps_config = Config(util.dot_to_dict(run.config))
merged_config = Config(loaded_local_config).merge(sweeps_config)
nlp = init_nlp(merged_config)
train(nlp, output_path, use_gpu=False)
sweep_config = {
'method': 'bayes'
}
metric = {
'name': 'ents_f',
'goal': 'maximize'
}
sweep_config['metric'] = metric
parameters_dict = {
"training.optimizer.learn_rate": {
"max": 0.002,
"min": 0.0005,
"distribution": "uniform"
},
"training.dropout": {
"max": 0.2,
"min": 0.05,
"distribution": "uniform"
},
}
sweep_config['parameters'] = parameters_dict
sweep_id = wandb.sweep(sweep_config, project="registered-nurse")
wandb.agent(sweep_id, train_spacy, count=20)
if __name__ == "__main__":
typer.run(main) the folder structure: and I am running the sweep like this Any ideas what exactly triggers?
Btw, I've validated that |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 3 replies
-
I generated a new config with |
Beta Was this translation helpful? Give feedback.
-
Hello,
and running the same command |
Beta Was this translation helpful? Give feedback.
Hello,
At the moment I can only guess. Have you tried changing the paths to:
and running the same command
python sweep.py spacy/config.cfg training/
?