diff --git a/LearningMachine.py b/LearningMachine.py index 3dbd4af..86393ec 100644 --- a/LearningMachine.py +++ b/LearningMachine.py @@ -260,6 +260,10 @@ def train(self, optimizer, loss_fn): optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm) + if isinstance(self.model, nn.DataParallel): + torch.nn.utils.clip_grad_norm_(self.model.module.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm) + else: + torch.nn.utils.clip_grad_norm_(self.model.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm) optimizer.step() del loss, logits, logits_softmax, logits_flat diff --git a/README.md b/README.md index fb443dc..bc53c5e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +test # ***NeuronBlocks*** - Building Your NLP DNN Models Like Playing Lego [![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported) diff --git a/block_zoo/Conv2d.py b/block_zoo/Conv2D.py similarity index 100% rename from block_zoo/Conv2d.py rename to block_zoo/Conv2D.py diff --git a/block_zoo/Embedding.py b/block_zoo/Embedding.py index e483dc1..d0b1de6 100644 --- a/block_zoo/Embedding.py +++ b/block_zoo/Embedding.py @@ -11,6 +11,7 @@ from block_zoo.embedding import * import copy import logging +import itertools class EmbeddingConf(BaseConf): """ Configuration for Embedding @@ -171,6 +172,11 @@ def forward(self, inputs, use_gpu=False): else: return features[0] + def get_parameters(self): + for sub_emb in self.embeddings: + for param in self.embeddings[sub_emb].parameters(): + yield param + diff --git a/block_zoo/Pooling2d.py b/block_zoo/Pooling2D.py similarity index 100% rename from block_zoo/Pooling2d.py rename to block_zoo/Pooling2D.py diff --git a/train.py b/train.py index 5173914..4151b75 100644 --- a/train.py +++ b/train.py @@ -12,12 +12,14 @@ import copy import torch +import torch.nn as nn from ModelConf import ModelConf from problem import Problem from utils.common_utils import dump_to_pkl, load_from_pkl, prepare_dir from utils.philly_utils import HDFSDirectTransferer from losses import * from optimizers import * +import itertools from LearningMachine import LearningMachine @@ -231,7 +233,10 @@ def main(params): loss_fn.cuda() ### optimizer - optimizer = eval(conf.optimizer_name)(lm.model.parameters(), **conf.optimizer_params) + if isinstance(lm.model, nn.DataParallel): + optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.module.layers['embedding'].get_parameters()), **conf.optimizer_params) + else: + optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.layers['embedding'].get_parameters()), **conf.optimizer_params) ## train lm.train(optimizer, loss_fn)