Skip to content

Commit 8b65e3e

Browse files
committed
Merge branch 'r2.0.0rc0' of github.com:NVIDIA/NeMo into r2.0.0rc0
2 parents 38fcd5f + d8afaba commit 8b65e3e

File tree

20 files changed

+644
-339
lines changed

20 files changed

+644
-339
lines changed

examples/nlp/language_modeling/conf/megatron_quantization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export:
3131
decoder_type: llama # gptnext, gpt2, llama
3232
inference_tensor_parallel: 1 # Default using 1 TP for inference
3333
inference_pipeline_parallel: 1 # Default using 1 PP for inference
34-
dtype: 16 # Default precision data type
34+
dtype: bf16 # Default precision data type
3535

3636
model_file: llama2-7b-fp16.nemo # Nemo file path
3737
model_save: llama2-7b-fp8.qnemo # Path where the quantized model will be saved

nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ def _states_to_device(dec_state, device='cpu'):
5858
return dec_state
5959

6060

61+
_DECODER_LENGTHS_NONE_WARNING = "Passing in decoder_lengths=None for CTC decoding is likely to be an error, since it is unlikely that each element of your batch has exactly the same length. decoder_lengths will default to decoder_output.shape[0]."
62+
63+
6164
class GreedyCTCInfer(Typing, ConfidenceMethodMixin):
6265
"""A greedy CTC decoder.
6366
@@ -148,7 +151,7 @@ def __init__(
148151
def forward(
149152
self,
150153
decoder_output: torch.Tensor,
151-
decoder_lengths: torch.Tensor,
154+
decoder_lengths: Optional[torch.Tensor],
152155
):
153156
"""Returns a list of hypotheses given an input batch of the encoder hidden embedding.
154157
Output token is generated auto-repressively.
@@ -167,6 +170,9 @@ def forward(
167170
mode=logging_mode.ONCE,
168171
)
169172

173+
if decoder_lengths is None:
174+
logging.warning(_DECODER_LENGTHS_NONE_WARNING, mode=logging_mode.ONCE)
175+
170176
with torch.inference_mode():
171177
hypotheses = []
172178
# Process each sequence independently
@@ -213,7 +219,7 @@ def forward(
213219
return (packed_result,)
214220

215221
@torch.no_grad()
216-
def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: torch.Tensor):
222+
def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: Optional[torch.Tensor]):
217223
# x: [T, D]
218224
# out_len: [seq_len]
219225

@@ -243,7 +249,7 @@ def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: torch.Tensor):
243249
return hypothesis
244250

245251
@torch.no_grad()
246-
def _greedy_decode_labels(self, x: torch.Tensor, out_len: torch.Tensor):
252+
def _greedy_decode_labels(self, x: torch.Tensor, out_len: Optional[torch.Tensor]):
247253
# x: [T]
248254
# out_len: [seq_len]
249255

@@ -370,7 +376,7 @@ def __init__(
370376
def forward(
371377
self,
372378
decoder_output: torch.Tensor,
373-
decoder_lengths: torch.Tensor,
379+
decoder_lengths: Optional[torch.Tensor],
374380
):
375381
"""Returns a list of hypotheses given an input batch of the encoder hidden embedding.
376382
Output token is generated auto-repressively.
@@ -383,11 +389,18 @@ def forward(
383389
Returns:
384390
packed list containing batch number of sentences (Hypotheses).
385391
"""
392+
393+
input_decoder_lengths = decoder_lengths
394+
395+
if decoder_lengths is None:
396+
logging.warning(_DECODER_LENGTHS_NONE_WARNING, mode=logging_mode.ONCE)
397+
decoder_lengths = torch.tensor([decoder_output.shape[1]], dtype=torch.long).expand(decoder_output.shape[0])
398+
386399
if decoder_output.ndim == 2:
387400
hypotheses = self._greedy_decode_labels_batched(decoder_output, decoder_lengths)
388401
else:
389402
hypotheses = self._greedy_decode_logprobs_batched(decoder_output, decoder_lengths)
390-
packed_result = pack_hypotheses(hypotheses, decoder_lengths)
403+
packed_result = pack_hypotheses(hypotheses, input_decoder_lengths)
391404
return (packed_result,)
392405

393406
@torch.no_grad()

nemo/collections/nlp/modules/common/megatron/clip_grads.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,28 +142,30 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2, use_fsdp=False):
142142
grad_norm = torch.zeros(1, device='cuda', dtype=torch.float32).squeeze()
143143
# Since we will be summing across data parallel groups,
144144
# we need the pow(norm-type).
145-
total_norm = grad_norm ** norm_type
145+
total_norm = grad_norm**norm_type
146146
if use_fsdp:
147147
if len(sharded_grads_for_norm) > 0:
148148
sharded_grad_norm, _ = multi_tensor_applier(
149149
amp_C.multi_tensor_l2norm, dummy_overflow_buf.fill_(0), [sharded_grads_for_norm], False
150150
)
151151
else:
152152
sharded_grad_norm = torch.zeros(1, device='cuda', dtype=torch.float32).squeeze()
153-
total_sharded_norm = sharded_grad_norm ** norm_type
153+
total_sharded_norm = sharded_grad_norm**norm_type
154154
else:
155155
for grad in grads_for_norm:
156156
grad_norm = torch.norm(grad, norm_type)
157-
total_norm += grad_norm ** norm_type
157+
total_norm += grad_norm**norm_type
158158
if use_fsdp:
159159
for grad in sharded_grads_for_norm:
160160
grad_norm = torch.norm(grad, norm_type)
161-
total_sharded_norm += grad_norm ** norm_type
161+
total_sharded_norm += grad_norm**norm_type
162162

163163
if use_fsdp:
164164
# Sum norm of grad shards across data-parallel GPUs.
165165
torch.distributed.all_reduce(
166-
total_sharded_norm, op=torch.distributed.ReduceOp.SUM, group=parallel_state.get_data_parallel_group(),
166+
total_sharded_norm,
167+
op=torch.distributed.ReduceOp.SUM,
168+
group=parallel_state.get_data_parallel_group(with_context_parallel=True),
167169
)
168170
total_norm += total_sharded_norm.squeeze()
169171

nemo/collections/tts/g2p/models/t5.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,23 @@ class T5G2PModel(G2PModel, Exportable):
4646

4747
@property
4848
def input_types(self) -> Optional[Dict[str, NeuralType]]:
49-
return {
50-
"input_ids": NeuralType(('B', 'T'), TokenIndex()),
51-
"attention_mask": NeuralType(('B', 'T'), MaskType(), optional=True),
52-
"labels": NeuralType(('B', 'T'), LabelsType()),
53-
}
49+
if self._input_types is None:
50+
return {
51+
"input_ids": NeuralType(('B', 'T'), TokenIndex()),
52+
"attention_mask": NeuralType(('B', 'T'), MaskType(), optional=True),
53+
"labels": NeuralType(('B', 'T'), LabelsType()),
54+
}
55+
return self._input_types
5456

5557
@property
5658
def output_types(self) -> Optional[Dict[str, NeuralType]]:
57-
return {"loss": NeuralType((), LossType())}
59+
if self._output_types is None:
60+
return {"loss": NeuralType((), LossType())}
61+
return self._output_types
5862

5963
def __init__(self, cfg: DictConfig, trainer: Trainer = None):
64+
self._input_types = None
65+
self._output_types = None
6066
self.world_size = 1
6167
if trainer is not None:
6268
self.world_size = trainer.num_nodes * trainer.num_devices
@@ -91,7 +97,11 @@ def forward(self, input_ids, attention_mask, labels):
9197
# ===== Training Functions ===== #
9298
def training_step(self, batch, batch_idx):
9399
input_ids, attention_mask, labels = batch
94-
train_loss = self.forward(input_ids=input_ids, attention_mask=attention_mask, labels=labels,)
100+
train_loss = self.forward(
101+
input_ids=input_ids,
102+
attention_mask=attention_mask,
103+
labels=labels,
104+
)
95105

96106
self.log('train_loss', train_loss)
97107
return train_loss
@@ -126,7 +136,10 @@ def _setup_infer_dataloader(self, cfg) -> 'torch.utils.data.DataLoader':
126136

127137
# Functions for inference
128138
@torch.no_grad()
129-
def _infer(self, config: DictConfig,) -> List[int]:
139+
def _infer(
140+
self,
141+
config: DictConfig,
142+
) -> List[int]:
130143
"""
131144
Runs model inference.
132145
@@ -161,7 +174,11 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0, split="val"):
161174
input_ids, attention_mask, labels = batch
162175

163176
# Get loss from forward step
164-
val_loss = self.forward(input_ids=input_ids, attention_mask=attention_mask, labels=labels,)
177+
val_loss = self.forward(
178+
input_ids=input_ids,
179+
attention_mask=attention_mask,
180+
labels=labels,
181+
)
165182

166183
# Get preds from generate function and calculate PER
167184
labels_str = self._tokenizer.batch_decode(
@@ -287,15 +304,8 @@ def _prepare_for_export(self, **kwargs):
287304
}
288305

289306
def _export_teardown(self):
290-
self._input_types = self._output_types = None
291-
292-
@property
293-
def input_types(self):
294-
return self._input_types
295-
296-
@property
297-
def output_types(self):
298-
return self._output_types
307+
self._input_types = None
308+
self._output_types = None
299309

300310
def input_example(self, max_batch=1, max_dim=44):
301311
"""
@@ -307,7 +317,11 @@ def input_example(self, max_batch=1, max_dim=44):
307317
sentence = "Kupil sem si bicikel in mu zamenjal stol."
308318
input_ids = [sentence]
309319
input_encoding = self._tokenizer(
310-
input_ids, padding='longest', max_length=self.max_source_len, truncation=True, return_tensors='pt',
320+
input_ids,
321+
padding='longest',
322+
max_length=self.max_source_len,
323+
truncation=True,
324+
return_tensors='pt',
311325
)
312326
return (input_encoding.input_ids,)
313327

0 commit comments

Comments
 (0)