Skip to content

Commit 475256f

Browse files
Merge pull request #731 from NVIDIA/gh/release
[FastPitch/PyT] updated checkpoints, multispeaker and text processing
2 parents 03c5a9f + bec8259 commit 475256f

36 files changed

+1471
-815
lines changed
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
1-
*.swp
2-
*.swo
3-
*.pyc
4-
__pycache__
5-
scripts_joc/
61
runs*/
72
LJSpeech-1.1/
83
output*
4+
scripts_joc/
5+
tests/
6+
7+
*.pyc
8+
__pycache__
9+
10+
.idea/
11+
.DS_Store
12+
13+
*.swp
14+
*.swo
15+
*.swn

PyTorch/SpeechSynthesis/FastPitch/README.md

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -488,11 +488,11 @@ The `scripts/train.sh` script is configured for 8x GPU with at least 16GB of mem
488488
```
489489
In a single accumulated step, there are `batch_size x gradient_accumulation_steps x GPUs = 256` examples being processed in parallel. With a smaller number of GPUs, increase `--gradient_accumulation_steps` to keep this relation satisfied, e.g., through env variables
490490
```bash
491-
NGPU=4 GRAD_ACC=2 bash scripts/train.sh
491+
NUM_GPUS=4 GRAD_ACCUMULATION=2 bash scripts/train.sh
492492
```
493493
With automatic mixed precision (AMP), a larger batch size fits in 16GB of memory:
494494
```bash
495-
NGPU=4 GRAD_ACC=1 BS=64 AMP=true bash scripts/train.sh
495+
NUM_GPUS=4 GRAD_ACCUMULATION=1 BS=64 AMP=true bash scripts/train.sh
496496
```
497497
498498
### Inference process
@@ -545,18 +545,18 @@ To benchmark the training performance on a specific batch size, run:
545545
546546
* NVIDIA DGX A100 (8x A100 40GB)
547547
```bash
548-
AMP=true NGPU=1 BS=128 GRAD_ACC=2 EPOCHS=10 bash scripts/train.sh
549-
AMP=true NGPU=8 BS=32 GRAD_ACC=1 EPOCHS=10 bash scripts/train.sh
550-
NGPU=1 BS=128 GRAD_ACC=2 EPOCHS=10 bash scripts/train.sh
551-
NGPU=8 BS=32 GRAD_ACC=1 EPOCHS=10 bash scripts/train.sh
548+
AMP=true NUM_GPUS=1 BS=128 GRAD_ACCUMULATION=2 EPOCHS=10 bash scripts/train.sh
549+
AMP=true NUM_GPUS=8 BS=32 GRAD_ACCUMULATION=1 EPOCHS=10 bash scripts/train.sh
550+
NUM_GPUS=1 BS=128 GRAD_ACCUMULATION=2 EPOCHS=10 bash scripts/train.sh
551+
NUM_GPUS=8 BS=32 GRAD_ACCUMULATION=1 EPOCHS=10 bash scripts/train.sh
552552
```
553553
554554
* NVIDIA DGX-1 (8x V100 16GB)
555555
```bash
556-
AMP=true NGPU=1 BS=64 GRAD_ACC=4 EPOCHS=10 bash scripts/train.sh
557-
AMP=true NGPU=8 BS=32 GRAD_ACC=1 EPOCHS=10 bash scripts/train.sh
558-
NGPU=1 BS=32 GRAD_ACC=8 EPOCHS=10 bash scripts/train.sh
559-
NGPU=8 BS=32 GRAD_ACC=1 EPOCHS=10 bash scripts/train.sh
556+
AMP=true NUM_GPUS=1 BS=64 GRAD_ACCUMULATION=4 EPOCHS=10 bash scripts/train.sh
557+
AMP=true NUM_GPUS=8 BS=32 GRAD_ACCUMULATION=1 EPOCHS=10 bash scripts/train.sh
558+
NUM_GPUS=1 BS=32 GRAD_ACCUMULATION=8 EPOCHS=10 bash scripts/train.sh
559+
NUM_GPUS=8 BS=32 GRAD_ACCUMULATION=1 EPOCHS=10 bash scripts/train.sh
560560
```
561561
562562
Each of these scripts runs for 10 epochs and for each epoch measures the
@@ -569,12 +569,12 @@ To benchmark the inference performance on a specific batch size, run:
569569
570570
* For FP16
571571
```bash
572-
AMP=true BS_SEQ=”1 4 8” REPEATS=100 bash scripts/inference_benchmark.sh
572+
AMP=true BS_SEQUENCE=”1 4 8” REPEATS=100 bash scripts/inference_benchmark.sh
573573
```
574574
575575
* For FP32 or TF32
576576
```bash
577-
BS_SEQ=”1 4 8” REPEATS=100 bash scripts/inference_benchmark.sh
577+
BS_SEQUENCE=”1 4 8” REPEATS=100 bash scripts/inference_benchmark.sh
578578
```
579579
580580
The output log files will contain performance numbers for the FastPitch model
@@ -726,6 +726,10 @@ The input utterance has 128 characters, synthesized audio has 8.05 s.
726726
727727
### Changelog
728728
729+
October 2020
730+
- Added multispeaker capabilities
731+
- Updated text processing module
732+
729733
June 2020
730734
- Updated performance tables to include A100 results
731735
Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,3 @@
1-
""" from https://github.com/keithito/tacotron """
2-
import re
3-
from common.text import cleaners
4-
from common.text.symbols import symbols
1+
from .cmudict import CMUDict
52

6-
7-
# Mappings from symbol to numeric ID and vice versa:
8-
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
9-
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
10-
11-
# Regular expression matching text enclosed in curly braces:
12-
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
13-
14-
15-
def text_to_sequence(text, cleaner_names):
16-
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
17-
18-
The text can optionally have ARPAbet sequences enclosed in curly braces embedded
19-
in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
20-
21-
Args:
22-
text: string to convert to a sequence
23-
cleaner_names: names of the cleaner functions to run the text through
24-
25-
Returns:
26-
List of integers corresponding to the symbols in the text
27-
'''
28-
sequence = []
29-
30-
# Check for curly braces and treat their contents as ARPAbet:
31-
while len(text):
32-
m = _curly_re.match(text)
33-
if not m:
34-
sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
35-
break
36-
sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
37-
sequence += _arpabet_to_sequence(m.group(2))
38-
text = m.group(3)
39-
40-
return sequence
41-
42-
43-
def sequence_to_text(sequence):
44-
'''Converts a sequence of IDs back to a string'''
45-
result = ''
46-
for symbol_id in sequence:
47-
if symbol_id in _id_to_symbol:
48-
s = _id_to_symbol[symbol_id]
49-
# Enclose ARPAbet back in curly braces:
50-
if len(s) > 1 and s[0] == '@':
51-
s = '{%s}' % s[1:]
52-
result += s
53-
return result.replace('}{', ' ')
54-
55-
56-
def _clean_text(text, cleaner_names):
57-
for name in cleaner_names:
58-
cleaner = getattr(cleaners, name)
59-
if not cleaner:
60-
raise Exception('Unknown cleaner: %s' % name)
61-
text = cleaner(text)
62-
return text
63-
64-
65-
def _symbols_to_sequence(symbols):
66-
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
67-
68-
69-
def _arpabet_to_sequence(text):
70-
return _symbols_to_sequence(['@' + s for s in text.split()])
71-
72-
73-
def _should_keep_symbol(s):
74-
return s in _symbol_to_id and s is not '_' and s is not '~'
3+
cmudict = CMUDict()
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import re
2+
3+
_no_period_re = re.compile(r'(No[.])(?=[ ]?[0-9])')
4+
_percent_re = re.compile(r'([ ]?[%])')
5+
_half_re = re.compile('([0-9]½)|(½)')
6+
7+
8+
# List of (regular expression, replacement) pairs for abbreviations:
9+
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
10+
('mrs', 'misess'),
11+
('ms', 'miss'),
12+
('mr', 'mister'),
13+
('dr', 'doctor'),
14+
('st', 'saint'),
15+
('co', 'company'),
16+
('jr', 'junior'),
17+
('maj', 'major'),
18+
('gen', 'general'),
19+
('drs', 'doctors'),
20+
('rev', 'reverend'),
21+
('lt', 'lieutenant'),
22+
('hon', 'honorable'),
23+
('sgt', 'sergeant'),
24+
('capt', 'captain'),
25+
('esq', 'esquire'),
26+
('ltd', 'limited'),
27+
('col', 'colonel'),
28+
('ft', 'fort'),
29+
('sen', 'senator'),
30+
]]
31+
32+
33+
def _expand_no_period(m):
34+
word = m.group(0)
35+
if word[0] == 'N':
36+
return 'Number'
37+
return 'number'
38+
39+
40+
def _expand_percent(m):
41+
return ' percent'
42+
43+
44+
def _expand_half(m):
45+
word = m.group(1)
46+
if word is None:
47+
return 'half'
48+
return word[0] + ' and a half'
49+
50+
51+
def normalize_abbreviations(text):
52+
text = re.sub(_no_period_re, _expand_no_period, text)
53+
text = re.sub(_percent_re, _expand_percent, text)
54+
text = re.sub(_half_re, _expand_half, text)
55+
56+
for regex, replacement in _abbreviations:
57+
text = re.sub(regex, replacement, text)
58+
return text
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import re
2+
from . import cmudict
3+
4+
_letter_to_arpabet = {
5+
'A': 'EY1',
6+
'B': 'B IY1',
7+
'C': 'S IY1',
8+
'D': 'D IY1',
9+
'E': 'IY1',
10+
'F': 'EH1 F',
11+
'G': 'JH IY1',
12+
'H': 'EY1 CH',
13+
'I': 'AY1',
14+
'J': 'JH EY1',
15+
'K': 'K EY1',
16+
'L': 'EH1 L',
17+
'M': 'EH1 M',
18+
'N': 'EH1 N',
19+
'O': 'OW1',
20+
'P': 'P IY1',
21+
'Q': 'K Y UW1',
22+
'R': 'AA1 R',
23+
'S': 'EH1 S',
24+
'T': 'T IY1',
25+
'U': 'Y UW1',
26+
'V': 'V IY1',
27+
'X': 'EH1 K S',
28+
'Y': 'W AY1',
29+
'W': 'D AH1 B AH0 L Y UW0',
30+
'Z': 'Z IY1',
31+
's': 'Z'
32+
}
33+
34+
# must ignore roman numerals
35+
# _acronym_re = re.compile(r'([A-Z][A-Z]+)s?|([A-Z]\.([A-Z]\.)+s?)')
36+
_acronym_re = re.compile(r'([A-Z][A-Z]+)s?')
37+
38+
39+
def _expand_acronyms(m, add_spaces=True):
40+
acronym = m.group(0)
41+
42+
# remove dots if they exist
43+
acronym = re.sub('\.', '', acronym)
44+
45+
acronym = "".join(acronym.split())
46+
arpabet = cmudict.lookup(acronym)
47+
48+
if arpabet is None:
49+
acronym = list(acronym)
50+
arpabet = ["{" + _letter_to_arpabet[letter] + "}" for letter in acronym]
51+
# temporary fix
52+
if arpabet[-1] == '{Z}' and len(arpabet) > 1:
53+
arpabet[-2] = arpabet[-2][:-1] + ' ' + arpabet[-1][1:]
54+
del arpabet[-1]
55+
56+
arpabet = ' '.join(arpabet)
57+
elif len(arpabet) == 1:
58+
arpabet = "{" + arpabet[0] + "}"
59+
else:
60+
arpabet = acronym
61+
62+
return arpabet
63+
64+
65+
def normalize_acronyms(text):
66+
text = re.sub(_acronym_re, _expand_acronyms, text)
67+
return text

0 commit comments

Comments
 (0)