Skip to content

Commit fe29a23

Browse files
committed
Update Naming
1 parent fd063b4 commit fe29a23

File tree

15 files changed

+86
-81
lines changed

15 files changed

+86
-81
lines changed

bin/inference.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
import numpy as np
1919
import torchaudio
2020
from torch import Tensor
21+
2122
from kospeech.vocabs.ksponspeech import KsponSpeechVocabulary
2223
from kospeech.data.audio.core import load_audio
2324
from kospeech.models import (
2425
SpeechTransformer,
2526
Jasper,
2627
DeepSpeech2,
2728
ListenAttendSpell,
29+
Conformer,
2830
)
2931

3032

@@ -67,7 +69,7 @@ def parse_audio(audio_path: str, del_silence: bool = False, audio_extension: str
6769
elif isinstance(model, DeepSpeech2):
6870
model.device = opt.device
6971
y_hats = model.greedy_search(feature.unsqueeze(0), input_length, opt.device)
70-
elif isinstance(model, SpeechTransformer) or isinstance(model, Jasper):
72+
elif isinstance(model, SpeechTransformer) or isinstance(model, Jasper) or isinstance(model, Conformer):
7173
y_hats = model.greedy_search(feature.unsqueeze(0), input_length, opt.device)
7274

7375
sentence = vocab.label_to_string(y_hats.cpu().detach().numpy())

kospeech/checkpoint/checkpoint.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import time
1717
import torch
1818
import torch.nn as nn
19+
1920
from kospeech.utils import logger
2021
from kospeech.data import SpectrogramDataset
2122
from kospeech.models import ListenAttendSpell
@@ -54,7 +55,7 @@ def __init__(
5455
optimizer: Optimizer = None, # stores the state of the optimizer
5556
trainset_list: list = None, # list of trainset
5657
validset: SpectrogramDataset = None, # validation dataset
57-
epoch: int = None # current epoch is a loop through the full training data
58+
epoch: int = None, # current epoch is a loop through the full training data
5859
) -> None:
5960
self.model = model
6061
self.optimizer = optimizer

kospeech/criterion/label_smoothed_cross_entropy.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ class LabelSmoothedCrossEntropyLoss(nn.Module):
3030
reduction (str): reduction method [sum, mean] (default: sum)
3131
architecture (str): speech model`s model [las, transformer] (default: las)
3232
33-
Inputs: logit, target
34-
logit (torch.Tensor): probability distribution value from model and it has a logarithm shape
33+
Inputs: logits, target
34+
logits (torch.Tensor): probability distribution value from model and it has a logarithm shape
3535
target (torch.Tensor): ground-thruth encoded to integers which directly point a word in label
3636
3737
Returns: label_smoothed
@@ -44,7 +44,7 @@ def __init__(
4444
smoothing: float = 0.1, # ratio of smoothing (confidence = 1.0 - smoothing)
4545
dim: int = -1, # dimension of caculation loss
4646
reduction='sum', # reduction method [sum, mean]
47-
architecture='las' # speech model`s model [las, transformer]
47+
architecture='las', # speech model`s model [las, transformer]
4848
) -> None:
4949
super(LabelSmoothedCrossEntropyLoss, self).__init__()
5050
self.confidence = 1.0 - smoothing
@@ -62,16 +62,16 @@ def __init__(
6262
else:
6363
raise ValueError("Unsupported reduction method {0}".format(reduction))
6464

65-
def forward(self, logit: Tensor, target: Tensor):
65+
def forward(self, logits: Tensor, targets: Tensor):
6666
if self.architecture == 'transformer':
67-
logit = F.log_softmax(logit, dim=-1)
67+
logits = F.log_softmax(logits, dim=-1)
6868

6969
if self.smoothing > 0.0:
7070
with torch.no_grad():
71-
label_smoothed = torch.zeros_like(logit)
71+
label_smoothed = torch.zeros_like(logits)
7272
label_smoothed.fill_(self.smoothing / (self.num_classes - 1))
73-
label_smoothed.scatter_(1, target.data.unsqueeze(1), self.confidence)
74-
label_smoothed[target == self.ignore_index, :] = 0
75-
return self.reduction_method(-label_smoothed * logit)
73+
label_smoothed.scatter_(1, targets.data.unsqueeze(1), self.confidence)
74+
label_smoothed[targets == self.ignore_index, :] = 0
75+
return self.reduction_method(-label_smoothed * logits)
7676

77-
return F.cross_entropy(logit, target, ignore_index=self.ignore_index, reduction=self.reduction)
77+
return F.cross_entropy(logits, targets, ignore_index=self.ignore_index, reduction=self.reduction)

kospeech/models/conformer/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import torch
1616
import torch.nn as nn
17+
import torch.nn.functional as F
1718
from torch import Tensor
1819
from typing import Tuple
1920

@@ -87,7 +88,8 @@ def __init__(
8788

8889
def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor]:
8990
outputs, output_lengths = self.encoder(inputs, input_lengths)
90-
outputs = self.fc(outputs).log_softmax(dim=-1)
91+
outputs = self.fc(outputs)
92+
outputs = F.log_softmax(outputs, dim=-1)
9193
return outputs, output_lengths
9294

9395
def greedy_search(self, inputs: Tensor, input_lengths: Tensor, device: str):

kospeech/models/conv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor
141141

142142

143143
class MaskConv1d(nn.Conv1d):
144-
"""1D convolution with sequence masking """
144+
""" 1D convolution with sequence masking """
145145
def __init__(
146146
self,
147147
in_channels: int,

kospeech/models/deepspeech2/model.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,22 @@ def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor
9090
input_lengths (torch.LongTensor): (batch_size)
9191
"""
9292
inputs = inputs.unsqueeze(1).permute(0, 1, 3, 2)
93-
output, output_lengths = self.conv(inputs, input_lengths)
93+
outputs, output_lengths = self.conv(inputs, input_lengths)
9494

95-
batch_size, num_channels, hidden_dim, seq_length = output.size()
96-
output = output.view(batch_size, num_channels * hidden_dim, seq_length).permute(2, 0, 1).contiguous()
95+
batch_size, num_channels, hidden_dim, seq_length = outputs.size()
96+
outputs = outputs.view(batch_size, num_channels * hidden_dim, seq_length).permute(2, 0, 1).contiguous()
9797

9898
for rnn_layer in self.rnn_layers:
9999
rnn_layer.to(self.device)
100-
output = rnn_layer(output, output_lengths)
100+
outputs = rnn_layer(outputs, output_lengths)
101101

102-
output = output.transpose(0, 1)
103-
output = self.fc(output)
104-
output = F.log_softmax(output, dim=-1)
102+
outputs = outputs.transpose(0, 1)
103+
outputs = self.fc(outputs)
104+
outputs = F.log_softmax(outputs, dim=-1)
105105

106-
return output, output_lengths
106+
return outputs, output_lengths
107107

108108
def greedy_search(self, inputs: Tensor, input_lengths: Tensor, device: str):
109109
with torch.no_grad():
110-
output, output_lengths = self.forward(inputs, input_lengths)
111-
return output.max(-1)[1]
110+
outputs, output_lengths = self.forward(inputs, input_lengths)
111+
return outputs.max(-1)[1]

kospeech/models/jasper/decoder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ def forward(self, encoder_outputs: Tensor, encoder_output_lengths: Tensor) -> Tu
6060
encoder_outputs (torch.FloatTensor): (batch_size, dimension, sequence_length)
6161
encoder_output_lengths (torch.LongTensor): (batch_size)
6262
"""
63-
output, output_lengths = encoder_outputs, encoder_output_lengths
63+
outputs, output_lengths = encoder_outputs, encoder_output_lengths
6464

6565
for i, layer in enumerate(self.layers):
66-
output, output_lengths = layer(output, output_lengths)
66+
outputs, output_lengths = layer(outputs, output_lengths)
6767

68-
output = F.log_softmax(output.transpose(1, 2), dim=-1)
68+
outputs = F.log_softmax(outputs.transpose(1, 2), dim=-1)
6969
del encoder_outputs, encoder_output_lengths
7070

71-
return output, output_lengths
71+
return outputs, output_lengths

kospeech/models/jasper/encoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor
8484
prev_output_lengths.append(input_lengths)
8585
residual = self._get_jasper_dencse_residual(prev_outputs, prev_output_lengths, i)
8686

87-
output, output_lengths = self.layers[-1](inputs, input_lengths, residual)
87+
outputs, output_lengths = self.layers[-1](inputs, input_lengths, residual)
8888
del prev_outputs, prev_output_lengths, residual, inputs, input_lengths
8989

90-
return output, output_lengths
90+
return outputs, output_lengths
9191

9292
def _get_jasper_dencse_residual(self, prev_outputs: list, prev_output_lengths: list, index: int):
9393
residual = None

kospeech/models/jasper/model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ def forward(self, inputs: Tensor, input_lengths: Tensor) -> Tuple[Tensor, Tensor
7272
input_lengths (torch.LongTensor): (batch_size)
7373
"""
7474
encoder_outputs, output_lengths = self.encoder(inputs.transpose(1, 2), input_lengths)
75-
output, output_lengths = self.decoder(encoder_outputs, output_lengths)
76-
return output, output_lengths
75+
outputs, output_lengths = self.decoder(encoder_outputs, output_lengths)
76+
return outputs, output_lengths
7777

7878
def greedy_search(self, inputs: Tensor, input_lengths: Tensor, device: str):
7979
with torch.no_grad():
80-
output, output_lengths = self.forward(inputs, input_lengths)
81-
return output.max(-1)[1]
80+
outputs, output_lengths = self.forward(inputs, input_lengths)
81+
return outputs.max(-1)[1]

kospeech/models/jasper/sublayers.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ def forward(self, inputs: Tensor, input_lengths: Tensor, residual: Tensor) -> Tu
8080
for layer in self.layers[:-1]:
8181
inputs, input_lengths = layer(inputs, input_lengths)
8282

83-
output, output_lengths = self.layers[-1](inputs, input_lengths, residual)
83+
outputs, output_lengths = self.layers[-1](inputs, input_lengths, residual)
8484

85-
return output, output_lengths
85+
return outputs, output_lengths
8686

8787

8888
class JasperSubBlock(nn.Module):
@@ -145,13 +145,13 @@ def __init__(
145145
self.dropout = nn.Dropout(p=dropout_p)
146146

147147
def forward(self, inputs: Tensor, input_lengths: Tensor, residual: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
148-
output, output_lengths = self.conv(inputs, input_lengths)
149-
output = self.batch_norm(output)
148+
outputs, output_lengths = self.conv(inputs, input_lengths)
149+
outputs = self.batch_norm(outputs)
150150

151151
if residual is not None:
152-
output += residual
152+
outputs += residual
153153

154-
output = self.dropout(self.activation(output))
154+
outputs = self.dropout(self.activation(outputs))
155155
del inputs, input_lengths, residual
156156

157-
return output, output_lengths
157+
return outputs, output_lengths

0 commit comments

Comments
 (0)