Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions speechscore/basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,17 @@ def scoring(self, data, window=None, score_rate=None):
audios[index] = audio

if window is not None:
maxlen = len(audios[0])
framer = Framing(window * score_rate, window * score_rate, maxlen)
nwin = framer.nwin
result = {}
for (t, win) in enumerate(framer):
result_t = self.windowed_scoring([audio[win] for audio in audios], score_rate)
result[t] = result_t
if win and maxlen > win.stop:
last_win = slice(win.stop, maxlen)
result_t = self.windowed_scoring([audio[last_win] for audio in audios], score_rate)
result[nwin] = result_t
else:
result = self.windowed_scoring(audios, score_rate)
return result
6 changes: 4 additions & 2 deletions speechscore/scores/distill_mos/distill_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ def __init__(self):
super(DISTILL_MOS, self).__init__(name='DISTILL_MOS')
self.intrusive = False
self.score_rate = 16000
self.model = ConvTransformerSQAModel()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = ConvTransformerSQAModel().to(self.device)
self.model.eval()

def windowed_scoring(self, audios, score_rate):

score = self.model(torch.from_numpy(np.expand_dims(audios[0], axis=0)).float())
score = self.model(
torch.from_numpy(np.expand_dims(audios[0], axis=0)).float().to(self.device))
score_np = score.detach().cpu().numpy()
return score_np[0][0]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

It's a good practice to end files with a single newline character. This is recommended by style guides like PEP 8 and can prevent issues with some command-line tools and diff viewers that expect a final newline.

2 changes: 1 addition & 1 deletion speechscore/scores/distill_mos/sqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
SEQ_LEN = 122880
MAX_HOP_LEN = 16000

DEFAULT_WEIGHTS_CHKPT = os.path.join("scores/distill_mos/weights", "distill_mos_v7.pt")
DEFAULT_WEIGHTS_CHKPT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "weights", "distill_mos_v7.pt")


def _complex_compressed(x, hop_length, win_length):
Expand Down
10 changes: 6 additions & 4 deletions speechscore/scores/dnsmos/dnsmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@

from basis import ScoreBasis

module_dir = os.path.dirname(os.path.abspath(__file__))

class DNSMOS(ScoreBasis):
def __init__(self):
super(DNSMOS, self).__init__(name='DNSMOS')
self.intrusive = True
self.score_rate = 16000
self.p808_model_path = os.path.join('scores/dnsmos/DNSMOS', 'model_v8.onnx')
self.primary_model_path = os.path.join('scores/dnsmos/DNSMOS', 'sig_bak_ovr.onnx')
self.p808_model_path = os.path.join(module_dir, 'DNSMOS', 'model_v8.onnx')
self.primary_model_path = os.path.join(module_dir, 'DNSMOS', 'sig_bak_ovr.onnx')
self.compute_score = ComputeScore(self.primary_model_path, self.p808_model_path)

def windowed_scoring(self, audios, rate):
return self.compute_score.cal_mos(audios[0], rate)

class ComputeScore:
def __init__(self, primary_model_path, p808_model_path) -> None:
self.onnx_sess = ort.InferenceSession(primary_model_path)
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
self.onnx_sess = ort.InferenceSession(primary_model_path, providers=providers)
self.p808_onnx_sess = ort.InferenceSession(p808_model_path, providers=providers)

def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, sr=16000, to_db=True):
mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=frame_size+1, hop_length=hop_length, n_mels=n_mels)
Expand Down
6 changes: 4 additions & 2 deletions speechscore/scores/nisqa/nisqa.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from basis import ScoreBasis
from scores.nisqa.cal_nisqa import load_nisqa_model

import os
import torch
class NISQA(ScoreBasis):
def __init__(self):
super(NISQA, self).__init__(name='NISQA')
self.intrusive = False
self.score_rate = 48000
self.model = load_nisqa_model("scores/nisqa/weights/nisqa.tar", device='cpu')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = load_nisqa_model(os.path.join(os.path.dirname(os.path.abspath(__file__)), "weights/nisqa.tar"), device=device)

def windowed_scoring(self, audios, score_rate):
from scores.nisqa.cal_nisqa import cal_NISQA
Expand Down