-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathshowing.py
More file actions
157 lines (131 loc) · 4.91 KB
/
showing.py
File metadata and controls
157 lines (131 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import pandas as pd
import webrtcvad
import torchaudio
class csample:
def __init__(self, path, vad_mode, chunk_num, vad):
self.path = path
self.vad = webrtcvad.Vad()
#self.vad = vad
self.vad.set_mode(vad_mode)
self.chunk_num = chunk_num
self.sample, self.fs = torchaudio.load(self.path)
def wave_f32toint16(self, sample):
sample = (sample * (2**16 / 2)).numpy().astype(np.int16)
return sample
def reset_vad(self):
return
def sample_vad(self):
vad_result = []
sample = self.wave_f32toint16(self.sample)
sample_chunk = np.reshape(sample[:1, :].T, (self.chunk_num, 320, 1))
for chunk in sample_chunk:
# Scaling Chunk
vad_result.append(self.vad.is_speech(chunk, self.fs))
return vad_result
def get_result(self):
return self.wave_f32toint16(self.sample), self.sample_vad()
class Waveform_Aranger:
def __init__(self, vad_mode=2, sample_rate=16000, chunk_num=50):
self.vad = webrtcvad.Vad()
self.vad_mode = vad_mode
self.vad.set_mode(vad_mode)
self.sample_rate = sample_rate
self.chunk_num = chunk_num
self.chunk_size = sample_rate // chunk_num
def wave_f32toint16(self, sample):
sample = (sample * (2**16 / 2)).numpy().astype(np.int16)
return sample
def get_result(self, path):
temp = csample(path, self.vad_mode, self.chunk_num, self.vad)
return temp.get_result()
def find_startend_point(self, path):
start = 0
found_start = False
end = 0
found_end = False
vad_result = []
sample, fs = torchaudio.load(path)
# float32 to int16
sample = self.wave_f32toint16(sample)
# 1 second to 20ms chunks
sample_chunk = np.reshape(sample[:1, :].T, (self.chunk_num, 320, 1))
for chunk in sample_chunk:
# Scaling Chunk
vad_result.append(self.vad.is_speech(chunk, self.sample_rate))
plt.plot(range(16000), sample[0])
for idx, result in enumerate(vad_result):
if result:
if found_start == False:
first = idx
found_start = True
plt.plot([idx * self.chunk_size, (idx+1) * self.chunk_size], [1, 1], color='red', linewidth=2)
else:
if found_start and found_end == False:
end = idx
found_end = True
if not found_start and not found_end:
first = 0
end = 49
return first, end
annotations_file = 'data_all.csv'
file_labels = pd.read_csv(annotations_file)
wa = Waveform_Aranger(vad_mode=2)
sample_ph = file_labels.iloc[0, 2]
sample, vad_result = wa.get_result(sample_ph)
fig, ax = plt.subplots()
ax.set_title(sample_ph)
fig.subplots_adjust(bottom=0.2)
l, = ax.plot(range(16000), sample[0], lw=2)
k = [0] * 50
for idx, result in enumerate(vad_result):
if result:
k[idx], = ax.plot([idx * 320, (idx+1) * 320], [1, 1], color='red', linewidth=2)
k[idx].set(antialiased=True, visible = True)
else:
k[idx], = ax.plot([idx * 320, (idx+1) * 320], [-1, -1], color='red', linewidth=2)
k[idx].set(antialiased=True, visible = False)
class Index:
ind = 0
def next(self, event):
self.ind += 1
sample, vad_result = wa.get_result(file_labels.iloc[self.ind, 2])
ax.set_title(file_labels.iloc[self.ind, 2])
ydata = sample[0]
l.set_ydata(ydata)
for idx, result in enumerate(vad_result):
if result:
k[idx].set_ydata([1, 1])
k[idx].set(antialiased=True, visible=True)
ax.set_ylim(min(ydata), max(ydata))
else:
k[idx].set_ydata([-1, -1])
k[idx].set(antialiased=True, visible=False)
ax.set_ylim(min(ydata), max(ydata))
plt.draw()
def prev(self, event):
self.ind -= 1
sample, vad_result = wa.get_result(file_labels.iloc[self.ind, 2])
ax.set_title(file_labels.iloc[self.ind, 2])
ydata = sample[0]
l.set_ydata(ydata)
for idx, result in enumerate(vad_result):
if result:
k[idx].set_ydata([1, 1])
k[idx].set(visible=True)
ax.set_ylim(min(ydata), max(ydata))
else:
k[idx].set_ydata([-1, -1])
k[idx].set(visible=False)
ax.set_ylim(min(ydata), max(ydata))
plt.draw()
callback = Index()
axprev = fig.add_axes([0.7, 0.05, 0.1, 0.075])
axnext = fig.add_axes([0.81, 0.05, 0.1, 0.075])
bnext = Button(axnext, 'Next')
bnext.on_clicked(callback.next)
bprev = Button(axprev, 'Previous')
bprev.on_clicked(callback.prev)
plt.show()