-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfeature_extraction_cnn.py
More file actions
95 lines (78 loc) · 2.81 KB
/
feature_extraction_cnn.py
File metadata and controls
95 lines (78 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
import librosa
import glob
import os
def windows(data, window_size):
start = 0
while(start < len(data)):
yield int(start), int(start + window_size)
start += (window_size / 10)
def extract_features(parent_dir, sub_dirs, file_ext="*.wav",bands=60, frames=101, output=""):
window_size = 512 * (frames-1)
log_specgrams = []
labels = []
# 90%
"""
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
(0, 60, 101, 2)
"""
# 50%
"""
(0, 60, 41, 2)
(13173, 60, 41, 2)
(13021, 60, 41, 2)
(14168, 60, 41, 2)
(14606, 60, 41, 2)
(13727, 60, 41, 2)
(12279, 60, 41, 2)
(12769, 60, 41, 2)
(11955, 60, 41, 2)
(12371, 60, 41, 2)
(12610, 60, 41, 2)
"""
for l, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
sound_clip, s = librosa.load(fn)
label = fn.split('\\')[3].split('-')[1]
# UrbanSound8K/audio/fold1/7061-6-0-0.wav
for (start, end) in windows(sound_clip, window_size):
if (len(sound_clip[start:end]) == window_size):
signal = sound_clip[start:end]
melspec = librosa.feature.melspectrogram(signal, n_mels=bands)
logspec = librosa.logamplitude(melspec)
logspec = logspec.T.flatten()[:, np.newaxis].T
# 같은 배열에 대해 차원만 증가시키는 경우 [:, np.newaxis]를 사용한다.
# logspec = (60,41)
# logspec.T.flatten() = (41,60) -> (2460,) -> (2460,1) -> (1, 2460)
log_specgrams.append(logspec)
labels.append(label)
log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams), bands, frames, 1)
features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis=3)
# features
# (5446,60,41,2)
for i in range(len(features)):
features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
print(features.shape)
np.savez("Extraction/audio" + output ,features=features,labels=labels)
return np.array(features), np.array(labels)
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels,n_unique_labels))
# (720,2)
one_hot_encode[np.arange(n_labels), labels.astype('int64')] = 1
return one_hot_encode
parent_dir = "UrbanSound8K\\audio"
for i in range(11):
sub_dirs= ['fold'+str(i)]
features,labels = extract_features(parent_dir,sub_dirs,output="_extraction_90_"+str(i))
labels = one_hot_encode(labels)