Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions inaSpeechSegmenter/segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import warnings

from .export_funcs import seg2csv, seg2textgrid

# from memory_profiler import profile
def _media2feats(medianame, tmpdir, start_sec, stop_sec, ffmpeg):
sig = media2sig16kmono(medianame, tmpdir, start_sec, stop_sec, ffmpeg, 'float32')
with warnings.catch_warnings():
Expand Down Expand Up @@ -156,10 +156,17 @@ def __call__(self, mspec, lseg, difflen = 0):
for lab, start, stop in lseg:
if lab == self.inlabel:
batch.append(patches[start:stop, :])

if len(batch) > 0:
batch = np.concatenate(batch)
rawpred = self.nn.predict(batch, batch_size=self.batch_size, verbose=2)
rawpred = []
for i in range(0, len(batch), self.batch_size):
b_i = batch[i:i+self.batch_size,:,:]
rawpred_i = self.nn(b_i, training=False)

rawpred.append(rawpred_i)

rawpred = np.concatenate(rawpred)

gc.collect()

ret = []
Expand Down Expand Up @@ -293,7 +300,7 @@ def __call__(self, medianame, tmpdir=None, start_sec=None, stop_sec=None):
# do segmentation
return self.segment_feats(mspec, loge, difflen, start_sec)


# @profile
def batch_process(self, linput, loutput, tmpdir=None, verbose=False, skipifexist=False, nbtry=1, trydelay=2., output_format='csv'):

if verbose:
Expand Down
4 changes: 0 additions & 4 deletions scripts/ina_speech_segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import os
import distutils.util
import warnings

# TODO
# * allow to use external activity or speech music segmentations
# * describe URL management in help and interference with glob
Expand Down Expand Up @@ -68,14 +67,11 @@

# Do processings
from inaSpeechSegmenter import Segmenter, seg2csv

# load neural network into memory, may last few seconds
detect_gender = bool(distutils.util.strtobool(args.detect_gender))
seg = Segmenter(vad_engine=args.vad_engine, detect_gender=detect_gender, ffmpeg=args.ffmpeg_binary, energy_ratio=args.energy_ratio, batch_size=args.batch_size)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
base = [os.path.splitext(os.path.basename(e))[0] for e in input_files]
output_files = [os.path.join(odir, e + '.' + args.export_format) for e in base]
seg.batch_process(input_files, output_files, verbose=True, output_format=args.export_format)

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
test_suite="run_test.py",
description = DESCRIPTION,
license = "MIT",
install_requires=['tensorflow', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch'
install_requires=['tensorflow[and-cuda]', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch'
# keywords = "example documentation tutorial",
url = "https://github.com/ina-foss/inaSpeechSegmenter",
# packages=['inaSpeechSegmenter'],
Expand Down