diff --git a/inaSpeechSegmenter/segmenter.py b/inaSpeechSegmenter/segmenter.py index 65eb079..8d4fdb9 100644 --- a/inaSpeechSegmenter/segmenter.py +++ b/inaSpeechSegmenter/segmenter.py @@ -49,7 +49,7 @@ import warnings from .export_funcs import seg2csv, seg2textgrid - +# from memory_profiler import profile def _media2feats(medianame, tmpdir, start_sec, stop_sec, ffmpeg): sig = media2sig16kmono(medianame, tmpdir, start_sec, stop_sec, ffmpeg, 'float32') with warnings.catch_warnings(): @@ -156,10 +156,17 @@ def __call__(self, mspec, lseg, difflen = 0): for lab, start, stop in lseg: if lab == self.inlabel: batch.append(patches[start:stop, :]) - if len(batch) > 0: batch = np.concatenate(batch) - rawpred = self.nn.predict(batch, batch_size=self.batch_size, verbose=2) + rawpred = [] + for i in range(0, len(batch), self.batch_size): + b_i = batch[i:i+self.batch_size,:,:] + rawpred_i = self.nn(b_i, training=False) + + rawpred.append(rawpred_i) + + rawpred = np.concatenate(rawpred) + gc.collect() ret = [] @@ -293,7 +300,7 @@ def __call__(self, medianame, tmpdir=None, start_sec=None, stop_sec=None): # do segmentation return self.segment_feats(mspec, loge, difflen, start_sec) - + # @profile def batch_process(self, linput, loutput, tmpdir=None, verbose=False, skipifexist=False, nbtry=1, trydelay=2., output_format='csv'): if verbose: diff --git a/scripts/ina_speech_segmenter.py b/scripts/ina_speech_segmenter.py index 335f54d..3e0a8b0 100755 --- a/scripts/ina_speech_segmenter.py +++ b/scripts/ina_speech_segmenter.py @@ -28,7 +28,6 @@ import os import distutils.util import warnings - # TODO # * allow to use external activity or speech music segmentations # * describe URL management in help and interference with glob @@ -68,14 +67,11 @@ # Do processings from inaSpeechSegmenter import Segmenter, seg2csv - # load neural network into memory, may last few seconds detect_gender = bool(distutils.util.strtobool(args.detect_gender)) seg = Segmenter(vad_engine=args.vad_engine, detect_gender=detect_gender, ffmpeg=args.ffmpeg_binary, energy_ratio=args.energy_ratio, batch_size=args.batch_size) - with warnings.catch_warnings(): warnings.simplefilter("ignore") base = [os.path.splitext(os.path.basename(e))[0] for e in input_files] output_files = [os.path.join(odir, e + '.' + args.export_format) for e in base] seg.batch_process(input_files, output_files, verbose=True, output_format=args.export_format) - diff --git a/setup.py b/setup.py index d5a7db8..c0d099e 100644 --- a/setup.py +++ b/setup.py @@ -108,7 +108,7 @@ test_suite="run_test.py", description = DESCRIPTION, license = "MIT", - install_requires=['tensorflow', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch' + install_requires=['tensorflow[and-cuda]', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch' # keywords = "example documentation tutorial", url = "https://github.com/ina-foss/inaSpeechSegmenter", # packages=['inaSpeechSegmenter'],