ina-foss · chazo1994 · Dec 7, 2023
diff --git a/inaSpeechSegmenter/segmenter.py b/inaSpeechSegmenter/segmenter.py
@@ -49,7 +49,7 @@
 import warnings
 
 from .export_funcs import seg2csv, seg2textgrid
-
+# from memory_profiler import profile
 def _media2feats(medianame, tmpdir, start_sec, stop_sec, ffmpeg):
     sig = media2sig16kmono(medianame, tmpdir, start_sec, stop_sec, ffmpeg, 'float32')
     with warnings.catch_warnings():
@@ -156,10 +156,17 @@ def __call__(self, mspec, lseg, difflen = 0):
         for lab, start, stop in lseg:
             if lab == self.inlabel:
                 batch.append(patches[start:stop, :])
-
         if len(batch) > 0:
             batch = np.concatenate(batch)
-            rawpred = self.nn.predict(batch, batch_size=self.batch_size, verbose=2)
+            rawpred = []
+            for i in range(0, len(batch), self.batch_size):
+                b_i = batch[i:i+self.batch_size,:,:]
+                rawpred_i = self.nn(b_i, training=False)
+
+                rawpred.append(rawpred_i)
+
+            rawpred = np.concatenate(rawpred)
+
         gc.collect()
 
         ret = []
@@ -293,7 +300,7 @@ def __call__(self, medianame, tmpdir=None, start_sec=None, stop_sec=None):
         # do segmentation   
         return self.segment_feats(mspec, loge, difflen, start_sec)
 
-
+    # @profile
     def batch_process(self, linput, loutput, tmpdir=None, verbose=False, skipifexist=False, nbtry=1, trydelay=2., output_format='csv'):
 
         if verbose:

diff --git a/scripts/ina_speech_segmenter.py b/scripts/ina_speech_segmenter.py
@@ -28,7 +28,6 @@
 import os
 import distutils.util
 import warnings
-
 # TODO
 # * allow to use external activity or speech music segmentations
 # * describe URL management in help and interference with glob
@@ -68,14 +67,11 @@
 
 # Do processings
 from inaSpeechSegmenter import Segmenter, seg2csv
-
 # load neural network into memory, may last few seconds
 detect_gender = bool(distutils.util.strtobool(args.detect_gender))
 seg = Segmenter(vad_engine=args.vad_engine, detect_gender=detect_gender, ffmpeg=args.ffmpeg_binary, energy_ratio=args.energy_ratio, batch_size=args.batch_size)
-
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
     base = [os.path.splitext(os.path.basename(e))[0] for e in input_files]
     output_files = [os.path.join(odir, e + '.' + args.export_format) for e in base]
     seg.batch_process(input_files, output_files, verbose=True, output_format=args.export_format)
-
diff --git a/setup.py b/setup.py
@@ -108,7 +108,7 @@
     test_suite="run_test.py",
     description = DESCRIPTION,
     license = "MIT",
-    install_requires=['tensorflow', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch'
+    install_requires=['tensorflow[and-cuda]', 'numpy', 'pandas', 'scikit-image', 'pyannote.core', 'matplotlib', 'Pyro4', 'pytextgrid', 'soundfile', 'onnxruntime-gpu'], #'torch'
  #   keywords = "example documentation tutorial",
     url = "https://github.com/ina-foss/inaSpeechSegmenter",
 #    packages=['inaSpeechSegmenter'],