baxtree
diff --git a/‎.github/workflows/ci-pipeline.yml‎
Lines changed: 7 additions & 6 deletions b/‎.github/workflows/ci-pipeline.yml‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docker/Dockerfile-Dev‎
Lines changed: 2 additions & 2 deletions b/‎docker/Dockerfile-Dev‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 25 additions & 19 deletions b/‎pyproject.toml‎
Lines changed: 25 additions & 19 deletions
diff --git a/‎site/source/acknowledgement.rst‎
Lines changed: 1 addition & 1 deletion b/‎site/source/acknowledgement.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎subaligner/__main__.py‎
Lines changed: 2 additions & 2 deletions b/‎subaligner/__main__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎subaligner/llm.py‎
Lines changed: 2 additions & 3 deletions b/‎subaligner/llm.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎subaligner/predictor.py‎
Lines changed: 0 additions & 105 deletions b/‎subaligner/predictor.py‎
Lines changed: 0 additions & 105 deletions
@@ -10,7 +10,7 @@ on:
 jobs:
   main:
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         python-version: ["3.10"]
@@ -29,11 +29,12 @@ jobs:
           sudo apt-get -y install libsndfile-dev
           sudo apt-get -y install build-essential python3-dev
           python -m pip install --upgrade pip
-          pip install -e .
-          pip install -e ".[stretch]"
-          pip install -e ".[llm]"
-          pip install -e ".[dev]"
-          pip install -e ".[site]"
+          python -m pip install "setuptools<65.0.0" wheel
+          python -m pip install --no-build-isolation -e .
+          python -m pip install --no-build-isolation -e ".[stretch]"
+          python -m pip install --no-build-isolation -e ".[llm]"
+          python -m pip install --no-build-isolation -e ".[dev]"
+          python -m pip install --no-build-isolation -e ".[site]"
       - name: Type checking
         run: |
           python -m mypy --follow-imports=normal subaligner
 
@@ -162,7 +162,7 @@ Assuming your media assets are stored under `d:\media`, open built-in command pr
 <pre><code>subaligner -m dual -v video.mp4 -s subtitle.srt -tr helsinki-nlp -o subtitle_aligned.srt -t src,tgt</code></pre>
 <pre><code>subaligner -m dual -v video.mp4 -s subtitle.srt -tr facebook-mbart -tf large -o subtitle_aligned.srt -t src,tgt</code></pre>
 <pre><code>subaligner -m dual -v video.mp4 -s subtitle.srt -tr facebook-m2m100 -tf small -o subtitle_aligned.srt -t src,tgt</code></pre>
-<pre><code>subaligner -m dual -v video.mp4 -s subtitle.srt -tr whisper -tf small -o subtitle_aligned.srt -t src,eng</code></pre>
+<pre><code>subaligner -m dual -v video.mp4 -s subtitle.srt -tr whisper -tf small -o subtitle_aligned.srt -t src,tgt</code></pre>
 </details>
 
 <details>
@@ -251,6 +251,6 @@ This tool wouldn't be possible without the following packages:
 [pysubs2](https://github.com/tkarabela/pysubs2)
 [aeneas](https://www.readbeyond.it/aeneas/)
 [transformers](https://huggingface.co/transformers/)
-[openai-whisper](https://github.com/openai/whisper).
+[whisper](https://openai.com/index/whisper/).
 
 Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
@@ -22,5 +22,5 @@ RUN ["/bin/bash", "-c", "apt-get -y update &&\
     apt-get -y install python3-pip &&\
     python3 -m pip install --upgrade pip &&\
     python3 -m pip install --upgrade \"setuptools<65.0.0\" wheel &&\
-    python3 -m pip install -e . &&\
-    python3 -m pip install -e \".[harmony]\""]
+    python3 -m pip install --no-build-isolation -e . &&\
+    python3 -m pip install --no-build-isolation -e \".[harmony]\""]
@@ -25,7 +25,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.8",
     "Intended Audience :: Developers",
-    "Topic :: Utilities"
+    "Topic :: Utilities",
 ]
 dependencies = [
     "astor==0.7.1",
@@ -90,33 +90,37 @@ dependencies = [
     "urllib3~=1.26.5",
     "wrapt==1.14.0",
     "Werkzeug>=0.15.3",
-    "zict==0.1.3"
+    "zict==0.1.3",
 ]
 
 [project.optional-dependencies]
 harmony = [
     "aeneas~=1.7.3.0; python_version < '3.12'",
 #    "aeneas @ git+https://github.com/baxtree/aeneas.git@v1.7.3.1#egg=aeneas ; python_version >= '3.12'",
-    "dtw-python~=1.5.3",
     "safetensors~=0.5.3",
     "sentencepiece~=0.1.95; python_version < '3.12'",
     "sentencepiece~=0.2.0; python_version >= '3.12'",
-    "torch<2.3.0",
-    "torchaudio<2.3.0",
-    "transformers<4.37.0",
-    "openai-whisper==20250625"
+    "torch~=2.9.1",
+    "torchaudio~=2.9.1",
+    "transformers==4.57.3",
+    "accelerate~=1.12.0",
+    "soxr==1.0.0",
+    "webrtcvad~=2.0.10",
+    "silero-vad~=6.2.0",
 ]
 dev = [
     "aeneas~=1.7.3.0; python_version < '3.12'",
 #    "aeneas @ git+https://github.com/baxtree/aeneas.git@v1.7.3.1#egg=aeneas ; python_version >= '3.12'",
-    "dtw-python~=1.5.3",
     "safetensors~=0.5.3",
     "sentencepiece~=0.1.95; python_version < '3.12'",
     "sentencepiece~=0.2.0; python_version >= '3.12'",
-    "torch<2.3.0",
-    "torchaudio<2.3.0",
-    "transformers<4.37.0",
-    "openai-whisper==20250625",
+    "torch~=2.9.1",
+    "torchaudio~=2.9.1",
+    "transformers==4.57.3",
+    "soxr==1.0.0",
+    "accelerate~=1.12.0",
+    "webrtcvad~=2.0.10",
+    "silero-vad~=6.2.0",
     "mock==4.0.3",
     "coverage==5.5",
     "tox~=3.23.0",
@@ -130,11 +134,11 @@ dev = [
     "mypy==1.3.0",
     "types-requests==2.27.9",
     "types-setuptools==64.0.1",
-    "typing-extensions==4.8.0",
+    "typing-extensions==4.10.0",
     "parameterized==0.8.1",
     "pylint~=2.17.2",
     "pygments<3.0.0",
-    "darglint~=1.8.1"
+    "darglint~=1.8.1",
 ]
 docs = [
     "sphinx==6.2.1",
@@ -144,16 +148,18 @@ docs = [
 stretch = [
     "aeneas~=1.7.3.0; python_version < '3.12'",
 #    "aeneas @ git+https://github.com/baxtree/aeneas.git@v1.7.3.1#egg=aeneas ; python_version >= '3.12'",
-    "dtw-python~=1.5.3"
 ]
 llm = [
     "safetensors~=0.5.3",
     "sentencepiece~=0.1.95; python_version < '3.12'",
     "sentencepiece~=0.2.0; python_version >= '3.12'",
-    "torch<2.3.0",
-    "torchaudio<2.3.0",
-    "transformers<4.37.0",
-    "openai-whisper==20250625"
+    "torch~=2.9.1",
+    "torchaudio~=2.9.1",
+    "transformers==4.57.3",
+    "accelerate~=1.12.0",
+    "soxr==1.0.0",
+    "webrtcvad~=2.0.10",
+    "silero-vad~=6.2.0",
 ]
 
 [project.scripts]
 
@@ -12,6 +12,6 @@ Acknowledgement
     - `pysubs2 <https://github.com/tkarabela/pysubs2>`_
     - `aeneas <https://www.readbeyond.it/aeneas/>`_
     - `transformers <https://huggingface.co/transformers/>`_
-    - `openai-whisper <https://github.com/openai/whisper>`_
+    - `whisper <https://openai.com/index/whisper/>`_
 
 Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
@@ -32,7 +32,7 @@
                         Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
   -mr {whisper}, --transcription_recipe {whisper}
                         LLM recipe used for transcribing video files
-  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}
+  -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v2,large-v3,large,large-v3-turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}
                         Flavour variation for a specific LLM recipe supporting transcription
   -ip INITIAL_PROMPT, --initial_prompt INITIAL_PROMPT
                         Optional text to provide the transcribing context or specific phrases
@@ -197,7 +197,7 @@ def main():
         "-mcl",
         "--max_char_length",
         type=int,
-        default=None,
+        default=37,
         help="Maximum number of characters for each generated subtitle segment"
     )
     from subaligner.llm import TranslationRecipe
 
@@ -20,11 +20,10 @@ class WhisperFlavour(Enum):
     MEDIUM_EN = "medium.en"
     BASE = "base"
     BASE_EN = "base.en"
-    LARGE_V1 = "large-v1"
+    LARGE = "large"
     LARGE_V2 = "large-v2"
     LARGE_V3 = "large-v3"
-    LARGE = "large"
-    TURBO = "turbo"
+    TURBO = "large-v3-turbo"
 
 
 class HelsinkiNLPFlavour(Enum):
 
@@ -740,111 +740,6 @@ def __adjust_durations(self, subs: List[SubRipItem], audio_file_path: str, stret
             if task.sync_map_file_path_absolute is not None and os.path.exists(task.sync_map_file_path_absolute):
                 os.remove(task.sync_map_file_path_absolute)
 
-    def __compress_and_stretch(self, subs: List[SubRipItem], audio_file_path: str, stretch_in_lang: str, lock: threading.RLock) -> List[SubRipItem]:
-        from dtw import dtw
-        try:
-            with lock:
-                segment_path, _ = self.__media_helper.extract_audio_from_start_to_end(
-                    audio_file_path,
-                    str(subs[0].start),
-                    str(subs[len(subs) - 1].end),
-                )
-
-                # Create a text file for DTW alignments
-                root, _ = os.path.splitext(segment_path)
-                text_file_path = "{}.txt".format(root)
-
-                with open(text_file_path, "w", encoding="utf8") as text_file:
-                    text_file.write("*****".join([sub_new.text for sub_new in subs]))
-
-            sample_rate = self.__feature_embedder.frequency
-            hop_length = self.__feature_embedder.hop_len
-            n_mfcc = self.__feature_embedder.n_mfcc
-
-            file_script_duration_mapping = []
-            with tempfile.TemporaryDirectory() as temp_dir:
-                with open(text_file_path, "r") as f:
-                    script_lines = f.read().split("*****")
-                    wav_data = []
-                    for i, line in enumerate(script_lines):
-                        normalised_line = line.replace('"', "'")
-                        espeak_output_file = f"espeak_part_{i}.wav"
-                        espeak_cmd = f"espeak -v {Language.LANGUAGE_TO_VOICE_CODE[stretch_in_lang]} --stdout -- \"{normalised_line}\" | ffmpeg -y -i - -af 'aresample={sample_rate}' {os.path.join(temp_dir, espeak_output_file)}"
-                        os.system(espeak_cmd)
-                        y, sr = librosa.load(os.path.join(temp_dir, espeak_output_file), sr=None)
-                        wav_data.append(y)
-                        duration = librosa.get_duration(y=y, sr=sr)
-                        file_script_duration_mapping.append((os.path.join(temp_dir, espeak_output_file), line, duration))
-                    data = np.concatenate(wav_data)
-                    sf.write(os.path.join(temp_dir, "espeak-all.wav"), data, sr)
-
-                y_query, sr_query = librosa.load(os.path.join(temp_dir, "espeak-all.wav"), sr=None)
-                query_mfcc_features = librosa.feature.mfcc(y=y_query, sr=sr_query, n_mfcc=n_mfcc, hop_length=hop_length).T
-                y_reference, sr_reference = librosa.load(segment_path, sr=sample_rate)
-                reference_mfcc_features = librosa.feature.mfcc(y=y_reference, sr=sr_reference, n_mfcc=n_mfcc, hop_length=hop_length).T
-
-                alignment = dtw(query_mfcc_features, reference_mfcc_features, keep_internals=False)
-                assert len(alignment.index1) == len(alignment.index2), "Mismatch in lengths of alignment indices"
-                assert sr_query == sr_reference
-                frame_duration = hop_length / sr_query
-
-                mapped_times = []
-                start_frame_index = 0
-                for index, (wav_file, line_text, duration) in enumerate(file_script_duration_mapping):
-                    num_frames_in_query = int(np.ceil(duration / frame_duration))
-
-                    query_start_frame = start_frame_index
-                    query_end_frame = start_frame_index + num_frames_in_query - 1
-                    reference_frame_indices = [r for q, r in zip(alignment.index1, alignment.index2) if
-                                               query_start_frame <= q <= query_end_frame]
-                    reference_start_frame = min(reference_frame_indices)
-                    reference_end_frame = max(reference_frame_indices)
-
-                    # TODO: Handle cases where mapped frames are not found in the reference audio
-
-                    new_reference_start_time = reference_start_frame * frame_duration
-                    new_reference_end_time = (reference_end_frame + 1) * frame_duration
-
-                    mapped_times.append({
-                        "new_reference_start_time": new_reference_start_time,
-                        "new_reference_end_time": new_reference_end_time
-                    })
-
-                    start_frame_index = query_end_frame + 1
-
-                with open(os.path.join(temp_dir, "synced_subtitles.srt"), "w") as f:
-                    for index, entry in enumerate(mapped_times):
-                        start_srt = Utils.format_timestamp(entry["new_reference_start_time"])
-                        end_srt = Utils.format_timestamp(entry["new_reference_end_time"])
-                        f.write(f"{index + 1}\n")
-                        f.write(f"{start_srt} --> {end_srt}\n")
-                        f.write(f"{script_lines[index]}\n")
-                        f.write(f"\n")
-                    f.flush()
-
-                adjusted_subs = Subtitle._get_srt_subs(
-                    subrip_file_path=os.path.join(temp_dir, "synced_subtitles.srt"),
-                    encoding="utf-8"
-                )
-
-            for index, sub_new_loaded in enumerate(adjusted_subs):
-                sub_new_loaded.index = subs[index].index
-
-            adjusted_subs.shift(
-                seconds=self.__media_helper.get_duration_in_seconds(
-                    start=None, end=str(subs[0].start)
-                )
-            )
-            return adjusted_subs
-        except KeyboardInterrupt:
-            raise TerminalException("Subtitle compress and stretch interrupted by the user")
-        finally:
-            # Housekeep intermediate files
-            if text_file_path is not None and os.path.exists(
-                    text_file_path
-            ):
-                os.remove(text_file_path)
-
     def __predict(
             self,
             video_file_path: Optional[str],