TensorSpeech
diff --git a/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pylintrc‎
Lines changed: 7 additions & 1 deletion b/‎.pylintrc‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.vscode/settings.json‎
Lines changed: 30 additions & 36 deletions b/‎.vscode/settings.json‎
Lines changed: 30 additions & 36 deletions
diff --git a/‎Dockerfile‎
Lines changed: 4 additions & 4 deletions b/‎Dockerfile‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 18 additions & 33 deletions b/‎README.md‎
Lines changed: 18 additions & 33 deletions
diff --git a/‎docs/tokenizers.md‎
Lines changed: 5 additions & 6 deletions b/‎docs/tokenizers.md‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎docs/tutorials/testing.md‎
Lines changed: 22 additions & 18 deletions b/‎docs/tutorials/testing.md‎
Lines changed: 22 additions & 18 deletions
diff --git a/‎docs/tutorials/tflite.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/tutorials/tflite.md‎
Lines changed: 2 additions & 2 deletions
@@ -1,2 +1,4 @@
 LibriSpeech
 Models
+.venv*
+venv*
@@ -3,7 +3,7 @@
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
-extension-pkg-allow-list=pydantic,tensorflow
+extension-pkg-allow-list=pydantic
 
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
@@ -120,6 +120,12 @@ disable=too-few-public-methods,
         consider-using-f-string,
         fixme,
         unused-variable,
+        pointless-string-statement,
+        too-many-lines,
+        abstract-method,
+        too-many-ancestors,
+        import-outside-toplevel,
+        too-many-positional-arguments,
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
 
@@ -1,37 +1,31 @@
 {
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter"
-    },
-    "autoDocstring.docstringFormat": "numpy",
-    "black-formatter.args": [
-        "--config",
-        "${workspaceFolder}/pyproject.toml"
-    ],
-    "black-formatter.path": [
-        "${interpreter}",
-        "-m",
-        "black"
-    ],
-    "editor.codeActionsOnSave": {
-        "source.fixAll": "explicit",
-        "source.organizeImports": "explicit"
-    },
-    "editor.formatOnSave": true,
-    "isort.args": [
-        "--settings-file",
-        "${workspaceFolder}/pyproject.toml"
-    ],
-    "pylint.args": [
-        "--rcfile=${workspaceFolder}/.pylintrc"
-    ],
-    "pylint.path": [
-        "${interpreter}",
-        "-m",
-        "pylint"
-    ],
-    "python.analysis.fixAll": [
-        "source.unusedImports",
-        "source.convertImportFormat"
-    ],
-    "python.analysis.importFormat": "absolute"
-}
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.tabSize": 4
+  },
+  "[markdown]": {
+    "editor.tabSize": 2,
+    "editor.indentSize": 2,
+    "editor.detectIndentation": false
+  },
+  "[json]": {
+    "editor.tabSize": 2
+  },
+  "[yaml]": {
+    "editor.tabSize": 2
+  },
+  "autoDocstring.docstringFormat": "numpy",
+  "black-formatter.args": ["--config", "${workspaceFolder}/pyproject.toml"],
+  "black-formatter.path": ["${interpreter}", "-m", "black"],
+  "editor.codeActionsOnSave": {
+    "source.fixAll": "explicit",
+    "source.organizeImports": "explicit"
+  },
+  "editor.formatOnSave": true,
+  "isort.args": ["--settings-file", "${workspaceFolder}/pyproject.toml"],
+  "pylint.args": ["--rcfile=${workspaceFolder}/.pylintrc"],
+  "pylint.path": ["${interpreter}", "-m", "pylint"],
+  "python.analysis.fixAll": ["source.unusedImports", "source.convertImportFormat"],
+  "python.analysis.importFormat": "absolute",
+  "markdown.extension.list.indentationSize": "inherit"
+}
@@ -1,4 +1,4 @@
-FROM tensorflow/tensorflow:2.3.2-gpu
+FROM tensorflow/tensorflow:2.18.0-gpu
 
 RUN apt-get update \
     && apt-get upgrade -y \
@@ -9,8 +9,8 @@ RUN apt-get update \
 RUN apt clean && apt-get clean
 
 # Install dependencies
-COPY requirements.txt /
-RUN pip --no-cache-dir install -r /requirements.txt
+COPY requirements*.txt /
+RUN pip --no-cache-dir install -r /requirements.txt -r /requirements.cuda.txt
 
 # Install rnnt_loss
 COPY scripts /scripts
@@ -21,4 +21,4 @@ RUN if [ "$install_rnnt_loss" = "true" ] ; \
     && ./scripts/install_rnnt_loss.sh \
     else echo 'Using pure TensorFlow'; fi
 
-RUN echo "export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" >> /root/.bashrc
+RUN echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" >> /root/.bashrc
@@ -34,7 +34,6 @@ TensorFlowASR implements some automatic speech recognition architectures such as
   - [Installing from source (recommended)](#installing-from-source-recommended)
   - [Installing via PyPi](#installing-via-pypi)
   - [Installing for development](#installing-for-development)
-  - [Install for Apple Sillicon](#install-for-apple-sillicon)
   - [Running in a container](#running-in-a-container)
 - [Training \& Testing Tutorial](#training--testing-tutorial)
 - [Features Extraction](#features-extraction)
@@ -61,6 +60,8 @@ TensorFlowASR implements some automatic speech recognition architectures such as
 
 - **Conformer Transducer** (Reference: [https://arxiv.org/abs/2005.08100](https://arxiv.org/abs/2005.08100))
   See [examples/models/transducer/conformer](./examples/models/transducer/conformer)
+- **Streaming Conformer** (Reference: [http://arxiv.org/abs/2010.11395](http://arxiv.org/abs/2010.11395))
+  See [examples/models/transducer/conformer](./examples/models/transducer/conformer)
 - **ContextNet** (Reference: [http://arxiv.org/abs/2005.03191](http://arxiv.org/abs/2005.03191))
   See [examples/models/transducer/contextnet](./examples/models/transducer/contextnet)
 - **RNN Transducer** (Reference: [https://arxiv.org/abs/1811.06621](https://arxiv.org/abs/1811.06621))
@@ -74,62 +75,46 @@ TensorFlowASR implements some automatic speech recognition architectures such as
 
 For training and testing, you should use `git clone` for installing necessary packages from other authors (`ctc_decoders`, `rnnt_loss`, etc.)
 
+**NOTE ONLY FOR APPLE SILICON**: TensorFlowASR requires python >= 3.12
+
+See the `requirements.[extra].txt` files for extra dependencies
+
 ### Installing from source (recommended)
 
 ```bash
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-# Tensorflow 2.x (with 2.x.x >= 2.5.1)
-pip3 install ".[tf2.x]" # or ".[tf2.x-gpu]"
+pip3 install -e . # or ".[cuda]" if using GPU
 ```
 
-For anaconda3:
+For **anaconda3**:
 
 ```bash
-conda create -y -n tfasr tensorflow-gpu python=3.8 # tensorflow if using CPU, this makes sure conda install all dependencies for tensorflow
+conda create -y -n tfasr python=3.11 # tensorflow if using CPU, this makes sure conda install all dependencies for tensorflow
 conda activate tfasr
-pip install -U tensorflow-gpu # upgrade to latest version of tensorflow
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-# Tensorflow 2.x (with 2.x.x >= 2.5.1)
-pip3 install ".[tf2.x]" # or ".[tf2.x-gpu]"
+pip3 install -e . # or ".[cuda]" if using GPU
 ```
 
-### Installing via PyPi
+For **colab with TPUs**:
 
 ```bash
-# Tensorflow 2.x (with 2.x >= 2.3)
-pip3 install "TensorFlowASR[tf2.x]" # or pip3 install "TensorFlowASR[tf2.x-gpu]"
+pip3 install -e ".[tpu]" -f https://storage.googleapis.com/libtpu-tf-releases/index.html
 ```
 
-### Installing for development
+### Installing via PyPi
 
 ```bash
-git clone https://github.com/TensorSpeech/TensorFlowASR.git
-cd TensorFlowASR
-pip3 install -e ".[dev]"
-pip3 install -e ".[tf2.x]" # or ".[tf2.x-gpu]" or ".[tf2.x-apple]" for apple m1 machine
+pip3 install "TensorFlowASR" # or "TensorFlowASR[cuda]" if using GPU
 ```
 
-### Install for Apple Sillicon
-
-Due to tensorflow-text is not built for Apple Sillicon, we need to install it with the prebuilt wheel file from [sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon](https://github.com/sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon)
+### Installing for development
 
 ```bash
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-pip3 install -e "." # or pip3 install -e ".[dev] for development # or pip3 install "TensorFlowASR[dev]" from PyPi
-pip3 install tensorflow~=2.14.0 # change minor version if you want
-```
-
-Do this after installing TensorFlowASR with tensorflow above
-
-```bash
-TF_VERSION="$(python3 -c 'import tensorflow; print(tensorflow.__version__)')" && \
-TF_VERSION_MAJOR="$(echo $TF_VERSION | cut -d'.' -f1,2)" && \
-PY_VERSION="$(python3 -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(f"{major}{minor}");')" && \
-URL="https://github.com/sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon" && \
-pip3 install "${URL}/releases/download/v${TF_VERSION_MAJOR}/tensorflow_text-${TF_VERSION_MAJOR}.0-cp${PY_VERSION}-cp${PY_VERSION}-macosx_11_0_arm64.whl"
+pip3 install -e ".[apple,dev]"
 ```
 
 ### Running in a container
@@ -139,7 +124,6 @@ docker-compose up -d
 ```
 
 
-
 ## Training & Testing Tutorial
 
 - For training, please read [tutorial_training](./docs/tutorials/training.md)
@@ -165,7 +149,7 @@ See [tflite_convertion](./docs/tutorials/tflite.md)
 
 ## Pretrained Models
 
-Go to [drive](https://drive.google.com/drive/folders/1BD0AK30n8hc-yR28C5FW3LqzZxtLOQfl?usp=sharing)
+See the results on each example folder, e.g. [./examples/models//transducer/conformer/results/sentencepiece/README.md](./examples/models//transducer/conformer/results/sentencepiece/README.md)
 
 ## Corpus Sources
 
@@ -183,6 +167,7 @@ Go to [drive](https://drive.google.com/drive/folders/1BD0AK30n8hc-yR28C5FW3LqzZx
 | Vivos                                  | [https://ailab.hcmus.edu.vn/vivos](https://www.kaggle.com/datasets/kynthesis/vivos-vietnamese-speech-corpus-for-asr) | 15h       |
 | InfoRe Technology 1                    | [InfoRe1 (passwd: BroughtToYouByInfoRe)](https://files.huylenguyen.com/datasets/infore/25hours.zip)                  | 25h       |
 | InfoRe Technology 2 (used in VLSP2019) | [InfoRe2 (passwd: BroughtToYouByInfoRe)](https://files.huylenguyen.com/datasets/infore/audiobooks.zip)               | 415h      |
+| VieitBud500                            | [https://huggingface.co/datasets/linhtran92/viet_bud500](https://huggingface.co/datasets/linhtran92/viet_bud500)     | 500h      |
 
 ## How to contribute
 
 
@@ -1,27 +1,26 @@
-# Tokenizers
-
 - [Tokenizers](#tokenizers)
   - [1. Character Tokenizer](#1-character-tokenizer)
   - [2. Wordpiece Tokenizer](#2-wordpiece-tokenizer)
   - [3. Sentencepiece Tokenizer](#3-sentencepiece-tokenizer)
 
+# Tokenizers
 
 ## 1. Character Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/characters/char.yml.j2)
+See [librespeech config](../examples/datasets/librispeech/characters/char.yml.j2)
 
 This splits the text into characters and then maps each character to an index. The index starts from 1 and 0 is reserved for blank token. This tokenizer only used for languages that have a small number of characters and each character is not a combination of other characters. For example, English, Vietnamese, etc.
 
 ## 2. Wordpiece Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/wordpiece/wp.yml.j2) for wordpiece splitted by whitespace
+See [librespeech config](../examples/datasets/librispeech/wordpiece/wp.yml.j2) for wordpiece splitted by whitespace
 
-See [librespeech config](../examples/configs/librispeech/wordpiece/wp_whitespace.yml.j2) for wordpiece that whitespace is a separate token
+See [librespeech config](../examples/datasets/librispeech/wordpiece/wp_whitespace.yml.j2) for wordpiece that whitespace is a separate token
 
 This splits the text into words and then splits each word into subwords. The subwords are then mapped to indices. Blank token can be set to <unk> as index 0. This tokenizer is used for languages that have a large number of words and each word can be a combination of other words, therefore it can be applied to any language.
 
 ## 3. Sentencepiece Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/sentencepiece/sp.yml.j2)
+See [librespeech config](../examples/datasets/librispeech/sentencepiece/sp.yml.j2)
 
 This splits the whole sentence into subwords and then maps each subword to an index. Blank token can be set to <unk> as index 0. This tokenizer is used for languages that have a large number of words and each word can be a combination of other words, therefore it can be applied to any language.
@@ -1,29 +1,33 @@
+- [Testing Tutorial](#testing-tutorial)
+  - [1. Installation](#1-installation)
+  - [2. Prepare transcripts files](#2-prepare-transcripts-files)
+  - [3. Prepare config file](#3-prepare-config-file)
+  - [4. Run testing](#4-run-testing)
+
+
 # Testing Tutorial
 
 These commands are example for librispeech dataset, but we can apply similar to other datasets
 
-## 1. Install packages
-
-If you use google colab, it's recommended to use the tensorflow version pre-installed on the colab itself
+## 1. Installation
 
 ```bash
-pip uninstall -y TensorFlowASR # uninstall for clean install if needed
-pip install ".[tf2.x]"
+./setup.sh [tpu|gpu|cpu] install
 ```
 
 ## 2. Prepare transcripts files
 
 This is the example for preparing transcript files for librispeech data corpus
 
 ```bash
-python scripts/create_librispeech_trans.py \
+python examples/datasets/librispeech/prepare_transcript.py \
     --directory=/path/to/dataset/test-clean \
     --output=/path/to/dataset/test-clean/transcripts.tsv
 ```
 
 Do the same thing with `test-clean`, `test-other`
 
-For other datasets, you must prepare your own python script like the `scripts/create_librispeech_trans.py`
+For other datasets, please make your own script to prepare the transcript files, take a look at the [`prepare_transcript.py`](../../examples/datasets/librispeech/prepare_transcript.py) file for more reference
 
 ## 3. Prepare config file
 
@@ -33,27 +37,27 @@ Please take a look in some examples for config files in `examples/*/*.yml.j2`
 
 The config file is the same as the config used for training
 
-## 4. [Optional][Required if not exists] Generate vocabulary and metadata
+The inputs, outputs and other options of vocabulary are defined in the config file
+
+For example:
 
-Use the same vocabulary file used in training
+```jinja2
+{% import "examples/datasets/librispeech/sentencepiece/sp.yml.j2" as decoder_config with context %}
+{{decoder_config}}
 
-```bash
-python scripts/prepare_vocab_and_metadata.py \
-    --config-path=/path/to/config.yml.j2 \
-    --datadir=/path/to/datadir
+{% import "examples/models/transducer/conformer/small.yml.j2" as config with context %}
+{{config}}
 ```
 
-The inputs, outputs and other options of vocabulary are defined in the config file
-
-## 5. Run testing
+## 4. Run testing
 
 ```bash
-python examples/test.py \
+tensorflow_asr test \
 --config-path /path/to/config.yml.j2 \
 --dataset_type slice \
 --datadir /path/to/datadir \
 --outputdir /path/to/modeldir/tests \
 --h5 /path/to/modeldir/weights.h5
 ## See others params
-python examples/test.py --help
+tensorflow_asr test --help
 ```
@@ -11,14 +11,14 @@
 ## Conversion
 
 ```bash
-python3 examples/tflite.py \
+tensorflow_asr tflite \
     --config-path=/path/to/config.yml.j2 \
     --h5=/path/to/weight.h5 \
     --bs=1 \ # Batch size
     --beam-width=0 \ # Beam width, set >0 to enable beam search
     --output=/path/to/output.tflite
 ## See others params
-python examples/tflite.py --help
+tensorflow_asr tflite --help
 ```
 
 ## Inference
-Original file line number
+Diff line change
@@ @@ -1,2 +1,4 @@ @@
 LibriSpeech
 Models
 +.venv*
 +venv*