diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ea948ec --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include README.md +include LICENSE +include requirements.txt +recursive-include fireredasr *.py diff --git a/README.md b/README.md index a3a8a0e..649ec85 100644 --- a/README.md +++ b/README.md @@ -57,23 +57,35 @@ Download model files from [huggingface](https://huggingface.co/fireredteam) and If you want to use `FireRedASR-LLM-L`, you also need to download [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct) and place it in the folder `pretrained_models`. Then, go to folder `FireRedASR-LLM-L` and run `$ ln -s ../Qwen2-7B-Instruct` -### Setup -Create a Python environment and install dependencies +### Installation + +#### Option 1: Install from GitHub (Recommended) ```bash -$ git clone https://github.com/FireRedTeam/FireRedASR.git -$ conda create --name fireredasr python=3.10 -$ pip install -r requirements.txt +$ pip install git+https://github.com/FireRedTeam/FireRedASR.git ``` -Set up Linux PATH and PYTHONPATH +#### Option 2: Install from Source +```bash +$ git clone https://github.com/FireRedTeam/FireRedASR.git +$ cd FireRedASR +$ pip install -e . ``` + +#### Option 3: Manual Setup (Legacy) +```bash +$ git clone https://github.com/FireRedTeam/FireRedASR.git +$ cd FireRedASR +$ conda create --name fireredasr python=3.10 +$ conda activate fireredasr +$ pip install -r requirements.txt $ export PATH=$PWD/fireredasr/:$PWD/fireredasr/utils/:$PATH $ export PYTHONPATH=$PWD/:$PYTHONPATH ``` -Convert audio to 16kHz 16-bit PCM format -``` -ffmpeg -i input_audio -ar 16000 -ac 1 -acodec pcm_s16le -f wav output.wav +### Audio Preprocessing +Convert audio to 16kHz 16-bit PCM format: +```bash +$ ffmpeg -i input_audio -ar 16000 -ac 1 -acodec pcm_s16le -f wav output.wav ``` ### Quick Start diff --git a/fireredasr/__init__.py b/fireredasr/__init__.py new file mode 100644 index 0000000..9b13d88 --- /dev/null +++ b/fireredasr/__init__.py @@ -0,0 +1,7 @@ +"""FireRedASR: Open-Source Industrial-Grade Automatic Speech Recognition Models""" + +__version__ = "0.1.0" + +from fireredasr.models.fireredasr import FireRedAsr + +__all__ = ["FireRedAsr"] diff --git a/fireredasr/data/__init__.py b/fireredasr/data/__init__.py new file mode 100644 index 0000000..f014b3f --- /dev/null +++ b/fireredasr/data/__init__.py @@ -0,0 +1 @@ +"""Data processing modules for FireRedASR""" diff --git a/fireredasr/models/__init__.py b/fireredasr/models/__init__.py new file mode 100644 index 0000000..6d4c5a4 --- /dev/null +++ b/fireredasr/models/__init__.py @@ -0,0 +1 @@ +"""Model modules for FireRedASR""" diff --git a/fireredasr/models/module/__init__.py b/fireredasr/models/module/__init__.py new file mode 100644 index 0000000..e830e42 --- /dev/null +++ b/fireredasr/models/module/__init__.py @@ -0,0 +1 @@ +"""Model submodules for FireRedASR""" diff --git a/fireredasr/speech2text.py b/fireredasr/speech2text.py index 21f645b..69d6a49 100755 --- a/fireredasr/speech2text.py +++ b/fireredasr/speech2text.py @@ -99,7 +99,12 @@ def get_wav_info(args): return wavs -if __name__ == "__main__": +def cli(): + """Console script entry point.""" args = parser.parse_args() print(args) main(args) + + +if __name__ == "__main__": + cli() diff --git a/fireredasr/tokenizer/__init__.py b/fireredasr/tokenizer/__init__.py new file mode 100644 index 0000000..3080da1 --- /dev/null +++ b/fireredasr/tokenizer/__init__.py @@ -0,0 +1 @@ +"""Tokenizer modules for FireRedASR""" diff --git a/fireredasr/utils/__init__.py b/fireredasr/utils/__init__.py new file mode 100644 index 0000000..257b059 --- /dev/null +++ b/fireredasr/utils/__init__.py @@ -0,0 +1 @@ +"""Utility modules for FireRedASR""" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d535799 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "fireredasr" +version = "0.1.0" +description = "Open-Source Industrial-Grade Automatic Speech Recognition Models" +readme = "README.md" +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +authors = [ + {name = "FireRedTeam"} +] +keywords = ["asr", "speech recognition", "automatic speech recognition", "mandarin", "chinese"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "cn2an>=0.5.23", + "kaldiio>=2.18.0", + "kaldi_native_fbank>=1.15", + "numpy>=1.26.1", + "peft>=0.13.2", + "sentencepiece", + "torch>=2.0.0", + "transformers>=4.46.3", +] + +[project.urls] +Homepage = "https://github.com/FireRedTeam/FireRedASR" +Documentation = "https://github.com/FireRedTeam/FireRedASR" +Repository = "https://github.com/FireRedTeam/FireRedASR" +Paper = "https://arxiv.org/pdf/2501.14350" + +[project.scripts] +speech2text = "fireredasr.speech2text:cli" + +[tool.setuptools] +packages = ["fireredasr", "fireredasr.data", "fireredasr.models", "fireredasr.models.module", "fireredasr.tokenizer", "fireredasr.utils"] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..18a9625 --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +from setuptools import setup, find_packages + +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")] + +setup( + name="fireredasr", + version="0.1.0", + author="FireRedTeam", + author_email="", + description="Open-Source Industrial-Grade Automatic Speech Recognition Models", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/FireRedTeam/FireRedASR", + packages=find_packages(), + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + ], + python_requires=">=3.8", + install_requires=requirements, + entry_points={ + "console_scripts": [ + "speech2text=fireredasr.speech2text:cli", + ], + }, + include_package_data=True, +)