Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include README.md
include LICENSE
include requirements.txt
recursive-include fireredasr *.py
30 changes: 21 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,35 @@ Download model files from [huggingface](https://huggingface.co/fireredteam) and
If you want to use `FireRedASR-LLM-L`, you also need to download [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct) and place it in the folder `pretrained_models`. Then, go to folder `FireRedASR-LLM-L` and run `$ ln -s ../Qwen2-7B-Instruct`


### Setup
Create a Python environment and install dependencies
### Installation

#### Option 1: Install from GitHub (Recommended)
```bash
$ git clone https://github.com/FireRedTeam/FireRedASR.git
$ conda create --name fireredasr python=3.10
$ pip install -r requirements.txt
$ pip install git+https://github.com/FireRedTeam/FireRedASR.git
```

Set up Linux PATH and PYTHONPATH
#### Option 2: Install from Source
```bash
$ git clone https://github.com/FireRedTeam/FireRedASR.git
$ cd FireRedASR
$ pip install -e .
```

#### Option 3: Manual Setup (Legacy)
```bash
$ git clone https://github.com/FireRedTeam/FireRedASR.git
$ cd FireRedASR
$ conda create --name fireredasr python=3.10
$ conda activate fireredasr
$ pip install -r requirements.txt
$ export PATH=$PWD/fireredasr/:$PWD/fireredasr/utils/:$PATH
$ export PYTHONPATH=$PWD/:$PYTHONPATH
```

Convert audio to 16kHz 16-bit PCM format
```
ffmpeg -i input_audio -ar 16000 -ac 1 -acodec pcm_s16le -f wav output.wav
### Audio Preprocessing
Convert audio to 16kHz 16-bit PCM format:
```bash
$ ffmpeg -i input_audio -ar 16000 -ac 1 -acodec pcm_s16le -f wav output.wav
```

### Quick Start
Expand Down
7 changes: 7 additions & 0 deletions fireredasr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""FireRedASR: Open-Source Industrial-Grade Automatic Speech Recognition Models"""

__version__ = "0.1.0"

from fireredasr.models.fireredasr import FireRedAsr

__all__ = ["FireRedAsr"]
1 change: 1 addition & 0 deletions fireredasr/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Data processing modules for FireRedASR"""
1 change: 1 addition & 0 deletions fireredasr/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Model modules for FireRedASR"""
1 change: 1 addition & 0 deletions fireredasr/models/module/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Model submodules for FireRedASR"""
7 changes: 6 additions & 1 deletion fireredasr/speech2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,12 @@ def get_wav_info(args):
return wavs


if __name__ == "__main__":
def cli():
"""Console script entry point."""
args = parser.parse_args()
print(args)
main(args)


if __name__ == "__main__":
cli()
1 change: 1 addition & 0 deletions fireredasr/tokenizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tokenizer modules for FireRedASR"""
1 change: 1 addition & 0 deletions fireredasr/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Utility modules for FireRedASR"""
49 changes: 49 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[build-system]
requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"

[project]
name = "fireredasr"
version = "0.1.0"
description = "Open-Source Industrial-Grade Automatic Speech Recognition Models"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "Apache-2.0"}
authors = [
{name = "FireRedTeam"}
]
keywords = ["asr", "speech recognition", "automatic speech recognition", "mandarin", "chinese"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dependencies = [
"cn2an>=0.5.23",
"kaldiio>=2.18.0",
"kaldi_native_fbank>=1.15",
"numpy>=1.26.1",
"peft>=0.13.2",
"sentencepiece",
"torch>=2.0.0",
"transformers>=4.46.3",
]

[project.urls]
Homepage = "https://github.com/FireRedTeam/FireRedASR"
Documentation = "https://github.com/FireRedTeam/FireRedASR"
Repository = "https://github.com/FireRedTeam/FireRedASR"
Paper = "https://arxiv.org/pdf/2501.14350"

[project.scripts]
speech2text = "fireredasr.speech2text:cli"

[tool.setuptools]
packages = ["fireredasr", "fireredasr.data", "fireredasr.models", "fireredasr.models.module", "fireredasr.tokenizer", "fireredasr.utils"]
40 changes: 40 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
from setuptools import setup, find_packages

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

with open("requirements.txt", "r", encoding="utf-8") as fh:
requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")]

setup(
name="fireredasr",
version="0.1.0",
author="FireRedTeam",
author_email="",
description="Open-Source Industrial-Grade Automatic Speech Recognition Models",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/FireRedTeam/FireRedASR",
packages=find_packages(),
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
python_requires=">=3.8",
install_requires=requirements,
entry_points={
"console_scripts": [
"speech2text=fireredasr.speech2text:cli",
],
},
include_package_data=True,
)