From 75af3b90a16aece84cc34f06286bca3fdb2c5a1f Mon Sep 17 00:00:00 2001
From: litingyu <605979840@qq.com>
Date: Thu, 15 May 2025 17:16:25 +0800
Subject: [PATCH 1/2] readme
---
README.md | 10 +++++-----
README_CN.md | 16 ++++++++--------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index e061691..8fc439e 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-# MindAudio
+# MindSpore AUDIO
[

@@ -20,7 +20,7 @@ English | [中文](README_CN.md)
## Introduction
-MindAudio is a toolbox of audio models and algorithms based on [MindSpore](https://www.mindspore.cn/). It provides a series of API for common audio data processing,data enhancement,feature extraction, so that users can preprocess data conveniently. Also provides examples to show how to build audio deep learning models with mindaudio.
+MindSpore AUDIO is a toolbox of audio models and algorithms based on [MindSpore](https://www.mindspore.cn/). It provides a series of API for common audio data processing,data enhancement,feature extraction, so that users can preprocess data conveniently. Also provides examples to show how to build audio deep learning models with mindaudio.
The following is the corresponding `mindaudio` versions and supported `mindspore` versions.
@@ -46,7 +46,7 @@ The following is the corresponding `mindaudio` versions and supported `mindspore
### Install with PyPI
-The released version of MindAudio can be installed via `PyPI` as follows:
+The released version of MindSpore AUDIO can be installed via `PyPI` as follows:
```shell
pip install mindaudio
@@ -54,7 +54,7 @@ pip install mindaudio
### Install from Source
-The latest version of MindAudio can be installed as follows:
+The latest version of MindSpore AUDIO can be installed as follows:
```shell
git clone https://github.com/mindspore-lab/mindaudio.git
@@ -67,7 +67,7 @@ python setup.py install
###
-MindAudio provides a series of commonly used audio data processing apis, which can be easily invoked for data analysis and feature extraction.
+MindSpore AUDIO provides a series of commonly used audio data processing apis, which can be easily invoked for data analysis and feature extraction.
```python
>>> import mindaudio.data.io as io
diff --git a/README_CN.md b/README_CN.md
index 68dba4e..e4c03ed 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,7 +1,7 @@
-# MindAudio
+# MindSpore AUDIO
[

@@ -18,7 +18,7 @@
## 介绍
-MindAudio 是基于 [MindSpore](https://www.mindspore.cn/) 的音频模型和算法工具箱。它提供了一系列用于常见音频数据处理、数据增强、特征提取的 API,方便用户对数据进行预处理。此外,它还提供了一些示例,展示如何利用 mindaudio 建立音频深度学习模型。
+MindSpore AUDIO 是基于 [MindSpore](https://www.mindspore.cn/) 的音频模型和算法工具箱。它提供了一系列用于常见音频数据处理、数据增强、特征提取的 API,方便用户对数据进行预处理。此外,它还提供了一些示例,展示如何利用 mindaudio 建立音频深度学习模型。
下表显示了相应的 `mindaudio` 版本和支持的 `mindspore` 版本。
@@ -44,14 +44,14 @@ MindAudio 是基于 [MindSpore](https://www.mindspore.cn/) 的音频模型和算
### Pypi安装
-MindAudio的发布版本可以通过`PyPI`安装:
+MindSpore AUDIO的发布版本可以通过`PyPI`安装:
```shell
pip install mindaudio
```
### 源码安装
-最新版本的 MindAudio 可以通过如下方式安装:
+最新版本的 MindSpore AUDIO 可以通过如下方式安装:
```shell
git clone https://github.com/mindspore-lab/mindaudio.git
@@ -64,7 +64,7 @@ python setup.py install
###
-MindAudio 提供了一系列常用的音频数据处理 APIs,可以轻松调用这些 APIs 进行数据分析和特征提取。
+MindSpore AUDIO 提供了一系列常用的音频数据处理 APIs,可以轻松调用这些 APIs 进行数据分析和特征提取。
```python
>>> import mindaudio.data.io as io
@@ -93,16 +93,16 @@ MindAudio 提供了一系列常用的音频数据处理 APIs,可以轻松调
## 贡献方式
-我们感谢开发者用户的所有贡献,一起让 MindAudio 变得更好。
+我们感谢开发者用户的所有贡献,一起让 MindSpore AUDIO 变得更好。
贡献指南请参考[CONTRIBUTING.md](CONTRIBUTING.md) 。
## 许可证
-MindAudio 遵循[Apache License 2.0](LICENSE)开源协议.
+MindSpore AUDIO 遵循[Apache License 2.0](LICENSE)开源协议.
## 引用
-如果你觉得 MindAudio 对你的项目有帮助,请考虑引用:
+如果你觉得 MindSpore AUDIO 对你的项目有帮助,请考虑引用:
```latex
@misc{MindSpore Audio 2022,
From 4dfe7d07d5ccec483cb63b39885b71bf309fbf0a Mon Sep 17 00:00:00 2001
From: litingyu <605979840@qq.com>
Date: Thu, 10 Jul 2025 20:12:40 +0800
Subject: [PATCH 2/2] fix
---
.../ECAPA-TDNN/speaker_verification_cosine.py | 2 ++
.../ECAPA-TDNN/train_speaker_embeddings.py | 2 ++
examples/ECAPA-TDNN/voxceleb_prepare.py | 2 ++
examples/conformer/asr_model.py | 2 ++
examples/conv_tasnet/data.py | 29 ++-----------------
examples/conv_tasnet/eval.py | 2 ++
examples/conv_tasnet/preprocess.py | 2 ++
examples/conv_tasnet/train.py | 2 ++
examples/deepspeech2/eval.py | 2 ++
examples/deepspeech2/train.py | 2 ++
examples/fastspeech2/dataset.py | 2 ++
examples/fastspeech2/generate.py | 2 ++
examples/fastspeech2/ljspeech.py | 2 ++
examples/fastspeech2/preprocess.py | 1 +
examples/fastspeech2/text/__init__.py | 2 +-
examples/fastspeech2/text/cleaners.py | 2 +-
examples/fastspeech2/text/cmudict.py | 2 +-
examples/fastspeech2/text/numbers.py | 2 +-
examples/fastspeech2/text/pinyin.py | 1 +
examples/fastspeech2/text/symbols.py | 1 +
examples/fastspeech2/train.py | 2 ++
examples/tasnet/data.py | 2 ++
examples/tasnet/eval.py | 4 +--
examples/tasnet/preprocess.py | 2 ++
examples/tasnet/train.py | 2 ++
examples/wavegrad/dataset.py | 2 ++
examples/wavegrad/ljspeech.py | 2 ++
examples/wavegrad/preprocess.py | 2 ++
mindaudio/data/aishell.py | 2 ++
mindaudio/data/librispeech.py | 2 ++
mindaudio/data/voxceleb.py | 2 ++
mindaudio/loss/AdditiveAngularMargin.py | 2 ++
mindaudio/loss/ctc_loss.py | 2 ++
mindaudio/loss/label_smoothing_loss.py | 2 ++
mindaudio/loss/separation_loss.py | 2 ++
mindaudio/metric/snr.py | 2 ++
mindaudio/models/conformer.py | 4 ++-
mindaudio/models/conv_tasnet.py | 2 ++
mindaudio/models/decoders/greedydecoder.py | 2 ++
mindaudio/models/deepspeech2.py | 2 ++
mindaudio/models/ecapatdnn.py | 2 ++
.../models/fastspeech2/fastspeech2_v190.py | 2 ++
mindaudio/models/fastspeech2/loss.py | 2 ++
.../models/fastspeech2/variance_adapter.py | 2 ++
mindaudio/models/layers/attention.py | 2 ++
mindaudio/models/layers/cmvn.py | 2 ++
mindaudio/models/layers/convolution.py | 2 ++
mindaudio/models/layers/embedding.py | 2 ++
.../layers/positionwise_feed_forward.py | 2 ++
mindaudio/models/layers/subsampling.py | 2 ++
mindaudio/models/layers/swish.py | 2 ++
mindaudio/models/tasnet.py | 2 ++
mindaudio/models/transformer/constants.py | 2 ++
mindaudio/models/transformer/layers.py | 2 ++
mindaudio/models/transformer/models.py | 2 ++
.../models/transformer/score_function.py | 2 ++
mindaudio/models/transformer/sublayers.py | 2 ++
mindaudio/models/wavegrad/wavegrad_v190.py | 2 ++
58 files changed, 110 insertions(+), 34 deletions(-)
diff --git a/examples/ECAPA-TDNN/speaker_verification_cosine.py b/examples/ECAPA-TDNN/speaker_verification_cosine.py
index 431bf7e..4b72dbe 100644
--- a/examples/ECAPA-TDNN/speaker_verification_cosine.py
+++ b/examples/ECAPA-TDNN/speaker_verification_cosine.py
@@ -1,3 +1,5 @@
+# ECAPA_TDNN in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/speaker_verification_cosine.py
"""
Recipe for training a speaker verification system based on cosine distance.
"""
diff --git a/examples/ECAPA-TDNN/train_speaker_embeddings.py b/examples/ECAPA-TDNN/train_speaker_embeddings.py
index f685a42..50e337f 100644
--- a/examples/ECAPA-TDNN/train_speaker_embeddings.py
+++ b/examples/ECAPA-TDNN/train_speaker_embeddings.py
@@ -1,3 +1,5 @@
+# ECAPA_TDNN in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/train_speaker_embeddings.py
"""
Recipe for training speaker embeddings using the VoxCeleb Dataset.
"""
diff --git a/examples/ECAPA-TDNN/voxceleb_prepare.py b/examples/ECAPA-TDNN/voxceleb_prepare.py
index 7d27571..75a70c8 100644
--- a/examples/ECAPA-TDNN/voxceleb_prepare.py
+++ b/examples/ECAPA-TDNN/voxceleb_prepare.py
@@ -1,3 +1,5 @@
+# ECAPA_TDNN in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/voxceleb_prepare.py
"""
Data preparation, from mindaudio VoxCeleb recipe.
"""
diff --git a/examples/conformer/asr_model.py b/examples/conformer/asr_model.py
index a9d7a2d..f60da18 100644
--- a/examples/conformer/asr_model.py
+++ b/examples/conformer/asr_model.py
@@ -1,3 +1,5 @@
+# Conformer in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/asr_model.py
"""Definition of ASR model."""
import mindspore
diff --git a/examples/conv_tasnet/data.py b/examples/conv_tasnet/data.py
index 3c921a2..f076817 100644
--- a/examples/conv_tasnet/data.py
+++ b/examples/conv_tasnet/data.py
@@ -1,3 +1,5 @@
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/data.py
"""
Logic:
1. AudioDataLoader generate a minibatch from AudioDataset, the size of this
@@ -16,14 +18,11 @@
Each targets's shape is B x C x T
"""
-import argparse
import json
import math
import os
-import mindspore.dataset as ds
import numpy as np
-from mindspore import context
import mindaudio.data.io as io
@@ -176,27 +175,3 @@ def sort_and_pad(self, batch):
sources_pad = sources_pad.transpose((0, 2, 1))
return mixtures_pad, ilens, sources_pad
-
-
-if __name__ == "__main__":
- context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=4)
- args = parser.parse_args()
- print(args)
- tr_dataset = DatasetGenerator(
- args.train_dir,
- args.batch_size,
- sample_rate=args.sample_rate,
- segment=args.segment,
- )
- dataset = ds.GeneratorDataset(
- tr_dataset, ["mixture", "lens", "sources"], shuffle=False
- )
- dataset = dataset.batch(batch_size=5)
- iter_per_epoch = dataset.get_dataset_size()
- print(iter_per_epoch)
- h = 0
- for data in dataset.create_dict_iterator():
- h += 1
- print(data["mixture"])
- print(data["lens"])
- print(data["sources"])
diff --git a/examples/conv_tasnet/eval.py b/examples/conv_tasnet/eval.py
index d21391f..7be72c5 100644
--- a/examples/conv_tasnet/eval.py
+++ b/examples/conv_tasnet/eval.py
@@ -1,3 +1,5 @@
+# Evaluation of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py
import mindspore
import mindspore.dataset as ds
import mindspore.ops as ops
diff --git a/examples/conv_tasnet/preprocess.py b/examples/conv_tasnet/preprocess.py
index b6ee267..2401033 100644
--- a/examples/conv_tasnet/preprocess.py
+++ b/examples/conv_tasnet/preprocess.py
@@ -1,3 +1,5 @@
+# Preprocess of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/preprocess.py
""" Convert the relevant information in the audio wav file to a json file """
import argparse
diff --git a/examples/conv_tasnet/train.py b/examples/conv_tasnet/train.py
index 9c021fc..cb5e67c 100644
--- a/examples/conv_tasnet/train.py
+++ b/examples/conv_tasnet/train.py
@@ -1,3 +1,5 @@
+# Train of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/train.py
import os
import mindspore.dataset as ds
diff --git a/examples/deepspeech2/eval.py b/examples/deepspeech2/eval.py
index 70cc725..f3b7775 100644
--- a/examples/deepspeech2/eval.py
+++ b/examples/deepspeech2/eval.py
@@ -1,3 +1,5 @@
+# Evaluation of deepspeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/blob/master/deepspeech_pytorch/validation.py
"""
Eval DeepSpeech2
"""
diff --git a/examples/deepspeech2/train.py b/examples/deepspeech2/train.py
index 4c01086..923b4cd 100644
--- a/examples/deepspeech2/train.py
+++ b/examples/deepspeech2/train.py
@@ -1,3 +1,5 @@
+# Train of deepspeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/blob/master/deepspeech_pytorch/training.py
"""train_criteo."""
import os
diff --git a/examples/fastspeech2/dataset.py b/examples/fastspeech2/dataset.py
index 35a4e35..5dae003 100755
--- a/examples/fastspeech2/dataset.py
+++ b/examples/fastspeech2/dataset.py
@@ -1,3 +1,5 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/dataset.py
import os
import sys
from multiprocessing import cpu_count
diff --git a/examples/fastspeech2/generate.py b/examples/fastspeech2/generate.py
index 775a2a4..fa31a0b 100644
--- a/examples/fastspeech2/generate.py
+++ b/examples/fastspeech2/generate.py
@@ -1,3 +1,5 @@
+# Synthesize in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/synthesize.py
import argparse
import os
import re
diff --git a/examples/fastspeech2/ljspeech.py b/examples/fastspeech2/ljspeech.py
index 9f73c88..f8d54b6 100755
--- a/examples/fastspeech2/ljspeech.py
+++ b/examples/fastspeech2/ljspeech.py
@@ -1,3 +1,5 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py
import csv
import os
diff --git a/examples/fastspeech2/preprocess.py b/examples/fastspeech2/preprocess.py
index c2a6d93..137a1cb 100755
--- a/examples/fastspeech2/preprocess.py
+++ b/examples/fastspeech2/preprocess.py
@@ -1,5 +1,6 @@
# Given the path to ljspeech/wavs,
# this script converts wav files to .npy features used for training.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/preprocessor.py
import argparse
import os
diff --git a/examples/fastspeech2/text/__init__.py b/examples/fastspeech2/text/__init__.py
index 25b3330..ca49065 100644
--- a/examples/fastspeech2/text/__init__.py
+++ b/examples/fastspeech2/text/__init__.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from from https://github.com/keithito/tacotron
import re
from text import cleaners
diff --git a/examples/fastspeech2/text/cleaners.py b/examples/fastspeech2/text/cleaners.py
index 399b853..515651c 100644
--- a/examples/fastspeech2/text/cleaners.py
+++ b/examples/fastspeech2/text/cleaners.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
"""
Cleaners are transformations that run over the input text at both training and eval time.
diff --git a/examples/fastspeech2/text/cmudict.py b/examples/fastspeech2/text/cmudict.py
index 9858827..c566b99 100644
--- a/examples/fastspeech2/text/cmudict.py
+++ b/examples/fastspeech2/text/cmudict.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
import re
diff --git a/examples/fastspeech2/text/numbers.py b/examples/fastspeech2/text/numbers.py
index d5cf986..ce9641c 100644
--- a/examples/fastspeech2/text/numbers.py
+++ b/examples/fastspeech2/text/numbers.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
import re
diff --git a/examples/fastspeech2/text/pinyin.py b/examples/fastspeech2/text/pinyin.py
index b590081..5fc7dd4 100644
--- a/examples/fastspeech2/text/pinyin.py
+++ b/examples/fastspeech2/text/pinyin.py
@@ -1,3 +1,4 @@
+# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/pinyin.py
initials = [
"b",
"c",
diff --git a/examples/fastspeech2/text/symbols.py b/examples/fastspeech2/text/symbols.py
index 426d2d6..be825d6 100644
--- a/examples/fastspeech2/text/symbols.py
+++ b/examples/fastspeech2/text/symbols.py
@@ -1,3 +1,4 @@
+# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/symbols.py
from text import pinyin
valid_symbols = [
diff --git a/examples/fastspeech2/train.py b/examples/fastspeech2/train.py
index fd7b56b..d91fd72 100755
--- a/examples/fastspeech2/train.py
+++ b/examples/fastspeech2/train.py
@@ -1,3 +1,5 @@
+# Train in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/train.py
import argparse
import ast
import os
diff --git a/examples/tasnet/data.py b/examples/tasnet/data.py
index 87364e8..8037d28 100644
--- a/examples/tasnet/data.py
+++ b/examples/tasnet/data.py
@@ -1,3 +1,5 @@
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/train.py
""" data """
import json
import os
diff --git a/examples/tasnet/eval.py b/examples/tasnet/eval.py
index 67afb63..04923e4 100644
--- a/examples/tasnet/eval.py
+++ b/examples/tasnet/eval.py
@@ -1,3 +1,5 @@
+# Evaluation of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/evaluate.py
import argparse
import json
import os
@@ -7,8 +9,6 @@
import mindspore.ops as ops
from data import DatasetGenerator
from mindspore import (
- Parameter,
- Tensor,
context,
load_checkpoint,
load_param_into_net,
diff --git a/examples/tasnet/preprocess.py b/examples/tasnet/preprocess.py
index ee29712..2dff787 100755
--- a/examples/tasnet/preprocess.py
+++ b/examples/tasnet/preprocess.py
@@ -1,3 +1,5 @@
+# Preprocess of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/preprocess.py
""" Convert the relevant information in the audio wav file to a json file """
import argparse
diff --git a/examples/tasnet/train.py b/examples/tasnet/train.py
index ec6d58d..cee8c6f 100644
--- a/examples/tasnet/train.py
+++ b/examples/tasnet/train.py
@@ -1,3 +1,5 @@
+# Train of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/train.py
""" Train """
import argparse
import json
diff --git a/examples/wavegrad/dataset.py b/examples/wavegrad/dataset.py
index 5caca6c..9f013c3 100644
--- a/examples/wavegrad/dataset.py
+++ b/examples/wavegrad/dataset.py
@@ -1,3 +1,5 @@
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/dataset.py
from multiprocessing import cpu_count
import numpy as np
diff --git a/examples/wavegrad/ljspeech.py b/examples/wavegrad/ljspeech.py
index 9f73c88..f8d54b6 100755
--- a/examples/wavegrad/ljspeech.py
+++ b/examples/wavegrad/ljspeech.py
@@ -1,3 +1,5 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py
import csv
import os
diff --git a/examples/wavegrad/preprocess.py b/examples/wavegrad/preprocess.py
index 17eac81..c294356 100755
--- a/examples/wavegrad/preprocess.py
+++ b/examples/wavegrad/preprocess.py
@@ -1,3 +1,5 @@
+# Preprocess in mindspore.
+# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/preprocess.py
import sys
from multiprocessing import Pool, cpu_count
diff --git a/mindaudio/data/aishell.py b/mindaudio/data/aishell.py
index c21117c..11e7e8a 100644
--- a/mindaudio/data/aishell.py
+++ b/mindaudio/data/aishell.py
@@ -1,3 +1,5 @@
+# AISHELL dataloader in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/aishell_prepare.py
import argparse
import csv
import glob
diff --git a/mindaudio/data/librispeech.py b/mindaudio/data/librispeech.py
index 7f433ef..74c6f01 100644
--- a/mindaudio/data/librispeech.py
+++ b/mindaudio/data/librispeech.py
@@ -1,3 +1,5 @@
+# LibriSpeech dataloader in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/blob/master/data/librispeech.py
import argparse
import json
import os
diff --git a/mindaudio/data/voxceleb.py b/mindaudio/data/voxceleb.py
index cad01a7..280faa7 100644
--- a/mindaudio/data/voxceleb.py
+++ b/mindaudio/data/voxceleb.py
@@ -1,3 +1,5 @@
+# Voxceleb dataloader in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/voxceleb_prepare.py
"""
Data preparation, from mindaudio VoxCeleb recipe.
"""
diff --git a/mindaudio/loss/AdditiveAngularMargin.py b/mindaudio/loss/AdditiveAngularMargin.py
index ea2e405..d7ffb61 100644
--- a/mindaudio/loss/AdditiveAngularMargin.py
+++ b/mindaudio/loss/AdditiveAngularMargin.py
@@ -1,3 +1,5 @@
+# AdditiveAngularMargin in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/losses.py
import math
import mindspore as ms
diff --git a/mindaudio/loss/ctc_loss.py b/mindaudio/loss/ctc_loss.py
index f513951..cb94c9c 100644
--- a/mindaudio/loss/ctc_loss.py
+++ b/mindaudio/loss/ctc_loss.py
@@ -1,3 +1,5 @@
+# CTC in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/ctc.py
"""CTC layer."""
import mindspore
diff --git a/mindaudio/loss/label_smoothing_loss.py b/mindaudio/loss/label_smoothing_loss.py
index 8e1542a..edd17c7 100644
--- a/mindaudio/loss/label_smoothing_loss.py
+++ b/mindaudio/loss/label_smoothing_loss.py
@@ -1,3 +1,5 @@
+# Label_smoothing_loss in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/label_smoothing_loss.py
"""Label smoothing module."""
import mindspore
diff --git a/mindaudio/loss/separation_loss.py b/mindaudio/loss/separation_loss.py
index 34bc0af..fbe7a0d 100644
--- a/mindaudio/loss/separation_loss.py
+++ b/mindaudio/loss/separation_loss.py
@@ -1,3 +1,5 @@
+# Separation_loss in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/pit_criterion.py
""" Loss """
from itertools import permutations
diff --git a/mindaudio/metric/snr.py b/mindaudio/metric/snr.py
index 55a19c8..200a73b 100644
--- a/mindaudio/metric/snr.py
+++ b/mindaudio/metric/snr.py
@@ -1,3 +1,5 @@
+# SNR in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py
import numpy as np
from mir_eval.separation import bss_eval_sources
diff --git a/mindaudio/models/conformer.py b/mindaudio/models/conformer.py
index 5b41801..3714c07 100644
--- a/mindaudio/models/conformer.py
+++ b/mindaudio/models/conformer.py
@@ -1,4 +1,6 @@
-"""Definition of ASR model."""
+# Conformer in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer
+"""Definition of conformer model."""
from typing import Optional, Tuple
diff --git a/mindaudio/models/conv_tasnet.py b/mindaudio/models/conv_tasnet.py
index 22fc4cc..70e4dab 100644
--- a/mindaudio/models/conv_tasnet.py
+++ b/mindaudio/models/conv_tasnet.py
@@ -1,3 +1,5 @@
+# Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/conv_tasnet.py
import argparse
import math
diff --git a/mindaudio/models/decoders/greedydecoder.py b/mindaudio/models/decoders/greedydecoder.py
index a11e478..26c6c21 100644
--- a/mindaudio/models/decoders/greedydecoder.py
+++ b/mindaudio/models/decoders/greedydecoder.py
@@ -1,3 +1,5 @@
+# Greedydecoder of deepspeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/blob/master/src/deepspeech_pytorch/decoder.py
import Levenshtein as Lev
import numpy as np
from six.moves import xrange
diff --git a/mindaudio/models/deepspeech2.py b/mindaudio/models/deepspeech2.py
index 84e377e..4a9bbac 100644
--- a/mindaudio/models/deepspeech2.py
+++ b/mindaudio/models/deepspeech2.py
@@ -1,3 +1,5 @@
+# DeepSpeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/blob/master/src/deepspeech_pytorch/model.py
"""
DeepSpeech2 model
"""
diff --git a/mindaudio/models/ecapatdnn.py b/mindaudio/models/ecapatdnn.py
index c69113d..332e536 100644
--- a/mindaudio/models/ecapatdnn.py
+++ b/mindaudio/models/ecapatdnn.py
@@ -1,3 +1,5 @@
+# ECAPA_TDNN in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/lobes/models/ECAPA_TDNN.py
import mindspore as ms
import mindspore.nn as nn
import mindspore.ops as ops
diff --git a/mindaudio/models/fastspeech2/fastspeech2_v190.py b/mindaudio/models/fastspeech2/fastspeech2_v190.py
index 30bcc3b..b792810 100755
--- a/mindaudio/models/fastspeech2/fastspeech2_v190.py
+++ b/mindaudio/models/fastspeech2/fastspeech2_v190.py
@@ -1,3 +1,5 @@
+# FastSpeech2 in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/fastspeech2.py
import mindspore as ms
import mindspore.nn as nn
import numpy as np
diff --git a/mindaudio/models/fastspeech2/loss.py b/mindaudio/models/fastspeech2/loss.py
index 5a11470..fdab5a4 100644
--- a/mindaudio/models/fastspeech2/loss.py
+++ b/mindaudio/models/fastspeech2/loss.py
@@ -1,3 +1,5 @@
+# FastSpeech2Loss in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/loss.py
import mindspore as ms
import mindspore.nn as nn
import mindspore.ops as ops
diff --git a/mindaudio/models/fastspeech2/variance_adapter.py b/mindaudio/models/fastspeech2/variance_adapter.py
index ba8ecc8..881e0c1 100644
--- a/mindaudio/models/fastspeech2/variance_adapter.py
+++ b/mindaudio/models/fastspeech2/variance_adapter.py
@@ -1,3 +1,5 @@
+# VarianceAdaptor with mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/modules.py
import mindspore as ms
import mindspore.nn as nn
import numpy as np
diff --git a/mindaudio/models/layers/attention.py b/mindaudio/models/layers/attention.py
index c8d20e2..34c4540 100644
--- a/mindaudio/models/layers/attention.py
+++ b/mindaudio/models/layers/attention.py
@@ -1,3 +1,5 @@
+# Attention in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/attention.py
"""Multi-Head Attention layer definition."""
import math
diff --git a/mindaudio/models/layers/cmvn.py b/mindaudio/models/layers/cmvn.py
index 9f12c55..4977cbb 100644
--- a/mindaudio/models/layers/cmvn.py
+++ b/mindaudio/models/layers/cmvn.py
@@ -1,3 +1,5 @@
+# CMVN in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/cmvn.py
"""cepstral mean and variance normalization definition."""
import mindspore
diff --git a/mindaudio/models/layers/convolution.py b/mindaudio/models/layers/convolution.py
index ddec673..ff08d8b 100644
--- a/mindaudio/models/layers/convolution.py
+++ b/mindaudio/models/layers/convolution.py
@@ -1,3 +1,5 @@
+# Convolution in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/convolution.py
"""ConvolutionModule definition."""
from typing import Tuple
diff --git a/mindaudio/models/layers/embedding.py b/mindaudio/models/layers/embedding.py
index eba552f..84717ab 100644
--- a/mindaudio/models/layers/embedding.py
+++ b/mindaudio/models/layers/embedding.py
@@ -1,3 +1,5 @@
+# Embedding in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/embedding.py
"""Positonal Encoding Module."""
import math
diff --git a/mindaudio/models/layers/positionwise_feed_forward.py b/mindaudio/models/layers/positionwise_feed_forward.py
index 4eb664b..cc40fce 100644
--- a/mindaudio/models/layers/positionwise_feed_forward.py
+++ b/mindaudio/models/layers/positionwise_feed_forward.py
@@ -1,3 +1,5 @@
+# Positionwise feed forward in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/positionwise_feed_forward.py
"""Positionwise feed forward layer definition."""
import mindspore
import mindspore.common.dtype as mstype
diff --git a/mindaudio/models/layers/subsampling.py b/mindaudio/models/layers/subsampling.py
index 63b041f..31abebd 100644
--- a/mindaudio/models/layers/subsampling.py
+++ b/mindaudio/models/layers/subsampling.py
@@ -1,3 +1,5 @@
+# Subsampling in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/subsampling.py
"""Subsampling layer definition."""
import mindspore
diff --git a/mindaudio/models/layers/swish.py b/mindaudio/models/layers/swish.py
index 4501c08..717166f 100644
--- a/mindaudio/models/layers/swish.py
+++ b/mindaudio/models/layers/swish.py
@@ -1,3 +1,5 @@
+# Swish in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/swish.py
"""Swish() activation function for Conformer."""
import mindspore
diff --git a/mindaudio/models/tasnet.py b/mindaudio/models/tasnet.py
index 168c29d..4e68f01 100644
--- a/mindaudio/models/tasnet.py
+++ b/mindaudio/models/tasnet.py
@@ -1,3 +1,5 @@
+# TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/tasnet.py
""" TasNet """
import mindspore
from mindspore import nn, ops
diff --git a/mindaudio/models/transformer/constants.py b/mindaudio/models/transformer/constants.py
index 757d116..275a789 100644
--- a/mindaudio/models/transformer/constants.py
+++ b/mindaudio/models/transformer/constants.py
@@ -1,3 +1,5 @@
+# Constants in mindspore.
+# Copited from https://github.com/ming024/FastSpeech2/blob/master/transformer/Constants.py
"""Constants and tokens."""
PAD = 0
UNK = 1
diff --git a/mindaudio/models/transformer/layers.py b/mindaudio/models/transformer/layers.py
index 9accb3f..f8d40de 100644
--- a/mindaudio/models/transformer/layers.py
+++ b/mindaudio/models/transformer/layers.py
@@ -1,3 +1,5 @@
+# Layers in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Layers.py
from mindspore import nn
from mindaudio.models.transformer.sublayers import (
diff --git a/mindaudio/models/transformer/models.py b/mindaudio/models/transformer/models.py
index d425907..80e78da 100644
--- a/mindaudio/models/transformer/models.py
+++ b/mindaudio/models/transformer/models.py
@@ -1,3 +1,5 @@
+# Models in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Models.py
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore import nn, ops
diff --git a/mindaudio/models/transformer/score_function.py b/mindaudio/models/transformer/score_function.py
index 0d6a37a..e9d433c 100644
--- a/mindaudio/models/transformer/score_function.py
+++ b/mindaudio/models/transformer/score_function.py
@@ -1,3 +1,5 @@
+# ScaledDotProductAttention in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Modules.py
import mindspore.numpy as msnp
from mindspore import nn, ops
diff --git a/mindaudio/models/transformer/sublayers.py b/mindaudio/models/transformer/sublayers.py
index f8b881f..917290b 100644
--- a/mindaudio/models/transformer/sublayers.py
+++ b/mindaudio/models/transformer/sublayers.py
@@ -1,3 +1,5 @@
+# Sublayers in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/SubLayers.py
import numpy as np
from mindspore import dtype as mstype
from mindspore import nn, ops
diff --git a/mindaudio/models/wavegrad/wavegrad_v190.py b/mindaudio/models/wavegrad/wavegrad_v190.py
index 01d19cb..666b071 100644
--- a/mindaudio/models/wavegrad/wavegrad_v190.py
+++ b/mindaudio/models/wavegrad/wavegrad_v190.py
@@ -1,3 +1,5 @@
+# WaveGrad in mindspore.
+# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/model.py
"""
THIS FILE IS FOR MindSpore 1.9
"""