Skip to content

Commit 37f73e9

Browse files
committed
remove IPEX for embedding model, update steps for CI
1 parent 96024c4 commit 37f73e9

File tree

5 files changed

+23
-49
lines changed

5 files changed

+23
-49
lines changed

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/inference_custom.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,10 @@ def __init__(self, ipex_op=False, bf16=False, int8_model=False):
6969
print("BF16 enabled")
7070
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"], dtype=torch.bfloat16)
7171
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"], dtype=torch.bfloat16)
72-
#self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], dtype=torch.bfloat16)
7372
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"], dtype=torch.bfloat16)
7473
else:
7574
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"])
7675
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"])
77-
#self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"])
7876
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"])
7977

8078
# Torchscript to resolve performance issues with reorder operations

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/README.md

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -112,20 +112,16 @@ cd ./Training
112112

113113
### Option 1: Run in Jupyter Notebook
114114

115-
1. Install Jupyter Notebook.
116-
```
117-
pip install notebook
118-
```
119-
2. Launch Jupyter Notebook.
115+
1. Launch Jupyter Notebook.
120116
```
121117
jupyter notebook --ip 0.0.0.0 --port 8888 --allow-root
122118
```
123-
3. Follow the instructions to open the URL with the token in your browser.
124-
4. Locate and select the Training Notebook.
119+
2. Follow the instructions to open the URL with the token in your browser.
120+
3. Locate and select the Training Notebook.
125121
```
126122
lang_id_training.ipynb
127123
```
128-
5. Follow the instructions in the Notebook.
124+
4. Follow the instructions in the Notebook.
129125

130126

131127
### Option 2: Run in a Console
@@ -217,10 +213,10 @@ After training, the output should be inside the `results/epaca/1987` folder. By
217213
cp label_encoder.txt ../.
218214
```
219215

220-
4. Change to the latest `CKPT` folder, and copy the classifier.ckpt and embedding_model.ckpt files into the `/Inference/lang_id_commonvoice_model/` folder which is two directories up.
216+
4. Change to the latest `CKPT` folder, and copy the classifier.ckpt and embedding_model.ckpt files into the `/Inference/lang_id_commonvoice_model/` folder which is two directories up. By default, the command below will navigate into the single CKPT folder that is present, but you can change it to the specific folder name.
221217
```bash
222218
# Navigate into the CKPT folder
223-
cd CKPT<DATE_OF_RUN>
219+
cd CKPT*
224220

225221
cp classifier.ckpt ../../.
226222
cp embedding_model.ckpt ../../
@@ -253,20 +249,16 @@ To run inference, you must have already run all of the training scripts, generat
253249

254250
### Option 1: Run in Jupyter Notebook
255251

256-
1. If you have not already done so, install Jupyter Notebook.
257-
```
258-
pip install notebook
259-
```
260-
2. Launch Jupyter Notebook.
252+
1. Launch Jupyter Notebook.
261253
```
262254
jupyter notebook --ip 0.0.0.0 --port 8889 --allow-root
263255
```
264-
3. Follow the instructions to open the URL with the token in your browser.
265-
4. Locate and select the inference Notebook.
256+
2. Follow the instructions to open the URL with the token in your browser.
257+
3. Locate and select the inference Notebook.
266258
```
267259
lang_id_inference.ipynb
268260
```
269-
5. Follow the instructions in the Notebook.
261+
4. Follow the instructions in the Notebook.
270262

271263
### Option 2: Run in a Console
272264

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Training/lang_id_training.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@
177177
"\n",
178178
"# 4)\n",
179179
"# Navigate into the CKPT folder\n",
180-
"!cd CKPT<DATE_OF_RUN> #@TODO: set this to your CKPT folder\n",
180+
"!cd CKPT* # Set this to your CKPT folder. By default it will navigate into the one that is present.\n",
181181
"!cp classifier.ckpt ../../.\n",
182182
"!cp embedding_model.ckpt ../../\n",
183183
"!cd ../.."

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/initialize.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@ cd ..
1111
export PYTHONPATH=$PYTHONPATH:$(pwd)/speechbrain
1212

1313
# Install huggingface datasets and other requirements
14-
conda install -y datasets tqdm librosa
14+
conda install -y datasets tqdm librosa jupyter ipykernel ipywidgets
1515

1616
# Install webdataset
1717
python -m pip install webdataset==0.2.100
1818

1919
# Install libraries for MP3 to WAV conversion
2020
python -m pip install pydub
21+
22+
# Install notebook to run Jupyter notebooks
23+
python -m pip install notebook

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/sample.json

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,17 @@
1212
{
1313
"id": "Language_Identification_E2E",
1414
"env": [
15+
"export COMMON_VOICE_PATH=/data/commonVoice"
1516
],
1617
"steps": [
17-
"export COMMON_VOICE_PATH=/data/commonVoice",
18-
"sudo apt-get update && apt-get install ffmpeg libgl1",
19-
"git clone https://github.com/oneapi-src/oneAPI-samples.git",
20-
"cd oneAPI-samples/AI-and-Analytics/End-to-end-Workloads/LanguageIdentification",
18+
"apt-get update && apt-get install ffmpeg libgl1 -y",
2119
"source initialize.sh",
22-
"cd /Training",
23-
"cp speechbrain/recipes/VoxLingua107/lang_id/create_wds_shards.py create_wds_shards.py",
24-
"cp speechbrain/recipes/VoxLingua107/lang_id/train.py train.py",
25-
"cp speechbrain/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml train_ecapa.yaml",
26-
"patch < create_wds_shards.patch",
27-
"patch < train_ecapa.patch",
28-
"python prepareAllCommonVoice.py -path $COMMON_VOICE_PATH -max_samples 2000 --createCsv --train --dev --test",
29-
"python create_wds_shards.py ${COMMON_VOICE_PATH}/processed_data/train ${COMMON_VOICE_PATH}/processed_data/commonVoice_shards/train",
30-
"python create_wds_shards.py ${COMMON_VOICE_PATH}/processed_data/dev ${COMMON_VOICE_PATH}/processed_data/commonVoice_shards/dev",
31-
"python train.py train_ecapa.yaml --device cpu",
32-
"cp -R results/epaca/1987 ../Inference/lang_id_commonvoice_model",
33-
"cd ../Inference/lang_id_commonvoice_model/save",
34-
"cp label_encoder.txt ../.",
35-
"cd CKPT<DATE_OF_RUN>",
36-
"cp classifier.ckpt ../../.",
37-
"cp embedding_model.ckpt ../../",
38-
"cd ../..",
39-
"cd /Inference",
40-
"python inference_commonVoice.py -p ${COMMON_VOICE_PATH}/processed_data/test",
41-
"python inference_custom.py -p data_custom -d 3 -s 50 --vad",
42-
"python inference_custom.py -p data_custom -d 3 -s 50 --vad --ipex --verbose",
43-
"python quantize_model.py -p ./lang_id_commonvoice_model -datapath $COMMON_VOICE_PATH/processed_data/dev",
44-
"python inference_custom.py -p data_custom -d 3 -s 50 --vad --int8_model --verbose"
20+
"cd ./Dataset",
21+
"python get_dataset.py --output_dir ${COMMON_VOICE_PATH}",
22+
"cd ../Training",
23+
"jupyter nbconvert --execute --to notebook --inplace --debug lang_id_training.ipynb",
24+
"cd ../Inference",
25+
"jupyter nbconvert --execute --to notebook --inplace --debug lang_id_inference.ipynb"
4526
]
4627
}
4728
]

0 commit comments

Comments
 (0)