Skip to content

Commit f58498a

Browse files
committed
updates for running with IPEX, fix sample inputs
1 parent 43ddc23 commit f58498a

File tree

5 files changed

+33
-40
lines changed

5 files changed

+33
-40
lines changed

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/inference_custom.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,41 +61,47 @@ def __init__(self, ipex_op=False, bf16=False, int8_model=False):
6161
self.model_int8 = load(source_model_int8_path, self.language_id)
6262
self.model_int8.eval()
6363
elif ipex_op:
64+
self.language_id.eval()
65+
6466
# Optimize for inference with IPEX
6567
print("Optimizing inference with IPEX")
66-
self.language_id.eval()
67-
sampleInput = (torch.load("./sample_input_features.pt"), torch.load("./sample_input_wav_lens.pt"))
6868
if bf16:
6969
print("BF16 enabled")
7070
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"], dtype=torch.bfloat16)
7171
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"], dtype=torch.bfloat16)
72-
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], dtype=torch.bfloat16)
72+
#self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], dtype=torch.bfloat16)
7373
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"], dtype=torch.bfloat16)
7474
else:
7575
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"])
7676
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"])
77-
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"])
77+
#self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"])
7878
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"])
7979

8080
# Torchscript to resolve performance issues with reorder operations
81+
print("Applying Torchscript")
82+
sampleWavs = torch.load("./sample_wavs.pt")
83+
sampleWavLens = torch.ones(sampleWavs.shape[0])
8184
with torch.no_grad():
82-
I2 = self.language_id.mods["embedding_model"](*sampleInput)
85+
I1 = self.language_id.mods["compute_features"](sampleWavs)
86+
I2 = self.language_id.mods["mean_var_norm"](I1, sampleWavLens)
87+
I3 = self.language_id.mods["embedding_model"](I2, sampleWavLens)
88+
8389
if bf16:
8490
with torch.cpu.amp.autocast():
85-
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
86-
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
87-
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
88-
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
91+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=sampleWavs)
92+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=(I1, sampleWavLens))
93+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=(I2, sampleWavLens))
94+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I3)
8995

9096
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
9197
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
9298
self.language_id.mods["embedding_model"] = torch.jit.freeze(self.language_id.mods["embedding_model"])
9399
self.language_id.mods["classifier"] = torch.jit.freeze( self.language_id.mods["classifier"])
94100
else:
95-
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
96-
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
97-
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
98-
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
101+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=sampleWavs)
102+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=(I1, sampleWavLens))
103+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=(I2, sampleWavLens))
104+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I3)
99105

100106
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
101107
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
@@ -114,11 +120,11 @@ def predict(self, data_path="", ipex_op=False, bf16=False, int8_model=False, ver
114120
with torch.no_grad():
115121
if bf16:
116122
with torch.cpu.amp.autocast():
117-
prediction = self.language_id.classify_batch(signal)
123+
prediction = self.language_id.classify_batch(signal)
118124
else:
119-
prediction = self.language_id.classify_batch(signal)
125+
prediction = self.language_id.classify_batch(signal)
120126
else: # default
121-
prediction = self.language_id.classify_batch(signal)
127+
prediction = self.language_id.classify_batch(signal)
122128

123129
inference_end_time = time()
124130
inference_latency = inference_end_time - inference_start_time

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/interfaces.patch

Lines changed: 0 additions & 11 deletions
This file was deleted.

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/README.md

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Spoken audio comes in different languages and this sample uses a model to identi
1717

1818
| Optimized for | Description
1919
|:--- |:---
20-
| OS | Ubuntu* 18.04 or newer
20+
| OS | Ubuntu* 22.04 or newer
2121
| Hardware | Intel® Xeon® processor family
2222
| Software | Intel® OneAPI AI Analytics Toolkit <br> Hugging Face SpeechBrain
2323

@@ -51,7 +51,7 @@ For this sample, you will need to download the following languages: **Japanese**
5151
2. Enter your email.
5252
3. Check the boxes, and right-click on the download button to copy the link address.
5353
4. Paste this link into a text editor and copy the first part of the URL up to ".tar.gz".
54-
5. Use **GNU wget** on the URL to download the data to `/data/commonVoice`.
54+
5. Use **GNU wget** on the URL to download the data to `/data/commonVoice` or a folder of your choice.
5555

5656
Alternatively, you can use a directory on your local drive due to the large amount of data.
5757

@@ -93,7 +93,7 @@ Then activate your environment:
9393
conda activate <your-env-name>
9494
```
9595

96-
2. Set the environment variable `COMMON_VOICE_PATH`
96+
2. Set the environment variable `COMMON_VOICE_PATH`. This needs to match with where you downloaded your dataset.
9797
```bash
9898
export COMMON_VOICE_PATH=/data/commonVoice
9999
```
@@ -263,10 +263,6 @@ To run inference, you must have already run all of the training scripts, generat
263263
```
264264
cd /Inference
265265
```
266-
2. Patch SpeechBrain's `interfaces.py`. This patch is required for PyTorch* TorchScript to work because the output of the model must contain only tensors.
267-
```
268-
patch ../speechbrain/speechbrain/pretrained/interfaces.py < interfaces.patch
269-
```
270266

271267
### Run in Jupyter Notebook
272268

@@ -328,7 +324,7 @@ Both scripts support input options; however, some options can be use on `inferen
328324
To run inference on custom data, you must specify a folder with **.wav** files and pass the path in as an argument. You can do so by creating a folder named `data_custom` and then copy 1 or 2 **.wav** files from your test dataset into it. **.mp3** files will NOT work.
329325

330326
Run the inference_ script.
331-
```
327+
```bash
332328
python inference_custom.py -p <path_to_folder>
333329
```
334330

@@ -337,13 +333,13 @@ The following examples describe how to use the scripts to produce specific outco
337333
**Default: Random Selections**
338334

339335
1. To randomly select audio clips from audio files for prediction, enter commands similar to the following:
340-
```
336+
```bash
341337
python inference_custom.py -p data_custom -d 3 -s 50
342338
```
343339
This picks 50 3-second samples from each **.wav** file in the `data_custom` folder. The `output_summary.csv` file summarizes the results.
344340

345341
2. To randomly select audio clips from audio files after applying **Voice Activity Detection (VAD)**, use the `--vad` option:
346-
```
342+
```bash
347343
python inference_custom.py -p data_custom -d 3 -s 50 --vad
348344
```
349345
Again, the `output_summary.csv` file summarizes the results.
@@ -353,18 +349,20 @@ The following examples describe how to use the scripts to produce specific outco
353349
**Optimization with Intel® Extension for PyTorch (IPEX)**
354350

355351
1. To optimize user-defined data, enter commands similar to the following:
356-
```
352+
```bash
357353
python inference_custom.py -p data_custom -d 3 -s 50 --vad --ipex --verbose
358354
```
355+
This will apply `ipex.optimize` to the model(s) and TorchScript. You can also add the `--bf16` option along with `--ipex` to run in the BF16 data type, supported on 4th Gen Intel® Xeon® Scalable processors and newer.
356+
359357
>**Note**: The `--verbose` option is required to view the latency measurements.
360358
361359
**Quantization with Intel® Neural Compressor (INC)**
362360

363361
1. To improve inference latency, you can use the Intel® Neural Compressor (INC) to quantize the trained model from FP32 to INT8 by running `quantize_model.py`.
364-
```
362+
```bash
365363
python quantize_model.py -p ./lang_id_commonvoice_model -datapath $COMMON_VOICE_PATH/dev
366364
```
367-
Use the `-datapath` argument to specify a custom evaluation dataset. By default, the datapath is set to the `/data/commonVoice/dev` folder that was generated from the data preprocessing scripts in the `Training` folder.
365+
Use the `-datapath` argument to specify a custom evaluation dataset. By default, the datapath is set to the `$COMMON_VOICE_PATH/dev` folder that was generated from the data preprocessing scripts in the `Training` folder.
368366

369367
After quantization, the model will be stored in `lang_id_commonvoice_model_INT8` and `neural_compressor.utils.pytorch.load` will have to be used to load the quantized model for inference. If `self.language_id` is the original model and `data_path` is the path to the audio file:
370368
```

0 commit comments

Comments
 (0)