diff --git a/intermediate_source/torchrec_intro_tutorial.py b/intermediate_source/torchrec_intro_tutorial.py index 70b2a5b4725..75d2532670e 100644 --- a/intermediate_source/torchrec_intro_tutorial.py +++ b/intermediate_source/torchrec_intro_tutorial.py @@ -919,6 +919,7 @@ def _wait_impl(self) -> torch.Tensor: # the trained model in a Python environment is incredibly inefficient. # There are two key differences between inference and training # environments: +# # * **Quantization**: Inference models are typically # quantized, where model parameters lose precision for lower latency in # predictions and reduced model size. For example FP32 (4 bytes) in