3333 * for each row along with the quantized weights.
3434 */
3535c10::intrusive_ptr<EmbeddingPackedParamsBase> PackedEmbeddingBagWeight::prepack (
36- at::Tensor qweight) {
36+ const at::Tensor& qweight) {
3737 static constexpr int64_t version = 1 ;
3838 TORCH_CHECK (
3939 qweight.dim () == 2 ,
@@ -67,8 +67,8 @@ c10::intrusive_ptr<EmbeddingPackedParamsBase> PackedEmbeddingBagWeight::prepack(
6767 " Expect embedding_bag weights to be quantized using kPerChannelAffineFloatQParams" );
6868 std::vector<float > weight_bias (embedding_rows);
6969
70- at::Tensor channel_scales = qweight.q_per_channel_scales ();
71- at::Tensor channel_zero_points = qweight.q_per_channel_zero_points ();
70+ const auto & channel_scales = qweight.q_per_channel_scales ();
71+ const auto & channel_zero_points = qweight.q_per_channel_zero_points ();
7272 std::vector<float > weight_scales (
7373 channel_scales.data_ptr <float >(),
7474 channel_scales.data_ptr <float >() + embedding_rows);
@@ -77,6 +77,11 @@ c10::intrusive_ptr<EmbeddingPackedParamsBase> PackedEmbeddingBagWeight::prepack(
7777 channel_zero_points.data_ptr <float >() + embedding_rows);
7878
7979 for (const auto i : c10::irange (embedding_rows)) {
80+ // As of now weight_zero_points and weight_scales are initialized with
81+ // the size of embedding_rows. Hence, this linter is a false positive.
82+ // However, if this assumption changes in the future, we need to
83+ // ensure that the bounds are checked.
84+ // NOLINTNEXTLINE(facebook-hte-LocalUncheckedArrayBounds)
8085 weight_bias[i] = weight_zero_points[i] * weight_scales[i] * -1 ;
8186 }
8287
@@ -237,16 +242,16 @@ Tensor& qembeddingbag_byte_prepack_out(Tensor& output, const Tensor& weight) {
237242
238243 const auto weight_sizes = weight.sizes ();
239244 const auto cols_dim = weight_sizes.size () - 1 ;
240- const int64_t embedding_rows = c10::size_to_dim_ (cols_dim, weight_sizes);
241- const int32_t embedding_cols = weight_sizes[cols_dim];
245+ const int64_t embedding_rows = c10::size_to_dim_ (static_cast < int >( cols_dim) , weight_sizes);
246+ const int32_t embedding_cols = static_cast < int32_t >( weight_sizes[cols_dim]) ;
242247 // Add 8 bytes per column to store FP32 scale and zero_point per row.
243- const int32_t output_columns = embedding_cols + 2 * sizeof (float );
248+ const int32_t output_columns = static_cast < int32_t >( embedding_cols + 2 * sizeof (float ) );
244249 const auto weight_contig =
245250 weight.expect_contiguous (weight.suggest_memory_format ());
246251
247252 // Adjust output dimensions to account for FP32 scale and zero_points.
248253 std::vector<int64_t > output_shape = weight_sizes.vec ();
249- output_shape[ cols_dim] = output_columns;
254+ output_shape. at ( cols_dim) = output_columns;
250255 at::native::resize_ (output, output_shape, std::nullopt );
251256 auto * output_data = output.data_ptr <uint8_t >();
252257
@@ -330,13 +335,13 @@ Tensor qembeddingbag_byte_prepack_meta(const Tensor& weight) {
330335 " 'embedding_bag_byte_prepack' only support float32 or float16." );
331336 const auto weight_sizes = weight.sizes ();
332337 const auto cols_dim = weight_sizes.size () - 1 ;
333- const int32_t embedding_cols = weight_sizes[cols_dim];
338+ const int32_t embedding_cols = static_cast < int32_t >( weight_sizes[cols_dim]) ;
334339 // Add 8 bytes per column to store FP32 scale and zero_point per row.
335- const int32_t output_columns = embedding_cols + 2 * sizeof (float );
340+ const int32_t output_columns = static_cast < int32_t >( embedding_cols + 2 * sizeof (float ) );
336341
337342 // Adjust output dimensions to account for FP32 scale and zero_points.
338343 std::vector<int64_t > output_shape = weight_sizes.vec ();
339- output_shape[ cols_dim] = output_columns;
344+ output_shape. at ( cols_dim) = output_columns;
340345 at::SymDimVector output_shape_vec (output_shape);
341346
342347 return at::empty_symint (
@@ -407,7 +412,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
407412 bit_width,
408413 weight_data + start_idx * embedding_cols,
409414 end_idx - start_idx,
410- embedding_cols,
415+ static_cast < int >( embedding_cols) ,
411416 output_data + start_idx * output_shape[1 ]);
412417 });
413418 } else {
@@ -418,7 +423,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
418423 bit_width,
419424 weight_data + start_idx * embedding_cols,
420425 end_idx - start_idx,
421- embedding_cols,
426+ static_cast < int >( embedding_cols) ,
422427 output_data + start_idx * output_shape[1 ]);
423428 });
424429 }
@@ -475,7 +480,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
475480 std::uint8_t quantized = std::max (
476481 0 ,
477482 std::min<int >(
478- lrintf ((X - Xmin) * inverse_scale), (1 << bit_width) - 1 ));
483+ static_cast < int >( lrintf ((X - Xmin) * inverse_scale) ), (1 << bit_width) - 1 ));
479484 // We pack 2 4-bit values in a byte. Index 0 is packed in the lower
480485 // 4-bits and index 1 is packed in the upper 4-bits.
481486 if (col % NUM_ELEM_PER_BYTE == 0 ) {
@@ -528,8 +533,8 @@ Tensor qembeddingbag_2bit_prepack(
528533
529534class QEmbeddingPackWeights final {
530535 public:
531- static c10::intrusive_ptr<EmbeddingPackedParamsBase> run (at::Tensor weight) {
532- return PackedEmbeddingBagWeight::prepack (std::move ( weight) );
536+ static c10::intrusive_ptr<EmbeddingPackedParamsBase> run (const at::Tensor& weight) {
537+ return PackedEmbeddingBagWeight::prepack (weight);
533538 }
534539};
535540
0 commit comments