@@ -275,6 +275,56 @@ void check_quantize_args(
275275 " actual quant_max: " ,
276276 quant_max);
277277}
278+
279+ //
280+ // Reference Implementation
281+ //
282+
283+ /*
284+ * Reference implementation of quantize_per_tensor
285+ */
286+ at::Tensor quantize_per_tensor_reference_impl (
287+ const at::Tensor& input,
288+ double scale,
289+ int64_t zero_point,
290+ int64_t quant_min,
291+ int64_t quant_max,
292+ at::ScalarType dtype) {
293+ // Create output tensor with the target dtype
294+ at::Tensor out = at::empty_like (input, dtype);
295+
296+ // Quantize the input tensor
297+ float inv_scale = 1.0 / scale;
298+
299+ // Iterate through the tensor and quantize each element
300+ at::Tensor float_input = input.to (at::kFloat );
301+ at::Tensor float_values = float_input.flatten ();
302+
303+ auto out_flat = out.flatten ();
304+
305+ for (int i = 0 ; i < float_values.numel (); i++) {
306+ float value = float_values[i].item <float >();
307+ int64_t qvalue = zero_point + std::nearbyint (inv_scale * value);
308+
309+ qvalue = std::max<int64_t >(qvalue, quant_min);
310+ qvalue = std::min<int64_t >(qvalue, quant_max);
311+
312+ if (dtype == at::kByte ) {
313+ out_flat[i] = static_cast <uint8_t >(qvalue);
314+ } else if (dtype == at::kChar ) {
315+ out_flat[i] = static_cast <int8_t >(qvalue);
316+ } else if (dtype == at::kShort ) {
317+ out_flat[i] = static_cast <int16_t >(qvalue);
318+ } else if (dtype == at::kInt ) {
319+ out_flat[i] = static_cast <int32_t >(qvalue);
320+ } else if (dtype == at::kLong ) {
321+ out_flat[i] = static_cast <int64_t >(qvalue);
322+ }
323+ }
324+
325+ return out.reshape (input.sizes ());
326+ }
327+
278328/*
279329 * Reference implementation of quantize_per_token
280330 */
@@ -337,6 +387,17 @@ at::Tensor quantize_per_token_reference_impl(
337387 return out;
338388}
339389
390+ // Forward declaration of implementation functions
391+ void test_vulkan_quantize_per_tensor_impl (
392+ const std::vector<int >& input_sizes,
393+ float scale,
394+ int zero_point,
395+ int64_t quant_min,
396+ int64_t quant_max,
397+ at::ScalarType dtype,
398+ const vkcompute::utils::StorageType in_storage,
399+ const vkcompute::utils::StorageType out_storage);
400+
340401void test_vulkan_quantize_per_token_impl (
341402 const std::vector<int >& input_sizes,
342403 const std::vector<float >& scales,
@@ -347,6 +408,37 @@ void test_vulkan_quantize_per_token_impl(
347408 const vkcompute::utils::StorageType in_storage,
348409 const vkcompute::utils::StorageType out_storage);
349410
411+ // Wrapper function to test both buffer and texture storage types
412+ void test_vulkan_quantize_per_tensor (
413+ const std::vector<int >& input_sizes,
414+ float scale,
415+ int zero_point,
416+ int64_t quant_min,
417+ int64_t quant_max,
418+ at::ScalarType dtype) {
419+ // Test with buffer storage
420+ test_vulkan_quantize_per_tensor_impl (
421+ input_sizes,
422+ scale,
423+ zero_point,
424+ quant_min,
425+ quant_max,
426+ dtype,
427+ vkcompute::utils::kBuffer ,
428+ vkcompute::utils::kBuffer );
429+
430+ // Test with texture storage
431+ test_vulkan_quantize_per_tensor_impl (
432+ input_sizes,
433+ scale,
434+ zero_point,
435+ quant_min,
436+ quant_max,
437+ dtype,
438+ vkcompute::utils::kTexture3D ,
439+ vkcompute::utils::kTexture3D );
440+ }
441+
350442// Wrapper function to test both buffer and texture storage types
351443void test_vulkan_quantize_per_token (
352444 const std::vector<int >& input_sizes,
@@ -378,6 +470,166 @@ void test_vulkan_quantize_per_token(
378470 vkcompute::utils::kTexture3D );
379471}
380472
473+ void test_reference_quantize_per_tensor (
474+ const std::vector<int >& input_sizes,
475+ float scale,
476+ int zero_point,
477+ int64_t quant_min,
478+ int64_t quant_max,
479+ at::ScalarType dtype) {
480+ check_quantize_args (quant_min, quant_max, dtype);
481+ std::vector<int64_t > input_sizes_int64 (
482+ input_sizes.begin (), input_sizes.end ());
483+ at::Tensor input =
484+ at::zeros (input_sizes_int64, at::device (at::kCPU ).dtype (at::kFloat ));
485+
486+ // Fill with a simple pattern: values from 0 to 1 in steps
487+ float step = 1 .0f / (input.numel () - 1 );
488+ auto flat_input = input.flatten ();
489+ for (int i = 0 ; i < flat_input.numel (); i++) {
490+ flat_input[i] = i * step;
491+ }
492+
493+ // Reshape back to original dimensions
494+ input = flat_input.reshape (input_sizes_int64);
495+
496+ // Get reference output
497+ at::Tensor reference_out = quantize_per_tensor_reference_impl (
498+ input, scale, zero_point, quant_min, quant_max, dtype);
499+
500+ // Get implementation output
501+ at::Tensor impl_out = torch::executor::native::quantize_per_tensor_aten (
502+ input, scale, zero_point, quant_min, quant_max, dtype);
503+
504+ // Convert to int for consistent display regardless of underlying type
505+ at::Tensor reference_int = reference_out.to (at::kInt );
506+ at::Tensor impl_int = impl_out.to (at::kInt );
507+
508+ const bool output_correct = at::equal (reference_int, impl_int);
509+ if (!output_correct) {
510+ at::Tensor diffs = at::abs (reference_int - impl_int);
511+
512+ std::cout << " \n "
513+ << " Failed with parameters: " << std::endl;
514+ std::cout << " scale: " << scale << std::endl;
515+ std::cout << " zero_point: " << zero_point << std::endl;
516+ std::cout << " quant_min: " << quant_min << std::endl;
517+ std::cout << " quant_max: " << quant_max << std::endl;
518+
519+ std::cout << " input:" << std::endl;
520+ std::cout << input << std::endl;
521+ std::cout << " reference:" << std::endl;
522+ std::cout << reference_int << std::endl;
523+ std::cout << " my_reference:" << std::endl;
524+ std::cout << impl_int << std::endl;
525+ }
526+
527+ ASSERT_TRUE (output_correct);
528+ }
529+
530+ void test_vulkan_quantize_per_tensor_impl (
531+ const std::vector<int >& input_sizes,
532+ float scale,
533+ int zero_point,
534+ int64_t quant_min,
535+ int64_t quant_max,
536+ at::ScalarType dtype,
537+ const vkcompute::utils::StorageType in_storage =
538+ vkcompute::utils::kTexture3D ,
539+ const vkcompute::utils::StorageType out_storage =
540+ vkcompute::utils::kTexture3D ) {
541+ check_quantize_args (quant_min, quant_max, dtype);
542+ std::vector<int64_t > input_sizes_int64 (
543+ input_sizes.begin (), input_sizes.end ());
544+ at::Tensor input =
545+ at::rand (input_sizes_int64, at::device (at::kCPU ).dtype (at::kFloat ));
546+
547+ // Get reference output
548+ at::Tensor reference_out = torch::executor::native::quantize_per_tensor_aten (
549+ input, scale, zero_point, quant_min, quant_max, dtype);
550+
551+ // Build Vulkan quantize_per_tensor graph
552+ using namespace vkcompute ;
553+
554+ GraphConfig config;
555+ config.set_storage_type_override (in_storage);
556+ ComputeGraph graph (config);
557+
558+ IOValueRef r_input = graph.add_input_tensor (
559+ input.sizes ().vec (), from_at_scalartype (input.scalar_type ()), in_storage);
560+
561+ const ValueRef r_scale = graph.add_scalar <double >(scale);
562+ const ValueRef r_zero_point = graph.add_scalar <int64_t >(zero_point);
563+ const ValueRef r_quant_min = graph.add_scalar <int64_t >(quant_min);
564+ const ValueRef r_quant_max = graph.add_scalar <int64_t >(quant_max);
565+
566+ const ValueRef r_out = graph.add_tensor (
567+ input.sizes ().vec (), from_at_scalartype (dtype), out_storage);
568+
569+ VK_GET_OP_FN (" quantize_per_tensor.default" )
570+ (graph,
571+ {
572+ r_input.value ,
573+ r_scale,
574+ r_zero_point,
575+ r_quant_min,
576+ r_quant_max,
577+ r_out,
578+ });
579+
580+ ValueRef staging_out = graph.set_output_tensor (r_out);
581+
582+ graph.prepare ();
583+ graph.encode_prepack ();
584+ graph.prepack ();
585+ graph.encode_execute ();
586+
587+ // Run Vulkan quantize_per_tensor
588+ graph.copy_into_staging (
589+ r_input.staging , input.const_data_ptr (), input.numel ());
590+
591+ graph.execute ();
592+
593+ at::Tensor vk_out = at::empty_like (reference_out).contiguous ();
594+ graph.copy_from_staging (
595+ staging_out, vk_out.mutable_data_ptr (), vk_out.numel ());
596+
597+ // Compare outputs
598+ // For quantized types, we need to compare the actual integer values
599+ at::Tensor reference_int = reference_out.to (at::kInt );
600+ at::Tensor vk_int = vk_out.to (at::kInt );
601+
602+ const bool output_correct = at::equal (reference_int, vk_int);
603+ if (!output_correct) {
604+ at::Tensor diffs = at::abs (reference_int - vk_int);
605+
606+ std::cout << " \n "
607+ << " Failed with parameters: " << std::endl;
608+ std::cout << " scale: " << scale << std::endl;
609+ std::cout << " zero_point: " << zero_point << std::endl;
610+ std::cout << " quant_min: " << quant_min << std::endl;
611+ std::cout << " quant_max: " << quant_max << std::endl;
612+
613+ std::cout << " input:" << std::endl;
614+ std::cout << input << std::endl;
615+ std::cout << " reference:" << std::endl;
616+ std::cout << reference_int << std::endl;
617+ std::cout << " vulkan:" << std::endl;
618+ std::cout << vk_int << std::endl;
619+ }
620+
621+ ASSERT_TRUE (output_correct);
622+ }
623+
624+ TEST (VulkanQuantizePerTensorTest, test_reference_quantize_per_tensor_int8) {
625+ test_reference_quantize_per_tensor (
626+ {2 , 3 , 4 }, // input sizes
627+ 0.1 , // scale
628+ 0 , // zero_point
629+ -128 , // quant_min
630+ 127 , // quant_max
631+ at::kChar );
632+ }
381633void test_reference_quantize_per_token (
382634 const std::vector<int >& input_sizes,
383635 const std::vector<float >& scales,
0 commit comments