1212#include < numeric>
1313#include < stdexcept> // for logic_error
1414#include < vector>
15+ #include " RefImplementations.h"
1516#include " fbgemm/Fbgemm.h"
1617
1718namespace fbgemm {
@@ -138,10 +139,6 @@ int fbgemmConv(
138139
139140 switch (ConvFastPath<SPATIAL_DIM, ACC_T>(conv_p)) {
140141 case optimized_conv_t ::depthwise: {
141- #if defined(__aarch64__)
142- throw std::runtime_error (
143- " fbgemmConv<processOutputType, SPATIAL_DIM, ACC_T>(): No fallback available for aarch64" );
144- #else
145142 // 2D and 3D depthwise fast path
146143 // std::cout << "Depthwise fast path" << std::endl;
147144 if constexpr (SPATIAL_DIM == 3 ) {
@@ -220,7 +217,6 @@ int fbgemmConv(
220217 throw std::runtime_error (msg);
221218 }
222219 break ;
223- #endif // __aarch64__
224220 }
225221 case optimized_conv_t ::groupwise: {
226222 // optimized groupwise convolution
@@ -242,6 +238,8 @@ int fbgemmConv(
242238 break ;
243239 }
244240 case optimized_conv_t ::pointwise: {
241+ #if defined(__x86_64__) || defined(__i386__) || \
242+ (defined (_MSC_VER) && (defined (_M_X64) || defined (_M_IX86)))
245243 std::vector<int32_t > row_offset_buf (
246244 PackAWithRowOffset<uint8_t >::rowOffsetBufferSize (blocking_params));
247245 int image_dim = std::accumulate (
@@ -271,16 +269,42 @@ int fbgemmConv(
271269 thread_id,
272270 num_threads,
273271 blocking_params);
272+ #else
273+ DoNothing<> doNothingObj{};
274+ ReQuantizeOutput<
275+ processOutputType::RELU_FUSED,
276+ processOutputType::QGRANType,
277+ typename processOutputType::BIAS_T>
278+ reqObj (
279+ doNothingObj,
280+ outProcess.getCMultiplier (),
281+ outProcess.getCZeroPoint (),
282+ outProcess.getAZeroPoint (),
283+ outProcess.getBZeroPoint (),
284+ nullptr , /* row offset buffer */
285+ outProcess.getColOffsets (),
286+ outProcess.getBias (),
287+ conv_p.OC ,
288+ conv_p.G ,
289+ outProcess.getActWScale ());
290+
291+ conv_requant_ref (
292+ conv_p,
293+ activations,
294+ packed_weights.getPackedWForPointwise ()->getBuf (),
295+ false ,
296+ out,
297+ outBuffer,
298+ reqObj,
299+ thread_id,
300+ num_threads);
301+ #endif
274302 break ;
275303 }
276304 case optimized_conv_t ::directconv: {
277305 // specialized direct convolution path
278306 // std::cout << "Directconv fast path" << std::endl;
279307 if constexpr (SPATIAL_DIM == 2 ) {
280- #if defined(__aarch64__)
281- throw std::runtime_error (
282- " fbgemmConv<processOutputType, SPATIAL_DIM, ACC_T>(): No fallback available for aarch64" );
283- #else
284308 fbgemmDirectConv<SPATIAL_DIM, processOutputType::QGRANType>(
285309 conv_p,
286310 // Aint8,
@@ -292,7 +316,6 @@ int fbgemmConv(
292316 outProcess.getBias (),
293317 thread_id,
294318 num_threads);
295- #endif
296319 } else {
297320 assert (false && " 1d/3d direct conv not supported" );
298321 }
@@ -302,6 +325,8 @@ int fbgemmConv(
302325 break ;
303326 }
304327 case optimized_conv_t ::im2col: {
328+ #if defined(__x86_64__) || defined(__i386__) || \
329+ (defined (_MSC_VER) && (defined (_M_X64) || defined (_M_IX86)))
305330 // All other convolutions go through im2col-based implementation
306331 // std::cout << "Im2col path" << std::endl;
307332 std::vector<int32_t > row_offset_buf (
@@ -352,6 +377,36 @@ int fbgemmConv(
352377 thread_id,
353378 num_threads,
354379 blocking_params);
380+ #else
381+ DoNothing<> doNothingObj{};
382+ ReQuantizeOutput<
383+ processOutputType::RELU_FUSED,
384+ processOutputType::QGRANType,
385+ typename processOutputType::BIAS_T>
386+ reqObj (
387+ doNothingObj,
388+ outProcess.getCMultiplier (),
389+ outProcess.getCZeroPoint (),
390+ outProcess.getAZeroPoint (),
391+ outProcess.getBZeroPoint (),
392+ nullptr , /* row offset buffer */
393+ outProcess.getColOffsets (),
394+ outProcess.getBias (),
395+ conv_p.OC ,
396+ conv_p.G ,
397+ outProcess.getActWScale ());
398+
399+ conv_requant_ref (
400+ conv_p,
401+ activations,
402+ packed_weights.getPackedWForIm2col ()->getBuf (),
403+ false ,
404+ out,
405+ outBuffer,
406+ reqObj,
407+ thread_id,
408+ num_threads);
409+ #endif
355410 break ;
356411 }
357412 } // switch
0 commit comments