@@ -1920,7 +1920,7 @@ inline void gemm_sdot_int8_kernel(const int8_t* a_ptr,
1920
1920
" vmax.f32 q5, q5, q0 \n " /* relu*/ \
1921
1921
" vmax.f32 q6, q6, q0 \n " /* relu*/ \
1922
1922
" vmax.f32 q7, q7, q0 \n " /* relu*/ \
1923
- " vld1.32 {d2-d3}, [%[alpha]]! \n " \
1923
+ " vld1.32 {d2-d3}, [%[alpha]] \n " \
1924
1924
" vmax.f32 q8, q8, q0 \n " /* relu*/ \
1925
1925
" vmax.f32 q9, q9, q0 \n " /* relu*/ \
1926
1926
" vmax.f32 q10, q10, q0 \n " /* relu*/ \
@@ -1943,46 +1943,46 @@ inline void gemm_sdot_int8_kernel(const int8_t* a_ptr,
1943
1943
" vmin.f32 q15, q15, q1 \n " /* relu6*/ \
1944
1944
" b 12f \n " /* relu6 end */
1945
1945
1946
- #define GEMM_DOT_LEAKY_RELU \
1947
- " 14: \n " \
1948
- " vmov.f32 q0, #0.0 \n " /* for leakyrelu*/ \
1949
- " vld1.32 {d2-d3}, [%[alpha]]! \n " /* leakyrelu alpha */ \
1950
- " vcge.f32 q2, q4, q0 \n " /* vcgeq_f32 */ \
1951
- " vmla .f32 q3, q4, q1 \n " /* vmulq_f32 */ \
1952
- " vbif q4, q3, q2 \n " /* choose*/ \
1953
- " vcge.f32 q2, q5, q0 \n " /* vcgeq_f32 */ \
1954
- " vmla .f32 q3, q5, q1 \n " /* vmulq_f32 */ \
1955
- " vbif q5, q3, q2 \n " /* choose*/ \
1956
- " vcge.f32 q2, q6, q0 \n " /* vcgeq_f32 */ \
1957
- " vmla .f32 q3, q6, q1 \n " /* vmulq_f32 */ \
1958
- " vbif q6, q3, q2 \n " /* choose*/ \
1959
- " vcge.f32 q2, q7, q0 \n " /* vcgeq_f32 */ \
1960
- " vmla .f32 q3, q7, q1 \n " /* vmulq_f32 */ \
1961
- " vbif q7, q3, q2 \n " /* choose*/ \
1962
- " vcge.f32 q2, q8, q0 \n " /* vcgeq_f32 */ \
1963
- " vmla .f32 q3, q8, q1 \n " /* vmulq_f32 */ \
1964
- " vbif q8, q3, q2 \n " /* choose*/ \
1965
- " vcge.f32 q2, q9, q0 \n " /* vcgeq_f32 */ \
1966
- " vmla .f32 q3, q9, q1 \n " /* vmulq_f32 */ \
1967
- " vbif q9, q3, q2 \n " /* choose*/ \
1968
- " vcge.f32 q2, q10, q0 \n " /* vcgeq_f32 */ \
1969
- " vmla .f32 q3, q10, q1 \n " /* vmulq_f32 */ \
1970
- " vbif q10, q3, q2 \n " /* choose*/ \
1971
- " vcge.f32 q2, q11, q0 \n " /* vcgeq_f32 */ \
1972
- " vmla .f32 q3, q11, q1 \n " /* vmulq_f32 */ \
1973
- " vbif q11, q3, q2 \n " /* choose*/ \
1974
- " vcge.f32 q2, q12, q0 \n " /* vcgeq_f32 */ \
1975
- " vmla .f32 q3, q12, q1 \n " /* vmulq_f32 */ \
1976
- " vbif q12, q3, q2 \n " /* choose*/ \
1977
- " vcge.f32 q2, q13, q0 \n " /* vcgeq_f32 */ \
1978
- " vmla .f32 q3, q13, q1 \n " /* vmulq_f32 */ \
1979
- " vbif q13, q3, q2 \n " /* choose*/ \
1980
- " vcge.f32 q2, q14, q0 \n " /* vcgeq_f32 */ \
1981
- " vmla .f32 q3, q14, q1 \n " /* vmulq_f32 */ \
1982
- " vbif q14, q3, q2 \n " /* choose*/ \
1983
- " vcge.f32 q2, q15, q0 \n " /* vcgeq_f32 */ \
1984
- " vmla .f32 q3, q15, q1 \n " /* vmulq_f32 */ \
1985
- " vbif q15, q3, q2 \n " /* choose*/ \
1946
+ #define GEMM_DOT_LEAKY_RELU \
1947
+ " 14: \n " \
1948
+ " vmov.f32 q0, #0.0 \n " /* for leakyrelu*/ \
1949
+ " vld1.32 {d2-d3}, [%[alpha]] \n " /* leakyrelu alpha */ \
1950
+ " vcge.f32 q2, q4, q0 \n " /* vcgeq_f32 */ \
1951
+ " vmul .f32 q3, q4, q1 \n " /* vmulq_f32 */ \
1952
+ " vbif q4, q3, q2 \n " /* choose*/ \
1953
+ " vcge.f32 q2, q5, q0 \n " /* vcgeq_f32 */ \
1954
+ " vmul .f32 q3, q5, q1 \n " /* vmulq_f32 */ \
1955
+ " vbif q5, q3, q2 \n " /* choose*/ \
1956
+ " vcge.f32 q2, q6, q0 \n " /* vcgeq_f32 */ \
1957
+ " vmul .f32 q3, q6, q1 \n " /* vmulq_f32 */ \
1958
+ " vbif q6, q3, q2 \n " /* choose*/ \
1959
+ " vcge.f32 q2, q7, q0 \n " /* vcgeq_f32 */ \
1960
+ " vmul .f32 q3, q7, q1 \n " /* vmulq_f32 */ \
1961
+ " vbif q7, q3, q2 \n " /* choose*/ \
1962
+ " vcge.f32 q2, q8, q0 \n " /* vcgeq_f32 */ \
1963
+ " vmul .f32 q3, q8, q1 \n " /* vmulq_f32 */ \
1964
+ " vbif q8, q3, q2 \n " /* choose*/ \
1965
+ " vcge.f32 q2, q9, q0 \n " /* vcgeq_f32 */ \
1966
+ " vmul .f32 q3, q9, q1 \n " /* vmulq_f32 */ \
1967
+ " vbif q9, q3, q2 \n " /* choose*/ \
1968
+ " vcge.f32 q2, q10, q0 \n " /* vcgeq_f32 */ \
1969
+ " vmul .f32 q3, q10, q1 \n " /* vmulq_f32 */ \
1970
+ " vbif q10, q3, q2 \n " /* choose*/ \
1971
+ " vcge.f32 q2, q11, q0 \n " /* vcgeq_f32 */ \
1972
+ " vmul .f32 q3, q11, q1 \n " /* vmulq_f32 */ \
1973
+ " vbif q11, q3, q2 \n " /* choose*/ \
1974
+ " vcge.f32 q2, q12, q0 \n " /* vcgeq_f32 */ \
1975
+ " vmul .f32 q3, q12, q1 \n " /* vmulq_f32 */ \
1976
+ " vbif q12, q3, q2 \n " /* choose*/ \
1977
+ " vcge.f32 q2, q13, q0 \n " /* vcgeq_f32 */ \
1978
+ " vmul .f32 q3, q13, q1 \n " /* vmulq_f32 */ \
1979
+ " vbif q13, q3, q2 \n " /* choose*/ \
1980
+ " vcge.f32 q2, q14, q0 \n " /* vcgeq_f32 */ \
1981
+ " vmul .f32 q3, q14, q1 \n " /* vmulq_f32 */ \
1982
+ " vbif q14, q3, q2 \n " /* choose*/ \
1983
+ " vcge.f32 q2, q15, q0 \n " /* vcgeq_f32 */ \
1984
+ " vmul .f32 q3, q15, q1 \n " /* vmulq_f32 */ \
1985
+ " vbif q15, q3, q2 \n " /* choose*/ \
1986
1986
" 12: \n "
1987
1987
1988
1988
#define GEMM_DOT_ST_INT8 \
0 commit comments