@@ -72,6 +72,64 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
7272 }
7373}
7474
75+ /*
76+ * Equivalent of bc_fast_mul for small numbers to perform computations
77+ * without using array.
78+ */
79+ static inline void bc_fast_square (bc_num n1 , size_t n1len , bc_num * prod )
80+ {
81+ const char * n1end = n1 -> n_value + n1len - 1 ;
82+
83+ BC_VECTOR n1_vector = bc_partial_convert_to_vector (n1end , n1len );
84+ BC_VECTOR prod_vector = n1_vector * n1_vector ;
85+
86+ size_t prodlen = n1len + n1len ;
87+ * prod = bc_new_num_nonzeroed (prodlen , 0 );
88+ char * pptr = (* prod )-> n_value ;
89+ char * pend = pptr + prodlen - 1 ;
90+
91+ while (pend >= pptr ) {
92+ * pend -- = prod_vector % BASE ;
93+ prod_vector /= BASE ;
94+ }
95+ }
96+
97+ /* Common part of functions bc_standard_mul and bc_standard_square
98+ * that takes a vector and converts it to a bc_num */
99+ static inline void bc_mul_finish_from_vector (BC_VECTOR * prod_vector , size_t prod_arr_size , size_t prodlen , bc_num * prod ) {
100+ /*
101+ * Move a value exceeding 4/8 digits by carrying to the next digit.
102+ * However, the last digit does nothing.
103+ */
104+ bc_mul_carry_calc (prod_vector , prod_arr_size );
105+
106+ /* Convert to bc_num */
107+ * prod = bc_new_num_nonzeroed (prodlen , 0 );
108+ char * pptr = (* prod )-> n_value ;
109+ char * pend = pptr + prodlen - 1 ;
110+ size_t i = 0 ;
111+ while (i < prod_arr_size - 1 ) {
112+ #if BC_VECTOR_SIZE == 4
113+ bc_write_bcd_representation (prod_vector [i ], pend - 3 );
114+ pend -= 4 ;
115+ #else
116+ bc_write_bcd_representation (prod_vector [i ] / 10000 , pend - 7 );
117+ bc_write_bcd_representation (prod_vector [i ] % 10000 , pend - 3 );
118+ pend -= 8 ;
119+ #endif
120+ i ++ ;
121+ }
122+
123+ /*
124+ * The last digit may carry over.
125+ * Also need to fill it to the end with zeros, so loop until the end of the string.
126+ */
127+ while (pend >= pptr ) {
128+ * pend -- = prod_vector [i ] % BASE ;
129+ prod_vector [i ] /= BASE ;
130+ }
131+ }
132+
75133/*
76134 * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
77135 * The array is generated starting with the smaller digits.
@@ -128,42 +186,57 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
128186 }
129187 }
130188
131- /*
132- * Move a value exceeding 4/8 digits by carrying to the next digit.
133- * However, the last digit does nothing.
134- */
135- bc_mul_carry_calc (prod_vector , prod_arr_size );
189+ bc_mul_finish_from_vector (prod_vector , prod_arr_size , prodlen , prod );
136190
137- /* Convert to bc_num */
138- * prod = bc_new_num_nonzeroed (prodlen , 0 );
139- char * pptr = (* prod )-> n_value ;
140- char * pend = pptr + prodlen - 1 ;
141- i = 0 ;
142- while (i < prod_arr_size - 1 ) {
143- #if BC_VECTOR_SIZE == 4
144- bc_write_bcd_representation (prod_vector [i ], pend - 3 );
145- pend -= 4 ;
146- #else
147- bc_write_bcd_representation (prod_vector [i ] / 10000 , pend - 7 );
148- bc_write_bcd_representation (prod_vector [i ] % 10000 , pend - 3 );
149- pend -= 8 ;
150- #endif
151- i ++ ;
191+ efree (buf );
192+ }
193+
194+ /** This is bc_standard_mul implementation for square */
195+ static void bc_standard_square (bc_num n1 , size_t n1len , bc_num * prod )
196+ {
197+ size_t i ;
198+ const char * n1end = n1 -> n_value + n1len - 1 ;
199+ size_t prodlen = n1len + n1len ;
200+
201+ size_t n1_arr_size = (n1len + BC_VECTOR_SIZE - 1 ) / BC_VECTOR_SIZE ;
202+ size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1 ) / BC_VECTOR_SIZE ;
203+
204+ BC_VECTOR * buf = safe_emalloc (n1_arr_size + n1_arr_size + prod_arr_size , sizeof (BC_VECTOR ), 0 );
205+
206+ BC_VECTOR * n1_vector = buf ;
207+ BC_VECTOR * prod_vector = n1_vector + n1_arr_size + n1_arr_size ;
208+
209+ for (i = 0 ; i < prod_arr_size ; i ++ ) {
210+ prod_vector [i ] = 0 ;
152211 }
153212
154- /*
155- * The last digit may carry over.
156- * Also need to fill it to the end with zeros, so loop until the end of the string.
157- */
158- while (pend >= pptr ) {
159- * pend -- = prod_vector [i ] % BASE ;
160- prod_vector [i ] /= BASE ;
213+ /* Convert to BC_VECTOR[] */
214+ bc_convert_to_vector (n1_vector , n1end , n1len );
215+
216+ /* Multiplication and addition */
217+ size_t count = 0 ;
218+ for (i = 0 ; i < n1_arr_size ; i ++ ) {
219+ /*
220+ * This calculation adds the result multiple times to the array entries.
221+ * When multiplying large numbers of digits, there is a possibility of
222+ * overflow, so digit adjustment is performed beforehand.
223+ */
224+ if (UNEXPECTED (count >= BC_VECTOR_NO_OVERFLOW_ADD_COUNT )) {
225+ bc_mul_carry_calc (prod_vector , prod_arr_size );
226+ count = 0 ;
227+ }
228+ count ++ ;
229+ for (size_t j = 0 ; j < n1_arr_size ; j ++ ) {
230+ prod_vector [i + j ] += n1_vector [i ] * n1_vector [j ];
231+ }
161232 }
162233
234+ bc_mul_finish_from_vector (prod_vector , prod_arr_size , prodlen , prod );
235+
163236 efree (buf );
164237}
165238
166- /* The multiply routine. N2 times N1 is put int PROD with the scale of
239+ /* The multiply routine. N2 times N1 is put int PROD with the scale of
167240 the result being MIN(N2 scale+N1 scale, MAX (SCALE, N2 scale, N1 scale)).
168241 */
169242
@@ -194,3 +267,25 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
194267 }
195268 return prod ;
196269}
270+
271+ bc_num bc_square (bc_num n1 , size_t scale )
272+ {
273+ bc_num prod ;
274+
275+ size_t len1 = n1 -> n_len + n1 -> n_scale ;
276+ size_t full_scale = n1 -> n_scale + n1 -> n_scale ;
277+ size_t prod_scale = MIN (full_scale , MAX (scale , n1 -> n_scale ));
278+
279+ if (len1 <= BC_VECTOR_SIZE ) {
280+ bc_fast_square (n1 , len1 , & prod );
281+ } else {
282+ bc_standard_square (n1 , len1 , & prod );
283+ }
284+
285+ prod -> n_sign = PLUS ;
286+ prod -> n_len -= full_scale ;
287+ prod -> n_scale = prod_scale ;
288+ _bc_rm_leading_zeros (prod );
289+
290+ return prod ;
291+ }
0 commit comments