@@ -101,6 +101,33 @@ NDArray_FromGD(zval *a) {
101101 i_shape [1 ] = (int )img_ptr -> sy ;
102102 i_shape [2 ] = (int )img_ptr -> sx ;
103103 rtn = NDArray_Zeros (i_shape , 3 , NDARRAY_TYPE_FLOAT32 , NDARRAY_DEVICE_CPU );
104+ int elsize = NDArray_ELSIZE (rtn );
105+
106+ #ifdef HAVE_AVX2
107+ __m256i red_mask = _mm256_set_epi32 (0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF );
108+ for (int i = 0 ; i < img_ptr -> sy ; i ++ ) {
109+ for (int j = 0 ; j < img_ptr -> sx ; j += 8 ) {
110+ offset_red = (NDArray_STRIDES (rtn )[0 ] / elsize * 0 ) +
111+ ((NDArray_STRIDES (rtn )[1 ] / elsize ) * i ) +
112+ ((NDArray_STRIDES (rtn )[2 ] / elsize ) * j );
113+ offset_green = ((NDArray_STRIDES (rtn )[0 ] / elsize ) * 1 ) +
114+ ((NDArray_STRIDES (rtn )[1 ] / elsize ) * i ) +
115+ ((NDArray_STRIDES (rtn )[2 ] / elsize ) * j );
116+ offset_blue = ((NDArray_STRIDES (rtn )[0 ] / elsize ) * 2 ) +
117+ ((NDArray_STRIDES (rtn )[1 ] / elsize ) * i ) +
118+ ((NDArray_STRIDES (rtn )[2 ] / elsize ) * j );
119+
120+ __m256i color_indices = _mm256_loadu_si256 ((__m256i * )& img_ptr -> tpixels [i ][j ]);
121+ __m256i red_shifted = _mm256_and_si256 (_mm256_srli_epi32 (color_indices , 16 ), red_mask );
122+ __m256i green_shifted = _mm256_and_si256 (_mm256_srli_epi32 (color_indices , 8 ), red_mask );
123+ __m256i blue_shifted = _mm256_and_si256 (color_indices , red_mask );
124+
125+ _mm256_storeu_ps (& NDArray_FDATA (rtn )[offset_red ], _mm256_cvtepi32_ps (red_shifted ));
126+ _mm256_storeu_ps (& NDArray_FDATA (rtn )[offset_green ], _mm256_cvtepi32_ps (green_shifted ));
127+ _mm256_storeu_ps (& NDArray_FDATA (rtn )[offset_blue ], _mm256_cvtepi32_ps (blue_shifted ));
128+ }
129+ }
130+ #else
104131 for (int i = 0 ; i < img_ptr -> sy ; i ++ ) {
105132 for (int j = 0 ; j < img_ptr -> sx ; j ++ ) {
106133 offset_red = (NDArray_STRIDES (rtn )[0 ]/ NDArray_ELSIZE (rtn ) * 0 ) +
@@ -121,6 +148,7 @@ NDArray_FromGD(zval *a) {
121148 NDArray_FDATA (rtn )[offset_green ] = (float )green ;
122149 }
123150 }
151+ #endif
124152 return rtn ;
125153}
126154
@@ -135,6 +163,64 @@ NDArray_ToGD(NDArray *a, NDArray *n_alpha, zval *output) {
135163 int red , green , blue , alpha ;
136164 char * tmp_red , * tmp_blue , * tmp_green ;
137165 gdImagePtr im = gdImageCreateTrueColor_ (NDArray_SHAPE (a )[2 ], NDArray_SHAPE (a )[1 ]);
166+
167+ #ifdef HAVE_AVX2
168+ int elsize = NDArray_ELSIZE (a );
169+ __m256i alpha_mask = _mm256_set_epi32 (0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF );
170+
171+ for (int i = 0 ; i < im -> sy ; i ++ ) {
172+ for (int j = 0 ; j < im -> sx ; j += 8 ) {
173+ offset_alpha = (NDArray_STRIDES (a )[0 ] / elsize * i ) +
174+ ((NDArray_STRIDES (a )[1 ] / elsize ) * j );
175+ offset_red = (NDArray_STRIDES (a )[0 ] / elsize * 0 ) +
176+ ((NDArray_STRIDES (a )[1 ] / elsize ) * i ) +
177+ ((NDArray_STRIDES (a )[2 ] / elsize ) * j );
178+ offset_green = ((NDArray_STRIDES (a )[0 ] / elsize ) * 1 ) +
179+ ((NDArray_STRIDES (a )[1 ] / elsize ) * i ) +
180+ ((NDArray_STRIDES (a )[2 ] / elsize ) * j );
181+ offset_blue = ((NDArray_STRIDES (a )[0 ] / elsize ) * 2 ) +
182+ ((NDArray_STRIDES (a )[1 ] / elsize ) * i ) +
183+ ((NDArray_STRIDES (a )[2 ] / elsize ) * j );
184+
185+ __m256 red_values = _mm256_loadu_ps (& NDArray_FDATA (a )[offset_red ]);
186+ __m256 green_values = _mm256_loadu_ps (& NDArray_FDATA (a )[offset_green ]);
187+ __m256 blue_values = _mm256_loadu_ps (& NDArray_FDATA (a )[offset_blue ]);
188+
189+ if (n_alpha != NULL ) {
190+ __m256i alpha_values = _mm256_cvtps_epi32 (_mm256_loadu_ps (& NDArray_FDATA (n_alpha )[offset_alpha ]));
191+ alpha_values = _mm256_and_si256 (alpha_values , alpha_mask );
192+ } else {
193+ // Handle the case when n_alpha is NULL (no alpha channel)
194+ // Set alpha_values to a default value or do appropriate handling
195+ // For example, you can set alpha_values to all 255 (fully opaque).
196+ __m256i alpha_values = _mm256_set1_epi32 (255 );
197+ }
198+
199+ __m256i red_int = _mm256_cvtps_epi32 (red_values );
200+ __m256i green_int = _mm256_cvtps_epi32 (green_values );
201+ __m256i blue_int = _mm256_cvtps_epi32 (blue_values );
202+
203+ __m256i color_indices ;
204+
205+ if (n_alpha != NULL ) {
206+ __m256i alpha_values = _mm256_cvtps_epi32 (_mm256_loadu_ps (& NDArray_FDATA (n_alpha )[offset_alpha ]));
207+ alpha_values = _mm256_and_si256 (alpha_values , alpha_mask );
208+
209+ color_indices = _mm256_or_si256 (_mm256_or_si256 (_mm256_slli_epi32 (alpha_values , 24 ),
210+ _mm256_slli_epi32 (red_int , 16 )),
211+ _mm256_or_si256 (_mm256_slli_epi32 (green_int , 8 ), blue_int ));
212+ } else {
213+ // Handle the case when n_alpha is NULL
214+ // Set color_indices using only red, green, and blue values
215+ color_indices = _mm256_or_si256 (_mm256_or_si256 (_mm256_slli_epi32 (red_int , 16 ),
216+ _mm256_slli_epi32 (green_int , 8 )),
217+ blue_int );
218+ }
219+
220+ _mm256_storeu_si256 ((__m256i * )& im -> tpixels [i ][j ], color_indices );
221+ }
222+ }
223+ #else
138224 for (int i = 0 ; i < im -> sy ; i ++ ) {
139225 for (int j = 0 ; j < im -> sx ; j ++ ) {
140226 offset_alpha = (NDArray_STRIDES (a )[0 ]/ NDArray_ELSIZE (a ) * i ) +
@@ -160,8 +246,10 @@ NDArray_ToGD(NDArray *a, NDArray *n_alpha, zval *output) {
160246 im -> tpixels [i ][j ] = color_index ;
161247 }
162248 }
249+ #endif
163250 php_gd_assign_libgdimageptr_as_extgdimage (output , im );
164251}
252+
165253#endif
166254
167255void apply_reduce (NDArray * result , NDArray * target , NDArray * (* operation )(NDArray * , NDArray * )) {
0 commit comments