@@ -50,35 +50,35 @@ float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float l
5050 float * W_Lapl = NULL , * Output_prev = NULL ;
5151 sigmaPar2 = sigmaPar * sigmaPar ;
5252 DimTotal = dimX * dimY * dimZ ;
53-
53+
5454 W_Lapl = calloc (DimTotal , sizeof (float ));
55-
55+
5656 if (epsil != 0.0f ) Output_prev = calloc (DimTotal , sizeof (float ));
57-
57+
5858 /* copy into output */
5959 copyIm (Input , Output , (long )(dimX ), (long )(dimY ), (long )(dimZ ));
60-
60+
6161 for (i = 0 ; i < iterationsNumb ; i ++ ) {
62- if ((epsil != 0.0f ) && (i % 5 == 0 )) copyIm (Output , Output_prev , (long )(dimX ), (long )(dimY ), (long )(dimZ ));
63-
64- if (dimZ == 1 ) {
62+ if ((epsil != 0.0f ) && (i % 5 == 0 )) copyIm (Output , Output_prev , (long )(dimX ), (long )(dimY ), (long )(dimZ ));
63+
64+ if (dimZ == 1 ) {
6565 /* running 2D diffusion iterations */
6666 /* Calculating weighted Laplacian */
6767 Weighted_Laplc2D (W_Lapl , Output , sigmaPar2 , dimX , dimY );
6868 /* Perform iteration step */
6969 Diffusion_update_step2D (Output , Input , W_Lapl , lambdaPar , sigmaPar2 , tau , (long )(dimX ), (long )(dimY ));
70- }
71- else {
70+ }
71+ else {
7272 /* running 3D diffusion iterations */
7373 /* Calculating weighted Laplacian */
7474 Weighted_Laplc3D (W_Lapl , Output , sigmaPar2 , dimX , dimY , dimZ );
7575 /* Perform iteration step */
7676 Diffusion_update_step3D (Output , Input , W_Lapl , lambdaPar , sigmaPar2 , tau , (long )(dimX ), (long )(dimY ), (long )(dimZ ));
77- }
78-
79- /* check early stopping criteria */
80- if ((epsil != 0.0f ) && (i % 5 == 0 )) {
81- re = 0.0f ; re1 = 0.0f ;
77+ }
78+
79+ /* check early stopping criteria */
80+ if ((epsil != 0.0f ) && (i % 5 == 0 )) {
81+ re = 0.0f ; re1 = 0.0f ;
8282 for (j = 0 ; j < DimTotal ; j ++ )
8383 {
8484 re += powf (Output [j ] - Output_prev [j ],2 );
@@ -87,10 +87,10 @@ float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float l
8787 re = sqrtf (re )/sqrtf (re1 );
8888 if (re < epsil ) count ++ ;
8989 if (count > 3 ) break ;
90- }
91- }
92- free (W_Lapl );
93-
90+ }
91+ }
92+ free (W_Lapl );
93+
9494 if (epsil != 0.0f ) free (Output_prev );
9595 /*adding info into info_vector */
9696 infovector [0 ] = (float )(i ); /*iterations number (if stopped earlier based on tolerance)*/
@@ -104,74 +104,71 @@ float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long di
104104{
105105 long i ,j ,i1 ,i2 ,j1 ,j2 ,index ;
106106 float gradX , gradX_sq , gradY , gradY_sq , gradXX , gradYY , gradXY , xy_2 , denom , V_norm , V_orth , c , c_sq ;
107-
108- #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
107+
108+ #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
109+ for (j = 0 ; j < dimY ; j ++ ) {
110+ /* symmetric boundary conditions */
111+ j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
112+ j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
109113 for (i = 0 ; i < dimX ; i ++ ) {
110- /* symmetric boundary conditions */
111- i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
112- i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
113- for (j = 0 ; j < dimY ; j ++ ) {
114- /* symmetric boundary conditions */
115- j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
116- j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
117-
118- index = j * dimX + i ;
119-
120- gradX = 0.5f * (U0 [j * dimX + i2 ] - U0 [j * dimX + i1 ]);
121- gradX_sq = pow (gradX ,2 );
122-
123- gradY = 0.5f * (U0 [j2 * dimX + i ] - U0 [j1 * dimX + i ]);
124- gradY_sq = pow (gradY ,2 );
125-
126- gradXX = U0 [j * dimX + i2 ] + U0 [j * dimX + i1 ] - 2 * U0 [index ];
127- gradYY = U0 [j2 * dimX + i ] + U0 [j1 * dimX + i ] - 2 * U0 [index ];
128-
129- gradXY = 0.25f * (U0 [j2 * dimX + i2 ] + U0 [j1 * dimX + i1 ] - U0 [j1 * dimX + i2 ] - U0 [j2 * dimX + i1 ]);
130- xy_2 = 2.0f * gradX * gradY * gradXY ;
131-
132- denom = gradX_sq + gradY_sq ;
133-
134- if (denom <= EPS ) {
135- V_norm = (gradXX * gradX_sq + xy_2 + gradYY * gradY_sq )/EPS ;
136- V_orth = (gradXX * gradY_sq - xy_2 + gradYY * gradX_sq )/EPS ;
137- }
138- else {
139- V_norm = (gradXX * gradX_sq + xy_2 + gradYY * gradY_sq )/denom ;
140- V_orth = (gradXX * gradY_sq - xy_2 + gradYY * gradX_sq )/denom ;
141- }
142-
143- c = 1.0f /(1.0f + denom /sigma );
144- c_sq = c * c ;
145-
146- W_Lapl [index ] = c_sq * V_norm + c * V_orth ;
114+ /* symmetric boundary conditions */
115+ i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
116+ i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
117+ index = j * dimX + i ;
118+
119+ gradX = 0.5f * (U0 [j * dimX + i2 ] - U0 [j * dimX + i1 ]);
120+ gradX_sq = pow (gradX ,2 );
121+
122+ gradY = 0.5f * (U0 [j2 * dimX + i ] - U0 [j1 * dimX + i ]);
123+ gradY_sq = pow (gradY ,2 );
124+
125+ gradXX = U0 [j * dimX + i2 ] + U0 [j * dimX + i1 ] - 2 * U0 [index ];
126+ gradYY = U0 [j2 * dimX + i ] + U0 [j1 * dimX + i ] - 2 * U0 [index ];
127+
128+ gradXY = 0.25f * (U0 [j2 * dimX + i2 ] + U0 [j1 * dimX + i1 ] - U0 [j1 * dimX + i2 ] - U0 [j2 * dimX + i1 ]);
129+ xy_2 = 2.0f * gradX * gradY * gradXY ;
130+
131+ denom = gradX_sq + gradY_sq ;
132+
133+ if (denom <= EPS ) {
134+ V_norm = (gradXX * gradX_sq + xy_2 + gradYY * gradY_sq )/EPS ;
135+ V_orth = (gradXX * gradY_sq - xy_2 + gradYY * gradX_sq )/EPS ;
147136 }
148- }
149- return * W_Lapl ;
137+ else {
138+ V_norm = (gradXX * gradX_sq + xy_2 + gradYY * gradY_sq )/denom ;
139+ V_orth = (gradXX * gradY_sq - xy_2 + gradYY * gradX_sq )/denom ;
140+ }
141+
142+ c = 1.0f /(1.0f + denom /sigma );
143+ c_sq = c * c ;
144+
145+ W_Lapl [index ] = c_sq * V_norm + c * V_orth ;
146+ }}
147+ return * W_Lapl ;
150148}
151149
152150float Diffusion_update_step2D (float * Output , float * Input , float * W_Lapl , float lambdaPar , float sigmaPar2 , float tau , long dimX , long dimY )
153151{
154- long i ,j ,i1 ,i2 ,j1 ,j2 ,index ;
152+ long i ,j ,i1 ,i2 ,j1 ,j2 ,index ;
155153 float gradXXc , gradYYc ;
156-
157- #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
154+
155+ #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
156+ for (j = 0 ; j < dimY ; j ++ ) {
157+ /* symmetric boundary conditions */
158+ j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
159+ j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
158160 for (i = 0 ; i < dimX ; i ++ ) {
159- /* symmetric boundary conditions */
160- i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
161- i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
162- for (j = 0 ; j < dimY ; j ++ ) {
163- /* symmetric boundary conditions */
164- j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
165- j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
166- index = j * dimX + i ;
167-
168- gradXXc = W_Lapl [j * dimX + i2 ] + W_Lapl [j * dimX + i1 ] - 2 * W_Lapl [index ];
169- gradYYc = W_Lapl [j2 * dimX + i ] + W_Lapl [j1 * dimX + i ] - 2 * W_Lapl [index ];
170-
171- Output [index ] += tau * (- lambdaPar * (gradXXc + gradYYc ) - (Output [index ] - Input [index ]));
172- }
173- }
174- return * Output ;
161+ /* symmetric boundary conditions */
162+ i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
163+ i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
164+ index = j * dimX + i ;
165+
166+ gradXXc = W_Lapl [j * dimX + i2 ] + W_Lapl [j * dimX + i1 ] - 2 * W_Lapl [index ];
167+ gradYYc = W_Lapl [j2 * dimX + i ] + W_Lapl [j1 * dimX + i ] - 2 * W_Lapl [index ];
168+
169+ Output [index ] += tau * (- lambdaPar * (gradXXc + gradYYc ) - (Output [index ] - Input [index ]));
170+ }}
171+ return * Output ;
175172}
176173/********************************************************************/
177174/***************************3D Functions*****************************/
@@ -180,95 +177,89 @@ float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long di
180177{
181178 long i ,j ,k ,i1 ,i2 ,j1 ,j2 ,k1 ,k2 ,index ;
182179 float gradX , gradX_sq , gradY , gradY_sq , gradXX , gradYY , gradXY , xy_2 , denom , V_norm , V_orth , c , c_sq , gradZ , gradZ_sq , gradZZ , gradXZ , gradYZ , xyz_1 , xyz_2 ;
183-
184- #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
185- for (i = 0 ; i < dimX ; i ++ ) {
186- /* symmetric boundary conditions */
187- i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
188- i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
189- for (j = 0 ; j < dimY ; j ++ ) {
190- /* symmetric boundary conditions */
191- j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
192- j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
193-
194- for (k = 0 ; k < dimZ ; k ++ ) {
195- /* symmetric boundary conditions */
196- k1 = k + 1 ; if (k1 == dimZ ) k1 = k - 1 ;
197- k2 = k - 1 ; if (k2 < 0 ) k2 = k + 1 ;
198-
199- index = (dimX * dimY )* k + j * dimX + i ;
200-
201- gradX = 0.5f * (U0 [(dimX * dimY )* k + j * dimX + i2 ] - U0 [(dimX * dimY )* k + j * dimX + i1 ]);
202- gradX_sq = pow (gradX ,2 );
203-
204- gradY = 0.5f * (U0 [(dimX * dimY )* k + j2 * dimX + i ] - U0 [(dimX * dimY )* k + j1 * dimX + i ]);
180+
181+ #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
182+ for (k = 0 ; k < dimZ ; k ++ ) {
183+ /* symmetric boundary conditions */
184+ k1 = k + 1 ; if (k1 == dimZ ) k1 = k - 1 ;
185+ k2 = k - 1 ; if (k2 < 0 ) k2 = k + 1 ;
186+ for (j = 0 ; j < dimY ; j ++ ) {
187+ /* symmetric boundary conditions */
188+ j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
189+ j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
190+ for (i = 0 ; i < dimX ; i ++ ) {
191+ /* symmetric boundary conditions */
192+ i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
193+ i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
194+
195+ index = (dimX * dimY )* k + j * dimX + i ;
196+
197+ gradX = 0.5f * (U0 [(dimX * dimY )* k + j * dimX + i2 ] - U0 [(dimX * dimY )* k + j * dimX + i1 ]);
198+ gradX_sq = pow (gradX ,2 );
199+
200+ gradY = 0.5f * (U0 [(dimX * dimY )* k + j2 * dimX + i ] - U0 [(dimX * dimY )* k + j1 * dimX + i ]);
205201 gradY_sq = pow (gradY ,2 );
206-
202+
207203 gradZ = 0.5f * (U0 [(dimX * dimY )* k2 + j * dimX + i ] - U0 [(dimX * dimY )* k1 + j * dimX + i ]);
208204 gradZ_sq = pow (gradZ ,2 );
209-
205+
210206 gradXX = U0 [(dimX * dimY )* k + j * dimX + i2 ] + U0 [(dimX * dimY )* k + j * dimX + i1 ] - 2 * U0 [index ];
211207 gradYY = U0 [(dimX * dimY )* k + j2 * dimX + i ] + U0 [(dimX * dimY )* k + j1 * dimX + i ] - 2 * U0 [index ];
212208 gradZZ = U0 [(dimX * dimY )* k2 + j * dimX + i ] + U0 [(dimX * dimY )* k1 + j * dimX + i ] - 2 * U0 [index ];
213-
209+
214210 gradXY = 0.25f * (U0 [(dimX * dimY )* k + j2 * dimX + i2 ] + U0 [(dimX * dimY )* k + j1 * dimX + i1 ] - U0 [(dimX * dimY )* k + j1 * dimX + i2 ] - U0 [(dimX * dimY )* k + j2 * dimX + i1 ]);
215211 gradXZ = 0.25f * (U0 [(dimX * dimY )* k2 + j * dimX + i2 ] - U0 [(dimX * dimY )* k2 + j * dimX + i1 ] - U0 [(dimX * dimY )* k1 + j * dimX + i2 ] + U0 [(dimX * dimY )* k1 + j * dimX + i1 ]);
216212 gradYZ = 0.25f * (U0 [(dimX * dimY )* k2 + j2 * dimX + i ] - U0 [(dimX * dimY )* k2 + j1 * dimX + i ] - U0 [(dimX * dimY )* k1 + j2 * dimX + i ] + U0 [(dimX * dimY )* k1 + j1 * dimX + i ]);
217-
213+
218214 xy_2 = 2.0f * gradX * gradY * gradXY ;
219215 xyz_1 = 2.0f * gradX * gradZ * gradXZ ;
220216 xyz_2 = 2.0f * gradY * gradZ * gradYZ ;
221-
217+
222218 denom = gradX_sq + gradY_sq + gradZ_sq ;
223-
224- if (denom <= EPS ) {
225- V_norm = (gradXX * gradX_sq + gradYY * gradY_sq + gradZZ * gradZ_sq + xy_2 + xyz_1 + xyz_2 )/EPS ;
219+
220+ if (denom <= EPS ) {
221+ V_norm = (gradXX * gradX_sq + gradYY * gradY_sq + gradZZ * gradZ_sq + xy_2 + xyz_1 + xyz_2 )/EPS ;
226222 V_orth = ((gradY_sq + gradZ_sq )* gradXX + (gradX_sq + gradZ_sq )* gradYY + (gradX_sq + gradY_sq )* gradZZ - xy_2 - xyz_1 - xyz_2 )/EPS ;
227- }
228- else {
229- V_norm = (gradXX * gradX_sq + gradYY * gradY_sq + gradZZ * gradZ_sq + xy_2 + xyz_1 + xyz_2 )/denom ;
223+ }
224+ else {
225+ V_norm = (gradXX * gradX_sq + gradYY * gradY_sq + gradZZ * gradZ_sq + xy_2 + xyz_1 + xyz_2 )/denom ;
230226 V_orth = ((gradY_sq + gradZ_sq )* gradXX + (gradX_sq + gradZ_sq )* gradYY + (gradX_sq + gradY_sq )* gradZZ - xy_2 - xyz_1 - xyz_2 )/denom ;
231- }
232-
227+ }
228+
233229 c = 1.0f /(1.0f + denom /sigma );
234230 c_sq = c * c ;
235-
231+
236232 W_Lapl [index ] = c_sq * V_norm + c * V_orth ;
237- }
238- }
239- }
240- return * W_Lapl ;
233+ }}}
234+ return * W_Lapl ;
241235}
242236
243237float Diffusion_update_step3D (float * Output , float * Input , float * W_Lapl , float lambdaPar , float sigmaPar2 , float tau , long dimX , long dimY , long dimZ )
244238{
245- long i ,j ,i1 ,i2 ,j1 ,j2 ,index ,k ,k1 ,k2 ;
239+ long i ,j ,i1 ,i2 ,j1 ,j2 ,index ,k ,k1 ,k2 ;
246240 float gradXXc , gradYYc , gradZZc ;
247-
248- #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
249- for (i = 0 ; i < dimX ; i ++ ) {
250- /* symmetric boundary conditions */
251- i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
252- i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
253- for (j = 0 ; j < dimY ; j ++ ) {
254- /* symmetric boundary conditions */
255- j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
256- j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
257-
258- for (k = 0 ; k < dimZ ; k ++ ) {
259- /* symmetric boundary conditions */
260- k1 = k + 1 ; if (k1 == dimZ ) k1 = k - 1 ;
261- k2 = k - 1 ; if (k2 < 0 ) k2 = k + 1 ;
262-
263- index = (dimX * dimY )* k + j * dimX + i ;
264-
265- gradXXc = W_Lapl [(dimX * dimY )* k + j * dimX + i2 ] + W_Lapl [(dimX * dimY )* k + j * dimX + i1 ] - 2 * W_Lapl [index ];
266- gradYYc = W_Lapl [(dimX * dimY )* k + j2 * dimX + i ] + W_Lapl [(dimX * dimY )* k + j1 * dimX + i ] - 2 * W_Lapl [index ];
267- gradZZc = W_Lapl [(dimX * dimY )* k2 + j * dimX + i ] + W_Lapl [(dimX * dimY )* k1 + j * dimX + i ] - 2 * W_Lapl [index ];
268-
269- Output [index ] += tau * (- lambdaPar * (gradXXc + gradYYc + gradZZc ) - (Output [index ] - Input [index ]));
270- }
271- }
272- }
273- return * Output ;
241+
242+ #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
243+ for (k = 0 ; k < dimZ ; k ++ ) {
244+ /* symmetric boundary conditions */
245+ k1 = k + 1 ; if (k1 == dimZ ) k1 = k - 1 ;
246+ k2 = k - 1 ; if (k2 < 0 ) k2 = k + 1 ;
247+ for (j = 0 ; j < dimY ; j ++ ) {
248+ /* symmetric boundary conditions */
249+ j1 = j + 1 ; if (j1 == dimY ) j1 = j - 1 ;
250+ j2 = j - 1 ; if (j2 < 0 ) j2 = j + 1 ;
251+ for (i = 0 ; i < dimX ; i ++ ) {
252+ /* symmetric boundary conditions */
253+ i1 = i + 1 ; if (i1 == dimX ) i1 = i - 1 ;
254+ i2 = i - 1 ; if (i2 < 0 ) i2 = i + 1 ;
255+
256+ index = (dimX * dimY )* k + j * dimX + i ;
257+
258+ gradXXc = W_Lapl [(dimX * dimY )* k + j * dimX + i2 ] + W_Lapl [(dimX * dimY )* k + j * dimX + i1 ] - 2 * W_Lapl [index ];
259+ gradYYc = W_Lapl [(dimX * dimY )* k + j2 * dimX + i ] + W_Lapl [(dimX * dimY )* k + j1 * dimX + i ] - 2 * W_Lapl [index ];
260+ gradZZc = W_Lapl [(dimX * dimY )* k2 + j * dimX + i ] + W_Lapl [(dimX * dimY )* k1 + j * dimX + i ] - 2 * W_Lapl [index ];
261+
262+ Output [index ] += tau * (- lambdaPar * (gradXXc + gradYYc + gradZZc ) - (Output [index ] - Input [index ]));
263+ }}}
264+ return * Output ;
274265}
0 commit comments