15
15
output
16
16
thrust::complex *ifgram (n_cols*n_rows)
17
17
input
18
- thrust::complex *refSlcUp ((oversample*n_fft)*n_rows)
18
+ thrust::complex *refSlcUp ((oversample*n_fft)*n_rows) nfft >= where ncols
19
19
thrust::complex *secSlcUp
20
20
size_t n_rows
21
21
size_t n_cols
22
22
size_t n_fft
23
- int oversample_i
24
- float oversampe_f
23
+ int oversample_int
24
+ float oversample_float
25
25
*/
26
26
template <typename T>
27
27
__global__ void interferogram_g (thrust::complex <T> *ifgram,
@@ -30,23 +30,37 @@ __global__ void interferogram_g(thrust::complex<T> *ifgram,
30
30
size_t n_rows,
31
31
size_t n_cols,
32
32
size_t n_fft,
33
- int oversample_i ,
34
- T oversample_f )
33
+ int oversample_int ,
34
+ T oversample_float )
35
35
{
36
+ // get 1-d interferogram index
36
37
const auto i = static_cast <size_t >(blockIdx .x ) * blockDim .x + threadIdx .x ;
37
38
38
39
// make sure index within ifgram size bounds
39
40
if (i < n_rows * n_cols) {
41
+ // break up 1-d index into 2-d index
40
42
auto i_row = i / n_cols;
41
43
auto i_col = i % n_cols;
42
44
43
- ifgram[i] = thrust::complex <T>(0.0 , 0.0 );
44
- for (int j = 0 ; j < oversample_i; ++j) {
45
- auto ref_val = refSlcUp[i_row*oversample_i*n_fft + i_col];
46
- auto sec_val_conj = conj (secSlcUp[i_row*oversample_i*n_fft + i_col]);
47
- ifgram[i] += ref_val * sec_val_conj;
45
+ // local accumulation variable
46
+ auto accumulation = thrust::complex <T>(0.0 , 0.0 );
47
+
48
+ // accumulate crossmultiplied oversampled pixels
49
+ // oversample_int > 0 so crossmultiply will always be calculated
50
+ for (int j = 0 ; j < oversample_int; ++j) {
51
+ // get 1-d, maybe oversampled, index based on 2-d index
52
+ // i_row * n_fft + i_col = 1-d index w/o oversampling
53
+ // oversample_int * (..) = first 1-d index w/ oversampling
54
+ // (...) + j = j-th oversampled index
55
+ auto i_up = oversample_int * (i_row * n_fft + i_col) + j;
56
+
57
+ // get values from SLC rasters and crossmultiply
58
+ auto ref_val = refSlcUp[i_up];
59
+ auto sec_val_conj = thrust::conj (secSlcUp[i_up]);
60
+ accumulation += ref_val * sec_val_conj;
48
61
}
49
- ifgram[i] /= oversample_f;
62
+ // normalize by oversample factor
63
+ ifgram[i] = accumulation / oversample_float;
50
64
}
51
65
}
52
66
@@ -203,7 +217,6 @@ crossmul(isce3::io::Raster& referenceSLC,
203
217
204
218
}
205
219
206
-
207
220
void isce3::cuda::signal::gpuCrossmul::
208
221
crossmul (isce3::io::Raster& referenceSLC,
209
222
isce3::io::Raster& secondarySLC,
@@ -277,14 +290,14 @@ crossmul(isce3::io::Raster& referenceSLC,
277
290
auto slc_size = n_slc * sizeof (thrust::complex <float >);
278
291
279
292
// storage for a block of reference SLC data
280
- std::valarray<std::complex <float >> refSlc (n_slc);
281
- thrust::complex <float > *d_refSlc ;
282
- checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_refSlc ), slc_size));
293
+ std::valarray<std::complex <float >> refSlcOrig (n_slc);
294
+ thrust::complex <float > *d_refSlcOrig ;
295
+ checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_refSlcOrig ), slc_size));
283
296
284
297
// storage for a block of secondary SLC data
285
- std::valarray<std::complex <float >> secSlc (n_slc);
286
- thrust::complex <float > *d_secSlc ;
287
- checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_secSlc ), slc_size));
298
+ std::valarray<std::complex <float >> secSlcOrig (n_slc);
299
+ thrust::complex <float > *d_secSlcOrig ;
300
+ checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_secSlcOrig ), slc_size));
288
301
289
302
// set upsampled parameters
290
303
auto n_slcUpsampled = _oversample * nfft * rowsPerBlock;
@@ -293,13 +306,16 @@ crossmul(isce3::io::Raster& referenceSLC,
293
306
// upsampled block of reference SLC
294
307
std::valarray<std::complex <float >> refSlcUpsampled (n_slcUpsampled);
295
308
thrust::complex <float > *d_refSlcUpsampled;
296
- checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_refSlcUpsampled), slcUpsampled_size));
309
+ if (_oversample > 1 )
310
+ checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_refSlcUpsampled), slcUpsampled_size));
297
311
298
312
// upsampled block of secondary SLC
299
313
thrust::complex <float > *d_secSlcUpsampled;
300
- checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_secSlcUpsampled), slcUpsampled_size));
314
+ if (_oversample > 1 )
315
+ checkCudaErrors (cudaMalloc (reinterpret_cast <void **>(&d_secSlcUpsampled), slcUpsampled_size));
301
316
302
- // shift impact
317
+ // calculate and copy to device shiftImpact frequency responce (a linear phase)
318
+ // to a sub-pixel shift in time domain introduced by upsampling followed by downsampling
303
319
std::valarray<std::complex <float >> shiftImpact (n_slcUpsampled);
304
320
thrust::complex <float > *d_shiftImpact;
305
321
lookdownShiftImpact (_oversample,
@@ -349,8 +365,8 @@ crossmul(isce3::io::Raster& referenceSLC,
349
365
350
366
// determine block layout
351
367
dim3 block (THRD_PER_BLOCK);
352
- dim3 grid_hi ((refSlc .size ()*_oversample+(THRD_PER_BLOCK-1 ))/THRD_PER_BLOCK);
353
- dim3 grid_reg ((refSlc .size ()+(THRD_PER_BLOCK-1 ))/THRD_PER_BLOCK);
368
+ dim3 grid_hi ((refSlcOrig .size ()*_oversample+(THRD_PER_BLOCK-1 ))/THRD_PER_BLOCK);
369
+ dim3 grid_reg ((refSlcOrig .size ()+(THRD_PER_BLOCK-1 ))/THRD_PER_BLOCK);
354
370
dim3 grid_lo ((blockRowsMultiLooked*ncolsMultiLooked+(THRD_PER_BLOCK-1 ))/THRD_PER_BLOCK);
355
371
356
372
// configure azimuth filter
@@ -385,8 +401,8 @@ crossmul(isce3::io::Raster& referenceSLC,
385
401
}
386
402
387
403
// fill the valarray with zero before getting the block of the data
388
- refSlc = 0 ;
389
- secSlc = 0 ;
404
+ refSlcOrig = 0 ;
405
+ secSlcOrig = 0 ;
390
406
refSlcUpsampled = 0 ;
391
407
ifgram = 0 ;
392
408
@@ -397,36 +413,33 @@ crossmul(isce3::io::Raster& referenceSLC,
397
413
std::valarray<std::complex <float >> dataLine (ncols);
398
414
for (size_t line = 0 ; line < rowsThisBlock; ++line){
399
415
referenceSLC.getLine (dataLine, rowStart + line);
400
- refSlc [std::slice (line*nfft, ncols, 1 )] = dataLine;
416
+ refSlcOrig [std::slice (line*nfft, ncols, 1 )] = dataLine;
401
417
secondarySLC.getLine (dataLine, rowStart + line);
402
- secSlc [std::slice (line*nfft, ncols, 1 )] = dataLine;
418
+ secSlcOrig [std::slice (line*nfft, ncols, 1 )] = dataLine;
403
419
}
404
- checkCudaErrors (cudaMemcpy (d_refSlc , &refSlc [0 ], slc_size, cudaMemcpyHostToDevice));
405
- checkCudaErrors (cudaMemcpy (d_secSlc , &secSlc [0 ], slc_size, cudaMemcpyHostToDevice));
420
+ checkCudaErrors (cudaMemcpy (d_refSlcOrig , &refSlcOrig [0 ], slc_size, cudaMemcpyHostToDevice));
421
+ checkCudaErrors (cudaMemcpy (d_secSlcOrig , &secSlcOrig [0 ], slc_size, cudaMemcpyHostToDevice));
406
422
407
423
// apply azimuth filter (do inplace)
408
424
if (_doCommonAzimuthBandFilter) {
409
- azimuthFilter.filter (d_refSlc );
410
- azimuthFilter.filter (d_secSlc );
425
+ azimuthFilter.filter (d_refSlcOrig );
426
+ azimuthFilter.filter (d_secSlcOrig );
411
427
}
412
428
413
429
auto oversample_f = static_cast <float >(_oversample);
414
430
if (_oversample > 1 ) {
415
431
// upsample reference and secondary. done on device
416
432
upsample (signalNoUpsample,
417
433
signalUpsample,
418
- d_refSlc ,
434
+ d_refSlcOrig ,
419
435
d_refSlcUpsampled,
420
436
d_shiftImpact);
421
437
upsample (signalNoUpsample,
422
438
signalUpsample,
423
- d_secSlc ,
439
+ d_secSlcOrig ,
424
440
d_secSlcUpsampled,
425
441
d_shiftImpact);
426
442
427
- // run kernels to compute oversampled interforgram
428
- // refSignal overwritten with upsampled interferogram
429
- // reduce from nfft*oversample*rowsPerBlock to ncols*rowsPerBlock
430
443
interferogram_g<<<grid_reg, block>>> (
431
444
d_ifgram,
432
445
d_refSlcUpsampled,
@@ -435,8 +448,8 @@ crossmul(isce3::io::Raster& referenceSLC,
435
448
} else {
436
449
interferogram_g<<<grid_reg, block>>> (
437
450
d_ifgram,
438
- d_refSlc ,
439
- d_secSlc ,
451
+ d_refSlcOrig ,
452
+ d_secSlcOrig ,
440
453
rowsThisBlock, ncols, nfft, _oversample, oversample_f);
441
454
}
442
455
@@ -469,7 +482,7 @@ crossmul(isce3::io::Raster& referenceSLC,
469
482
d_ifgram,
470
483
ncols, // n columns hi res
471
484
ncolsMultiLooked, // n cols lo res
472
- _azimuthLooks, // col resize factor of hi to lo
485
+ _azimuthLooks, // row resize factor of hi to lo
473
486
_rangeLooks, // col resize factor of hi to lo
474
487
n_mlook, // number of lo res elements
475
488
float (_azimuthLooks*_rangeLooks));
@@ -484,32 +497,58 @@ crossmul(isce3::io::Raster& referenceSLC,
484
497
interferogram.setBlock (ifgram_mlook, 0 , rowStart/_azimuthLooks,
485
498
ncols/_rangeLooks, rowsThisBlock/_azimuthLooks);
486
499
487
- // write reduce+abs and set blocks
488
- multilooks_power_g<<<grid_lo, block>>> (
489
- d_ref_amp_mlook,
490
- d_refSlc,
491
- 2 ,
492
- nfft,
493
- ncolsMultiLooked,
494
- _azimuthLooks, // row resize factor of hi to lo
495
- _rangeLooks, // col resize factor of hi to lo
496
- n_mlook, // number of lo res elements
497
- float (_azimuthLooks*_rangeLooks));
500
+ if (_oversample > 1 ) {
501
+ // write reduce+abs and set blocks
502
+ multilooks_power_g<<<grid_lo, block>>> (
503
+ d_ref_amp_mlook,
504
+ d_refSlcUpsampled,
505
+ 2 ,
506
+ _oversample*nfft, // n columns hi res
507
+ ncolsMultiLooked, // n columns lo res
508
+ _azimuthLooks, // row resize factor of hi to lo
509
+ _oversample*_rangeLooks, // col resize factor of hi to lo
510
+ n_mlook, // number of lo res elements
511
+ float (_oversample*_azimuthLooks*_rangeLooks));
512
+ } else {
513
+ multilooks_power_g<<<grid_lo, block>>> (
514
+ d_ref_amp_mlook,
515
+ d_refSlcOrig,
516
+ 2 ,
517
+ _oversample*nfft, // n columns hi res
518
+ ncolsMultiLooked, // n columns lo res
519
+ _azimuthLooks, // row resize factor of hi to lo
520
+ _oversample*_rangeLooks, // col resize factor of hi to lo
521
+ n_mlook, // number of lo res elements
522
+ float (_oversample*_azimuthLooks*_rangeLooks));
523
+ }
498
524
499
525
// Check for any kernel errors
500
526
checkCudaErrors (cudaPeekAtLastError ());
501
527
checkCudaErrors (cudaDeviceSynchronize ());
502
528
503
- multilooks_power_g<<<grid_lo, block>>> (
504
- d_sec_amp_mlook,
505
- d_secSlc,
506
- 2 ,
507
- nfft,
508
- ncolsMultiLooked,
509
- _azimuthLooks, // row resize factor of hi to lo
510
- _rangeLooks, // col resize factor of hi to lo
511
- n_mlook, // number of lo res elements
512
- float (_azimuthLooks*_rangeLooks));
529
+ if (_oversample > 1 ) {
530
+ multilooks_power_g<<<grid_lo, block>>> (
531
+ d_sec_amp_mlook,
532
+ d_secSlcUpsampled,
533
+ 2 ,
534
+ _oversample*nfft,
535
+ ncolsMultiLooked,
536
+ _azimuthLooks, // row resize factor of hi to lo
537
+ _oversample*_rangeLooks, // col resize factor of hi to lo
538
+ n_mlook, // number of lo res elements
539
+ float (_oversample*_azimuthLooks*_rangeLooks));
540
+ } else {
541
+ multilooks_power_g<<<grid_lo, block>>> (
542
+ d_sec_amp_mlook,
543
+ d_secSlcOrig,
544
+ 2 ,
545
+ _oversample*nfft,
546
+ ncolsMultiLooked,
547
+ _azimuthLooks, // row resize factor of hi to lo
548
+ _oversample*_rangeLooks, // col resize factor of hi to lo
549
+ n_mlook, // number of lo res elements
550
+ float (_oversample*_azimuthLooks*_rangeLooks));
551
+ }
513
552
514
553
// Check for any kernel errors
515
554
checkCudaErrors (cudaPeekAtLastError ());
@@ -543,10 +582,12 @@ crossmul(isce3::io::Raster& referenceSLC,
543
582
}
544
583
545
584
// liberate all device memory
546
- checkCudaErrors (cudaFree (d_refSlc));
547
- checkCudaErrors (cudaFree (d_secSlc));
548
- checkCudaErrors (cudaFree (d_refSlcUpsampled));
549
- checkCudaErrors (cudaFree (d_secSlcUpsampled));
585
+ checkCudaErrors (cudaFree (d_refSlcOrig));
586
+ checkCudaErrors (cudaFree (d_secSlcOrig));
587
+ if (_oversample > 1 ) {
588
+ checkCudaErrors (cudaFree (d_refSlcUpsampled));
589
+ checkCudaErrors (cudaFree (d_secSlcUpsampled));
590
+ }
550
591
checkCudaErrors (cudaFree (d_shiftImpact));
551
592
checkCudaErrors (cudaFree (d_ifgram));
552
593
if (_doCommonRangeBandFilter) {
0 commit comments