![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts 00003 * 00004 * This program is free software; you can redistribute it and/or modify it under 00005 * the terms of the GNU General Public License as published by the Free Software 00006 * Foundation; either version 2 of the License, or (at your option) any later 00007 * version. 00008 * 00009 * This program is distributed in the hope that it will be useful, but WITHOUT 00010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00011 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 00012 * details. 00013 * 00014 * You should have received a copy of the GNU General Public License along with 00015 * this program; if not, write to the Free Software Foundation, Inc., 51 00016 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00025 #include <stdio.h> 00026 #include <stdlib.h> 00027 #include <math.h> 00028 #include <complex.h> 00029 00030 #define NFFT_PRECISION_DOUBLE 00031 00032 #include "nfft3mp.h" 00033 00042 #define DGT_PRE_CEXP (1U<< 0) 00043 00053 #define FGT_NDFT (1U<< 1) 00054 00063 #define FGT_APPROX_B (1U<< 2) 00064 00066 typedef struct 00067 { 00068 int N; 00069 int M; 00071 NFFT_C *alpha; 00072 NFFT_C *f; 00074 unsigned flags; 00077 NFFT_C sigma; 00079 NFFT_R *x; 00080 NFFT_R *y; 00082 NFFT_C *pre_cexp; 00084 int n; 00085 NFFT_R p; 00087 NFFT_C *b; 00089 NFFT(plan) *nplan1; 00090 NFFT(plan) *nplan2; 00092 } fgt_plan; 00093 00101 static void dgt_trafo(fgt_plan *ths) 00102 { 00103 NFFT_INT j, k, l; 00104 00105 for (j = 0; j < ths->M; j++) 00106 ths->f[j] = NFFT_K(0.0); 00107 00108 if (ths->flags & DGT_PRE_CEXP) 00109 for (j = 0, l = 0; j < ths->M; j++) 00110 for (k = 0; k < ths->N; k++, l++) 00111 ths->f[j] += ths->alpha[k] * ths->pre_cexp[l]; 00112 else 00113 for (j = 0; j < ths->M; j++) 00114 for (k = 0; k < ths->N; k++) 00115 ths->f[j] += ths->alpha[k] 00116 * NFFT_M(cexp)( 00117 -ths->sigma * (ths->y[j] - ths->x[k]) 00118 * (ths->y[j] - ths->x[k])); 00119 } 00120 00128 static void fgt_trafo(fgt_plan *ths) 00129 { 00130 NFFT_INT l; 00131 00132 if (ths->flags & FGT_NDFT) 00133 { 00134 NFFT(adjoint_direct)(ths->nplan1); 00135 00136 for (l = 0; l < ths->n; l++) 00137 ths->nplan1->f_hat[l] *= ths->b[l]; 00138 00139 NFFT(trafo_direct)(ths->nplan2); 00140 } 00141 else 00142 { 00143 NFFT(adjoint)(ths->nplan1); 00144 00145 for (l = 0; l < ths->n; l++) 00146 ths->nplan1->f_hat[l] *= ths->b[l]; 00147 00148 NFFT(trafo)(ths->nplan2); 00149 } 00150 } 00151 00166 static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, 00167 unsigned flags) 00168 { 00169 int j, n_fftw; 00170 FFTW(plan) fplan; 00171 00172 ths->M = M; 00173 ths->N = N; 00174 ths->sigma = sigma; 00175 ths->flags = flags; 00176 00177 ths->x = (NFFT_R*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_R)); 00178 ths->y = (NFFT_R*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_R)); 00179 ths->alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_C)); 00180 ths->f = (NFFT_C*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_C)); 00181 00182 ths->n = n; 00183 ths->p = p; 00184 00185 ths->b = (NFFT_C*) NFFT(malloc)((size_t)(ths->n) * sizeof(NFFT_C)); 00186 00187 ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan))); 00188 ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan))); 00189 00190 n_fftw = (int)NFFT(next_power_of_2)(2 * ths->n); 00191 00192 { 00193 NFFT(init_guru)(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT | 00194 PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE); 00195 NFFT(init_guru)(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT | 00196 PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE); 00197 } 00198 00199 ths->nplan1->f = ths->alpha; 00200 ths->nplan2->f_hat = ths->nplan1->f_hat; 00201 ths->nplan2->f = ths->f; 00202 00203 if (ths->flags & FGT_APPROX_B) 00204 { 00205 fplan = FFTW(plan_dft_1d)(ths->n, ths->b, ths->b, FFTW_FORWARD, 00206 FFTW_MEASURE); 00207 00208 for (j = 0; j < ths->n; j++) 00209 ths->b[j] = NFFT_M(cexp)( 00210 -ths->p * ths->p * ths->sigma * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) 00211 / ((NFFT_R) (ths->n * ths->n))) / ((NFFT_R)(ths->n)); 00212 00213 NFFT(fftshift_complex_int)(ths->b, 1, &ths->n); 00214 FFTW(execute)(fplan); 00215 NFFT(fftshift_complex_int)(ths->b, 1, &ths->n); 00216 00217 FFTW(destroy_plan)(fplan); 00218 } 00219 else 00220 { 00221 for (j = 0; j < ths->n; j++) 00222 ths->b[j] = NFFT_K(1.0) / ths->p * NFFT_M(csqrt)(NFFT_KPI / ths->sigma) 00223 * NFFT_M(cexp)( 00224 -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) 00225 / (ths->p * ths->p * ths->sigma)); 00226 } 00227 } 00228 00240 static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps) 00241 { 00242 NFFT_R p; 00243 int n; 00244 00245 p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma)); 00246 00247 if (p < NFFT_K(1.0)) 00248 p = NFFT_K(1.0); 00249 00250 n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma)))))); 00251 00252 fgt_init_guru(ths, N, M, sigma, n, p, 7, N * M <= ((NFFT_INT) (1U << 20)) ? DGT_PRE_CEXP : 0); 00253 } 00254 00261 static void fgt_init_node_dependent(fgt_plan *ths) 00262 { 00263 int j, k, l; 00264 00265 if (ths->flags & DGT_PRE_CEXP) 00266 { 00267 ths->pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->M * ths->N) * sizeof(NFFT_C)); 00268 00269 for (j = 0, l = 0; j < ths->M; j++) 00270 for (k = 0; k < ths->N; k++, l++) 00271 ths->pre_cexp[l] = NFFT_M(cexp)( 00272 -ths->sigma * (ths->y[j] - ths->x[k]) * (ths->y[j] - ths->x[k])); 00273 } 00274 00275 for (j = 0; j < ths->nplan1->M_total; j++) 00276 ths->nplan1->x[j] = ths->x[j] / ths->p; 00277 for (j = 0; j < ths->nplan2->M_total; j++) 00278 ths->nplan2->x[j] = ths->y[j] / ths->p; 00279 00280 if (ths->nplan1->flags & PRE_PSI) 00281 NFFT(precompute_psi)(ths->nplan1); 00282 if (ths->nplan2->flags & PRE_PSI) 00283 NFFT(precompute_psi)(ths->nplan2); 00284 } 00285 00292 static void fgt_finalize(fgt_plan *ths) 00293 { 00294 NFFT(finalize)(ths->nplan2); 00295 NFFT(finalize)(ths->nplan1); 00296 00297 NFFT(free)(ths->nplan2); 00298 NFFT(free)(ths->nplan1); 00299 00300 NFFT(free)(ths->b); 00301 00302 NFFT(free)(ths->f); 00303 NFFT(free)(ths->y); 00304 00305 NFFT(free)(ths->alpha); 00306 NFFT(free)(ths->x); 00307 } 00308 00315 static void fgt_test_init_rand(fgt_plan *ths) 00316 { 00317 NFFT_INT j, k; 00318 00319 for (k = 0; k < ths->N; k++) 00320 ths->x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0); 00321 00322 for (j = 0; j < ths->M; j++) 00323 ths->y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0); 00324 00325 for (k = 0; k < ths->N; k++) 00326 ths->alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0) 00327 + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)); 00328 } 00329 00338 static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt) 00339 { 00340 int r; 00341 NFFT_R t0, t1, time_diff; 00342 NFFT_R t_out; 00343 NFFT_R tau = NFFT_K(0.01); 00344 00345 t_out = NFFT_K(0.0); 00346 r = 0; 00347 while (t_out < tau) 00348 { 00349 r++; 00350 t0 = NFFT(clock_gettime_seconds)(); 00351 if (dgt) 00352 dgt_trafo(ths); 00353 else 00354 fgt_trafo(ths); 00355 t1 = NFFT(clock_gettime_seconds)(); 00356 time_diff = t1 - t0; 00357 t_out += time_diff; 00358 } 00359 t_out /= (NFFT_R)(r); 00360 00361 return t_out; 00362 } 00363 00373 static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps) 00374 { 00375 fgt_plan my_plan; 00376 NFFT_C *swap_dgt; 00377 00378 fgt_init(&my_plan, N, M, sigma, eps); 00379 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C)); 00380 00381 fgt_test_init_rand(&my_plan); 00382 fgt_init_node_dependent(&my_plan); 00383 00384 NFFT_CSWAP(swap_dgt, my_plan.f); 00385 dgt_trafo(&my_plan); 00386 NFFT(vpr_complex)(my_plan.f, my_plan.M, "discrete gauss transform"); 00387 NFFT_CSWAP(swap_dgt, my_plan.f); 00388 00389 fgt_trafo(&my_plan); 00390 NFFT(vpr_complex)(my_plan.f, my_plan.M, "fast gauss transform"); 00391 00392 printf("\n relative error: %1.3" NFFT__FES__ "\n", 00393 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, 00394 my_plan.alpha, my_plan.N)); 00395 00396 NFFT(free)(swap_dgt); 00397 fgt_finalize(&my_plan); 00398 } 00399 00409 static void fgt_test_andersson(void) 00410 { 00411 fgt_plan my_plan; 00412 NFFT_C *swap_dgt; 00413 int N; 00414 00415 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0)); 00416 int n = 128; 00417 int N_dgt_pre_exp = (int) (1U << 11); 00418 int N_dgt = (int) (1U << 19); 00419 00420 printf("n=%d, sigma=%1.3" NFFT__FES__ "+i%1.3" NFFT__FES__ "\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma)); 00421 00422 for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1) 00423 { 00424 printf("$%d$\t & ", N); 00425 00426 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, N < N_dgt_pre_exp ? DGT_PRE_CEXP : 0); 00427 00428 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C)); 00429 00430 fgt_test_init_rand(&my_plan); 00431 00432 fgt_init_node_dependent(&my_plan); 00433 00434 if (N < N_dgt) 00435 { 00436 NFFT_CSWAP(swap_dgt, my_plan.f); 00437 if (N < N_dgt_pre_exp) 00438 my_plan.flags ^= DGT_PRE_CEXP; 00439 00440 printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1)); 00441 if (N < N_dgt_pre_exp) 00442 my_plan.flags ^= DGT_PRE_CEXP; 00443 NFFT_CSWAP(swap_dgt, my_plan.f); 00444 } 00445 else 00446 printf("\t\t & "); 00447 00448 if (N < N_dgt_pre_exp) 00449 printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1)); 00450 else 00451 printf("\t\t & "); 00452 00453 my_plan.flags ^= FGT_NDFT; 00454 printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0)); 00455 my_plan.flags ^= FGT_NDFT; 00456 00457 printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0)); 00458 00459 printf("$%1.1" NFFT__FES__ "$\t \\\\ \n", 00460 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); 00461 fflush(stdout); 00462 00463 NFFT(free)(swap_dgt); 00464 fgt_finalize(&my_plan); 00465 FFTW(cleanup)(); 00466 } 00467 } 00468 00475 static void fgt_test_error(void) 00476 { 00477 fgt_plan my_plan; 00478 NFFT_C *swap_dgt; 00479 int n, mi; 00480 00481 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0)); 00482 int N = 1000; 00483 int M = 1000; 00484 int m[2] = { 7, 3 }; 00485 00486 printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma), 00487 NFFT_M(cimag)(sigma)); 00488 printf("error=[\n"); 00489 00490 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C)); 00491 00492 for (n = 8; n <= 128; n += 4) 00493 { 00494 printf("%d\t", n); 00495 for (mi = 0; mi < 2; mi++) 00496 { 00497 fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0); 00498 fgt_test_init_rand(&my_plan); 00499 fgt_init_node_dependent(&my_plan); 00500 00501 NFFT_CSWAP(swap_dgt, my_plan.f); 00502 dgt_trafo(&my_plan); 00503 NFFT_CSWAP(swap_dgt, my_plan.f); 00504 00505 fgt_trafo(&my_plan); 00506 00507 printf("%1.3" NFFT__FES__ "\t", 00508 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); 00509 fflush(stdout); 00510 00511 fgt_finalize(&my_plan); 00512 FFTW(cleanup)(); 00513 } 00514 printf("\n"); 00515 } 00516 printf("];\n"); 00517 00518 NFFT(free)(swap_dgt); 00519 } 00520 00527 static void fgt_test_error_p(void) 00528 { 00529 fgt_plan my_plan; 00530 NFFT_C *swap_dgt; 00531 int n, pi; 00532 00533 NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I; 00534 int N = 1000; 00535 int M = 1000; 00536 NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)}; 00537 00538 printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma), 00539 NFFT_M(cimag)(sigma)); 00540 printf("error=[\n"); 00541 00542 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C)); 00543 00544 for (n = 8; n <= 128; n += 4) 00545 { 00546 printf("%d\t", n); 00547 for (pi = 0; pi < 3; pi++) 00548 { 00549 fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0); 00550 fgt_test_init_rand(&my_plan); 00551 fgt_init_node_dependent(&my_plan); 00552 00553 NFFT_CSWAP(swap_dgt, my_plan.f); 00554 dgt_trafo(&my_plan); 00555 NFFT_CSWAP(swap_dgt, my_plan.f); 00556 00557 fgt_trafo(&my_plan); 00558 00559 printf("%1.3" NFFT__FES__ "\t", 00560 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); 00561 fflush(stdout); 00562 00563 fgt_finalize(&my_plan); 00564 FFTW(cleanup)(); 00565 } 00566 printf("\n"); 00567 } 00568 printf("];\n"); 00569 } 00570 00576 int main(int argc, char **argv) 00577 { 00578 if (argc != 2) 00579 { 00580 fprintf(stderr, "fastgauss type\n"); 00581 fprintf(stderr, " type\n"); 00582 fprintf(stderr, " 0 - Simple test.\n"); 00583 fprintf(stderr, " 1 - Compares accuracy and execution time.\n"); 00584 fprintf(stderr, " Pipe to output_andersson.tex\n"); 00585 fprintf(stderr, " 2 - Compares accuracy.\n"); 00586 fprintf(stderr, " Pipe to output_error.m\n"); 00587 fprintf(stderr, " 3 - Compares accuracy.\n"); 00588 fprintf(stderr, " Pipe to output_error_p.m\n"); 00589 return EXIT_FAILURE; 00590 } 00591 00592 if (atoi(argv[1]) == 0) 00593 fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001)); 00594 00595 if (atoi(argv[1]) == 1) 00596 fgt_test_andersson(); 00597 00598 if (atoi(argv[1]) == 2) 00599 fgt_test_error(); 00600 00601 if (atoi(argv[1]) == 3) 00602 fgt_test_error_p(); 00603 00604 return EXIT_SUCCESS; 00605 } 00606 /* \} */