NFFT  3.3.1
nfct.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* Nonequispaced fast cosine transform */
00020 
00021 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
00022 
00023 /* configure header */
00024 #include "config.h"
00025 
00026 /* complex datatype (maybe) */
00027 #ifdef HAVE_COMPLEX_H
00028 #include<complex.h>
00029 #endif
00030 
00031 /* NFFT headers */
00032 #include "nfft3.h"
00033 #include "infft.h"
00034 
00035 #ifdef _OPENMP
00036 #include <omp.h>
00037 #endif
00038 
00039 #ifdef OMP_ASSERT
00040 #include <assert.h>
00041 #endif
00042 
00043 #undef X
00044 #define X(name) NFCT(name)
00045 
00047 static inline INT intprod(const INT *vec, const INT a, const INT d)
00048 {
00049   INT t, p;
00050 
00051   p = 1;
00052   for (t = 0; t < d; t++)
00053     p *= vec[t] - a;
00054 
00055   return p;
00056 }
00057 
00058 /* handy shortcuts */
00059 #define BASE(x) COS(x)
00060 #define NN(x) (x - 1)
00061 #define OFFSET 0
00062 #define FOURIER_TRAFO FFTW_REDFT00
00063 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
00064 
00065 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
00066 
00067 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
00068 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
00069 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
00070   - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
00071 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
00072 
00088 void X(trafo_direct)(const X(plan) *ths)
00089 {
00090   R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
00091 
00092   memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
00093 
00094   if (ths->d == 1)
00095   {
00096     /* specialize for univariate case, rationale: faster */
00097     INT j;
00098 #ifdef _OPENMP
00099     #pragma omp parallel for default(shared) private(j)
00100 #endif
00101     for (j = 0; j < ths->M_total; j++)
00102     {
00103       INT k_L;
00104       for (k_L = 0; k_L < ths->N_total; k_L++)
00105       {
00106         R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00107         f[j] += f_hat[k_L] * BASE(omega);
00108       }
00109     }
00110   }
00111   else
00112   {
00113     /* multivariate case */
00114     INT j;
00115 #ifdef _OPENMP
00116     #pragma omp parallel for default(shared) private(j)
00117 #endif
00118     for (j = 0; j < ths->M_total; j++)
00119     {
00120       R x[ths->d], omega, Omega[ths->d + 1];
00121       INT t, t2, k_L, k[ths->d];
00122       Omega[0] = K(1.0);
00123       for (t = 0; t < ths->d; t++)
00124       {
00125         k[t] = OFFSET;
00126         x[t] = K2PI * ths->x[j * ths->d + t];
00127         Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
00128       }
00129       omega = Omega[ths->d];
00130 
00131       for (k_L = 0; k_L < ths->N_total; k_L++)
00132       {
00133         f[j] += f_hat[k_L] * omega;
00134         {
00135           for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
00136             k[t] = OFFSET;
00137 
00138           k[t]++;
00139 
00140           for (t2 = t; t2 < ths->d; t2++)
00141             Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
00142 
00143           omega = Omega[ths->d];
00144         }
00145       }
00146     }
00147   }
00148 }
00149 
00150 void X(adjoint_direct)(const X(plan) *ths)
00151 {
00152   R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
00153 
00154   memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
00155 
00156   if (ths->d == 1)
00157   {
00158     /* specialize for univariate case, rationale: faster */
00159 #ifdef _OPENMP
00160       INT k_L;
00161       #pragma omp parallel for default(shared) private(k_L)
00162       for (k_L = 0; k_L < ths->N_total; k_L++)
00163       {
00164         INT j;
00165         for (j = 0; j < ths->M_total; j++)
00166         {
00167           R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00168           f_hat[k_L] += f[j] * BASE(omega);
00169         }
00170       }
00171 #else
00172       INT j;
00173       for (j = 0; j < ths->M_total; j++)
00174       {
00175         INT k_L;
00176         for (k_L = 0; k_L < ths->N_total; k_L++)
00177         {
00178           R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
00179           f_hat[k_L] += f[j] * BASE(omega);
00180         }
00181       }
00182 #endif
00183   }
00184   else
00185   {
00186     /* multivariate case */
00187     INT j, k_L;
00188 #ifdef _OPENMP
00189     #pragma omp parallel for default(shared) private(j, k_L)
00190     for (k_L = 0; k_L < ths->N_total; k_L++)
00191     {
00192       INT k[ths->d], k_temp, t;
00193 
00194       k_temp = k_L;
00195 
00196       for (t = ths->d - 1; t >= 0; t--)
00197       {
00198         k[t] = k_temp % ths->N[t];
00199         k_temp /= ths->N[t];
00200       }
00201 
00202       for (j = 0; j < ths->M_total; j++)
00203       {
00204         R omega = K(1.0);
00205         for (t = 0; t < ths->d; t++)
00206           omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
00207         f_hat[k_L] += f[j] * omega;
00208       }
00209     }
00210 #else
00211     for (j = 0; j < ths->M_total; j++)
00212     {
00213       R x[ths->d], omega, Omega[ths->d+1];
00214       INT t, t2, k[ths->d];
00215       Omega[0] = K(1.0);
00216       for (t = 0; t < ths->d; t++)
00217       {
00218         k[t] = OFFSET;
00219         x[t] = K2PI * ths->x[j * ths->d + t];
00220         Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
00221       }
00222       omega = Omega[ths->d];
00223       for (k_L = 0; k_L < ths->N_total; k_L++)
00224       {
00225         f_hat[k_L] += f[j] * omega;
00226 
00227         for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
00228           k[t] = OFFSET;
00229 
00230         k[t]++;
00231 
00232         for (t2 = t; t2 < ths->d; t2++)
00233           Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
00234 
00235         omega = Omega[ths->d];
00236       }
00237     }
00238 #endif
00239   }
00240 }
00241 
00261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
00262   const INT act_dim)
00263 {
00264   const R xj = ths->x[j * ths->d + act_dim];
00265   INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
00266 
00267   (*up) = c - (ths->m);
00268   (*op) = c + 1 + (ths->m);
00269 }
00270 
00271 #define MACRO_D_compute_A \
00272 { \
00273   g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
00274 }
00275 
00276 #define MACRO_D_compute_T \
00277 { \
00278   f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
00279 }
00280 
00281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
00282 
00283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
00284 
00285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
00286 
00287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
00288 
00289 #define MACRO_init_k_ks \
00290 { \
00291   for (t = 0; t < ths->d; t++) \
00292   { \
00293     kg[t] = 0; \
00294   } \
00295   i = 0; \
00296 }
00297 
00298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
00299 { \
00300   for (t = i; t < ths->d; t++) \
00301   { \
00302     MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
00303     kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
00304   } \
00305 }
00306 
00307 #define MACRO_update_c_phi_inv_k_A(which_phi) \
00308 { \
00309   c_phi_inv_k[t+1] = (kg[t] == 0 ? K(1.0) : K(0.5)) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
00310 }
00311 
00312 #define MACRO_update_c_phi_inv_k_T(which_phi) \
00313 { \
00314   c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
00315 }
00316 
00317 #define MACRO_count_k_ks \
00318 { \
00319   kg[ths->d - 1]++; \
00320   i = ths->d - 1; \
00321 \
00322   while ((kg[i] == ths->N[i]) && (i > 0)) \
00323   { \
00324     kg[i - 1]++; \
00325     kg[i] = 0; \
00326     i--; \
00327   } \
00328 }
00329 
00330 /* sub routines for the fast transforms  matrix vector multiplication with D, D^T */
00331 #define MACRO_D(which_one) \
00332 static inline void D_ ## which_one (X(plan) *ths) \
00333 { \
00334   R *g_hat, *f_hat; /* local copy */ \
00335   R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
00336   INT t; /* index dimensions */ \
00337   INT i; \
00338   INT k_L; /* plain index */ \
00339   INT kg[ths->d]; /* multi index in g_hat */ \
00340   INT kg_plain[ths->d+1]; /* postfix plain index */ \
00341 \
00342   f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
00343   MACRO_D_init_result_ ## which_one; \
00344 \
00345   c_phi_inv_k[0] = K(1.0); \
00346   kg_plain[0] = 0; \
00347 \
00348   MACRO_init_k_ks; \
00349 \
00350   if (ths->flags & PRE_PHI_HUT) \
00351   { \
00352     for (k_L = 0; k_L < ths->N_total; k_L++) \
00353     { \
00354       MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
00355       MACRO_D_compute_ ## which_one; \
00356       MACRO_count_k_ks; \
00357     } \
00358   } \
00359   else \
00360   { \
00361     for (k_L = 0; k_L < ths->N_total; k_L++) \
00362     { \
00363       MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
00364       MACRO_D_compute_ ## which_one; \
00365       MACRO_count_k_ks; \
00366     } \
00367   } \
00368 }
00369 
00370 MACRO_D(A)
00371 MACRO_D(T)
00372 
00373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
00374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
00375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
00376 
00377 #define MACRO_B_PRE_FULL_PSI_compute_A \
00378 { \
00379   (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
00380 }
00381 
00382 #define MACRO_B_PRE_FULL_PSI_compute_T \
00383 { \
00384   R factor = K(1.0); \
00385   INT d = ths->psi_index_g[ix]; \
00386   for (t = ths->d - 1; t >= 0; t--) \
00387   { \
00388     INT m = d % ths->n[t]; \
00389     if (m != 0 && m != ths->n[t] - 1) \
00390       factor *= K(0.5); \
00391     d = d / ths->n[t]; \
00392   } \
00393   g[ths->psi_index_g[ix]] += factor * ths->psi[ix] * (*fj); \
00394 }
00395 
00396 #define MACRO_B_compute_A \
00397 { \
00398   (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
00399 }
00400 
00401 #define MACRO_B_compute_T \
00402 { \
00403   g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
00404 }
00405 
00406 #define MACRO_init_uo_l_lj_t \
00407 { \
00408   for (t2 = 0; t2 < ths->d; t2++) \
00409   { \
00410     uo(ths, j, &u[t2], &o[t2], t2); \
00411     \
00412     /* determine index in g-array corresponding to u[(t2)] */ \
00413     if (u[(t2)] < 0) \
00414       lg_offset[(t2)] = \
00415         (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
00416     else \
00417       lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
00418       if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
00419         lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
00420     \
00421     if (lg_offset[t2] <= 0) \
00422     { \
00423       l[t2] = -lg_offset[t2]; \
00424       count_lg[t2] = -1; \
00425     } \
00426     else \
00427     { \
00428       l[t2] = +lg_offset[t2]; \
00429       count_lg[t2] = +1; \
00430     } \
00431  \
00432     lj[t2] = 0; \
00433    } \
00434    t2 = 0; \
00435 }
00436 
00437 #define FOO_A K(1.0)
00438 
00439 #define FOO_T ((l[t] == 0 || l[t] == ths->n[t] - 1) ? K(1.0) : K(0.5))
00440 
00441 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
00442 { \
00443   for (t = t2; t < ths->d; t++) \
00444   { \
00445     phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
00446     ll_plain[t+1]  = ll_plain[t] * ths->n[t] + l[t]; \
00447   } \
00448 }
00449 
00450 #define MACRO_count_uo_l_lj_t \
00451 { \
00452   /* turn around if we hit one of the boundaries */ \
00453   if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
00454     count_lg[(ths->d-1)] *= -1; \
00455  \
00456   /* move array index */ \
00457   l[(ths->d-1)] += count_lg[(ths->d-1)]; \
00458  \
00459   lj[ths->d - 1]++; \
00460   t2 = ths->d - 1; \
00461  \
00462   while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
00463   { \
00464     lj[t2 - 1]++; \
00465     lj[t2] = 0; \
00466     /* ansonsten lg[i-1] verschieben */ \
00467  \
00468     /* turn around if we hit one of the boundaries */ \
00469     if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
00470       count_lg[(t2 - 1)] *= -1; \
00471     /* move array index */ \
00472     l[(t2 - 1)] += count_lg[(t2 - 1)]; \
00473  \
00474     /* lg[i] = anfangswert */ \
00475     if (lg_offset[t2] <= 0) \
00476     { \
00477       l[t2] = -lg_offset[t2]; \
00478       count_lg[t2] = -1; \
00479     } \
00480     else \
00481     { \
00482       l[t2] = +lg_offset[t2]; \
00483       count_lg[t2] = +1; \
00484     } \
00485  \
00486     t2--; \
00487   } \
00488 }
00489 
00490 #define MACRO_B(which_one) \
00491 static inline void B_ ## which_one (X(plan) *ths) \
00492 { \
00493   INT lprod; /* 'regular bandwidth' of matrix B  */ \
00494   INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
00495   INT t, t2; /* index dimensions */ \
00496   INT j; /* index nodes */ \
00497   INT l_L, ix; /* index one row of B */ \
00498   INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
00499   INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
00500   INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
00501   R phi_prod[ths->d+1]; /* postfix product of PHI */ \
00502   R *f, *g; /* local copy */ \
00503   R *fj; /* local copy */ \
00504   R y[ths->d]; \
00505   R fg_psi[ths->d][2*ths->m+2]; \
00506   R fg_exp_l[ths->d][2*ths->m+2]; \
00507   INT l_fg,lj_fg; \
00508   R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
00509   R ip_w; \
00510   INT ip_u; \
00511   INT ip_s = ths->K/(ths->m+2); \
00512   INT lg_offset[ths->d]; /* offset in g according to u */ \
00513   INT count_lg[ths->d]; /* count summands (2m+2) */ \
00514 \
00515   f = (R*)ths->f; g = (R*)ths->g; \
00516 \
00517   MACRO_B_init_result_ ## which_one \
00518 \
00519   if (ths->flags & PRE_FULL_PSI) \
00520   { \
00521     for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
00522     { \
00523       for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
00524       { \
00525         MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
00526       } \
00527     } \
00528     return; \
00529   } \
00530 \
00531   phi_prod[0] = K(1.0); \
00532   ll_plain[0] = 0; \
00533 \
00534   for (t = 0, lprod = 1; t < ths->d; t++) \
00535     lprod *= (2 * ths->m + 2); \
00536 \
00537   if (ths->flags & PRE_PSI) \
00538   { \
00539     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00540     { \
00541       MACRO_init_uo_l_lj_t; \
00542  \
00543       for (l_L = 0; l_L < lprod; l_L++) \
00544       { \
00545         MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
00546  \
00547         MACRO_B_compute_ ## which_one; \
00548  \
00549         MACRO_count_uo_l_lj_t; \
00550       } /* for(l_L) */ \
00551     } /* for(j) */ \
00552     return; \
00553   } /* if(PRE_PSI) */ \
00554  \
00555   if (ths->flags & PRE_FG_PSI) \
00556   { \
00557     for (t = 0; t < ths->d; t++) \
00558     { \
00559       tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
00560       tmpEXP2sq = tmpEXP2 * tmpEXP2; \
00561       tmp2 = K(1.0); \
00562       tmp3 = K(1.0); \
00563       fg_exp_l[t][0] = K(1.0); \
00564  \
00565       for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
00566       { \
00567         tmp3 = tmp2 * tmpEXP2; \
00568         tmp2 *= tmpEXP2sq; \
00569         fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
00570       } \
00571     } \
00572  \
00573     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00574     { \
00575       MACRO_init_uo_l_lj_t; \
00576  \
00577       for (t = 0; t < ths->d; t++) \
00578       { \
00579         fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
00580         tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
00581         tmp1 = K(1.0); \
00582  \
00583         for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
00584         { \
00585           tmp1 *= tmpEXP1; \
00586           fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
00587         } \
00588       } \
00589  \
00590       for (l_L= 0; l_L < lprod; l_L++) \
00591       { \
00592         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00593  \
00594         MACRO_B_compute_ ## which_one; \
00595  \
00596         MACRO_count_uo_l_lj_t; \
00597       } \
00598     } \
00599     return; \
00600   } \
00601  \
00602   if (ths->flags & FG_PSI) \
00603   { \
00604     for (t = 0; t < ths->d; t++) \
00605     { \
00606       tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
00607       tmpEXP2sq = tmpEXP2 * tmpEXP2; \
00608       tmp2 = K(1.0); \
00609       tmp3 = K(1.0); \
00610       fg_exp_l[t][0] = K(1.0); \
00611       for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
00612       { \
00613         tmp3 = tmp2 * tmpEXP2; \
00614         tmp2 *= tmpEXP2sq; \
00615         fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
00616       } \
00617     } \
00618  \
00619     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00620     { \
00621       MACRO_init_uo_l_lj_t; \
00622  \
00623       for (t = 0; t < ths->d; t++) \
00624       { \
00625         fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
00626  \
00627         tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
00628         tmp1 = K(1.0); \
00629         for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
00630         { \
00631           tmp1 *= tmpEXP1; \
00632           fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
00633         } \
00634       } \
00635   \
00636       for (l_L = 0; l_L < lprod; l_L++) \
00637       { \
00638         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00639  \
00640         MACRO_B_compute_ ## which_one; \
00641  \
00642         MACRO_count_uo_l_lj_t; \
00643       } \
00644     } \
00645     return; \
00646   } \
00647  \
00648   if (ths->flags & PRE_LIN_PSI) \
00649   { \
00650     for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
00651     { \
00652       MACRO_init_uo_l_lj_t; \
00653   \
00654       for (t = 0; t < ths->d; t++) \
00655       { \
00656         y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
00657                 * ((R)ths->K))/(ths->m + 2); \
00658         ip_u  = LRINT(FLOOR(y[t])); \
00659         ip_w  = y[t]-ip_u; \
00660         for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
00661         { \
00662           fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
00663             * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
00664             * (ip_w); \
00665         } \
00666       } \
00667   \
00668       for (l_L = 0; l_L < lprod; l_L++) \
00669       { \
00670         MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
00671  \
00672         MACRO_B_compute_ ## which_one; \
00673  \
00674         MACRO_count_uo_l_lj_t; \
00675       }  /* for(l_L) */  \
00676     } /* for(j) */  \
00677     return; \
00678   } /* if(PRE_LIN_PSI) */ \
00679   \
00680   /* no precomputed psi at all */ \
00681   for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
00682   { \
00683     MACRO_init_uo_l_lj_t; \
00684  \
00685     for (l_L = 0; l_L < lprod; l_L++) \
00686     { \
00687       MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
00688  \
00689       MACRO_B_compute_ ## which_one; \
00690  \
00691       MACRO_count_uo_l_lj_t; \
00692     } /* for (l_L) */ \
00693   } /* for (j) */ \
00694 } /* B */
00695 
00696 MACRO_B(A)
00697 MACRO_B(T)
00698 
00702 void X(trafo)(X(plan) *ths)
00703 {
00704   switch(ths->d)
00705   {
00706     default:
00707     {
00708       /* use ths->my_fftw_r2r_plan */
00709       ths->g_hat = ths->g1;
00710       ths->g = ths->g2;
00711 
00712       /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
00713        * k \in I_N \f$ */
00714       TIC(0)
00715       D_A(ths);
00716       TOC(0)
00717 
00718       /* Compute by d-variate discrete Fourier transform
00719        * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
00720        * \text{ for } l \in I_n \f$ */
00721       TIC_FFTW(1)
00722       FFTW(execute)(ths->my_fftw_r2r_plan);
00723       TOC_FFTW(1)
00724 
00725       /*if (ths->flags & PRE_FULL_PSI)
00726         full_psi__A(ths);*/
00727 
00728       /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
00729        * \text{ for } j=0,\hdots,M-1 \f$ */
00730       TIC(2)
00731       B_A(ths);
00732       TOC(2)
00733 
00734       /*if (ths->flags & PRE_FULL_PSI)
00735       {
00736         Y(free)(ths->psi_index_g);
00737         Y(free)(ths->psi_index_f);
00738       }*/
00739     }
00740   }
00741 } /* trafo */
00742 
00743 void X(adjoint)(X(plan) *ths)
00744 {
00745   switch(ths->d)
00746   {
00747     default:
00748     {
00749       /* use ths->my_fftw_plan */
00750       ths->g_hat = ths->g2;
00751       ths->g = ths->g1;
00752 
00753       /*if (ths->flags & PRE_FULL_PSI)
00754         full_psi__T(ths);*/
00755 
00756       /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
00757        * \text{ for } l \in I_n,m(x_j) \f$ */
00758       TIC(2)
00759       B_T(ths);
00760       TOC(2)
00761 
00762       /* Compute by d-variate discrete cosine transform
00763        * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
00764        * \text{ for }  k \in I_N\f$ */
00765       TIC_FFTW(1)
00766       FFTW(execute)(ths->my_fftw_r2r_plan);
00767       TOC_FFTW(1)
00768 
00769       /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
00770        * k \in I_N \f$ */
00771       TIC(0)
00772       D_T(ths);
00773       TOC(0)
00774     }
00775   }
00776 } /* adjoint */
00777 
00780 static inline void precompute_phi_hut(X(plan) *ths)
00781 {
00782   INT ks[ths->d]; /* index over all frequencies */
00783   INT t; /* index over all dimensions */
00784 
00785   ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
00786 
00787   for (t = 0; t < ths->d; t++)
00788   {
00789     ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
00790 
00791     for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
00792     {
00793       ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
00794     }
00795   }
00796 } /* phi_hut */
00797 
00803 void X(precompute_lin_psi)(X(plan) *ths)
00804 {
00805   INT t; 
00806   INT j; 
00807   R step; 
00809   for (t = 0; t < ths->d; t++)
00810   {
00811     step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
00812 
00813     for (j = 0; j <= ths->K; j++)
00814     {
00815       ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
00816     } /* for(j) */
00817   } /* for(t) */
00818 }
00819 
00820 void X(precompute_fg_psi)(X(plan) *ths)
00821 {
00822   INT t; /* index over all dimensions */
00823   INT u, o; /* depends on x_j */
00824 
00825 //  sort(ths);
00826 
00827   for (t = 0; t < ths->d; t++)
00828   {
00829     INT j;
00830 //    #pragma omp parallel for default(shared) private(j,u,o)
00831     for (j = 0; j < ths->M_total; j++)
00832     {
00833       uo(ths, j, &u, &o, t);
00834 
00835       ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
00836       ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
00837       } /* for(j) */
00838   }
00839   /* for(t) */
00840 } /* nfft_precompute_fg_psi */
00841 
00842 void X(precompute_psi)(X(plan) *ths)
00843 {
00844   INT t; /* index over all dimensions */
00845   INT lj; /* index 0<=lj<u+o+1 */
00846   INT u, o; /* depends on x_j */
00847 
00848   //sort(ths);
00849 
00850   for (t = 0; t < ths->d; t++)
00851   {
00852     INT j;
00853 
00854     for (j = 0; j < ths->M_total; j++)
00855     {
00856       uo(ths, j, &u, &o, t);
00857 
00858       for(lj = 0; lj < (2 * ths->m + 2); lj++)
00859         ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
00860             (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
00861     } /* for (j) */
00862   } /* for (t) */
00863 } /* precompute_psi */
00864 
00865 void X(precompute_full_psi)(X(plan) *ths)
00866 {
00867 //#ifdef _OPENMP
00868 //  sort(ths);
00869 //
00870 //  nfft_precompute_full_psi_omp(ths);
00871 //#else
00872   INT t, t2; /* index over all dimensions */
00873   INT j; /* index over all nodes */
00874   INT l_L; /* plain index 0 <= l_L < lprod */
00875   INT l[ths->d]; /* multi index u<=l<=o */
00876   INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
00877   INT ll_plain[ths->d+1]; /* postfix plain index */
00878   INT lprod; /* 'bandwidth' of matrix B */
00879   INT u[ths->d], o[ths->d]; /* depends on x_j */
00880   INT count_lg[ths->d];
00881   INT lg_offset[ths->d];
00882 
00883   R phi_prod[ths->d+1];
00884 
00885   INT ix, ix_old;
00886 
00887   //sort(ths);
00888 
00889   phi_prod[0] = K(1.0);
00890   ll_plain[0]  = 0;
00891 
00892   for (t = 0, lprod = 1; t < ths->d; t++)
00893     lprod *= 2 * ths->m + 2;
00894 
00895   for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
00896   {
00897     MACRO_init_uo_l_lj_t;
00898 
00899     for (l_L = 0; l_L < lprod; l_L++, ix++)
00900     {
00901       MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
00902 
00903       ths->psi_index_g[ix] = ll_plain[ths->d];
00904       ths->psi[ix] = phi_prod[ths->d];
00905 
00906       MACRO_count_uo_l_lj_t;
00907     } /* for (l_L) */
00908 
00909     ths->psi_index_f[j] = ix - ix_old;
00910     ix_old = ix;
00911   } /* for(j) */
00912 //#endif
00913 }
00914 
00915 void X(precompute_one_psi)(X(plan) *ths)
00916 {
00917   if(ths->flags & PRE_PSI)
00918     X(precompute_psi)(ths);
00919   if(ths->flags & PRE_FULL_PSI)
00920     X(precompute_full_psi)(ths);
00921   if(ths->flags & PRE_FG_PSI)
00922     X(precompute_fg_psi)(ths);
00923   if(ths->flags & PRE_LIN_PSI)
00924     X(precompute_lin_psi)(ths);
00925 }
00926 
00927 static inline void init_help(X(plan) *ths)
00928 {
00929   INT t; /* index over all dimensions */
00930   INT lprod; /* 'bandwidth' of matrix B */
00931 
00932   if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00933     ths->flags |= NFFT_SORT_NODES;
00934 
00935   ths->N_total = intprod(ths->N, OFFSET, ths->d);
00936   ths->n_total = intprod(ths->n, 0, ths->d);
00937 
00938   ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
00939 
00940   for (t = 0; t < ths->d; t++)
00941     ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
00942 
00943   /* Assign r2r transform kinds for each dimension */
00944   ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
00945   for (t = 0; t < ths->d; t++)
00946     ths->r2r_kind[t] = FOURIER_TRAFO;
00947 
00948   WINDOW_HELP_INIT;
00949 
00950   if (ths->flags & MALLOC_X)
00951     ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
00952 
00953   if (ths->flags & MALLOC_F_HAT)
00954     ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
00955 
00956   if (ths->flags & MALLOC_F)
00957     ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
00958 
00959   if (ths->flags & PRE_PHI_HUT)
00960     precompute_phi_hut(ths);
00961 
00962   if(ths->flags & PRE_LIN_PSI)
00963   {
00964       ths->K = (1U<< 10) * (ths->m+2);
00965       ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
00966   }
00967 
00968   if(ths->flags & PRE_FG_PSI)
00969     ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
00970 
00971   if (ths->flags & PRE_PSI)
00972     ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
00973 
00974   if(ths->flags & PRE_FULL_PSI)
00975   {
00976       for (t = 0, lprod = 1; t < ths->d; t++)
00977         lprod *= 2 * ths->m + 2;
00978 
00979       ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
00980 
00981       ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
00982       ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
00983   }
00984 
00985   if (ths->flags & FFTW_INIT)
00986   {
00987     ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
00988 
00989     if (ths->flags & FFT_OUT_OF_PLACE)
00990       ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
00991     else
00992       ths->g2 = ths->g1;
00993 
00994     {
00995       int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
00996 
00997       for (t = 0; t < ths->d; t++)
00998         _n[t] = (int)(ths->n[t]);
00999 
01000       ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
01001       Y(free)(_n);
01002     }
01003   }
01004 
01005 //  if(ths->flags & NFFT_SORT_NODES)
01006 //    ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
01007 //  else
01008 //    ths->index_x = NULL;
01009 
01010   ths->mv_trafo = (void (*) (void* ))X(trafo);
01011   ths->mv_adjoint = (void (*) (void* ))X(adjoint);
01012 }
01013 
01014 void X(init)(X(plan) *ths, int d, int *N, int M_total)
01015 {
01016   int t; /* index over all dimensions */
01017 
01018   ths->d = (INT)d;
01019 
01020   ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
01021 
01022   for (t = 0; t < d; t++)
01023     ths->N[t] = (INT)N[t];
01024 
01025   ths->M_total = (INT)M_total;
01026 
01027   ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
01028 
01029   for (t = 0; t < d; t++)
01030     ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
01031 
01032   ths->m = WINDOW_HELP_ESTIMATE_m;
01033 
01034   if (d > 1)
01035   {
01036 //#ifdef _OPENMP
01037 //    ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01038 //                      FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
01039 //          NFFT_OMP_BLOCKWISE_ADJOINT;
01040 //#else
01041     ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01042                       FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
01043 //#endif
01044   }
01045   else
01046     ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
01047                       FFTW_INIT | FFT_OUT_OF_PLACE;
01048 
01049   ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
01050 
01051   init_help(ths);
01052 }
01053 
01054 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
01055   unsigned flags, unsigned fftw_flags)
01056 {
01057   INT t; /* index over all dimensions */
01058 
01059   ths->d = (INT)d;
01060   ths->M_total = (INT)M_total;
01061   ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
01062 
01063   for (t = 0; t < d; t++)
01064     ths->N[t] = (INT)N[t];
01065 
01066   ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
01067 
01068   for (t = 0; t < d; t++)
01069     ths->n[t] = (INT)n[t];
01070 
01071   ths->m = (INT)m;
01072 
01073   ths->flags = flags;
01074   ths->fftw_flags = fftw_flags;
01075 
01076   init_help(ths);
01077 }
01078 
01079 void X(init_1d)(X(plan) *ths, int N1, int M_total)
01080 {
01081   int N[1];
01082 
01083   N[0] = N1;
01084 
01085   X(init)(ths, 1, N, M_total);
01086 }
01087 
01088 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
01089 {
01090   int N[2];
01091 
01092   N[0] = N1;
01093   N[1] = N2;
01094 
01095   X(init)(ths, 2, N, M_total);
01096 }
01097 
01098 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
01099 {
01100   int N[3];
01101 
01102   N[0] = N1;
01103   N[1] = N2;
01104   N[2] = N3;
01105 
01106   X(init)(ths, 3, N, M_total);
01107 }
01108 
01109 const char* X(check)(X(plan) *ths)
01110 {
01111   INT j;
01112 
01113   if (!ths->f)
01114       return "Member f not initialized.";
01115 
01116   if (!ths->x)
01117       return "Member x not initialized.";
01118 
01119   if (!ths->f_hat)
01120       return "Member f_hat not initialized.";
01121 
01122   for (j = 0; j < ths->M_total * ths->d; j++)
01123   {
01124     if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
01125     {
01126       return "ths->x out of range [0.0,0.5)";
01127     }
01128   }
01129 
01130   for (j = 0; j < ths->d; j++)
01131   {
01132     if (ths->sigma[j] <= 1)
01133       return "Oversampling factor too small";
01134 
01135     if(ths->N[j] - 1 <= ths->m)
01136       return "Polynomial degree N is smaller than cut-off m";
01137 
01138     if(ths->N[j]%2 == 1)
01139       return "polynomial degree N has to be even";
01140   }
01141   return 0;
01142 }
01143 
01144 void X(finalize)(X(plan) *ths)
01145 {
01146   INT t; /* index over dimensions */
01147 
01148 //  if(ths->flags & NFFT_SORT_NODES)
01149 //    Y(free)(ths->index_x);
01150 
01151   if (ths->flags & FFTW_INIT)
01152   {
01153 #ifdef _OPENMP
01154     #pragma omp critical (nfft_omp_critical_fftw_plan)
01155 #endif
01156     FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
01157 
01158     if (ths->flags & FFT_OUT_OF_PLACE)
01159       Y(free)(ths->g2);
01160 
01161     Y(free)(ths->g1);
01162   }
01163 
01164   if(ths->flags & PRE_FULL_PSI)
01165   {
01166     Y(free)(ths->psi_index_g);
01167     Y(free)(ths->psi_index_f);
01168     Y(free)(ths->psi);
01169   }
01170 
01171   if (ths->flags & PRE_PSI)
01172     Y(free)(ths->psi);
01173 
01174   if(ths->flags & PRE_FG_PSI)
01175     Y(free)(ths->psi);
01176 
01177   if(ths->flags & PRE_LIN_PSI)
01178     Y(free)(ths->psi);
01179 
01180   if (ths->flags & PRE_PHI_HUT)
01181   {
01182     for (t = 0; t < ths->d; t++)
01183       Y(free)(ths->c_phi_inv[t]);
01184     Y(free)(ths->c_phi_inv);
01185   }
01186 
01187   if (ths->flags & MALLOC_F)
01188     Y(free)(ths->f);
01189 
01190   if(ths->flags & MALLOC_F_HAT)
01191     Y(free)(ths->f_hat);
01192 
01193   if (ths->flags & MALLOC_X)
01194     Y(free)(ths->x);
01195 
01196   WINDOW_HELP_FINALIZE;
01197 
01198   Y(free)(ths->N);
01199   Y(free)(ths->n);
01200   Y(free)(ths->sigma);
01201 
01202   Y(free)(ths->r2r_kind);
01203 } /* finalize */