31#include "../fpt/fpt.h"
37#define DEFAULT_NFFT_CUTOFF 6
38#define FPT_THRESHOLD 1000
40#define NFSOFT_INDEX_TWO(m,n,l,B) ((B+1)*(B+1)+(B+1)*(B+1)*(m+B)-((m-1)*m*(2*m-1)+(B+1)*(B+2)*(2*B+3))/6)+(posN(n,m,B))+(l-MAX(ABS(m),ABS(n)))
42static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads);
43static int posN(
int n,
int m,
int B);
51void nfsoft_init_advanced(
nfsoft_plan *plan,
int N,
int M,
52 unsigned int nfsoft_flags)
56 DEFAULT_NFFT_CUTOFF, FPT_THRESHOLD);
59void nfsoft_init_guru(
nfsoft_plan *plan,
int B,
int M,
60 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
63 nfsoft_init_guru_advanced(plan, B, M, nfsoft_flags, nfft_flags, nfft_cutoff, fpt_kappa, 8* B);
66void nfsoft_init_guru_advanced(
nfsoft_plan *plan,
int B,
int M,
67 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
68 int fpt_kappa,
int nn_oversampled)
77 n[0] = nn_oversampled ;
78 n[1] = nn_oversampled ;
79 n[2] = nn_oversampled ;
81 nfft_init_guru(&plan->
p_nfft, 3, N, M, n, nfft_cutoff, nfft_flags,
82 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
86 nfft_precompute_lin_psi(&(plan->
p_nfft));
91 plan->
flags = nfsoft_flags;
96 if (plan->
f_hat == NULL ) printf(
"Allocation failed!\n");
102 if (plan->
x == NULL ) printf(
"Allocation failed!\n");
107 if (plan->
f == NULL ) printf(
"Allocation failed!\n");
114 plan->
mv_trafo = (void (*) (
void* ))nfsoft_trafo;
115 plan->
mv_adjoint = (void (*) (
void* ))nfsoft_adjoint;
117 plan->
nthreads = Y(get_num_threads)();
123static void c2e(
nfsoft_plan *my_plan,
int even, C* wig_coeffs,
int k,
int m)
131 C cheby[2*my_plan->
N_total + 2];
132 cheby[my_plan->
N_total+1] = wig_coeffs[0];
135 for (j=1;j<my_plan->
N_total+1;j++)
137 cheby[my_plan->
N_total+1+j]=0.5* wig_coeffs[j];
138 cheby[my_plan->
N_total+1-j]=0.5* wig_coeffs[j];
151 cheby[0]=(C) (-1.)/(2.*_Complex_I) * aux[1];
154 cheby[j]=(1./(2.*_Complex_I)*(aux[j+1]-aux[j-1]));
159 for (
int i = 1; i <= 2* my_plan ->
N_total + 2; i++)
168static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads)
171 int N, t, k_start, k, m;
181 t = (int) log2(X(next_power_of_2)(N));
190 N = X(next_power_of_2)(l);
196 unsigned int fptflags = 0U
216 set[0] = fpt_init((2* N + 1) * (2* N + 1), t, fptflags);
217 for (
int i=1; i<nthreads; i++)
219 set[i] = fpt_init((2* N + 1) * (2* N + 1), t, fptflags | FPT_NO_INIT_FPT_DATA);
220 set[i]->
dpt = set[0]->
dpt;
225 for (k = -N; k <= N; k++)
226 for (m = -N; m <= N; m++)
229 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
231 fpt_precompute_1(set[0], (k+N)*(2*N+1) + m+N,k_start);
233 #pragma omp parallel for default(shared) private(k,m,k_start) schedule(dynamic) num_threads(nthreads)
235 for (k = -N; k <= N; k++)
236 for (m = -N; m <= N; m++)
239 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
242 R alpha[N+2], beta[N+2], gamma[N+2];
243 SO3_alpha_row(alpha, N, k, m);
244 SO3_beta_row(beta, N, k, m);
245 SO3_gamma_row(gamma, N, k, m);
248 fpt_precompute_2(set[omp_get_thread_num()], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
250 fpt_precompute(set[0], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
257static void SO3_fpt(C *coeffs,
fpt_set set,
int l,
int k,
int m,
unsigned int flags)
261 int k_start, k_end, j;
262 int function_values = 0;
276 N = X(next_power_of_2)(l);
280 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
282 trafo_nr = (N + k) * (2* N + 1) + (m + N);
287 for (j = 0; j <= k_end; j++)
291 for (j = 0; j <= l - k_start; j++)
293 x[j + k_start] = coeffs[j];
295 for (j = l - k_start + 1; j <= k_end - k_start; j++)
297 x[j + k_start] = K(0.0);
305 fpt_trafo_direct(set, trafo_nr, &x[k_start], y, k_end, 0U
310 fpt_trafo(set, trafo_nr, &x[k_start], y, k_end, 0U
315 for (j = 0; j <= l; j++)
322static void SO3_fpt_transposed(C *coeffs,
fpt_set set,
int l,
int k,
int m,
325 int N, k_start, k_end, j;
327 int function_values = 0;
342 N = X(next_power_of_2)(l);
346 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
348 trafo_nr = (N + k) * (2* N + 1) + (m + N);
355 for (j = 0; j <= l; j++)
359 for (j = l + 1; j <= k_end; j++)
366 fpt_transposed_direct(set, trafo_nr, &x[k_start], y, k_end, 0U
371 fpt_transposed(set, trafo_nr, &x[k_start], y, k_end, 0U
375 for (j = 0; j <= l; j++)
389 if (plan3D->
x != plan3D->
p_nfft.x)
391 for (j = 0; j < M; j++)
393 plan3D->
p_nfft.x[3* j ] = plan3D->
x[3* j + 2];
394 plan3D->
p_nfft.x[3* j + 1] = plan3D->
x[3* j ];
395 plan3D->
p_nfft.x[3* j + 2] = plan3D->
x[3* j + 1];
398 for (j = 0; j < 3* plan3D ->
p_nfft.M_total; j++)
400 plan3D->
p_nfft.x[j] = plan3D->
p_nfft.x[j] * (1 / (2* KPI ));
407 nfft_precompute_one_psi(&(plan3D->
p_nfft));
411 nfft_precompute_one_psi(&(plan3D->
p_nfft));
426 for (
int j = 0; j < M; j++)
427 plan3D->
f[j] = plan3D->
f_hat[0];
431 for (
int j = 0; j < plan3D->
p_nfft.N_total; j++)
432 plan3D->
p_nfft.f_hat[j] = 0.0;
435 #pragma omp parallel for default(shared) num_threads(plan3D->nthreads)
437 for (
int k = -N; k <= N; k++)
439 C wig_coeffs[(X(next_power_of_2)(N)+1)];
441 int threadid = omp_get_thread_num();
446 for (
int m = -N; m <= N; m++)
449 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
451 int glo0 = NFSOFT_INDEX_TWO(k,m,max,N);
453 for (
int j = 0; j <= N - max; j++)
455 wig_coeffs[j] = plan3D->
f_hat[glo0 + j];
459 wig_coeffs[j] = wig_coeffs[j] * (1. / (2. * KPI))
460 * SQRT(0.5 * (2. * (max + j) + 1.));
465 if ((k < 0) && (k % 2))
467 wig_coeffs[j] = wig_coeffs[j] * (-1);
469 if ((m < 0) && (m % 2))
470 wig_coeffs[j] = wig_coeffs[j] * (-1);
473 wig_coeffs[j] = wig_coeffs[j] * (-1);
480 for (
int j = N - max + 1; j < X(next_power_of_2)(N) + 1; j++)
485 c2e(plan3D, ABS((k + m) % 2), wig_coeffs, k, m);
492 nfft_trafo_direct(&(plan3D->
p_nfft));
496 nfft_trafo(&(plan3D->
p_nfft));
499 if (plan3D->
f != plan3D->
p_nfft.f)
500 for (
int j = 0; j < plan3D->
M_total; j++)
501 plan3D->
f[j] = plan3D->
p_nfft.f[j];
505static void e2c(
nfsoft_plan *my_plan,
int even, C* wig_coeffs, C* cheby)
519 aux[0]= 1/(2*_Complex_I)*cheby[1];
523 aux[j]=1/(2*_Complex_I)*(cheby[j+1]-cheby[j-1]);
525 aux[N-1]=1/(2*_Complex_I)*(-cheby[j-1]);
534 wig_coeffs[0]=cheby[my_plan->
N_total+1];
536 for(j=1;j<=my_plan->
N_total;j++)
538 wig_coeffs[j]=0.5*(cheby[my_plan->
N_total+j+1]+cheby[my_plan->
N_total+1-j]);
556 for (
int j = 0; j < M; j++)
557 plan3D->
f_hat[0] += plan3D->
f[j];
561 if (plan3D->
p_nfft.f != plan3D->
f)
562 for (
int j = 0; j < M; j++)
564 plan3D->
p_nfft.f[j] = plan3D->
f[j];
569 nfft_adjoint_direct(&(plan3D->
p_nfft));
573 nfft_adjoint(&(plan3D->
p_nfft));
579 #pragma omp parallel for default(shared) num_threads(plan3D->nthreads)
581 for (
int k = -N; k <= N; k++)
584 int threadid = omp_get_thread_num();
588 for (
int m = -N; m <= N; m++)
590 C wig_coeffs[(X(next_power_of_2)(N)+1)];
591 C cheby[2*plan3D->
N_total + 2];
593 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
595 for (
int i = 1; i < 2* plan3D ->
N_total + 3; i++)
604 e2c(plan3D, ABS((k + m) % 2), wig_coeffs, cheby);
612 int glo0 = NFSOFT_INDEX_TWO(k,m,0,N);
614 for (
int j = max; j <= N; j++)
618 if ((k < 0) && (k % 2))
620 wig_coeffs[j] = -wig_coeffs[j];
622 if ((m < 0) && (m % 2))
623 wig_coeffs[j] = -wig_coeffs[j];
626 wig_coeffs[j] = wig_coeffs[j] * (-1);
630 plan3D->
f_hat[glo0+j] = wig_coeffs[j];
634 plan3D->
f_hat[glo0+j] = plan3D->
f_hat[glo0+j] * (1 / (2. * KPI)) * SQRT(
635 0.5 * (2. * (j) + 1.));
648 nfft_finalize(&plan->
p_nfft);
650 for (
int i=0; i<plan->
nthreads; i++)
676static int posN(
int n,
int m,
int B)
681 pos = posN(n - 1, m, B) + B + 1 - MAX(ABS(m), ABS(n - 1));
#define FPT_NO_FAST_ALGORITHM
If set, TODO complete comment.
#define FPT_NO_STABILIZATION
If set, no stabilization will be used.
#define FPT_FUNCTION_VALUES
If set, the output are function values at Chebyshev nodes rather than Chebyshev coefficients.
#define FPT_NO_DIRECT_ALGORITHM
If set, TODO complete comment.
#define NFSOFT_INDEX(m, n, l, B)
#define NFSOFT_MALLOC_F_HAT
#define NFSOFT_NORMALIZED
#define NFSOFT_NO_STABILIZATION
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.
void * nfft_malloc(size_t n)
Holds data for a set of cascade summations.
fpt_data * dpt
The DPT transform data
nfft_plan p_nfft
the internal NFFT plan
NFFT_INT N_total
Total number of Fourier coefficients.
void(* mv_trafo)(void *)
Transform.
int nthreads
the number of threads
fftw_complex * cheby
deprecated variable
fpt_set * internal_fpt_set
the internal FPT plan
void(* mv_adjoint)(void *)
Adjoint transform.
fftw_complex * f_hat
Fourier coefficients.
unsigned int flags
the planner flags
NFFT_INT M_total
Total number of samples.
fftw_complex * aux
deprecated variable
fftw_complex * wig_coeffs
deprecated variable
Header file for functions related to Wigner-d/D functions.