NFFT 3.5.3alpha
nfsft_benchomp.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <unistd.h>
22
23#include "config.h"
24
25#include "nfft3.h"
26#include "infft.h"
27
28#define NREPEAT 5
29
30#if defined(_WIN32) || defined(_WIN64)
31const char *CMD_CREATEDATASET = "nfsft_benchomp_createdataset.exe";
32const char *CMD_DETAIL_SINGLE = "nfsft_benchomp_detail_single.exe";
33const char *CMD_DETAIL_THREADS = "nfsft_benchomp_detail_threads.exe";
34#else
35const char *CMD_CREATEDATASET = "./nfsft_benchomp_createdataset";
36const char *CMD_DETAIL_SINGLE = "./nfsft_benchomp_detail_single";
37const char *CMD_DETAIL_THREADS = "./nfsft_benchomp_detail_threads";
38#endif
39
40static FILE* file_out_tex = NULL;
41
42int get_nthreads_array(int **arr)
43{
44 int max_threads = X(get_num_threads)();
45 int alloc_num = 2;
46 int k;
47 int ret_number = 0;
48 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
49
50 if (max_threads <= 5)
51 {
52 *arr = (int*) malloc(max_threads*sizeof(int));
53 for (k = 0; k < max_threads; k++)
54 *(*arr + k) = k+1;
55 return max_threads;
56 }
57
58 for (k = 1; k <= max_threads; k*=2, alloc_num++);
59
60 *arr = (int*) malloc(alloc_num*sizeof(int));
61
62 for (k = 1; k <= max_threads; k*=2)
63 {
64 if (k != max_threads && 2*k > max_threads && max_threads_pw2)
65 {
66 *(*arr + ret_number) = max_threads/2;
67 ret_number++;
68 }
69
70 *(*arr + ret_number) = k;
71 ret_number++;
72
73 if (k != max_threads && 2*k > max_threads)
74 {
75 *(*arr + ret_number) = max_threads;
76 ret_number++;
77 break;
78 }
79 }
80
81 return ret_number;
82}
83
84
85void check_result_value(const int val, const int ok, const char *msg)
86{
87 if (val != ok)
88 {
89 fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
90
91 exit(1);
92 }
93}
94
95void run_test_create(int trafo_adjoint, int N, int M)
96{
97 char cmd[1025];
98
99 snprintf(cmd, 1024, "%s %d %d %d > nfsft_benchomp_test.data", CMD_CREATEDATASET, trafo_adjoint, N, M);
100 fprintf(stderr, "%s\n", cmd);
101 check_result_value(system(cmd), 0, "createdataset");
102}
103
104void run_test_init_output()
105{
106 FILE *f = fopen("nfsft_benchomp_test.result", "w");
107 if (f!= NULL)
108 fclose(f);
109}
110
111typedef struct
112{
113 int trafo_adjoint;
114 int N;
115 int M;
116 int m;
117 int nfsft_flags;
118 int psi_flags;
119} s_param;
120
121typedef struct
122{
123 double avg;
124 double min;
125 double max;
126} s_resval;
127
128typedef struct
129{
130 int nthreads;
131 s_resval resval[6];
132} s_result;
133
134typedef struct
135{
136 s_param param;
137 s_result *results;
138 int nresults;
139} s_testset;
140
141void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads)
142{
143 FILE *f;
144 char cmd[1025];
145 int r,t;
146
147 for (t = 0; t < 6; t++)
148 {
149 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
150 }
151
152 if (nthreads < 2)
153 snprintf(cmd, 1024, "%s %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_SINGLE, m, nfsft_flags, psi_flags, nrepeat);
154 else
155 snprintf(cmd, 1024, "%s %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_THREADS, m, nfsft_flags, psi_flags, nrepeat, nthreads);
156 fprintf(stderr, "%s\n", cmd);
157
158 check_result_value(system(cmd), 0, cmd);
159
160 f = fopen("nfsft_benchomp_test.out", "r");
161 for (r = 0; r < nrepeat; r++)
162 {
163 int retval;
164 double v[6];
165// FILE *f;
166// check_result_value(system(cmd), 0, cmd);
167// f = fopen("nfsft_benchomp_test.out", "r");
168 retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
169 check_result_value(retval, 6, "read nfsft_benchomp_test.out");
170// fclose(f);
171// fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]);
172 for (t = 0; t < 6; t++)
173 {
174 res[t].avg += v[t];
175 if (res[t].min > v[t])
176 res[t].min = v[t];
177 if (res[t].max < v[t])
178 res[t].max = v[t];
179 }
180 }
181 fclose(f);
182
183 for (t = 0; t < 6; t++)
184 res[t].avg /= nrepeat;
185
186 fprintf(stderr, "%d %d: ", nthreads, nrepeat);
187 for (t = 0; t < 6; t++)
188 fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
189 fprintf(stderr, "\n");
190}
191
192const char *get_psi_string(int flags)
193{
194 if (flags & PRE_PSI)
195 return "prepsi";
196 else if (flags & PRE_ONE_PSI)
197 return "unknownPSI";
198
199 return "nopsi";
200}
201const char *get_sort_string(int flags)
202{
203 if (flags & NFFT_SORT_NODES)
204 return "sorted";
205
206 return "unsorted";
207}
208
209const char *get_adjoint_omp_string(int flags)
210{
211 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
212 return "blockwise";
213
214 return "";
215}
216
217#define MASK_TA (1U<<1)
218#define MASK_N (1U<<2)
219#define MASK_M (1U<<4)
220#define MASK_WINM (1U<<5)
221#define MASK_FLAGS_PSI (1U<<6)
222#define MASK_FLAGS_SORT (1U<<7)
223#define MASK_FLAGS_BW (1U<<8)
224#define MASK_FLAGS_FPT (1U<<9)
225
226unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
227{
228 int t;
229 unsigned int mask = 0;
230
231 if (ntestsets < 2)
232 return 0;
233
234 for (t = 1; t < ntestsets; t++)
235 {
236 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
237 mask |= MASK_TA;
238 if (testsets[t-1].param.N != testsets[t].param.N)
239 mask |= MASK_N;
240 if (testsets[t-1].param.M != testsets[t].param.M)
241 mask |= MASK_M;
242 if (testsets[t-1].param.m != testsets[t].param.m)
243 mask |= MASK_WINM;
244 if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
245 mask |= MASK_FLAGS_PSI;
246 if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
247 mask |= MASK_FLAGS_SORT;
248 if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
249 mask |= MASK_FLAGS_BW;
250 if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
251 mask |= MASK_FLAGS_FPT;
252 }
253
254 return mask;
255}
256
257void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
258{
259 unsigned int mask = ~diff_mask;
260 int offset = 0;
261 int len;
262
263 len = snprintf(outstr, maxlen, "%s", hostname);
264 if (len < 0 || len+offset >= maxlen-1) return;
265 offset += len;
266
267 if (mask & MASK_TA)
268 {
269 len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top");
270 if (len < 0 || len+offset >= maxlen-1) return;
271 offset += len;
272 }
273
274 if (mask & MASK_N)
275 {
276 len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
277 if (len < 0 || len+offset >= maxlen-1) return;
278 offset += len;
279 }
280
281 if (mask & MASK_M)
282 {
283 len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
284 if (len < 0 || len+offset >= maxlen-1) return;
285 offset += len;
286 }
287
288 if (mask & MASK_WINM)
289 {
290 len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
291 if (len < 0 || len+offset >= maxlen-1) return;
292 offset += len;
293 }
294
295 if (mask & MASK_FLAGS_PSI)
296 {
297 len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags));
298 if (len < 0 || len+offset >= maxlen-1) return;
299 offset += len;
300 }
301
302 if (mask & MASK_FLAGS_SORT)
303 {
304 len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags));
305 if (len < 0 || len+offset >= maxlen-1) return;
306 offset += len;
307 }
308
309 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
310 {
311 len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags));
312 if (len < 0 || len+offset >= maxlen-1) return;
313 offset += len;
314 }
315
316 if (mask & MASK_FLAGS_FPT)
317 {
318 len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : "");
319 if (len < 0 || len+offset >= maxlen-1) return;
320 offset += len;
321 }
322
323}
324
325void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref)
326{
327 int i, t;
328 char hostname[1025];
329 char plottitle[1025];
330 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
331
332#ifdef HAVE_GETHOSTNAME
333 if (gethostname(hostname, 1024) != 0)
334#endif
335 strncpy(hostname, "unnamed", 1024);
336
337 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
338
339 fprintf(out, "\\begin{tikzpicture}\n");
340 fprintf(out, "\\begin{axis}[");
341 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
342 fprintf(out, " title={%s}", plottitle);
343 fprintf(out, " ]\n");
344
345 for (t = 0; t < ntestsets; t++)
346 {
347 s_testset testset = testsets[t];
348 fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
349 fprintf(stderr, "\n");
350
351 fprintf(out, "\\addplot coordinates {");
352 for (i = 0; i < testset.nresults; i++)
353 if (use_tref == 1)
354 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
355 else
356 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
357 fprintf(out, "};\n");
358
359 for (i = 0; i < testset.nresults; i++)
360 if (use_tref == 1)
361 fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
362 else
363 fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
364 fprintf(stderr, "\n\n");
365 }
366
367 fprintf(out, "\\legend{{");
368 for (t = 0; t < ntestsets; t++)
369 {
370 char title[256];
371 if (t > 0)
372 fprintf(out, "},{");
373 get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask));
374 fprintf(out, "%s", title);
375 }
376 fprintf(out, "}}\n");
377 fprintf(out, "\\end{axis}\n");
378 fprintf(out, "\\end{tikzpicture}\n");
379 fprintf(out, "\n\n");
380
381 fflush(out);
382}
383
384void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref)
385{
386 double tref = 1.0/0.0;
387 int t, k;
388
389 if (use_tref == 1)
390 for (t = 0; t < ntestsets; t++)
391 for (k = 0; k < testsets[t].nresults; k++)
392 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
393 tref = testsets[t].results[k].resval[5].avg;
394
395 print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
396}
397
398void print_output_histo_PENRT(FILE *out, s_testset testset)
399{
400 int i, size = testset.nresults;
401 char hostname[1025];
402
403#ifdef HAVE_GETHOSTNAME
404 if (gethostname(hostname, 1024) != 0)
405#endif
406 strncpy(hostname, "unnamed", 1024);
407
408 fprintf(out, "\\begin{tikzpicture}\n");
409 fprintf(out, "\\begin{axis}[");
410 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
411 fprintf(out, "symbolic x coords={");
412 for (i = 0; i < size; i++)
413 if (i > 0)
414 fprintf(out, ",%d", testset.results[i].nthreads);
415 else
416 fprintf(out, "%d", testset.results[i].nthreads);
417
418 fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
419 fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
420 fprintf(out, " ]\n");
421 fprintf(out, "\\addplot coordinates {");
422 for (i = 0; i < size; i++)
423 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
424 fprintf(out, "};\n");
425
426 fprintf(out, "\\addplot coordinates {");
427 for (i = 0; i < size; i++)
428 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
429 fprintf(out, "};\n");
430
431 fprintf(out, "\\addplot coordinates {");
432 for (i = 0; i < size; i++)
433 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
434 fprintf(out, "};\n");
435
436 fprintf(out, "\\addplot coordinates {");
437 for (i = 0; i < size; i++)
438 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
439 fprintf(out, "};\n");
440
441 fprintf(out, "\\addplot coordinates {");
442 for (i = 0; i < size; i++)
443 fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
444 fprintf(out, "};\n");
445 fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top");
446 fprintf(out, "\\end{axis}\n");
447 fprintf(out, "\\end{tikzpicture}\n");
448 fprintf(out, "\n\n");
449
450 fflush(out);
451}
452
453void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size)
454{
455 int i;
456 testset->param.trafo_adjoint = trafo_adjoint;
457 testset->param.N = N;
458 testset->param.M = M;
459 testset->param.m = m;
460 testset->param.nfsft_flags = nfsft_flags;
461 testset->param.psi_flags = psi_flags;
462
463 testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
464 testset->nresults = n_threads_array_size;
465
466 run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
467 for (i = 0; i < n_threads_array_size; i++)
468 {
469 testset->results[i].nthreads = nthreads_array[i];
470 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
471 }
472
473}
474
475void test1(int *nthreads_array, int n_threads_array_size, int m)
476{
477 s_testset testsets[4];
478
479 run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
480#if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
481 print_output_histo_PENRT(file_out_tex, testsets[0]);
482#endif
483
484 run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
485#if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
486 print_output_histo_PENRT(file_out_tex, testsets[1]);
487#endif
488
489 print_output_speedup_total(file_out_tex, testsets, 2, 0);
490
491 run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
492#if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
493 print_output_histo_PENRT(file_out_tex, testsets[2]);
494#endif
495
496 run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
497#if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
498 print_output_histo_PENRT(file_out_tex, testsets[3]);
499#endif
500
501 print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
502}
503
504int main(int argc, char** argv)
505{
506 int *nthreads_array;
507 int n_threads_array_size = get_nthreads_array(&nthreads_array);
508 int k;
509
510#if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
511 fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n");
512 fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
513 fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
514 fprintf(stderr, "and run \"make clean all\"\n\n");
515#endif
516
517 for (k = 0; k < n_threads_array_size; k++)
518 fprintf(stderr, "%d ", nthreads_array[k]);
519 fprintf(stderr, "\n");
520
521 file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w");
522
523 test1(nthreads_array, n_threads_array_size, 2);
524 test1(nthreads_array, n_threads_array_size, 4);
525 test1(nthreads_array, n_threads_array_size, 6);
526 test1(nthreads_array, n_threads_array_size, 8);
527
528 fclose(file_out_tex);
529
530 return 0;
531}
#define PRE_ONE_PSI
Definition nfft3.h:200
#define PRE_PSI
Definition nfft3.h:191
#define NFSFT_USE_DPT
Definition nfft3.h:587
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.