NFFT  3.3.1
fastsum_benchomp.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <unistd.h>
00022 
00023 #include "config.h"
00024 
00025 #include "nfft3.h"
00026 #include "infft.h"
00027 
00028 #define NREPEAT 5
00029 
00030 #if defined(_WIN32) || defined(_WIN64)
00031 const char *CMD_CREATEDATASET = "fastsum_benchomp_createdataset.exe";
00032 const char *CMD_DETAIL_SINGLE = "fastsum_benchomp_detail_single.exe";
00033 const char *CMD_DETAIL_THREADS = "fastsum_benchomp_detail_threads.exe";
00034 #else
00035 const char *CMD_CREATEDATASET = "./fastsum_benchomp_createdataset";
00036 const char *CMD_DETAIL_SINGLE = "./fastsum_benchomp_detail_single";
00037 const char *CMD_DETAIL_THREADS = "./fastsum_benchomp_detail_threads";
00038 #endif
00039 
00040 static FILE* file_out_tex = NULL;
00041 
00042 int get_nthreads_array(int **arr)
00043 {
00044   int max_threads = NFFT(get_num_threads)();
00045   int alloc_num = 2;
00046   int k;
00047   int ret_number = 0;
00048   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
00049 
00050   if (max_threads <= 5)
00051   {
00052     *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int));
00053     for (k = 0; k < max_threads; k++)
00054       *(*arr + k) = k + 1;
00055     return max_threads;
00056   }
00057 
00058   for (k = 1; k <= max_threads; k *= 2, alloc_num++)
00059     ;
00060 
00061   *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int));
00062 
00063   for (k = 1; k <= max_threads; k *= 2)
00064   {
00065     if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
00066     {
00067       *(*arr + ret_number) = max_threads / 2;
00068       ret_number++;
00069     }
00070 
00071     *(*arr + ret_number) = k;
00072     ret_number++;
00073 
00074     if (k != max_threads && 2 * k > max_threads)
00075     {
00076       *(*arr + ret_number) = max_threads;
00077       ret_number++;
00078       break;
00079     }
00080   }
00081 
00082   return ret_number;
00083 }
00084 
00085 void check_result_value(const int val, const int ok, const char *msg)
00086 {
00087   if (val != ok)
00088   {
00089     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
00090 
00091     exit(EXIT_FAILURE);
00092   }
00093 }
00094 
00095 void run_test_create(int d, int L, int M)
00096 {
00097   char cmd[1025];
00098 
00099   snprintf(cmd, 1024,
00100       "%s %d %d %d > fastsum_benchomp_test.data",
00101       CMD_CREATEDATASET, d, L, M);
00102   fprintf(stderr, "%s\n", cmd);
00103   check_result_value(system(cmd), 0, "createdataset");
00104 }
00105 
00106 void run_test_init_output()
00107 {
00108   FILE *f = fopen("fastsum_benchomp_test.result", "w");
00109   if (f != NULL)
00110     fclose(f);
00111 }
00112 
00113 typedef struct
00114 {
00115   int d;
00116   int L;
00117   int M;
00118   int n;
00119   int m;
00120   int p;
00121   char *kernel_name;
00122   R c;
00123   R eps_I;
00124   R eps_B;
00125 } s_param;
00126 
00127 typedef struct
00128 {
00129   R avg;
00130   R min;
00131   R max;
00132 } s_resval;
00133 
00134 typedef struct
00135 {
00136   int nthreads;
00137   s_resval resval[16];
00138 } s_result;
00139 
00140 typedef struct
00141 {
00142   s_param param;
00143   s_result *results;
00144   int nresults;
00145 } s_testset;
00146 
00147 void run_test(s_resval *res, int nrepeat, int n, int m, int p,
00148     char *kernel_name, R c, R eps_I, R eps_B, int nthreads)
00149 {
00150   char cmd[1025];
00151   int r, t;
00152 
00153   for (t = 0; t < 16; t++)
00154   {
00155     res[t].avg = K(0.0);
00156     res[t].min = K(1.0) / K(0.0);
00157     res[t].max = K(0.0);
00158   }
00159 
00160   if (nthreads < 2)
00161     snprintf(cmd, 1024,
00162         "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
00163         CMD_DETAIL_SINGLE, n, m, p, kernel_name, c, eps_I, eps_B);
00164   else
00165     snprintf(cmd, 1024,
00166         "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
00167         CMD_DETAIL_THREADS, n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
00168   fprintf(stderr, "%s\n", cmd);
00169   check_result_value(system(cmd), 0, cmd);
00170 
00171   for (r = 0; r < nrepeat; r++)
00172   {
00173     int retval;
00174     R v[16];
00175     FILE *f;
00176     check_result_value(system(cmd), 0, cmd);
00177     f = fopen("fastsum_benchomp_test.out", "r");
00178     retval = fscanf(f,
00179         "" __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ "", v,
00180         v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
00181         v + 11, v + 12, v + 13, v + 14, v + 15);
00182     check_result_value(retval, 16, "read fastsum_benchomp_test.out");
00183     fclose(f);
00184 
00185     for (t = 0; t < 16; t++)
00186     {
00187       res[t].avg += v[t];
00188       if (res[t].min > v[t])
00189         res[t].min = v[t];
00190       if (res[t].max < v[t])
00191         res[t].max = v[t];
00192     }
00193   }
00194 
00195   for (t = 0; t < 16; t++)
00196     res[t].avg /= (R)(nrepeat);
00197 
00198   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
00199   for (t = 0; t < 16; t++)
00200     fprintf(stderr, "%.3" __FES__ " %.3" __FES__ " %.3" __FES__ " | ", res[t].avg, res[t].min, res[t].max);
00201   fprintf(stderr, "\n");
00202 }
00203 
00204 const char *get_psi_string(int flags)
00205 {
00206   if (flags & PRE_PSI)
00207     return "prepsi";
00208   else if (flags & PRE_ONE_PSI)
00209     return "unknownPSI";
00210 
00211   return "nopsi";
00212 }
00213 const char *get_sort_string(int flags)
00214 {
00215   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00216     return "";
00217 
00218   if (flags & NFFT_SORT_NODES)
00219     return "sorted";
00220 
00221   return "unsorted";
00222 }
00223 
00224 const char *get_adjoint_omp_string(int flags)
00225 {
00226   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00227     return "blockwise";
00228 
00229   return "";
00230 }
00231 
00232 #define MASK_FSUM_D (1U<<0)
00233 #define MASK_FSUM_L (1U<<1)
00234 #define MASK_FSUM_M (1U<<2)
00235 #define MASK_FSUM_MULTIBW (1U<<3)
00236 #define MASK_FSUM_WINM (1U<<4)
00237 #define MASK_FSUM_P (1U<<5)
00238 #define MASK_FSUM_KERNEL (1U<<6)
00239 #define MASK_FSUM_EPSI (1U<<7)
00240 #define MASK_FSUM_EPSB (1U<<8)
00241 
00242 unsigned int fastsum_determine_different_parameters(s_testset *testsets,
00243     int ntestsets)
00244 {
00245   int t;
00246   unsigned int mask = 0;
00247 
00248   if (ntestsets < 2)
00249     return 0;
00250 
00251   for (t = 1; t < ntestsets; t++)
00252   {
00253     if (testsets[t - 1].param.d != testsets[t].param.d)
00254       mask |= MASK_FSUM_D;
00255     if (testsets[t - 1].param.L != testsets[t].param.L)
00256       mask |= MASK_FSUM_L;
00257     if (testsets[t - 1].param.M != testsets[t].param.M)
00258       mask |= MASK_FSUM_M;
00259     if (testsets[t - 1].param.n != testsets[t].param.n)
00260       mask |= MASK_FSUM_MULTIBW;
00261     if (testsets[t - 1].param.m != testsets[t].param.m)
00262       mask |= MASK_FSUM_WINM;
00263     if (testsets[t - 1].param.p != testsets[t].param.p)
00264       mask |= MASK_FSUM_P;
00265     if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
00266         != 0)
00267       mask |= MASK_FSUM_KERNEL;
00268     if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
00269       mask |= MASK_FSUM_EPSI;
00270     if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
00271       mask |= MASK_FSUM_EPSB;
00272   }
00273 
00274   return mask;
00275 }
00276 
00277 void strEscapeUnderscore(char *dst, char *src, int maxlen)
00278 {
00279   int i = 0;
00280   int len;
00281   int offset = 0;
00282 
00283   while (src[i] != '\0' && len + offset < maxlen - 1)
00284   {
00285     if (src[i] == '_')
00286       len = snprintf(dst + offset, maxlen - offset, "\\_{}");
00287     else
00288       len = snprintf(dst + offset, maxlen - offset, "%c", src[i]);
00289     offset += len;
00290     i++;
00291   }
00292 }
00293 
00294 void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen,
00295     char *hostname, s_param param, unsigned int diff_mask)
00296 {
00297   unsigned int mask = ~diff_mask;
00298   int offset = 0;
00299   int len;
00300 
00301   len = snprintf(outstr, maxlen, "%s", hostname);
00302   if (len < 0 || len + offset >= maxlen - 1)
00303     return;
00304   offset += len;
00305 
00306   if (mask & MASK_FSUM_D)
00307   {
00308     len = snprintf(outstr + offset, maxlen - offset, " %dd fastsum", param.d);
00309     if (len < 0 || len + offset >= maxlen - 1)
00310       return;
00311     offset += len;
00312   }
00313 
00314   if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
00315   {
00316     len = snprintf(outstr + offset, maxlen - offset, " L=M=%d", param.L);
00317     if (len < 0 || len + offset >= maxlen - 1)
00318       return;
00319     offset += len;
00320   }
00321   else
00322   {
00323     if (mask & MASK_FSUM_L)
00324     {
00325       len = snprintf(outstr + offset, maxlen - offset, " L=%d", param.L);
00326       if (len < 0 || len + offset >= maxlen - 1)
00327         return;
00328       offset += len;
00329     }
00330 
00331     if (mask & MASK_FSUM_M)
00332     {
00333       len = snprintf(outstr + offset, maxlen - offset, " M=%d", param.M);
00334       if (len < 0 || len + offset >= maxlen - 1)
00335         return;
00336       offset += len;
00337     }
00338   }
00339 
00340   if (mask & MASK_FSUM_MULTIBW)
00341   {
00342     len = snprintf(outstr + offset, maxlen - offset, " n=%d", param.n);
00343     if (len < 0 || len + offset >= maxlen - 1)
00344       return;
00345     offset += len;
00346   }
00347 
00348   if (mask & MASK_FSUM_WINM)
00349   {
00350     len = snprintf(outstr + offset, maxlen - offset, " m=%d", param.m);
00351     if (len < 0 || len + offset >= maxlen - 1)
00352       return;
00353     offset += len;
00354   }
00355 
00356   if (mask & MASK_FSUM_P)
00357   {
00358     len = snprintf(outstr + offset, maxlen - offset, " p=%d", param.p);
00359     if (len < 0 || len + offset >= maxlen - 1)
00360       return;
00361     offset += len;
00362   }
00363 
00364   if (mask & MASK_FSUM_KERNEL)
00365   {
00366     char tmp[maxlen];
00367     strEscapeUnderscore(tmp, param.kernel_name, maxlen);
00368 
00369     len = snprintf(outstr + offset, maxlen - offset, " %s", tmp);
00370     if (len < 0 || len + offset >= maxlen - 1)
00371       return;
00372     offset += len;
00373   }
00374 
00375   if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
00376   {
00377     len = snprintf(outstr + offset, maxlen - offset,
00378         " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__ "",
00379         param.eps_I);
00380     if (len < 0 || len + offset >= maxlen - 1)
00381       return;
00382     offset += len;
00383   }
00384   else
00385   {
00386     if (mask & MASK_FSUM_EPSI)
00387     {
00388       len = snprintf(outstr + offset, maxlen - offset,
00389           " $\\varepsilon_\\mathrm{I}$=%" __FGS__ "", param.eps_I);
00390       if (len < 0 || len + offset >= maxlen - 1)
00391         return;
00392       offset += len;
00393     }
00394 
00395     if (mask & MASK_FSUM_EPSB)
00396     {
00397       len = snprintf(outstr + offset, maxlen - offset,
00398           " $\\varepsilon_\\mathrm{B}$=%" __FGS__ "", param.eps_B);
00399       if (len < 0 || len + offset >= maxlen - 1)
00400         return;
00401       offset += len;
00402     }
00403   }
00404 }
00405 
00406 void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset)
00407 {
00408   int i, size = testset.nresults;
00409   char hostname[1025];
00410 
00411 #ifdef HAVE_GETHOSTNAME
00412   if (gethostname(hostname, 1024) != 0)
00413 #endif
00414     strncpy(hostname, "unnamed", 1024);
00415 
00416   fprintf(out, "\\begin{tikzpicture}\n");
00417   fprintf(out, "\\begin{axis}[");
00418   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00419   fprintf(out, "symbolic x coords={");
00420   for (i = 0; i < size; i++)
00421     if (i > 0)
00422       fprintf(out, ",%d", testset.results[i].nthreads);
00423     else
00424       fprintf(out, "%d", testset.results[i].nthreads);
00425 
00426   fprintf(out,
00427       "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00428   fprintf(out,
00429       " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
00430       hostname, testset.param.d, testset.param.n, testset.param.M,
00431       testset.param.m);
00432   fprintf(out, " ]\n");
00433   fprintf(out, "\\addplot coordinates {");
00434   for (i = 0; i < size; i++)
00435     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00436         testset.results[i].resval[10].avg);
00437   fprintf(out, "};\n");
00438 
00439   fprintf(out, "\\addplot coordinates {");
00440   for (i = 0; i < size; i++)
00441     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00442         testset.results[i].resval[11].avg);
00443   fprintf(out, "};\n");
00444 
00445   fprintf(out, "\\addplot coordinates {");
00446   for (i = 0; i < size; i++)
00447     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00448         testset.results[i].resval[12].avg);
00449   fprintf(out, "};\n");
00450 
00451   fprintf(out, "\\addplot coordinates {");
00452   for (i = 0; i < size; i++)
00453     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00454         testset.results[i].resval[1].avg);
00455   fprintf(out, "};\n");
00456 
00457   fprintf(out, "\\addplot coordinates {");
00458   for (i = 0; i < size; i++)
00459     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00460         testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
00461   fprintf(out, "};\n");
00462   fprintf(out,
00463       "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
00464   fprintf(out, "\\end{axis}\n");
00465   fprintf(out, "\\end{tikzpicture}\n");
00466   fprintf(out, "\n\n");
00467 
00468   fflush(out);
00469 }
00470 
00471 void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset)
00472 {
00473   int i, size = testset.nresults;
00474   char hostname[1025];
00475 
00476 #ifdef HAVE_GETHOSTNAME
00477   if (gethostname(hostname, 1024) != 0)
00478 #endif
00479     strncpy(hostname, "unnamed", 1024);
00480 
00481   fprintf(out, "\\begin{tikzpicture}\n");
00482   fprintf(out, "\\begin{axis}[");
00483   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00484   fprintf(out, "symbolic x coords={");
00485   for (i = 0; i < size; i++)
00486     if (i > 0)
00487       fprintf(out, ",%d", testset.results[i].nthreads);
00488     else
00489       fprintf(out, "%d", testset.results[i].nthreads);
00490 
00491   fprintf(out,
00492       "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00493   fprintf(out,
00494       " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
00495       hostname, testset.param.d, testset.param.n, testset.param.M,
00496       testset.param.m);
00497   fprintf(out, " ]\n");
00498   fprintf(out, "\\addplot coordinates {");
00499   for (i = 0; i < size; i++)
00500     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00501         testset.results[i].resval[13].avg);
00502   fprintf(out, "};\n");
00503 
00504   fprintf(out, "\\addplot coordinates {");
00505   for (i = 0; i < size; i++)
00506     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00507         testset.results[i].resval[14].avg);
00508   fprintf(out, "};\n");
00509 
00510   fprintf(out, "\\addplot coordinates {");
00511   for (i = 0; i < size; i++)
00512     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00513         testset.results[i].resval[15].avg);
00514   fprintf(out, "};\n");
00515 
00516   fprintf(out, "\\addplot coordinates {");
00517   for (i = 0; i < size; i++)
00518     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00519         testset.results[i].resval[2].avg);
00520   fprintf(out, "};\n");
00521 
00522   fprintf(out, "\\addplot coordinates {");
00523   for (i = 0; i < size; i++)
00524     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00525         testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
00526   fprintf(out, "};\n");
00527   fprintf(out, "\\legend{D,F,B,prepsi,total}\n");
00528   fprintf(out, "\\end{axis}\n");
00529   fprintf(out, "\\end{tikzpicture}\n");
00530   fprintf(out, "\n\n");
00531 
00532   fflush(out);
00533 }
00534 
00535 void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset)
00536 {
00537   int i, size = testset.nresults;
00538   char hostname[1025];
00539   char plottitle[1025];
00540 
00541 #ifdef HAVE_GETHOSTNAME
00542   if (gethostname(hostname, 1024) != 0)
00543 #endif
00544     strncpy(hostname, "unnamed", 1024);
00545 
00546   fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
00547       0);
00548 
00549   fprintf(out, "\\begin{tikzpicture}\n");
00550   fprintf(out, "\\begin{axis}[");
00551   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00552   fprintf(out, "symbolic x coords={");
00553   for (i = 0; i < size; i++)
00554     if (i > 0)
00555       fprintf(out, ",%d", testset.results[i].nthreads);
00556     else
00557       fprintf(out, "%d", testset.results[i].nthreads);
00558 
00559   fprintf(out,
00560       "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00561   fprintf(out, " title={%s}", plottitle);
00562   fprintf(out, " ]\n");
00563   fprintf(out, "\\addplot coordinates {");
00564   for (i = 0; i < size; i++)
00565     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00566         testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
00567   fprintf(out, "};\n");
00568 
00569   fprintf(out, "\\addplot coordinates {");
00570   for (i = 0; i < size; i++)
00571     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00572         testset.results[i].resval[3].avg);
00573   fprintf(out, "};\n");
00574 
00575   fprintf(out, "\\addplot coordinates {");
00576   for (i = 0; i < size; i++)
00577     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00578         testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
00579             + testset.results[i].resval[6].avg);
00580   fprintf(out, "};\n");
00581 
00582   fprintf(out, "\\addplot coordinates {");
00583   for (i = 0; i < size; i++)
00584     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00585         testset.results[i].resval[7].avg);
00586   fprintf(out, "};\n");
00587 
00588   fprintf(out, "\\addplot coordinates {");
00589   for (i = 0; i < size; i++)
00590     fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00591         testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
00592   fprintf(out, "};\n");
00593   fprintf(out,
00594       "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
00595   fprintf(out, "\\end{axis}\n");
00596   fprintf(out, "\\end{tikzpicture}\n");
00597   fprintf(out, "\n\n");
00598 
00599   fflush(out);
00600 }
00601 
00602 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
00603     s_testset *testsets, int ntestsets)
00604 {
00605   int i, t;
00606   char hostname[1025];
00607   char plottitle[1025];
00608   unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
00609       ntestsets);
00610 
00611 #ifdef HAVE_GETHOSTNAME
00612   if (gethostname(hostname, 1024) != 0)
00613 #endif
00614     strncpy(hostname, "unnamed", 1024);
00615 
00616   fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
00617       testsets[0].param, diff_mask | MASK_FSUM_WINM);
00618 
00619   fprintf(out, "\\begin{tikzpicture}\n");
00620   fprintf(out, "\\begin{axis}[");
00621   fprintf(out,
00622       "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
00623   fprintf(out, " title={%s}", plottitle);
00624   fprintf(out, " ]\n");
00625 
00626   for (t = 0; t < ntestsets; t++)
00627   {
00628     s_testset testset = testsets[t];
00629 
00630     R tref = K(0.0);
00631     for (i = 0; i < testset.nresults; i++)
00632       if (testset.results[i].nthreads == 1)
00633         tref = testset.results[i].resval[9].avg
00634             - testset.results[i].resval[0].avg;
00635 
00636     fprintf(out, "\\addplot coordinates {");
00637     for (i = 0; i < testset.nresults; i++)
00638       fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
00639           tref
00640               / (testset.results[i].resval[9].avg
00641                   - testset.results[i].resval[0].avg));
00642     fprintf(out, "};\n");
00643 
00644     for (i = 0; i < testset.nresults; i++)
00645     {
00646       fprintf(stderr, "%d:%.3" __FIS__ "  ", testset.results[i].nthreads,
00647           tref
00648               / (testset.results[i].resval[9].avg
00649                   - testset.results[i].resval[0].avg));
00650     }
00651     fprintf(stderr, "\n\n");
00652   }
00653 
00654   fprintf(out, "\\legend{{");
00655   for (t = 0; t < ntestsets; t++)
00656   {
00657     char title[256];
00658     if (t > 0)
00659       fprintf(out, "},{");
00660     fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param,
00661         ~(diff_mask | MASK_FSUM_WINM));
00662     fprintf(out, "%s", title);
00663   }
00664   fprintf(out, "}}\n");
00665   fprintf(out, "\\end{axis}\n");
00666   fprintf(out, "\\end{tikzpicture}\n");
00667   fprintf(out, "\n\n");
00668 
00669   fflush(out);
00670 }
00671 
00672 void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p,
00673     char *kernel_name, R c, R eps_I, R eps_B,
00674     int *nthreads_array, int n_threads_array_size)
00675 {
00676   int i;
00677   testset->param.d = d;
00678   testset->param.L = L;
00679   testset->param.M = M;
00680   testset->param.n = n;
00681   testset->param.m = m;
00682   testset->param.p = p;
00683   testset->param.kernel_name = kernel_name;
00684   testset->param.c = c;
00685   testset->param.eps_I = eps_I;
00686   testset->param.eps_B = eps_B;
00687 
00688   testset->results = (s_result*) NFFT(malloc)(
00689       (size_t)(n_threads_array_size) * sizeof(s_result));
00690   testset->nresults = n_threads_array_size;
00691 
00692   run_test_create(testset->param.d, testset->param.L, testset->param.M);
00693   for (i = 0; i < n_threads_array_size; i++)
00694   {
00695     testset->results[i].nthreads = nthreads_array[i];
00696     run_test(testset->results[i].resval, NREPEAT, testset->param.n,
00697         testset->param.m, testset->param.p, testset->param.kernel_name,
00698         testset->param.c, testset->param.eps_I, testset->param.eps_B,
00699         testset->results[i].nthreads);
00700   }
00701 
00702 }
00703 
00704 void test1(int *nthreads_array, int n_threads_array_size)
00705 {
00706   s_testset testsets[1];
00707 
00708 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00709   run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
00710 
00711   fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
00712 
00713   fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
00714 
00715   nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
00716 
00717   nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
00718 #endif
00719 }
00720 
00721 int main(int argc, char** argv)
00722 {
00723   int *nthreads_array;
00724   int n_threads_array_size = get_nthreads_array(&nthreads_array);
00725   int k;
00726 
00727 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
00728   fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n");
00729   fprintf(stderr, "Please re-run the configure script with options\n");
00730   fprintf(stderr,
00731       "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
00732   fprintf(stderr, "and run \"make clean all\"\n\n");
00733 #endif
00734 
00735   for (k = 0; k < n_threads_array_size; k++)
00736     fprintf(stderr, "%d ", nthreads_array[k]);
00737   fprintf(stderr, "\n");
00738 
00739   file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w");
00740 
00741   test1(nthreads_array, n_threads_array_size);
00742 
00743   fclose(file_out_tex);
00744 
00745   return EXIT_SUCCESS;
00746 }
00747