00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "Euclid_dh.h"
00031 #include "Mem_dh.h"
00032 #include "Mat_dh.h"
00033 #include "Vec_dh.h"
00034 #include "Factor_dh.h"
00035 #include "getRow_dh.h"
00036 #include "ilu_dh.h"
00037 #include "Parser_dh.h"
00038 #include "SortedList_dh.h"
00039 #include "SubdomainGraph_dh.h"
00040 #include "ExternalRows_dh.h"
00041 #include "krylov_dh.h"
00042
00043 static void get_runtime_params_private (Euclid_dh ctx);
00044 static void invert_diagonals_private (Euclid_dh ctx);
00045 static void compute_rho_private (Euclid_dh ctx);
00046 static void factor_private (Euclid_dh ctx);
00047
00048 static void reduce_timings_private (Euclid_dh ctx);
00049
00050 #undef __FUNC__
00051 #define __FUNC__ "Euclid_dhCreate"
00052 void
00053 Euclid_dhCreate (Euclid_dh * ctxOUT)
00054 {
00055 START_FUNC_DH
00056 struct _mpi_interface_dh *ctx =
00057 (struct _mpi_interface_dh *)
00058 MALLOC_DH (sizeof (struct _mpi_interface_dh));
00059 CHECK_V_ERROR;
00060 *ctxOUT = ctx;
00061
00062 ctx->isSetup = false;
00063
00064 ctx->rho_init = 2.0;
00065 ctx->rho_final = 0.0;
00066
00067 ctx->m = 0;
00068 ctx->n = 0;
00069 ctx->rhs = NULL;
00070 ctx->A = NULL;
00071 ctx->F = NULL;
00072 ctx->sg = NULL;
00073
00074 ctx->scale = NULL;
00075 ctx->isScaled = false;
00076 ctx->work = NULL;
00077 ctx->work2 = NULL;
00078 ctx->from = 0;
00079 ctx->to = 0;
00080
00081 strcpy (ctx->algo_par, "pilu");
00082 strcpy (ctx->algo_ilu, "iluk");
00083 ctx->level = 1;
00084 ctx->droptol = DEFAULT_DROP_TOL;
00085 ctx->sparseTolA = 0.0;
00086 ctx->sparseTolF = 0.0;
00087 ctx->pivotMin = 0.0;
00088 ctx->pivotFix = PIVOT_FIX_DEFAULT;
00089 ctx->maxVal = 0.0;
00090
00091 ctx->slist = NULL;
00092 ctx->extRows = NULL;
00093
00094 strcpy (ctx->krylovMethod, "bicgstab");
00095 ctx->maxIts = 200;
00096 ctx->rtol = 1e-5;
00097 ctx->atol = 1e-50;
00098 ctx->its = 0;
00099 ctx->itsTotal = 0;
00100 ctx->setupCount = 0;
00101 ctx->logging = 0;
00102 ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
00103
00104 {
00105 int i;
00106 for (i = 0; i < TIMING_BINS; ++i)
00107 ctx->timing[i] = 0.0;
00108 for (i = 0; i < STATS_BINS; ++i)
00109 ctx->stats[i] = 0.0;
00110 }
00111 ctx->timingsWereReduced = false;
00112
00113 ++ref_counter;
00114 END_FUNC_DH}
00115
00116
00117 #undef __FUNC__
00118 #define __FUNC__ "Euclid_dhDestroy"
00119 void
00120 Euclid_dhDestroy (Euclid_dh ctx)
00121 {
00122 START_FUNC_DH
00123 if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
00124 {
00125
00126 Parser_dhInsert (parser_dh, "-eu_mem", "1");
00127 CHECK_V_ERROR;
00128 Euclid_dhPrintHypreReport (ctx, stdout);
00129 CHECK_V_ERROR;
00130 }
00131
00132 if (ctx->setupCount > 1 && ctx->printStats)
00133 {
00134 Euclid_dhPrintStatsShorter (ctx, stdout);
00135 CHECK_V_ERROR;
00136 }
00137
00138 if (ctx->F != NULL)
00139 {
00140 Factor_dhDestroy (ctx->F);
00141 CHECK_V_ERROR;
00142 }
00143 if (ctx->sg != NULL)
00144 {
00145 SubdomainGraph_dhDestroy (ctx->sg);
00146 CHECK_V_ERROR;
00147 }
00148 if (ctx->scale != NULL)
00149 {
00150 FREE_DH (ctx->scale);
00151 CHECK_V_ERROR;
00152 }
00153 if (ctx->work != NULL)
00154 {
00155 FREE_DH (ctx->work);
00156 CHECK_V_ERROR;
00157 }
00158 if (ctx->work2 != NULL)
00159 {
00160 FREE_DH (ctx->work2);
00161 CHECK_V_ERROR;
00162 }
00163 if (ctx->slist != NULL)
00164 {
00165 SortedList_dhDestroy (ctx->slist);
00166 CHECK_V_ERROR;
00167 }
00168 if (ctx->extRows != NULL)
00169 {
00170 ExternalRows_dhDestroy (ctx->extRows);
00171 CHECK_V_ERROR;
00172 }
00173 FREE_DH (ctx);
00174 CHECK_V_ERROR;
00175
00176 --ref_counter;
00177 END_FUNC_DH}
00178
00179
00180
00181
00182
00183 #undef __FUNC__
00184 #define __FUNC__ "Euclid_dhSetup"
00185 void
00186 Euclid_dhSetup (Euclid_dh ctx)
00187 {
00188 START_FUNC_DH int m, n, beg_row;
00189 double t1;
00190 bool isSetup = ctx->isSetup;
00191 bool bj = false;
00192
00193
00194
00195
00196
00197 if (ctx->setupCount && ctx->printStats)
00198 {
00199 Euclid_dhPrintStatsShorter (ctx, stdout);
00200 CHECK_V_ERROR;
00201 ctx->its = 0;
00202 }
00203
00204
00205
00206
00207 {
00208 int i;
00209 for (i = 0; i < STATS_BINS; ++i)
00210 ctx->stats[i] = 0.0;
00211 }
00212
00213
00214
00215
00216 ctx->timing[SOLVE_START_T] = MPI_Wtime ();
00217
00218 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00219 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00220
00221 if (ctx->F != NULL)
00222 {
00223 Factor_dhDestroy (ctx->F);
00224 CHECK_V_ERROR;
00225 ctx->F = NULL;
00226 }
00227
00228 if (ctx->A == NULL)
00229 {
00230 SET_V_ERROR ("must set ctx->A before calling init");
00231 }
00232 EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
00233 CHECK_V_ERROR;
00234 ctx->m = m;
00235 ctx->n = n;
00236
00237 if (Parser_dhHasSwitch (parser_dh, "-print_size"))
00238 {
00239 printf_dh
00240 ("setting up linear system; global rows: %i local rows: %i (on P_0)\n",
00241 n, m);
00242 }
00243
00244 sprintf (msgBuf_dh, "localRow= %i; globalRows= %i; beg_row= %i", m, n,
00245 beg_row);
00246 SET_INFO (msgBuf_dh);
00247
00248 bj = Parser_dhHasSwitch (parser_dh, "-bj");
00249
00250
00251
00252
00253
00254
00255
00256 if (ctx->sg == NULL)
00257 {
00258 int blocks = np_dh;
00259 t1 = MPI_Wtime ();
00260 if (np_dh == 1)
00261 {
00262 Parser_dhReadInt (parser_dh, "-blocks", &blocks);
00263 CHECK_V_ERROR;
00264 SubdomainGraph_dhCreate (&(ctx->sg));
00265 CHECK_V_ERROR;
00266 SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
00267 CHECK_V_ERROR;
00268 }
00269 else
00270 {
00271 SubdomainGraph_dhCreate (&(ctx->sg));
00272 CHECK_V_ERROR;
00273 SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
00274 CHECK_V_ERROR;
00275 }
00276 ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
00277 }
00278
00279
00280
00281
00282
00283
00284
00285 if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
00286 {
00287 goto END_OF_FUNCTION;
00288 }
00289
00290
00291
00292
00293
00294 if (!isSetup)
00295 {
00296 get_runtime_params_private (ctx);
00297 CHECK_V_ERROR;
00298 }
00299 if (!strcmp (ctx->algo_par, "bj"))
00300 bj = false;
00301
00302
00303
00304
00305
00306 if (ctx->scale == NULL)
00307 {
00308 ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00309 CHECK_V_ERROR;
00310 }
00311 {
00312 int i;
00313 for (i = 0; i < m; ++i)
00314 ctx->scale[i] = 1.0;
00315 }
00316
00317
00318
00319
00320 if (ctx->work == NULL)
00321 {
00322 ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00323 CHECK_V_ERROR;
00324 }
00325 if (ctx->work2 == NULL)
00326 {
00327 ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00328 CHECK_V_ERROR;
00329 }
00330
00331
00332
00333
00334
00335 t1 = MPI_Wtime ();
00336 factor_private (ctx);
00337 CHECK_V_ERROR;
00338 ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
00339
00340
00341
00342
00343 if (strcmp (ctx->algo_par, "none"))
00344 {
00345 invert_diagonals_private (ctx);
00346 CHECK_V_ERROR;
00347 }
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357 if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
00358 {
00359 if (strcmp (ctx->algo_par, "none"))
00360 {
00361 t1 = MPI_Wtime ();
00362 compute_rho_private (ctx);
00363 CHECK_V_ERROR;
00364 ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
00365 }
00366 }
00367
00368
00369
00370
00371
00372
00373
00374 if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
00375 {
00376 t1 = MPI_Wtime ();
00377 Factor_dhSolveSetup (ctx->F, ctx->sg);
00378 CHECK_V_ERROR;
00379 ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
00380 }
00381
00382 END_OF_FUNCTION:;
00383
00384
00385
00386
00387 ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
00388 ctx->setupCount += 1;
00389
00390 ctx->isSetup = true;
00391
00392 END_FUNC_DH}
00393
00394
00395 #undef __FUNC__
00396 #define __FUNC__ "get_runtime_params_private"
00397 void
00398 get_runtime_params_private (Euclid_dh ctx)
00399 {
00400 START_FUNC_DH char *tmp;
00401
00402
00403 Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
00404 Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
00405 Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
00406
00407
00408 tmp = NULL;
00409 Parser_dhReadString (parser_dh, "-par", &tmp);
00410 if (tmp != NULL)
00411 {
00412 strcpy (ctx->algo_par, tmp);
00413 }
00414 if (Parser_dhHasSwitch (parser_dh, "-bj"))
00415 {
00416 strcpy (ctx->algo_par, "bj");
00417 }
00418
00419
00420
00421 Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
00422
00423 Parser_dhReadInt (parser_dh, "-level", &ctx->level);
00424 Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
00425
00426 if (Parser_dhHasSwitch (parser_dh, "-ilut"))
00427 {
00428 Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
00429 ctx->isScaled = true;
00430 strcpy (ctx->algo_ilu, "ilut");
00431 }
00432
00433
00434
00435
00436 if (!strcmp (ctx->algo_par, "none"))
00437 {
00438 strcpy (ctx->algo_ilu, "none");
00439 }
00440 else if (!strcmp (ctx->algo_ilu, "none"))
00441 {
00442 strcpy (ctx->algo_par, "none");
00443 }
00444
00445
00446 Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
00447
00448 Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
00449
00450 Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
00451
00452 Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
00453
00454
00455
00456 if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
00457 {
00458 ctx->isScaled = true;
00459 }
00460
00461
00462 tmp = NULL;
00463 Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
00464 if (tmp != NULL)
00465 {
00466 strcpy (ctx->krylovMethod, tmp);
00467
00468 if (!strcmp (ctx->krylovMethod, "bcgs"))
00469 {
00470 strcpy (ctx->krylovMethod, "bicgstab");
00471 }
00472 }
00473 END_FUNC_DH}
00474
00475 #undef __FUNC__
00476 #define __FUNC__ "invert_diagonals_private"
00477 void
00478 invert_diagonals_private (Euclid_dh ctx)
00479 {
00480 START_FUNC_DH REAL_DH * aval = ctx->F->aval;
00481 int *diag = ctx->F->diag;
00482 if (aval == NULL || diag == NULL)
00483 {
00484 SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
00485 }
00486 else
00487 {
00488 int i, m = ctx->F->m;
00489 for (i = 0; i < m; ++i)
00490 {
00491 aval[diag[i]] = 1.0 / aval[diag[i]];
00492 }
00493 }
00494 END_FUNC_DH}
00495
00496
00497
00498 #undef __FUNC__
00499 #define __FUNC__ "compute_rho_private"
00500 void
00501 compute_rho_private (Euclid_dh ctx)
00502 {
00503 START_FUNC_DH if (ctx->F != NULL)
00504 {
00505 double bufLocal[3], bufGlobal[3];
00506 int m = ctx->m;
00507
00508 ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
00509 bufLocal[0] = ctx->stats[NZA_STATS];
00510 bufLocal[1] = ctx->stats[NZF_STATS];
00511 bufLocal[2] = ctx->stats[NZA_USED_STATS];
00512
00513 if (np_dh == 1)
00514 {
00515 bufGlobal[0] = bufLocal[0];
00516 bufGlobal[1] = bufLocal[1];
00517 bufGlobal[2] = bufLocal[2];
00518 }
00519 else
00520 {
00521 MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
00522 comm_dh);
00523 }
00524
00525 if (myid_dh == 0)
00526 {
00527
00528
00529 if (bufGlobal[0] && bufGlobal[1])
00530 {
00531 ctx->rho_final = bufGlobal[1] / bufGlobal[0];
00532 }
00533 else
00534 {
00535 ctx->rho_final = -1;
00536 }
00537
00538
00539 if (bufGlobal[0] && bufGlobal[2])
00540 {
00541 ctx->stats[NZA_RATIO_STATS] =
00542 100.0 * bufGlobal[2] / bufGlobal[0];
00543 }
00544 else
00545 {
00546 ctx->stats[NZA_RATIO_STATS] = 100.0;
00547 }
00548 }
00549 }
00550 END_FUNC_DH}
00551
00552 #undef __FUNC__
00553 #define __FUNC__ "factor_private"
00554 void
00555 factor_private (Euclid_dh ctx)
00556 {
00557 START_FUNC_DH
00558
00559
00560
00561 if (!strcmp (ctx->algo_par, "none"))
00562 {
00563 goto DO_NOTHING;
00564 }
00565
00566
00567
00568
00569 {
00570 int br = 0;
00571 int id = np_dh;
00572 if (ctx->sg != NULL)
00573 {
00574 br = ctx->sg->beg_rowP[myid_dh];
00575 id = ctx->sg->o2n_sub[myid_dh];
00576 }
00577 Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
00578 CHECK_V_ERROR;
00579 ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
00580 ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
00581 if (!strcmp (ctx->algo_par, "bj"))
00582 ctx->F->blockJacobi = true;
00583 if (Parser_dhHasSwitch (parser_dh, "-bj"))
00584 ctx->F->blockJacobi = true;
00585 }
00586
00587
00588
00589
00590 if (np_dh == 1)
00591 {
00592
00593
00594 if (!strcmp (ctx->algo_ilu, "iluk"))
00595 {
00596 ctx->from = 0;
00597 ctx->to = ctx->m;
00598
00599
00600 if (Parser_dhHasSwitch (parser_dh, "-mpi"))
00601 {
00602 if (ctx->sg != NULL && ctx->sg->blocks > 1)
00603 {
00604 SET_V_ERROR
00605 ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
00606 }
00607 iluk_mpi_pilu (ctx);
00608 CHECK_V_ERROR;
00609 }
00610
00611
00612 else
00613 {
00614 iluk_seq_block (ctx);
00615 CHECK_V_ERROR;
00616
00617 }
00618 }
00619
00620
00621 else if (!strcmp (ctx->algo_ilu, "ilut"))
00622 {
00623 ctx->from = 0;
00624 ctx->to = ctx->m;
00625 ilut_seq (ctx);
00626 CHECK_V_ERROR;
00627 }
00628
00629
00630 else
00631 {
00632 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00633 ctx->algo_ilu);
00634 SET_V_ERROR (msgBuf_dh);
00635 }
00636 }
00637
00638
00639
00640
00641 else
00642 {
00643
00644 if (!strcmp (ctx->algo_par, "bj"))
00645 {
00646 ctx->from = 0;
00647 ctx->to = ctx->m;
00648 iluk_mpi_bj (ctx);
00649 CHECK_V_ERROR;
00650 }
00651
00652
00653 else if (!strcmp (ctx->algo_ilu, "iluk"))
00654 {
00655 bool bj = ctx->F->blockJacobi;
00656
00657
00658
00659 SortedList_dhCreate (&(ctx->slist));
00660 CHECK_V_ERROR;
00661 SortedList_dhInit (ctx->slist, ctx->sg);
00662 CHECK_V_ERROR;
00663 ExternalRows_dhCreate (&(ctx->extRows));
00664 CHECK_V_ERROR;
00665 ExternalRows_dhInit (ctx->extRows, ctx);
00666 CHECK_V_ERROR;
00667
00668
00669 ctx->from = 0;
00670 ctx->to = ctx->F->first_bdry;
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681 iluk_seq (ctx);
00682 CHECK_V_ERROR;
00683
00684
00685
00686
00687
00688 if (!bj)
00689 {
00690 ExternalRows_dhRecvRows (ctx->extRows);
00691 CHECK_V_ERROR;
00692 }
00693
00694
00695 ctx->from = ctx->F->first_bdry;
00696 ctx->to = ctx->F->m;
00697 iluk_mpi_pilu (ctx);
00698 CHECK_V_ERROR;
00699
00700
00701
00702
00703 if (!bj)
00704 {
00705 ExternalRows_dhSendRows (ctx->extRows);
00706 CHECK_V_ERROR;
00707 }
00708
00709
00710
00711
00712
00713
00714
00715 SortedList_dhDestroy (ctx->slist);
00716 CHECK_V_ERROR;
00717 ctx->slist = NULL;
00718 ExternalRows_dhDestroy (ctx->extRows);
00719 CHECK_V_ERROR;
00720 ctx->extRows = NULL;
00721 }
00722
00723
00724 else
00725 {
00726 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00727 ctx->algo_ilu);
00728 SET_V_ERROR (msgBuf_dh);
00729 }
00730 }
00731
00732 DO_NOTHING:;
00733
00734 END_FUNC_DH}
00735
00736 #if 0
00737
00738 #undef __FUNC__
00739 #define __FUNC__ "discard_indices_private"
00740 void
00741 discard_indices_private (Euclid_dh ctx)
00742 {
00743 START_FUNC_DH
00744 #if 0
00745 int *rp = ctx->F->rp, *cval = ctx->F->cval;
00746 double *aval = ctx->F->aval;
00747 int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
00748 int i, j, k, idx, count = 0, start_of_row;
00749 int beg_row = ctx->beg_row, end_row = beg_row + m;
00750 int *diag = ctx->F->diag;
00751
00752
00753
00754
00755
00756
00757 for (i = 0; i < m; ++i)
00758 {
00759 for (j = rp[i]; j < rp[i + 1]; ++j)
00760 {
00761 int col = cval[j];
00762 if (col < beg_row || col >= end_row)
00763 {
00764 bool flag = true;
00765 int owner = find_owner_private_mpi (ctx, col);
00766 CHECK_V_ERROR;
00767
00768 for (k = 0; k < nc; ++k)
00769 {
00770 if (nabors[k] == owner)
00771 {
00772 flag = false;
00773 break;
00774 }
00775 }
00776
00777 if (flag)
00778 {
00779 cval[j] = -1;
00780 ++count;
00781 }
00782 }
00783 }
00784 }
00785
00786 sprintf (msgBuf_dh,
00787 "deleting %i indices that would alter the subdomain graph", count);
00788 SET_INFO (msgBuf_dh);
00789
00790
00791 idx = 0;
00792 start_of_row = 0;
00793 for (i = 0; i < m; ++i)
00794 {
00795 for (j = start_of_row; j < rp[i + 1]; ++j)
00796 {
00797 int col = cval[j];
00798 double val = aval[j];
00799 if (col != -1)
00800 {
00801 cval[idx] = col;
00802 aval[idx] = val;
00803 ++idx;
00804 }
00805 }
00806 start_of_row = rp[i + 1];
00807 rp[i + 1] = idx;
00808 }
00809
00810
00811 for (i = 0; i < m; ++i)
00812 {
00813 for (j = rp[i]; j < rp[i + 1]; ++j)
00814 {
00815 if (cval[j] == i + beg_row)
00816 {
00817 diag[i] = j;
00818 break;
00819 }
00820 }
00821 }
00822 #endif
00823 END_FUNC_DH}
00824 #endif
00825
00826 #undef __FUNC__
00827 #define __FUNC__ "Euclid_dhSolve"
00828 void
00829 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
00830 {
00831 START_FUNC_DH int itsOUT;
00832 Mat_dh A = (Mat_dh) ctx->A;
00833
00834 if (!strcmp (ctx->krylovMethod, "cg"))
00835 {
00836 cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00837 ERRCHKA;
00838 }
00839 else if (!strcmp (ctx->krylovMethod, "bicgstab"))
00840 {
00841 bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00842 ERRCHKA;
00843 }
00844 else
00845 {
00846 sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
00847 SET_V_ERROR (msgBuf_dh);
00848 }
00849 *its = itsOUT;
00850 END_FUNC_DH}
00851
00852 #undef __FUNC__
00853 #define __FUNC__ "Euclid_dhPrintStats"
00854 void
00855 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
00856 {
00857 START_FUNC_DH double *timing;
00858 int nz;
00859
00860 nz = Factor_dhReadNz (ctx->F);
00861 CHECK_V_ERROR;
00862 timing = ctx->timing;
00863
00864
00865 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00866 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00867
00868 reduce_timings_private (ctx);
00869 CHECK_V_ERROR;
00870
00871 fprintf_dh (fp,
00872 "\n==================== Euclid report (start) ====================\n");
00873 fprintf_dh (fp, "\nruntime parameters\n");
00874 fprintf_dh (fp, "------------------\n");
00875 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
00876 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
00877 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
00878 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
00879 fprintf_dh (fp, " matrix was row scaled: %i\n", ctx->isScaled);
00880
00881 fprintf_dh (fp, " matrix row count: %i\n", ctx->n);
00882 fprintf_dh (fp, " nzF: %i\n", nz);
00883 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
00884 fprintf_dh (fp, " level: %i\n", ctx->level);
00885 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
00886
00887 fprintf_dh (fp, "\nEuclid timing report\n");
00888 fprintf_dh (fp, "--------------------\n");
00889 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
00890 timing[TOTAL_SOLVE_T]);
00891 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
00892 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
00893 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
00894 timing[SUB_GRAPH_T]);
00895 fprintf_dh (fp, " factorization: %0.2f\n", timing[FACTOR_T]);
00896 fprintf_dh (fp, " solve setup: %0.2f\n",
00897 timing[SOLVE_SETUP_T]);
00898 fprintf_dh (fp, " rho: %0.2f\n",
00899 ctx->timing[COMPUTE_RHO_T]);
00900 fprintf_dh (fp, " misc (should be small): %0.2f\n",
00901 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
00902 timing[SOLVE_SETUP_T] +
00903 timing[COMPUTE_RHO_T]));
00904
00905 if (ctx->sg != NULL)
00906 {
00907 SubdomainGraph_dhPrintStats (ctx->sg, fp);
00908 CHECK_V_ERROR;
00909 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
00910 CHECK_V_ERROR;
00911 }
00912
00913
00914 fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
00915 fprintf_dh (fp, "---------------------------------------------------\n");
00916 fprintf_dh (fp, " solve method: %s\n", ctx->krylovMethod);
00917 fprintf_dh (fp, " maxIts: %i\n", ctx->maxIts);
00918 fprintf_dh (fp, " rtol: %g\n", ctx->rtol);
00919 fprintf_dh (fp, " atol: %g\n", ctx->atol);
00920 fprintf_dh (fp,
00921 "\n==================== Euclid report (end) ======================\n");
00922 END_FUNC_DH}
00923
00924
00925
00926
00927
00928
00929 #undef __FUNC__
00930 #define __FUNC__ "Euclid_dhPrintStatsShort"
00931 void
00932 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
00933 FILE * fp)
00934 {
00935 START_FUNC_DH double *timing = ctx->timing;
00936
00937
00938
00939 double apply_total;
00940 double apply_per_it;
00941
00942 double perIt;
00943 int blocks = np_dh;
00944
00945 if (np_dh == 1)
00946 blocks = ctx->sg->blocks;
00947
00948 reduce_timings_private (ctx);
00949 CHECK_V_ERROR;
00950
00951
00952
00953 apply_total = timing[TRI_SOLVE_T];
00954 apply_per_it = apply_total / (double) ctx->its;
00955
00956 perIt = solve / (double) ctx->its;
00957
00958 fprintf_dh (fp, "\n");
00959 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00960 "method", "subdms", "level", "its", "setup", "solve", "total",
00961 "perIt", "perIt", "rows");
00962 fprintf_dh (fp,
00963 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
00964 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par,
00965 blocks,
00966 ctx->level,
00967 ctx->its,
00968 setup,
00969 solve,
00970 setup + solve,
00971 perIt,
00972 apply_per_it,
00973 (double) ctx->n
00974 );
00975
00976
00977
00978
00979 #if 0
00980 fprintf_dh (fp, "\n");
00981 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00982 "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
00983
00984 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00985 "method", "subdms", "level", "its", "total", "factor",
00986 "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
00987 fprintf_dh (fp,
00988 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
00989
00990
00991 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par,
00992 blocks,
00993 ctx->level,
00994 ctx->its,
00995 setup,
00996 solve,
00997 setup_factor,
00998 setup_other,
00999 apply_total,
01000 apply_per_it,
01001 ctx->rho_final,
01002 ctx->sparseTolA,
01003 nzUsedRatio,
01004 (double) ctx->n
01005 );
01006 #endif
01007
01008 #if 0
01009
01010 fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
01011 "subGph", "factor", "solveS", "perIt");
01012 fprintf_dh (fp, "------ ----- ----- ----- ----- ----- WW\n");
01013 fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n",
01014 ctx->algo_par,
01015 ctx->level,
01016 timing[SUB_GRAPH_T],
01017 timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
01018 #endif
01019 END_FUNC_DH}
01020
01021
01022
01023 #undef __FUNC__
01024 #define __FUNC__ "Euclid_dhPrintStatsShorter"
01025 void
01026 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
01027 {
01028 START_FUNC_DH double *stats = ctx->stats;
01029
01030 int its = ctx->its;
01031 double rho = ctx->rho_final;
01032 double nzUsedRatio = stats[NZA_RATIO_STATS];
01033
01034
01035 fprintf_dh (fp, "\nStats from last linear solve: YY\n");
01036 fprintf_dh (fp, "%6s %6s %6s YY\n", "its", "rho", "A_%");
01037 fprintf_dh (fp, " ----- ----- ----- YY\n");
01038 fprintf_dh (fp, "%6i %6.2f %6.2f YYY\n", its, rho, nzUsedRatio);
01039
01040 END_FUNC_DH}
01041
01042 #undef __FUNC__
01043 #define __FUNC__ "Euclid_dhPrintScaling"
01044 void
01045 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
01046 {
01047 START_FUNC_DH int i, m = ctx->m;
01048
01049 if (m > 10)
01050 m = 10;
01051
01052 if (ctx->scale == NULL)
01053 {
01054 SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
01055 }
01056
01057 fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
01058 for (i = 0; i < m; ++i)
01059 {
01060 fprintf (fp, " %i %g \n", i + 1, ctx->scale[i]);
01061 }
01062 END_FUNC_DH}
01063
01064
01065 #undef __FUNC__
01066 #define __FUNC__ "reduce_timings_private"
01067 void
01068 reduce_timings_private (Euclid_dh ctx)
01069 {
01070 START_FUNC_DH if (np_dh > 1)
01071 {
01072 double bufOUT[TIMING_BINS];
01073
01074 memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
01075 MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
01076 comm_dh);
01077 }
01078
01079 ctx->timingsWereReduced = true;
01080 END_FUNC_DH}
01081
01082 #undef __FUNC__
01083 #define __FUNC__ "Euclid_dhPrintHypreReport"
01084 void
01085 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
01086 {
01087 START_FUNC_DH double *timing;
01088 int nz;
01089
01090 nz = Factor_dhReadNz (ctx->F);
01091 CHECK_V_ERROR;
01092 timing = ctx->timing;
01093
01094
01095 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
01096 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
01097
01098 reduce_timings_private (ctx);
01099 CHECK_V_ERROR;
01100
01101 if (myid_dh == 0)
01102 {
01103
01104 fprintf (fp,
01105 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
01106 fprintf_dh (fp, "\nruntime parameters\n");
01107 fprintf_dh (fp, "------------------\n");
01108 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
01109 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
01110 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
01111 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
01112 if (!strcmp (ctx->algo_ilu, "iluk"))
01113 {
01114 fprintf_dh (fp, " level: %i\n", ctx->level);
01115 }
01116
01117 if (ctx->isScaled)
01118 {
01119 fprintf_dh (fp, " matrix was row scaled\n");
01120 }
01121
01122 fprintf_dh (fp, " global matrix row count: %i\n", ctx->n);
01123 fprintf_dh (fp, " nzF: %i\n", nz);
01124 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
01125 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
01126
01127 fprintf_dh (fp, "\nEuclid timing report\n");
01128 fprintf_dh (fp, "--------------------\n");
01129 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
01130 timing[TOTAL_SOLVE_T]);
01131 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
01132 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
01133 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
01134 timing[SUB_GRAPH_T]);
01135 fprintf_dh (fp, " factorization: %0.2f\n",
01136 timing[FACTOR_T]);
01137 fprintf_dh (fp, " solve setup: %0.2f\n",
01138 timing[SOLVE_SETUP_T]);
01139 fprintf_dh (fp, " rho: %0.2f\n",
01140 ctx->timing[COMPUTE_RHO_T]);
01141 fprintf_dh (fp, " misc (should be small): %0.2f\n",
01142 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
01143 timing[SOLVE_SETUP_T] +
01144 timing[COMPUTE_RHO_T]));
01145
01146 if (ctx->sg != NULL)
01147 {
01148 SubdomainGraph_dhPrintStats (ctx->sg, fp);
01149 CHECK_V_ERROR;
01150 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01151 CHECK_V_ERROR;
01152 }
01153
01154 fprintf (fp,
01155 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
01156
01157 }
01158
01159 END_FUNC_DH}
01160
01161 #undef __FUNC__
01162 #define __FUNC__ "Euclid_dhPrintTestData"
01163 void
01164 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
01165 {
01166 START_FUNC_DH
01167
01168
01169
01170
01171 if (myid_dh == 0)
01172 {
01173 fprintf (fp, " setups: %i\n", ctx->setupCount);
01174 fprintf (fp, " tri solves: %i\n", ctx->its);
01175 fprintf (fp, " parallelization method: %s\n", ctx->algo_par);
01176 fprintf (fp, " factorization method: %s\n", ctx->algo_ilu);
01177 fprintf (fp, " level: %i\n", ctx->level);
01178 fprintf (fp, " row scaling: %i\n", ctx->isScaled);
01179 }
01180 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01181 CHECK_V_ERROR;
01182 END_FUNC_DH}