00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "Euclid_dh.h"
00044 #include "Mem_dh.h"
00045 #include "Mat_dh.h"
00046 #include "Vec_dh.h"
00047 #include "Factor_dh.h"
00048 #include "getRow_dh.h"
00049 #include "ilu_dh.h"
00050 #include "Parser_dh.h"
00051 #include "SortedList_dh.h"
00052 #include "SubdomainGraph_dh.h"
00053 #include "ExternalRows_dh.h"
00054 #include "krylov_dh.h"
00055
00056 static void get_runtime_params_private (Euclid_dh ctx);
00057 static void invert_diagonals_private (Euclid_dh ctx);
00058 static void compute_rho_private (Euclid_dh ctx);
00059 static void factor_private (Euclid_dh ctx);
00060
00061 static void reduce_timings_private (Euclid_dh ctx);
00062
00063 #undef __FUNC__
00064 #define __FUNC__ "Euclid_dhCreate"
00065 void
00066 Euclid_dhCreate (Euclid_dh * ctxOUT)
00067 {
00068 START_FUNC_DH
00069 struct _mpi_interface_dh *ctx =
00070 (struct _mpi_interface_dh *)
00071 MALLOC_DH (sizeof (struct _mpi_interface_dh));
00072 CHECK_V_ERROR;
00073 *ctxOUT = ctx;
00074
00075 ctx->isSetup = false;
00076
00077 ctx->rho_init = 2.0;
00078 ctx->rho_final = 0.0;
00079
00080 ctx->m = 0;
00081 ctx->n = 0;
00082 ctx->rhs = NULL;
00083 ctx->A = NULL;
00084 ctx->F = NULL;
00085 ctx->sg = NULL;
00086
00087 ctx->scale = NULL;
00088 ctx->isScaled = false;
00089 ctx->work = NULL;
00090 ctx->work2 = NULL;
00091 ctx->from = 0;
00092 ctx->to = 0;
00093
00094 strcpy (ctx->algo_par, "pilu");
00095 strcpy (ctx->algo_ilu, "iluk");
00096 ctx->level = 1;
00097 ctx->droptol = DEFAULT_DROP_TOL;
00098 ctx->sparseTolA = 0.0;
00099 ctx->sparseTolF = 0.0;
00100 ctx->pivotMin = 0.0;
00101 ctx->pivotFix = PIVOT_FIX_DEFAULT;
00102 ctx->maxVal = 0.0;
00103
00104 ctx->slist = NULL;
00105 ctx->extRows = NULL;
00106
00107 strcpy (ctx->krylovMethod, "bicgstab");
00108 ctx->maxIts = 200;
00109 ctx->rtol = 1e-5;
00110 ctx->atol = 1e-50;
00111 ctx->its = 0;
00112 ctx->itsTotal = 0;
00113 ctx->setupCount = 0;
00114 ctx->logging = 0;
00115 ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
00116
00117 {
00118 int i;
00119 for (i = 0; i < TIMING_BINS; ++i)
00120 ctx->timing[i] = 0.0;
00121 for (i = 0; i < STATS_BINS; ++i)
00122 ctx->stats[i] = 0.0;
00123 }
00124 ctx->timingsWereReduced = false;
00125
00126 ++ref_counter;
00127 END_FUNC_DH}
00128
00129
00130 #undef __FUNC__
00131 #define __FUNC__ "Euclid_dhDestroy"
00132 void
00133 Euclid_dhDestroy (Euclid_dh ctx)
00134 {
00135 START_FUNC_DH
00136 if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
00137 {
00138
00139 Parser_dhInsert (parser_dh, "-eu_mem", "1");
00140 CHECK_V_ERROR;
00141 Euclid_dhPrintHypreReport (ctx, stdout);
00142 CHECK_V_ERROR;
00143 }
00144
00145 if (ctx->setupCount > 1 && ctx->printStats)
00146 {
00147 Euclid_dhPrintStatsShorter (ctx, stdout);
00148 CHECK_V_ERROR;
00149 }
00150
00151 if (ctx->F != NULL)
00152 {
00153 Factor_dhDestroy (ctx->F);
00154 CHECK_V_ERROR;
00155 }
00156 if (ctx->sg != NULL)
00157 {
00158 SubdomainGraph_dhDestroy (ctx->sg);
00159 CHECK_V_ERROR;
00160 }
00161 if (ctx->scale != NULL)
00162 {
00163 FREE_DH (ctx->scale);
00164 CHECK_V_ERROR;
00165 }
00166 if (ctx->work != NULL)
00167 {
00168 FREE_DH (ctx->work);
00169 CHECK_V_ERROR;
00170 }
00171 if (ctx->work2 != NULL)
00172 {
00173 FREE_DH (ctx->work2);
00174 CHECK_V_ERROR;
00175 }
00176 if (ctx->slist != NULL)
00177 {
00178 SortedList_dhDestroy (ctx->slist);
00179 CHECK_V_ERROR;
00180 }
00181 if (ctx->extRows != NULL)
00182 {
00183 ExternalRows_dhDestroy (ctx->extRows);
00184 CHECK_V_ERROR;
00185 }
00186 FREE_DH (ctx);
00187 CHECK_V_ERROR;
00188
00189 --ref_counter;
00190 END_FUNC_DH}
00191
00192
00193
00194
00195
00196 #undef __FUNC__
00197 #define __FUNC__ "Euclid_dhSetup"
00198 void
00199 Euclid_dhSetup (Euclid_dh ctx)
00200 {
00201 START_FUNC_DH int m, n, beg_row;
00202 double t1;
00203 bool isSetup = ctx->isSetup;
00204 bool bj = false;
00205
00206
00207
00208
00209
00210 if (ctx->setupCount && ctx->printStats)
00211 {
00212 Euclid_dhPrintStatsShorter (ctx, stdout);
00213 CHECK_V_ERROR;
00214 ctx->its = 0;
00215 }
00216
00217
00218
00219
00220 {
00221 int i;
00222 for (i = 0; i < STATS_BINS; ++i)
00223 ctx->stats[i] = 0.0;
00224 }
00225
00226
00227
00228
00229 ctx->timing[SOLVE_START_T] = MPI_Wtime ();
00230
00231 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00232 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00233
00234 if (ctx->F != NULL)
00235 {
00236 Factor_dhDestroy (ctx->F);
00237 CHECK_V_ERROR;
00238 ctx->F = NULL;
00239 }
00240
00241 if (ctx->A == NULL)
00242 {
00243 SET_V_ERROR ("must set ctx->A before calling init");
00244 }
00245 EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
00246 CHECK_V_ERROR;
00247 ctx->m = m;
00248 ctx->n = n;
00249
00250 if (Parser_dhHasSwitch (parser_dh, "-print_size"))
00251 {
00252 printf_dh
00253 ("setting up linear system; global rows: %i local rows: %i (on P_0)\n",
00254 n, m);
00255 }
00256
00257 sprintf (msgBuf_dh, "localRow= %i; globalRows= %i; beg_row= %i", m, n,
00258 beg_row);
00259 SET_INFO (msgBuf_dh);
00260
00261 bj = Parser_dhHasSwitch (parser_dh, "-bj");
00262
00263
00264
00265
00266
00267
00268
00269 if (ctx->sg == NULL)
00270 {
00271 int blocks = np_dh;
00272 t1 = MPI_Wtime ();
00273 if (np_dh == 1)
00274 {
00275 Parser_dhReadInt (parser_dh, "-blocks", &blocks);
00276 CHECK_V_ERROR;
00277 SubdomainGraph_dhCreate (&(ctx->sg));
00278 CHECK_V_ERROR;
00279 SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
00280 CHECK_V_ERROR;
00281 }
00282 else
00283 {
00284 SubdomainGraph_dhCreate (&(ctx->sg));
00285 CHECK_V_ERROR;
00286 SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
00287 CHECK_V_ERROR;
00288 }
00289 ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
00290 }
00291
00292
00293
00294
00295
00296
00297
00298 if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
00299 {
00300 goto END_OF_FUNCTION;
00301 }
00302
00303
00304
00305
00306
00307 if (!isSetup)
00308 {
00309 get_runtime_params_private (ctx);
00310 CHECK_V_ERROR;
00311 }
00312 if (!strcmp (ctx->algo_par, "bj"))
00313 bj = false;
00314
00315
00316
00317
00318
00319 if (ctx->scale == NULL)
00320 {
00321 ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00322 CHECK_V_ERROR;
00323 }
00324 {
00325 int i;
00326 for (i = 0; i < m; ++i)
00327 ctx->scale[i] = 1.0;
00328 }
00329
00330
00331
00332
00333 if (ctx->work == NULL)
00334 {
00335 ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00336 CHECK_V_ERROR;
00337 }
00338 if (ctx->work2 == NULL)
00339 {
00340 ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00341 CHECK_V_ERROR;
00342 }
00343
00344
00345
00346
00347
00348 t1 = MPI_Wtime ();
00349 factor_private (ctx);
00350 CHECK_V_ERROR;
00351 ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
00352
00353
00354
00355
00356 if (strcmp (ctx->algo_par, "none"))
00357 {
00358 invert_diagonals_private (ctx);
00359 CHECK_V_ERROR;
00360 }
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370 if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
00371 {
00372 if (strcmp (ctx->algo_par, "none"))
00373 {
00374 t1 = MPI_Wtime ();
00375 compute_rho_private (ctx);
00376 CHECK_V_ERROR;
00377 ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
00378 }
00379 }
00380
00381
00382
00383
00384
00385
00386
00387 if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
00388 {
00389 t1 = MPI_Wtime ();
00390 Factor_dhSolveSetup (ctx->F, ctx->sg);
00391 CHECK_V_ERROR;
00392 ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
00393 }
00394
00395 END_OF_FUNCTION:;
00396
00397
00398
00399
00400 ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
00401 ctx->setupCount += 1;
00402
00403 ctx->isSetup = true;
00404
00405 END_FUNC_DH}
00406
00407
00408 #undef __FUNC__
00409 #define __FUNC__ "get_runtime_params_private"
00410 void
00411 get_runtime_params_private (Euclid_dh ctx)
00412 {
00413 START_FUNC_DH char *tmp;
00414
00415
00416 Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
00417 Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
00418 Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
00419
00420
00421 tmp = NULL;
00422 Parser_dhReadString (parser_dh, "-par", &tmp);
00423 if (tmp != NULL)
00424 {
00425 strcpy (ctx->algo_par, tmp);
00426 }
00427 if (Parser_dhHasSwitch (parser_dh, "-bj"))
00428 {
00429 strcpy (ctx->algo_par, "bj");
00430 }
00431
00432
00433
00434 Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
00435
00436 Parser_dhReadInt (parser_dh, "-level", &ctx->level);
00437 Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
00438
00439 if (Parser_dhHasSwitch (parser_dh, "-ilut"))
00440 {
00441 Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
00442 ctx->isScaled = true;
00443 strcpy (ctx->algo_ilu, "ilut");
00444 }
00445
00446
00447
00448
00449 if (!strcmp (ctx->algo_par, "none"))
00450 {
00451 strcpy (ctx->algo_ilu, "none");
00452 }
00453 else if (!strcmp (ctx->algo_ilu, "none"))
00454 {
00455 strcpy (ctx->algo_par, "none");
00456 }
00457
00458
00459 Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
00460
00461 Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
00462
00463 Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
00464
00465 Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
00466
00467
00468
00469 if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
00470 {
00471 ctx->isScaled = true;
00472 }
00473
00474
00475 tmp = NULL;
00476 Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
00477 if (tmp != NULL)
00478 {
00479 strcpy (ctx->krylovMethod, tmp);
00480
00481 if (!strcmp (ctx->krylovMethod, "bcgs"))
00482 {
00483 strcpy (ctx->krylovMethod, "bicgstab");
00484 }
00485 }
00486 END_FUNC_DH}
00487
00488 #undef __FUNC__
00489 #define __FUNC__ "invert_diagonals_private"
00490 void
00491 invert_diagonals_private (Euclid_dh ctx)
00492 {
00493 START_FUNC_DH REAL_DH * aval = ctx->F->aval;
00494 int *diag = ctx->F->diag;
00495 if (aval == NULL || diag == NULL)
00496 {
00497 SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
00498 }
00499 else
00500 {
00501 int i, m = ctx->F->m;
00502 for (i = 0; i < m; ++i)
00503 {
00504 aval[diag[i]] = 1.0 / aval[diag[i]];
00505 }
00506 }
00507 END_FUNC_DH}
00508
00509
00510
00511 #undef __FUNC__
00512 #define __FUNC__ "compute_rho_private"
00513 void
00514 compute_rho_private (Euclid_dh ctx)
00515 {
00516 START_FUNC_DH if (ctx->F != NULL)
00517 {
00518 double bufLocal[3], bufGlobal[3];
00519 int m = ctx->m;
00520
00521 ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
00522 bufLocal[0] = ctx->stats[NZA_STATS];
00523 bufLocal[1] = ctx->stats[NZF_STATS];
00524 bufLocal[2] = ctx->stats[NZA_USED_STATS];
00525
00526 if (np_dh == 1)
00527 {
00528 bufGlobal[0] = bufLocal[0];
00529 bufGlobal[1] = bufLocal[1];
00530 bufGlobal[2] = bufLocal[2];
00531 }
00532 else
00533 {
00534 MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
00535 comm_dh);
00536 }
00537
00538 if (myid_dh == 0)
00539 {
00540
00541
00542 if (bufGlobal[0] && bufGlobal[1])
00543 {
00544 ctx->rho_final = bufGlobal[1] / bufGlobal[0];
00545 }
00546 else
00547 {
00548 ctx->rho_final = -1;
00549 }
00550
00551
00552 if (bufGlobal[0] && bufGlobal[2])
00553 {
00554 ctx->stats[NZA_RATIO_STATS] =
00555 100.0 * bufGlobal[2] / bufGlobal[0];
00556 }
00557 else
00558 {
00559 ctx->stats[NZA_RATIO_STATS] = 100.0;
00560 }
00561 }
00562 }
00563 END_FUNC_DH}
00564
00565 #undef __FUNC__
00566 #define __FUNC__ "factor_private"
00567 void
00568 factor_private (Euclid_dh ctx)
00569 {
00570 START_FUNC_DH
00571
00572
00573
00574 if (!strcmp (ctx->algo_par, "none"))
00575 {
00576 goto DO_NOTHING;
00577 }
00578
00579
00580
00581
00582 {
00583 int br = 0;
00584 int id = np_dh;
00585 if (ctx->sg != NULL)
00586 {
00587 br = ctx->sg->beg_rowP[myid_dh];
00588 id = ctx->sg->o2n_sub[myid_dh];
00589 }
00590 Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
00591 CHECK_V_ERROR;
00592 ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
00593 ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
00594 if (!strcmp (ctx->algo_par, "bj"))
00595 ctx->F->blockJacobi = true;
00596 if (Parser_dhHasSwitch (parser_dh, "-bj"))
00597 ctx->F->blockJacobi = true;
00598 }
00599
00600
00601
00602
00603 if (np_dh == 1)
00604 {
00605
00606
00607 if (!strcmp (ctx->algo_ilu, "iluk"))
00608 {
00609 ctx->from = 0;
00610 ctx->to = ctx->m;
00611
00612
00613 if (Parser_dhHasSwitch (parser_dh, "-mpi"))
00614 {
00615 if (ctx->sg != NULL && ctx->sg->blocks > 1)
00616 {
00617 SET_V_ERROR
00618 ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
00619 }
00620 iluk_mpi_pilu (ctx);
00621 CHECK_V_ERROR;
00622 }
00623
00624
00625 else
00626 {
00627 iluk_seq_block (ctx);
00628 CHECK_V_ERROR;
00629
00630 }
00631 }
00632
00633
00634 else if (!strcmp (ctx->algo_ilu, "ilut"))
00635 {
00636 ctx->from = 0;
00637 ctx->to = ctx->m;
00638 ilut_seq (ctx);
00639 CHECK_V_ERROR;
00640 }
00641
00642
00643 else
00644 {
00645 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00646 ctx->algo_ilu);
00647 SET_V_ERROR (msgBuf_dh);
00648 }
00649 }
00650
00651
00652
00653
00654 else
00655 {
00656
00657 if (!strcmp (ctx->algo_par, "bj"))
00658 {
00659 ctx->from = 0;
00660 ctx->to = ctx->m;
00661 iluk_mpi_bj (ctx);
00662 CHECK_V_ERROR;
00663 }
00664
00665
00666 else if (!strcmp (ctx->algo_ilu, "iluk"))
00667 {
00668 bool bj = ctx->F->blockJacobi;
00669
00670
00671
00672 SortedList_dhCreate (&(ctx->slist));
00673 CHECK_V_ERROR;
00674 SortedList_dhInit (ctx->slist, ctx->sg);
00675 CHECK_V_ERROR;
00676 ExternalRows_dhCreate (&(ctx->extRows));
00677 CHECK_V_ERROR;
00678 ExternalRows_dhInit (ctx->extRows, ctx);
00679 CHECK_V_ERROR;
00680
00681
00682 ctx->from = 0;
00683 ctx->to = ctx->F->first_bdry;
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694 iluk_seq (ctx);
00695 CHECK_V_ERROR;
00696
00697
00698
00699
00700
00701 if (!bj)
00702 {
00703 ExternalRows_dhRecvRows (ctx->extRows);
00704 CHECK_V_ERROR;
00705 }
00706
00707
00708 ctx->from = ctx->F->first_bdry;
00709 ctx->to = ctx->F->m;
00710 iluk_mpi_pilu (ctx);
00711 CHECK_V_ERROR;
00712
00713
00714
00715
00716 if (!bj)
00717 {
00718 ExternalRows_dhSendRows (ctx->extRows);
00719 CHECK_V_ERROR;
00720 }
00721
00722
00723
00724
00725
00726
00727
00728 SortedList_dhDestroy (ctx->slist);
00729 CHECK_V_ERROR;
00730 ctx->slist = NULL;
00731 ExternalRows_dhDestroy (ctx->extRows);
00732 CHECK_V_ERROR;
00733 ctx->extRows = NULL;
00734 }
00735
00736
00737 else
00738 {
00739 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00740 ctx->algo_ilu);
00741 SET_V_ERROR (msgBuf_dh);
00742 }
00743 }
00744
00745 DO_NOTHING:;
00746
00747 END_FUNC_DH}
00748
00749 #if 0
00750
00751 #undef __FUNC__
00752 #define __FUNC__ "discard_indices_private"
00753 void
00754 discard_indices_private (Euclid_dh ctx)
00755 {
00756 START_FUNC_DH
00757 #if 0
00758 int *rp = ctx->F->rp, *cval = ctx->F->cval;
00759 double *aval = ctx->F->aval;
00760 int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
00761 int i, j, k, idx, count = 0, start_of_row;
00762 int beg_row = ctx->beg_row, end_row = beg_row + m;
00763 int *diag = ctx->F->diag;
00764
00765
00766
00767
00768
00769
00770 for (i = 0; i < m; ++i)
00771 {
00772 for (j = rp[i]; j < rp[i + 1]; ++j)
00773 {
00774 int col = cval[j];
00775 if (col < beg_row || col >= end_row)
00776 {
00777 bool flag = true;
00778 int owner = find_owner_private_mpi (ctx, col);
00779 CHECK_V_ERROR;
00780
00781 for (k = 0; k < nc; ++k)
00782 {
00783 if (nabors[k] == owner)
00784 {
00785 flag = false;
00786 break;
00787 }
00788 }
00789
00790 if (flag)
00791 {
00792 cval[j] = -1;
00793 ++count;
00794 }
00795 }
00796 }
00797 }
00798
00799 sprintf (msgBuf_dh,
00800 "deleting %i indices that would alter the subdomain graph", count);
00801 SET_INFO (msgBuf_dh);
00802
00803
00804 idx = 0;
00805 start_of_row = 0;
00806 for (i = 0; i < m; ++i)
00807 {
00808 for (j = start_of_row; j < rp[i + 1]; ++j)
00809 {
00810 int col = cval[j];
00811 double val = aval[j];
00812 if (col != -1)
00813 {
00814 cval[idx] = col;
00815 aval[idx] = val;
00816 ++idx;
00817 }
00818 }
00819 start_of_row = rp[i + 1];
00820 rp[i + 1] = idx;
00821 }
00822
00823
00824 for (i = 0; i < m; ++i)
00825 {
00826 for (j = rp[i]; j < rp[i + 1]; ++j)
00827 {
00828 if (cval[j] == i + beg_row)
00829 {
00830 diag[i] = j;
00831 break;
00832 }
00833 }
00834 }
00835 #endif
00836 END_FUNC_DH}
00837 #endif
00838
00839 #undef __FUNC__
00840 #define __FUNC__ "Euclid_dhSolve"
00841 void
00842 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
00843 {
00844 START_FUNC_DH int itsOUT;
00845 Mat_dh A = (Mat_dh) ctx->A;
00846
00847 if (!strcmp (ctx->krylovMethod, "cg"))
00848 {
00849 cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00850 ERRCHKA;
00851 }
00852 else if (!strcmp (ctx->krylovMethod, "bicgstab"))
00853 {
00854 bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00855 ERRCHKA;
00856 }
00857 else
00858 {
00859 sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
00860 SET_V_ERROR (msgBuf_dh);
00861 }
00862 *its = itsOUT;
00863 END_FUNC_DH}
00864
00865 #undef __FUNC__
00866 #define __FUNC__ "Euclid_dhPrintStats"
00867 void
00868 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
00869 {
00870 START_FUNC_DH double *timing;
00871 int nz;
00872
00873 nz = Factor_dhReadNz (ctx->F);
00874 CHECK_V_ERROR;
00875 timing = ctx->timing;
00876
00877
00878 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00879 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00880
00881 reduce_timings_private (ctx);
00882 CHECK_V_ERROR;
00883
00884 fprintf_dh (fp,
00885 "\n==================== Euclid report (start) ====================\n");
00886 fprintf_dh (fp, "\nruntime parameters\n");
00887 fprintf_dh (fp, "------------------\n");
00888 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
00889 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
00890 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
00891 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
00892 fprintf_dh (fp, " matrix was row scaled: %i\n", ctx->isScaled);
00893
00894 fprintf_dh (fp, " matrix row count: %i\n", ctx->n);
00895 fprintf_dh (fp, " nzF: %i\n", nz);
00896 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
00897 fprintf_dh (fp, " level: %i\n", ctx->level);
00898 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
00899
00900 fprintf_dh (fp, "\nEuclid timing report\n");
00901 fprintf_dh (fp, "--------------------\n");
00902 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
00903 timing[TOTAL_SOLVE_T]);
00904 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
00905 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
00906 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
00907 timing[SUB_GRAPH_T]);
00908 fprintf_dh (fp, " factorization: %0.2f\n", timing[FACTOR_T]);
00909 fprintf_dh (fp, " solve setup: %0.2f\n",
00910 timing[SOLVE_SETUP_T]);
00911 fprintf_dh (fp, " rho: %0.2f\n",
00912 ctx->timing[COMPUTE_RHO_T]);
00913 fprintf_dh (fp, " misc (should be small): %0.2f\n",
00914 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
00915 timing[SOLVE_SETUP_T] +
00916 timing[COMPUTE_RHO_T]));
00917
00918 if (ctx->sg != NULL)
00919 {
00920 SubdomainGraph_dhPrintStats (ctx->sg, fp);
00921 CHECK_V_ERROR;
00922 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
00923 CHECK_V_ERROR;
00924 }
00925
00926
00927 fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
00928 fprintf_dh (fp, "---------------------------------------------------\n");
00929 fprintf_dh (fp, " solve method: %s\n", ctx->krylovMethod);
00930 fprintf_dh (fp, " maxIts: %i\n", ctx->maxIts);
00931 fprintf_dh (fp, " rtol: %g\n", ctx->rtol);
00932 fprintf_dh (fp, " atol: %g\n", ctx->atol);
00933 fprintf_dh (fp,
00934 "\n==================== Euclid report (end) ======================\n");
00935 END_FUNC_DH}
00936
00937
00938
00939
00940
00941
00942 #undef __FUNC__
00943 #define __FUNC__ "Euclid_dhPrintStatsShort"
00944 void
00945 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
00946 FILE * fp)
00947 {
00948 START_FUNC_DH double *timing = ctx->timing;
00949
00950
00951
00952 double apply_total;
00953 double apply_per_it;
00954
00955 double perIt;
00956 int blocks = np_dh;
00957
00958 if (np_dh == 1)
00959 blocks = ctx->sg->blocks;
00960
00961 reduce_timings_private (ctx);
00962 CHECK_V_ERROR;
00963
00964
00965
00966 apply_total = timing[TRI_SOLVE_T];
00967 apply_per_it = apply_total / (double) ctx->its;
00968
00969 perIt = solve / (double) ctx->its;
00970
00971 fprintf_dh (fp, "\n");
00972 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00973 "method", "subdms", "level", "its", "setup", "solve", "total",
00974 "perIt", "perIt", "rows");
00975 fprintf_dh (fp,
00976 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
00977 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par,
00978 blocks,
00979 ctx->level,
00980 ctx->its,
00981 setup,
00982 solve,
00983 setup + solve,
00984 perIt,
00985 apply_per_it,
00986 (double) ctx->n
00987 );
00988
00989
00990
00991
00992 #if 0
00993 fprintf_dh (fp, "\n");
00994 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00995 "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
00996
00997 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00998 "method", "subdms", "level", "its", "total", "factor",
00999 "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
01000 fprintf_dh (fp,
01001 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
01002
01003
01004 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par,
01005 blocks,
01006 ctx->level,
01007 ctx->its,
01008 setup,
01009 solve,
01010 setup_factor,
01011 setup_other,
01012 apply_total,
01013 apply_per_it,
01014 ctx->rho_final,
01015 ctx->sparseTolA,
01016 nzUsedRatio,
01017 (double) ctx->n
01018 );
01019 #endif
01020
01021 #if 0
01022
01023 fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
01024 "subGph", "factor", "solveS", "perIt");
01025 fprintf_dh (fp, "------ ----- ----- ----- ----- ----- WW\n");
01026 fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n",
01027 ctx->algo_par,
01028 ctx->level,
01029 timing[SUB_GRAPH_T],
01030 timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
01031 #endif
01032 END_FUNC_DH}
01033
01034
01035
01036 #undef __FUNC__
01037 #define __FUNC__ "Euclid_dhPrintStatsShorter"
01038 void
01039 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
01040 {
01041 START_FUNC_DH double *stats = ctx->stats;
01042
01043 int its = ctx->its;
01044 double rho = ctx->rho_final;
01045 double nzUsedRatio = stats[NZA_RATIO_STATS];
01046
01047
01048 fprintf_dh (fp, "\nStats from last linear solve: YY\n");
01049 fprintf_dh (fp, "%6s %6s %6s YY\n", "its", "rho", "A_%");
01050 fprintf_dh (fp, " ----- ----- ----- YY\n");
01051 fprintf_dh (fp, "%6i %6.2f %6.2f YYY\n", its, rho, nzUsedRatio);
01052
01053 END_FUNC_DH}
01054
01055 #undef __FUNC__
01056 #define __FUNC__ "Euclid_dhPrintScaling"
01057 void
01058 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
01059 {
01060 START_FUNC_DH int i, m = ctx->m;
01061
01062 if (m > 10)
01063 m = 10;
01064
01065 if (ctx->scale == NULL)
01066 {
01067 SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
01068 }
01069
01070 fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
01071 for (i = 0; i < m; ++i)
01072 {
01073 fprintf (fp, " %i %g \n", i + 1, ctx->scale[i]);
01074 }
01075 END_FUNC_DH}
01076
01077
01078 #undef __FUNC__
01079 #define __FUNC__ "reduce_timings_private"
01080 void
01081 reduce_timings_private (Euclid_dh ctx)
01082 {
01083 START_FUNC_DH if (np_dh > 1)
01084 {
01085 double bufOUT[TIMING_BINS];
01086
01087 memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
01088 MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
01089 comm_dh);
01090 }
01091
01092 ctx->timingsWereReduced = true;
01093 END_FUNC_DH}
01094
01095 #undef __FUNC__
01096 #define __FUNC__ "Euclid_dhPrintHypreReport"
01097 void
01098 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
01099 {
01100 START_FUNC_DH double *timing;
01101 int nz;
01102
01103 nz = Factor_dhReadNz (ctx->F);
01104 CHECK_V_ERROR;
01105 timing = ctx->timing;
01106
01107
01108 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
01109 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
01110
01111 reduce_timings_private (ctx);
01112 CHECK_V_ERROR;
01113
01114 if (myid_dh == 0)
01115 {
01116
01117 fprintf (fp,
01118 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
01119 fprintf_dh (fp, "\nruntime parameters\n");
01120 fprintf_dh (fp, "------------------\n");
01121 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
01122 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
01123 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
01124 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
01125 if (!strcmp (ctx->algo_ilu, "iluk"))
01126 {
01127 fprintf_dh (fp, " level: %i\n", ctx->level);
01128 }
01129
01130 if (ctx->isScaled)
01131 {
01132 fprintf_dh (fp, " matrix was row scaled\n");
01133 }
01134
01135 fprintf_dh (fp, " global matrix row count: %i\n", ctx->n);
01136 fprintf_dh (fp, " nzF: %i\n", nz);
01137 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
01138 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
01139
01140 fprintf_dh (fp, "\nEuclid timing report\n");
01141 fprintf_dh (fp, "--------------------\n");
01142 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
01143 timing[TOTAL_SOLVE_T]);
01144 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
01145 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
01146 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
01147 timing[SUB_GRAPH_T]);
01148 fprintf_dh (fp, " factorization: %0.2f\n",
01149 timing[FACTOR_T]);
01150 fprintf_dh (fp, " solve setup: %0.2f\n",
01151 timing[SOLVE_SETUP_T]);
01152 fprintf_dh (fp, " rho: %0.2f\n",
01153 ctx->timing[COMPUTE_RHO_T]);
01154 fprintf_dh (fp, " misc (should be small): %0.2f\n",
01155 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
01156 timing[SOLVE_SETUP_T] +
01157 timing[COMPUTE_RHO_T]));
01158
01159 if (ctx->sg != NULL)
01160 {
01161 SubdomainGraph_dhPrintStats (ctx->sg, fp);
01162 CHECK_V_ERROR;
01163 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01164 CHECK_V_ERROR;
01165 }
01166
01167 fprintf (fp,
01168 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
01169
01170 }
01171
01172 END_FUNC_DH}
01173
01174 #undef __FUNC__
01175 #define __FUNC__ "Euclid_dhPrintTestData"
01176 void
01177 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
01178 {
01179 START_FUNC_DH
01180
01181
01182
01183
01184 if (myid_dh == 0)
01185 {
01186 fprintf (fp, " setups: %i\n", ctx->setupCount);
01187 fprintf (fp, " tri solves: %i\n", ctx->its);
01188 fprintf (fp, " parallelization method: %s\n", ctx->algo_par);
01189 fprintf (fp, " factorization method: %s\n", ctx->algo_ilu);
01190 fprintf (fp, " level: %i\n", ctx->level);
01191 fprintf (fp, " row scaling: %i\n", ctx->isScaled);
01192 }
01193 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01194 CHECK_V_ERROR;
01195 END_FUNC_DH}