IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends
Euclid_dh.c
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #include "Euclid_dh.h"
00044 #include "Mem_dh.h"
00045 #include "Mat_dh.h"
00046 #include "Vec_dh.h"
00047 #include "Factor_dh.h"
00048 #include "getRow_dh.h"
00049 #include "ilu_dh.h"
00050 #include "Parser_dh.h"
00051 #include "SortedList_dh.h"
00052 #include "SubdomainGraph_dh.h"
00053 #include "ExternalRows_dh.h"
00054 #include "krylov_dh.h"
00055 
00056 static void get_runtime_params_private (Euclid_dh ctx);
00057 static void invert_diagonals_private (Euclid_dh ctx);
00058 static void compute_rho_private (Euclid_dh ctx);
00059 static void factor_private (Euclid_dh ctx);
00060 /* static void discard_indices_private(Euclid_dh ctx); */
00061 static void reduce_timings_private (Euclid_dh ctx);
00062 
00063 #undef __FUNC__
00064 #define __FUNC__ "Euclid_dhCreate"
00065 void
00066 Euclid_dhCreate (Euclid_dh * ctxOUT)
00067 {
00068   START_FUNC_DH
00069     struct _mpi_interface_dh *ctx =
00070     (struct _mpi_interface_dh *)
00071     MALLOC_DH (sizeof (struct _mpi_interface_dh));
00072   CHECK_V_ERROR;
00073   *ctxOUT = ctx;
00074 
00075   ctx->isSetup = false;
00076 
00077   ctx->rho_init = 2.0;
00078   ctx->rho_final = 0.0;
00079 
00080   ctx->m = 0;
00081   ctx->n = 0;
00082   ctx->rhs = NULL;
00083   ctx->A = NULL;
00084   ctx->F = NULL;
00085   ctx->sg = NULL;
00086 
00087   ctx->scale = NULL;
00088   ctx->isScaled = false;
00089   ctx->work = NULL;
00090   ctx->work2 = NULL;
00091   ctx->from = 0;
00092   ctx->to = 0;
00093 
00094   strcpy (ctx->algo_par, "pilu");
00095   strcpy (ctx->algo_ilu, "iluk");
00096   ctx->level = 1;
00097   ctx->droptol = DEFAULT_DROP_TOL;
00098   ctx->sparseTolA = 0.0;
00099   ctx->sparseTolF = 0.0;
00100   ctx->pivotMin = 0.0;
00101   ctx->pivotFix = PIVOT_FIX_DEFAULT;
00102   ctx->maxVal = 0.0;
00103 
00104   ctx->slist = NULL;
00105   ctx->extRows = NULL;
00106 
00107   strcpy (ctx->krylovMethod, "bicgstab");
00108   ctx->maxIts = 200;
00109   ctx->rtol = 1e-5;
00110   ctx->atol = 1e-50;
00111   ctx->its = 0;
00112   ctx->itsTotal = 0;
00113   ctx->setupCount = 0;
00114   ctx->logging = 0;
00115   ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
00116 
00117   {
00118     int i;
00119     for (i = 0; i < TIMING_BINS; ++i)
00120       ctx->timing[i] = 0.0;
00121     for (i = 0; i < STATS_BINS; ++i)
00122       ctx->stats[i] = 0.0;
00123   }
00124   ctx->timingsWereReduced = false;
00125 
00126   ++ref_counter;
00127 END_FUNC_DH}
00128 
00129 
00130 #undef __FUNC__
00131 #define __FUNC__ "Euclid_dhDestroy"
00132 void
00133 Euclid_dhDestroy (Euclid_dh ctx)
00134 {
00135   START_FUNC_DH
00136     if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
00137     {
00138       /* insert switch so memory report will also be printed */
00139       Parser_dhInsert (parser_dh, "-eu_mem", "1");
00140       CHECK_V_ERROR;
00141       Euclid_dhPrintHypreReport (ctx, stdout);
00142       CHECK_V_ERROR;
00143     }
00144 
00145   if (ctx->setupCount > 1 && ctx->printStats)
00146     {
00147       Euclid_dhPrintStatsShorter (ctx, stdout);
00148       CHECK_V_ERROR;
00149     }
00150 
00151   if (ctx->F != NULL)
00152     {
00153       Factor_dhDestroy (ctx->F);
00154       CHECK_V_ERROR;
00155     }
00156   if (ctx->sg != NULL)
00157     {
00158       SubdomainGraph_dhDestroy (ctx->sg);
00159       CHECK_V_ERROR;
00160     }
00161   if (ctx->scale != NULL)
00162     {
00163       FREE_DH (ctx->scale);
00164       CHECK_V_ERROR;
00165     }
00166   if (ctx->work != NULL)
00167     {
00168       FREE_DH (ctx->work);
00169       CHECK_V_ERROR;
00170     }
00171   if (ctx->work2 != NULL)
00172     {
00173       FREE_DH (ctx->work2);
00174       CHECK_V_ERROR;
00175     }
00176   if (ctx->slist != NULL)
00177     {
00178       SortedList_dhDestroy (ctx->slist);
00179       CHECK_V_ERROR;
00180     }
00181   if (ctx->extRows != NULL)
00182     {
00183       ExternalRows_dhDestroy (ctx->extRows);
00184       CHECK_V_ERROR;
00185     }
00186   FREE_DH (ctx);
00187   CHECK_V_ERROR;
00188 
00189   --ref_counter;
00190 END_FUNC_DH}
00191 
00192 
00193 /* on entry, "A" must have been set.  If this context is being
00194    reused, user must ensure ???
00195 */
00196 #undef __FUNC__
00197 #define __FUNC__ "Euclid_dhSetup"
00198 void
00199 Euclid_dhSetup (Euclid_dh ctx)
00200 {
00201   START_FUNC_DH int m, n, beg_row;
00202   double t1;
00203   bool isSetup = ctx->isSetup;
00204   bool bj = false;
00205 
00206   /*----------------------------------------------------
00207    * If Euclid was previously setup, print summary of
00208    * what happened during previous setup/solve
00209    *----------------------------------------------------*/
00210   if (ctx->setupCount && ctx->printStats)
00211     {
00212       Euclid_dhPrintStatsShorter (ctx, stdout);
00213       CHECK_V_ERROR;
00214       ctx->its = 0;
00215     }
00216 
00217   /*----------------------------------------------------
00218    * zero array for statistical reporting
00219    *----------------------------------------------------*/
00220   {
00221     int i;
00222     for (i = 0; i < STATS_BINS; ++i)
00223       ctx->stats[i] = 0.0;
00224   }
00225 
00226   /*----------------------------------------------------
00227    * internal timing
00228    *----------------------------------------------------*/
00229   ctx->timing[SOLVE_START_T] = MPI_Wtime ();
00230   /* sum timing from last linear solve cycle, if any */
00231   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00232   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00233 
00234   if (ctx->F != NULL)
00235     {
00236       Factor_dhDestroy (ctx->F);
00237       CHECK_V_ERROR;
00238       ctx->F = NULL;
00239     }
00240 
00241   if (ctx->A == NULL)
00242     {
00243       SET_V_ERROR ("must set ctx->A before calling init");
00244     }
00245   EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
00246   CHECK_V_ERROR;
00247   ctx->m = m;
00248   ctx->n = n;
00249 
00250   if (Parser_dhHasSwitch (parser_dh, "-print_size"))
00251     {
00252       printf_dh
00253     ("setting up linear system; global rows: %i  local rows: %i (on P_0)\n",
00254      n, m);
00255     }
00256 
00257   sprintf (msgBuf_dh, "localRow= %i;  globalRows= %i;  beg_row= %i", m, n,
00258        beg_row);
00259   SET_INFO (msgBuf_dh);
00260 
00261   bj = Parser_dhHasSwitch (parser_dh, "-bj");
00262 
00263   /*------------------------------------------------------------------------
00264    * Setup the SubdomainGraph, which contains connectivity and
00265    * and permutation information.  If this context is being reused,
00266    * this may already have been done; if being resused, the underlying
00267    * subdomain graph cannot change (user's responsibility?)
00268    *------------------------------------------------------------------------*/
00269   if (ctx->sg == NULL)
00270     {
00271       int blocks = np_dh;
00272       t1 = MPI_Wtime ();
00273       if (np_dh == 1)
00274     {
00275       Parser_dhReadInt (parser_dh, "-blocks", &blocks);
00276       CHECK_V_ERROR;
00277       SubdomainGraph_dhCreate (&(ctx->sg));
00278       CHECK_V_ERROR;
00279       SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
00280       CHECK_V_ERROR;
00281     }
00282       else
00283     {
00284       SubdomainGraph_dhCreate (&(ctx->sg));
00285       CHECK_V_ERROR;
00286       SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
00287       CHECK_V_ERROR;
00288     }
00289       ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
00290     }
00291 
00292 
00293 /* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */
00294 
00295   /*----------------------------------------------------
00296    * for debugging
00297    *----------------------------------------------------*/
00298   if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
00299     {
00300       goto END_OF_FUNCTION;
00301     }
00302 
00303 
00304   /*----------------------------------------------------
00305    * query parser for runtime parameters
00306    *----------------------------------------------------*/
00307   if (!isSetup)
00308     {
00309       get_runtime_params_private (ctx);
00310       CHECK_V_ERROR;
00311     }
00312   if (!strcmp (ctx->algo_par, "bj"))
00313     bj = false;
00314 
00315   /*--------------------------------------------------------- 
00316    * allocate and initialize storage for row-scaling
00317    * (ctx->isScaled is set in get_runtime_params_private(); )
00318    *---------------------------------------------------------*/
00319   if (ctx->scale == NULL)
00320     {
00321       ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00322       CHECK_V_ERROR;
00323     }
00324   {
00325     int i;
00326     for (i = 0; i < m; ++i)
00327       ctx->scale[i] = 1.0;
00328   }
00329 
00330   /*------------------------------------------------------------------ 
00331    * allocate work vectors; used in factorization and triangular solves;
00332    *------------------------------------------------------------------*/
00333   if (ctx->work == NULL)
00334     {
00335       ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00336       CHECK_V_ERROR;
00337     }
00338   if (ctx->work2 == NULL)
00339     {
00340       ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
00341       CHECK_V_ERROR;
00342     }
00343 
00344   /*-----------------------------------------------------------------
00345    * perform the incomplete factorization (this should be, at least
00346    * for higher level ILUK, the most time-intensive portion of setup)
00347    *-----------------------------------------------------------------*/
00348   t1 = MPI_Wtime ();
00349   factor_private (ctx);
00350   CHECK_V_ERROR;
00351   ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
00352 
00353   /*-------------------------------------------------------------- 
00354    * invert diagonals, for faster triangular solves
00355    *--------------------------------------------------------------*/
00356   if (strcmp (ctx->algo_par, "none"))
00357     {
00358       invert_diagonals_private (ctx);
00359       CHECK_V_ERROR;
00360     }
00361 
00362   /*-------------------------------------------------------------- 
00363    * compute rho_final: global ratio of nzF/nzA
00364    * also, if -sparseA > 0,  compute ratio of nzA 
00365    * used in factorization
00366    *--------------------------------------------------------------*/
00367   /* for some reason compute_rho_private() was expensive, so now it's
00368      an option, unless there's only one mpi task.
00369    */
00370   if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
00371     {
00372       if (strcmp (ctx->algo_par, "none"))
00373     {
00374       t1 = MPI_Wtime ();
00375       compute_rho_private (ctx);
00376       CHECK_V_ERROR;
00377       ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
00378     }
00379     }
00380 
00381   /*-------------------------------------------------------------- 
00382    * if using PILU, set up persistent comms and global-to-local
00383    * number scheme, for efficient triangular solves.
00384    * (Thanks to Edmond Chow for these algorithmic ideas.)
00385    *--------------------------------------------------------------*/
00386 
00387   if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
00388     {
00389       t1 = MPI_Wtime ();
00390       Factor_dhSolveSetup (ctx->F, ctx->sg);
00391       CHECK_V_ERROR;
00392       ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
00393     }
00394 
00395 END_OF_FUNCTION:;
00396 
00397   /*-------------------------------------------------------
00398    * internal timing
00399    *-------------------------------------------------------*/
00400   ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
00401   ctx->setupCount += 1;
00402 
00403   ctx->isSetup = true;
00404 
00405 END_FUNC_DH}
00406 
00407 
00408 #undef __FUNC__
00409 #define __FUNC__ "get_runtime_params_private"
00410 void
00411 get_runtime_params_private (Euclid_dh ctx)
00412 {
00413   START_FUNC_DH char *tmp;
00414 
00415   /* params for use of internal solvers */
00416   Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
00417   Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
00418   Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
00419 
00420   /* parallelization strategy (bj, pilu, none) */
00421   tmp = NULL;
00422   Parser_dhReadString (parser_dh, "-par", &tmp);
00423   if (tmp != NULL)
00424     {
00425       strcpy (ctx->algo_par, tmp);
00426     }
00427   if (Parser_dhHasSwitch (parser_dh, "-bj"))
00428     {
00429       strcpy (ctx->algo_par, "bj");
00430     }
00431 
00432 
00433   /* factorization parameters */
00434   Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
00435   /* inital storage allocation for factor */
00436   Parser_dhReadInt (parser_dh, "-level", &ctx->level);
00437   Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
00438 
00439   if (Parser_dhHasSwitch (parser_dh, "-ilut"))
00440     {
00441       Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
00442       ctx->isScaled = true;
00443       strcpy (ctx->algo_ilu, "ilut");
00444     }
00445 
00446   /* make sure both algo_par and algo_ilu are set to "none,"
00447      if at least one is.
00448    */
00449   if (!strcmp (ctx->algo_par, "none"))
00450     {
00451       strcpy (ctx->algo_ilu, "none");
00452     }
00453   else if (!strcmp (ctx->algo_ilu, "none"))
00454     {
00455       strcpy (ctx->algo_par, "none");
00456     }
00457 
00458 
00459   Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
00460   /* sparsify A before factoring */
00461   Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
00462   /* sparsify after factoring */
00463   Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
00464   /* adjust pivots if smaller than this */
00465   Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
00466   /* how to adjust pivots */
00467 
00468   /* set row scaling for mandatory cases */
00469   if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
00470     {
00471       ctx->isScaled = true;
00472     }
00473 
00474   /* solve method */
00475   tmp = NULL;
00476   Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
00477   if (tmp != NULL)
00478     {
00479       strcpy (ctx->krylovMethod, tmp);
00480 
00481       if (!strcmp (ctx->krylovMethod, "bcgs"))
00482     {
00483       strcpy (ctx->krylovMethod, "bicgstab");
00484     }
00485     }
00486 END_FUNC_DH}
00487 
00488 #undef __FUNC__
00489 #define __FUNC__ "invert_diagonals_private"
00490 void
00491 invert_diagonals_private (Euclid_dh ctx)
00492 {
00493   START_FUNC_DH REAL_DH * aval = ctx->F->aval;
00494   int *diag = ctx->F->diag;
00495   if (aval == NULL || diag == NULL)
00496     {
00497       SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
00498     }
00499   else
00500     {
00501       int i, m = ctx->F->m;
00502       for (i = 0; i < m; ++i)
00503     {
00504       aval[diag[i]] = 1.0 / aval[diag[i]];
00505     }
00506     }
00507 END_FUNC_DH}
00508 
00509 
00510 /* records rho_final (max for all processors) */
00511 #undef __FUNC__
00512 #define __FUNC__ "compute_rho_private"
00513 void
00514 compute_rho_private (Euclid_dh ctx)
00515 {
00516   START_FUNC_DH if (ctx->F != NULL)
00517     {
00518       double bufLocal[3], bufGlobal[3];
00519       int m = ctx->m;
00520 
00521       ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
00522       bufLocal[0] = ctx->stats[NZA_STATS];  /* nzA */
00523       bufLocal[1] = ctx->stats[NZF_STATS];  /* nzF */
00524       bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */
00525 
00526       if (np_dh == 1)
00527     {
00528       bufGlobal[0] = bufLocal[0];
00529       bufGlobal[1] = bufLocal[1];
00530       bufGlobal[2] = bufLocal[2];
00531     }
00532       else
00533     {
00534       MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
00535               comm_dh);
00536     }
00537 
00538       if (myid_dh == 0)
00539     {
00540 
00541       /* compute rho */
00542       if (bufGlobal[0] && bufGlobal[1])
00543         {
00544           ctx->rho_final = bufGlobal[1] / bufGlobal[0];
00545         }
00546       else
00547         {
00548           ctx->rho_final = -1;
00549         }
00550 
00551       /* compute ratio of nonzeros in A that were used */
00552       if (bufGlobal[0] && bufGlobal[2])
00553         {
00554           ctx->stats[NZA_RATIO_STATS] =
00555         100.0 * bufGlobal[2] / bufGlobal[0];
00556         }
00557       else
00558         {
00559           ctx->stats[NZA_RATIO_STATS] = 100.0;
00560         }
00561     }
00562     }
00563 END_FUNC_DH}
00564 
00565 #undef __FUNC__
00566 #define __FUNC__ "factor_private"
00567 void
00568 factor_private (Euclid_dh ctx)
00569 {
00570   START_FUNC_DH
00571   /*-------------------------------------------------------------
00572    * special case, for testing/debugging: no preconditioning
00573    *-------------------------------------------------------------*/
00574     if (!strcmp (ctx->algo_par, "none"))
00575     {
00576       goto DO_NOTHING;
00577     }
00578 
00579   /*-------------------------------------------------------------
00580    * Initialize object to hold factor.
00581    *-------------------------------------------------------------*/
00582   {
00583     int br = 0;
00584     int id = np_dh;
00585     if (ctx->sg != NULL)
00586       {
00587     br = ctx->sg->beg_rowP[myid_dh];
00588     id = ctx->sg->o2n_sub[myid_dh];
00589       }
00590     Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
00591     CHECK_V_ERROR;
00592     ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
00593     ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
00594     if (!strcmp (ctx->algo_par, "bj"))
00595       ctx->F->blockJacobi = true;
00596     if (Parser_dhHasSwitch (parser_dh, "-bj"))
00597       ctx->F->blockJacobi = true;
00598   }
00599 
00600   /*-------------------------------------------------------------
00601    * single mpi task with single or multiple subdomains
00602    *-------------------------------------------------------------*/
00603   if (np_dh == 1)
00604     {
00605 
00606       /* ILU(k) factorization */
00607       if (!strcmp (ctx->algo_ilu, "iluk"))
00608     {
00609       ctx->from = 0;
00610       ctx->to = ctx->m;
00611 
00612       /* only for debugging: use ilu_mpi_pilu */
00613       if (Parser_dhHasSwitch (parser_dh, "-mpi"))
00614         {
00615           if (ctx->sg != NULL && ctx->sg->blocks > 1)
00616         {
00617           SET_V_ERROR
00618             ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
00619         }
00620           iluk_mpi_pilu (ctx);
00621           CHECK_V_ERROR;
00622         }
00623 
00624       /* "normal" operation */
00625       else
00626         {
00627           iluk_seq_block (ctx);
00628           CHECK_V_ERROR;
00629           /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj.  */
00630         }
00631     }
00632 
00633       /* ILUT factorization */
00634       else if (!strcmp (ctx->algo_ilu, "ilut"))
00635     {
00636       ctx->from = 0;
00637       ctx->to = ctx->m;
00638       ilut_seq (ctx);
00639       CHECK_V_ERROR;
00640     }
00641 
00642       /* all other factorization methods */
00643       else
00644     {
00645       sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00646            ctx->algo_ilu);
00647       SET_V_ERROR (msgBuf_dh);
00648     }
00649     }
00650 
00651   /*-------------------------------------------------------------
00652    * multiple mpi tasks with multiple subdomains
00653    *-------------------------------------------------------------*/
00654   else
00655     {
00656       /* block jacobi */
00657       if (!strcmp (ctx->algo_par, "bj"))
00658     {
00659       ctx->from = 0;
00660       ctx->to = ctx->m;
00661       iluk_mpi_bj (ctx);
00662       CHECK_V_ERROR;
00663     }
00664 
00665       /* iluk */
00666       else if (!strcmp (ctx->algo_ilu, "iluk"))
00667     {
00668       bool bj = ctx->F->blockJacobi;    /* for debugging */
00669 
00670       /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */
00671 
00672       SortedList_dhCreate (&(ctx->slist));
00673       CHECK_V_ERROR;
00674       SortedList_dhInit (ctx->slist, ctx->sg);
00675       CHECK_V_ERROR;
00676       ExternalRows_dhCreate (&(ctx->extRows));
00677       CHECK_V_ERROR;
00678       ExternalRows_dhInit (ctx->extRows, ctx);
00679       CHECK_V_ERROR;
00680 
00681       /* factor interior rows */
00682       ctx->from = 0;
00683       ctx->to = ctx->F->first_bdry;
00684 
00685 /*
00686 if (Parser_dhHasSwitch(parser_dh, "-test")) {
00687        printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh);
00688        iluk_seq(ctx); CHECK_V_ERROR; 
00689 } else {
00690        iluk_mpi_pilu(ctx); CHECK_V_ERROR; 
00691 }
00692 */
00693 
00694       iluk_seq (ctx);
00695       CHECK_V_ERROR;
00696 
00697       /* get external rows from lower ordered neighbors in the
00698          subdomain graph; these rows are needed for factoring
00699          this subdomain's boundary rows.
00700        */
00701       if (!bj)
00702         {
00703           ExternalRows_dhRecvRows (ctx->extRows);
00704           CHECK_V_ERROR;
00705         }
00706 
00707       /* factor boundary rows */
00708       ctx->from = ctx->F->first_bdry;
00709       ctx->to = ctx->F->m;
00710       iluk_mpi_pilu (ctx);
00711       CHECK_V_ERROR;
00712 
00713       /* send this processor's boundary rows to higher ordered
00714          neighbors in the subdomain graph.
00715        */
00716       if (!bj)
00717         {
00718           ExternalRows_dhSendRows (ctx->extRows);
00719           CHECK_V_ERROR;
00720         }
00721 
00722       /* discard column indices in factor if they would alter
00723          the subdomain graph (any such elements are in upper
00724          triangular portion of the row)
00725        */
00726 
00727 
00728       SortedList_dhDestroy (ctx->slist);
00729       CHECK_V_ERROR;
00730       ctx->slist = NULL;
00731       ExternalRows_dhDestroy (ctx->extRows);
00732       CHECK_V_ERROR;
00733       ctx->extRows = NULL;
00734     }
00735 
00736       /* all other factorization methods */
00737       else
00738     {
00739       sprintf (msgBuf_dh, "factorization method: %s is not implemented",
00740            ctx->algo_ilu);
00741       SET_V_ERROR (msgBuf_dh);
00742     }
00743     }
00744 
00745 DO_NOTHING:;
00746 
00747 END_FUNC_DH}
00748 
00749 #if 0
00750 
00751 #undef __FUNC__
00752 #define __FUNC__ "discard_indices_private"
00753 void
00754 discard_indices_private (Euclid_dh ctx)
00755 {
00756   START_FUNC_DH
00757 #if 0
00758   int *rp = ctx->F->rp, *cval = ctx->F->cval;
00759   double *aval = ctx->F->aval;
00760   int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
00761   int i, j, k, idx, count = 0, start_of_row;
00762   int beg_row = ctx->beg_row, end_row = beg_row + m;
00763   int *diag = ctx->F->diag;
00764 
00765   /* if col is not locally owned, and doesn't belong to a
00766    * nabor in the (original) subdomain graph, we need to discard
00767    * the column index and associated value.  First, we'll flag all
00768    * such indices for deletion.
00769    */
00770   for (i = 0; i < m; ++i)
00771     {
00772       for (j = rp[i]; j < rp[i + 1]; ++j)
00773     {
00774       int col = cval[j];
00775       if (col < beg_row || col >= end_row)
00776         {
00777           bool flag = true;
00778           int owner = find_owner_private_mpi (ctx, col);
00779           CHECK_V_ERROR;
00780 
00781           for (k = 0; k < nc; ++k)
00782         {
00783           if (nabors[k] == owner)
00784             {
00785               flag = false;
00786               break;
00787             }
00788         }
00789 
00790           if (flag)
00791         {
00792           cval[j] = -1;
00793           ++count;
00794         }
00795         }
00796     }
00797     }
00798 
00799   sprintf (msgBuf_dh,
00800        "deleting %i indices that would alter the subdomain graph", count);
00801   SET_INFO (msgBuf_dh);
00802 
00803   /* Second, perform the actual deletion */
00804   idx = 0;
00805   start_of_row = 0;
00806   for (i = 0; i < m; ++i)
00807     {
00808       for (j = start_of_row; j < rp[i + 1]; ++j)
00809     {
00810       int col = cval[j];
00811       double val = aval[j];
00812       if (col != -1)
00813         {
00814           cval[idx] = col;
00815           aval[idx] = val;
00816           ++idx;
00817         }
00818     }
00819       start_of_row = rp[i + 1];
00820       rp[i + 1] = idx;
00821     }
00822 
00823   /* rebuild diagonal pointers */
00824   for (i = 0; i < m; ++i)
00825     {
00826       for (j = rp[i]; j < rp[i + 1]; ++j)
00827     {
00828       if (cval[j] == i + beg_row)
00829         {
00830           diag[i] = j;
00831           break;
00832         }
00833     }
00834     }
00835 #endif
00836 END_FUNC_DH}
00837 #endif
00838 
00839 #undef __FUNC__
00840 #define __FUNC__ "Euclid_dhSolve"
00841 void
00842 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
00843 {
00844   START_FUNC_DH int itsOUT;
00845   Mat_dh A = (Mat_dh) ctx->A;
00846 
00847   if (!strcmp (ctx->krylovMethod, "cg"))
00848     {
00849       cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00850       ERRCHKA;
00851     }
00852   else if (!strcmp (ctx->krylovMethod, "bicgstab"))
00853     {
00854       bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
00855       ERRCHKA;
00856     }
00857   else
00858     {
00859       sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
00860       SET_V_ERROR (msgBuf_dh);
00861     }
00862   *its = itsOUT;
00863 END_FUNC_DH}
00864 
00865 #undef __FUNC__
00866 #define __FUNC__ "Euclid_dhPrintStats"
00867 void
00868 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
00869 {
00870   START_FUNC_DH double *timing;
00871   int nz;
00872 
00873   nz = Factor_dhReadNz (ctx->F);
00874   CHECK_V_ERROR;
00875   timing = ctx->timing;
00876 
00877   /* add in timing from lasst setup (if any) */
00878   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
00879   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
00880 
00881   reduce_timings_private (ctx);
00882   CHECK_V_ERROR;
00883 
00884   fprintf_dh (fp,
00885           "\n==================== Euclid report (start) ====================\n");
00886   fprintf_dh (fp, "\nruntime parameters\n");
00887   fprintf_dh (fp, "------------------\n");
00888   fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
00889   fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
00890   fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
00891   fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
00892   fprintf_dh (fp, "   matrix was row scaled:  %i\n", ctx->isScaled);
00893 
00894   fprintf_dh (fp, "   matrix row count:       %i\n", ctx->n);
00895   fprintf_dh (fp, "   nzF:                    %i\n", nz);
00896   fprintf_dh (fp, "   rho:                    %g\n", ctx->rho_final);
00897   fprintf_dh (fp, "   level:                  %i\n", ctx->level);
00898   fprintf_dh (fp, "   sparseA:                %g\n", ctx->sparseTolA);
00899 
00900   fprintf_dh (fp, "\nEuclid timing report\n");
00901   fprintf_dh (fp, "--------------------\n");
00902   fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
00903           timing[TOTAL_SOLVE_T]);
00904   fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
00905   fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
00906   fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
00907           timing[SUB_GRAPH_T]);
00908   fprintf_dh (fp, "      factorization:          %0.2f\n", timing[FACTOR_T]);
00909   fprintf_dh (fp, "      solve setup:            %0.2f\n",
00910           timing[SOLVE_SETUP_T]);
00911   fprintf_dh (fp, "      rho:                    %0.2f\n",
00912           ctx->timing[COMPUTE_RHO_T]);
00913   fprintf_dh (fp, "      misc (should be small): %0.2f\n",
00914           timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
00915                  timing[SOLVE_SETUP_T] +
00916                  timing[COMPUTE_RHO_T]));
00917 
00918   if (ctx->sg != NULL)
00919     {
00920       SubdomainGraph_dhPrintStats (ctx->sg, fp);
00921       CHECK_V_ERROR;
00922       SubdomainGraph_dhPrintRatios (ctx->sg, fp);
00923       CHECK_V_ERROR;
00924     }
00925 
00926 
00927   fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
00928   fprintf_dh (fp, "---------------------------------------------------\n");
00929   fprintf_dh (fp, "   solve method: %s\n", ctx->krylovMethod);
00930   fprintf_dh (fp, "   maxIts:       %i\n", ctx->maxIts);
00931   fprintf_dh (fp, "   rtol:         %g\n", ctx->rtol);
00932   fprintf_dh (fp, "   atol:         %g\n", ctx->atol);
00933   fprintf_dh (fp,
00934           "\n==================== Euclid report (end) ======================\n");
00935 END_FUNC_DH}
00936 
00937 
00938 /* nzA ratio and rho refer to most recent solve, if more than
00939    one solve (call to Setup) was performed.  Other stats
00940    are cumulative.
00941 */
00942 #undef __FUNC__
00943 #define __FUNC__ "Euclid_dhPrintStatsShort"
00944 void
00945 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
00946               FILE * fp)
00947 {
00948   START_FUNC_DH double *timing = ctx->timing;
00949   /* double *stats = ctx->stats; */
00950   /* double setup_factor; */
00951   /* double setup_other; */
00952   double apply_total;
00953   double apply_per_it;
00954   /* double nzUsedRatio; */
00955   double perIt;
00956   int blocks = np_dh;
00957 
00958   if (np_dh == 1)
00959     blocks = ctx->sg->blocks;
00960 
00961   reduce_timings_private (ctx);
00962   CHECK_V_ERROR;
00963 
00964   /* setup_factor  = timing[FACTOR_T]; */
00965   /* setup_other   = timing[SETUP_T] - setup_factor; */
00966   apply_total = timing[TRI_SOLVE_T];
00967   apply_per_it = apply_total / (double) ctx->its;
00968   /* nzUsedRatio   = stats[NZA_RATIO_STATS]; */
00969   perIt = solve / (double) ctx->its;
00970 
00971   fprintf_dh (fp, "\n");
00972   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00973           "method", "subdms", "level", "its", "setup", "solve", "total",
00974           "perIt", "perIt", "rows");
00975   fprintf_dh (fp,
00976           "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  XX\n");
00977   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g  XXX\n", ctx->algo_par,    /* parallelization strategy [pilu, bj] */
00978           blocks,       /* number of subdomains */
00979           ctx->level,   /* level, for ILU(k) */
00980           ctx->its,     /* iterations */
00981           setup,        /* total setup time, from caller */
00982           solve,        /* total setup time, from caller */
00983           setup + solve,    /* total time, from caller */
00984           perIt,        /* time per iteration, solver+precond. */
00985           apply_per_it, /* time per iteration, solver+precond. */
00986           (double) ctx->n   /* global unknnowns */
00987     );
00988 
00989 
00990 
00991 
00992 #if 0
00993   fprintf_dh (fp, "\n");
00994   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00995           "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
00996 
00997   fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
00998           "method", "subdms", "level", "its", "total", "factor",
00999           "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
01000   fprintf_dh (fp,
01001           "------  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----  ----- XX\n");
01002 
01003 
01004   fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g  XXX\n", ctx->algo_par,    /* parallelization strategy [pilu, bj] */
01005           blocks,       /* number of subdomains */
01006           ctx->level,   /* level, for ILU(k) */
01007           ctx->its,     /* iterations */
01008           setup,        /* total setup time, from caller */
01009           solve,        /* total setup time, from caller */
01010           setup_factor, /* pc solve: factorization */
01011           setup_other,  /* pc setup: other */
01012           apply_total,  /* triangular solve time */
01013           apply_per_it, /* time for one triangular solve */
01014           ctx->rho_final,   /* rho */
01015           ctx->sparseTolA,  /* sparseA tolerance */
01016           nzUsedRatio,  /* percent of A that was used */
01017           (double) ctx->n   /* global unknnowns */
01018     );
01019 #endif
01020 
01021 #if 0
01022   /* special: for scalability studies */
01023   fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
01024           "subGph", "factor", "solveS", "perIt");
01025   fprintf_dh (fp, "------  -----  -----  -----  -----  -----  WW\n");
01026   fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f  WWW\n",
01027           ctx->algo_par,
01028           ctx->level,
01029           timing[SUB_GRAPH_T],
01030           timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
01031 #endif
01032 END_FUNC_DH}
01033 
01034 
01035 /* its during last solve; rho; nzaUsed */
01036 #undef __FUNC__
01037 #define __FUNC__ "Euclid_dhPrintStatsShorter"
01038 void
01039 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
01040 {
01041   START_FUNC_DH double *stats = ctx->stats;
01042 
01043   int its = ctx->its;
01044   double rho = ctx->rho_final;
01045   double nzUsedRatio = stats[NZA_RATIO_STATS];
01046 
01047 
01048   fprintf_dh (fp, "\nStats from last linear solve: YY\n");
01049   fprintf_dh (fp, "%6s %6s %6s     YY\n", "its", "rho", "A_%");
01050   fprintf_dh (fp, " -----  -----  -----     YY\n");
01051   fprintf_dh (fp, "%6i %6.2f %6.2f     YYY\n", its, rho, nzUsedRatio);
01052 
01053 END_FUNC_DH}
01054 
01055 #undef __FUNC__
01056 #define __FUNC__ "Euclid_dhPrintScaling"
01057 void
01058 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
01059 {
01060   START_FUNC_DH int i, m = ctx->m;
01061 
01062   if (m > 10)
01063     m = 10;
01064 
01065   if (ctx->scale == NULL)
01066     {
01067       SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
01068     }
01069 
01070   fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
01071   for (i = 0; i < m; ++i)
01072     {
01073       fprintf (fp, "   %i  %g  \n", i + 1, ctx->scale[i]);
01074     }
01075 END_FUNC_DH}
01076 
01077 
01078 #undef __FUNC__
01079 #define __FUNC__ "reduce_timings_private"
01080 void
01081 reduce_timings_private (Euclid_dh ctx)
01082 {
01083   START_FUNC_DH if (np_dh > 1)
01084     {
01085       double bufOUT[TIMING_BINS];
01086 
01087       memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
01088       MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
01089           comm_dh);
01090     }
01091 
01092   ctx->timingsWereReduced = true;
01093 END_FUNC_DH}
01094 
01095 #undef __FUNC__
01096 #define __FUNC__ "Euclid_dhPrintHypreReport"
01097 void
01098 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
01099 {
01100   START_FUNC_DH double *timing;
01101   int nz;
01102 
01103   nz = Factor_dhReadNz (ctx->F);
01104   CHECK_V_ERROR;
01105   timing = ctx->timing;
01106 
01107   /* add in timing from lasst setup (if any) */
01108   ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
01109   ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
01110 
01111   reduce_timings_private (ctx);
01112   CHECK_V_ERROR;
01113 
01114   if (myid_dh == 0)
01115     {
01116 
01117       fprintf (fp,
01118            "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
01119       fprintf_dh (fp, "\nruntime parameters\n");
01120       fprintf_dh (fp, "------------------\n");
01121       fprintf_dh (fp, "   setups:                 %i\n", ctx->setupCount);
01122       fprintf_dh (fp, "   tri solves:             %i\n", ctx->itsTotal);
01123       fprintf_dh (fp, "   parallelization method: %s\n", ctx->algo_par);
01124       fprintf_dh (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01125       if (!strcmp (ctx->algo_ilu, "iluk"))
01126     {
01127       fprintf_dh (fp, "      level:               %i\n", ctx->level);
01128     }
01129 
01130       if (ctx->isScaled)
01131     {
01132       fprintf_dh (fp, "   matrix was row scaled\n");
01133     }
01134 
01135       fprintf_dh (fp, "   global matrix row count: %i\n", ctx->n);
01136       fprintf_dh (fp, "   nzF:                     %i\n", nz);
01137       fprintf_dh (fp, "   rho:                     %g\n", ctx->rho_final);
01138       fprintf_dh (fp, "   sparseA:                 %g\n", ctx->sparseTolA);
01139 
01140       fprintf_dh (fp, "\nEuclid timing report\n");
01141       fprintf_dh (fp, "--------------------\n");
01142       fprintf_dh (fp, "   solves total:  %0.2f (see docs)\n",
01143           timing[TOTAL_SOLVE_T]);
01144       fprintf_dh (fp, "   tri solves:    %0.2f\n", timing[TRI_SOLVE_T]);
01145       fprintf_dh (fp, "   setups:        %0.2f\n", timing[SETUP_T]);
01146       fprintf_dh (fp, "      subdomain graph setup:  %0.2f\n",
01147           timing[SUB_GRAPH_T]);
01148       fprintf_dh (fp, "      factorization:          %0.2f\n",
01149           timing[FACTOR_T]);
01150       fprintf_dh (fp, "      solve setup:            %0.2f\n",
01151           timing[SOLVE_SETUP_T]);
01152       fprintf_dh (fp, "      rho:                    %0.2f\n",
01153           ctx->timing[COMPUTE_RHO_T]);
01154       fprintf_dh (fp, "      misc (should be small): %0.2f\n",
01155           timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
01156                      timing[SOLVE_SETUP_T] +
01157                      timing[COMPUTE_RHO_T]));
01158 
01159       if (ctx->sg != NULL)
01160     {
01161       SubdomainGraph_dhPrintStats (ctx->sg, fp);
01162       CHECK_V_ERROR;
01163       SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01164       CHECK_V_ERROR;
01165     }
01166 
01167       fprintf (fp,
01168            "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
01169 
01170     }
01171 
01172 END_FUNC_DH}
01173 
01174 #undef __FUNC__
01175 #define __FUNC__ "Euclid_dhPrintTestData"
01176 void
01177 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
01178 {
01179   START_FUNC_DH
01180     /* Print data that should remain that will hopefully 
01181        remain the same for any platform.
01182        Possibly "tri solves" may change . . .
01183      */
01184     if (myid_dh == 0)
01185     {
01186       fprintf (fp, "   setups:                 %i\n", ctx->setupCount);
01187       fprintf (fp, "   tri solves:             %i\n", ctx->its);
01188       fprintf (fp, "   parallelization method: %s\n", ctx->algo_par);
01189       fprintf (fp, "   factorization method:   %s\n", ctx->algo_ilu);
01190       fprintf (fp, "   level:                  %i\n", ctx->level);
01191       fprintf (fp, "   row scaling:            %i\n", ctx->isScaled);
01192     }
01193   SubdomainGraph_dhPrintRatios (ctx->sg, fp);
01194   CHECK_V_ERROR;
01195 END_FUNC_DH}
 All Classes Files Functions Variables Enumerations Friends