IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends
Factor_dh.c
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #include "Factor_dh.h"
00044 #include "Vec_dh.h"
00045 #include "Mat_dh.h"
00046 #include "SubdomainGraph_dh.h"
00047 #include "TimeLog_dh.h"
00048 #include "Mem_dh.h"
00049 #include "Numbering_dh.h"
00050 #include "Hash_i_dh.h"
00051 #include "Parser_dh.h"
00052 #include "mat_dh_private.h"
00053 #include "getRow_dh.h"
00054 #include "Euclid_dh.h"
00055 #include "io_dh.h"
00056 
00057 /* suppress compiler complaints */
00058 void
00059 Factor_dh_junk ()
00060 {
00061 }
00062 
00063 static void adjust_bj_private (Factor_dh mat);
00064 static void unadjust_bj_private (Factor_dh mat);
00065 
00066 
00067 #undef __FUNC__
00068 #define __FUNC__ "Factor_dhCreate"
00069 void
00070 Factor_dhCreate (Factor_dh * mat)
00071 {
00072   START_FUNC_DH struct _factor_dh *tmp;
00073 
00074   if (np_dh > MAX_MPI_TASKS)
00075     {
00076       SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!");
00077     }
00078 
00079   tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh));
00080   CHECK_V_ERROR;
00081   *mat = tmp;
00082 
00083   tmp->m = 0;
00084   tmp->n = 0;
00085   tmp->id = myid_dh;
00086   tmp->beg_row = 0;
00087   tmp->first_bdry = 0;
00088   tmp->bdry_count = 0;
00089   tmp->blockJacobi = false;
00090 
00091   tmp->rp = NULL;
00092   tmp->cval = NULL;
00093   tmp->aval = NULL;
00094   tmp->fill = NULL;
00095   tmp->diag = NULL;
00096   tmp->alloc = 0;
00097 
00098   tmp->work_y_lo = tmp->work_x_hi = NULL;
00099   tmp->sendbufLo = tmp->sendbufHi = NULL;
00100   tmp->sendindLo = tmp->sendindHi = NULL;
00101   tmp->num_recvLo = tmp->num_recvHi = 0;
00102   tmp->num_sendLo = tmp->num_sendHi = 0;
00103   tmp->sendlenLo = tmp->sendlenHi = 0;
00104 
00105   tmp->solveIsSetup = false;
00106   tmp->numbSolve = NULL;
00107 
00108   tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor");
00109 
00110 /*  Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */
00111 END_FUNC_DH}
00112 
00113 #undef __FUNC__
00114 #define __FUNC__ "Factor_dhDestroy"
00115 void
00116 Factor_dhDestroy (Factor_dh mat)
00117 {
00118   START_FUNC_DH if (mat->rp != NULL)
00119     {
00120       FREE_DH (mat->rp);
00121       CHECK_V_ERROR;
00122     }
00123   if (mat->cval != NULL)
00124     {
00125       FREE_DH (mat->cval);
00126       CHECK_V_ERROR;
00127     }
00128   if (mat->aval != NULL)
00129     {
00130       FREE_DH (mat->aval);
00131       CHECK_V_ERROR;
00132     }
00133   if (mat->diag != NULL)
00134     {
00135       FREE_DH (mat->diag);
00136       CHECK_V_ERROR;
00137     }
00138   if (mat->fill != NULL)
00139     {
00140       FREE_DH (mat->fill);
00141       CHECK_V_ERROR;
00142     }
00143 
00144   if (mat->work_y_lo != NULL)
00145     {
00146       FREE_DH (mat->work_y_lo);
00147       CHECK_V_ERROR;
00148     }
00149   if (mat->work_x_hi != NULL)
00150     {
00151       FREE_DH (mat->work_x_hi);
00152       CHECK_V_ERROR;
00153     }
00154   if (mat->sendbufLo != NULL)
00155     {
00156       FREE_DH (mat->sendbufLo);
00157       CHECK_V_ERROR;
00158     }
00159   if (mat->sendbufHi != NULL)
00160     {
00161       FREE_DH (mat->sendbufHi);
00162       CHECK_V_ERROR;
00163     }
00164   if (mat->sendindLo != NULL)
00165     {
00166       FREE_DH (mat->sendindLo);
00167       CHECK_V_ERROR;
00168     }
00169   if (mat->sendindHi != NULL)
00170     {
00171       FREE_DH (mat->sendindHi);
00172       CHECK_V_ERROR;
00173     }
00174 
00175   if (mat->numbSolve != NULL)
00176     {
00177       Numbering_dhDestroy (mat->numbSolve);
00178       CHECK_V_ERROR;
00179     }
00180   FREE_DH (mat);
00181   CHECK_V_ERROR;
00182 END_FUNC_DH}
00183 
00184 
00185 #undef __FUNC__
00186 #define __FUNC__ "create_fake_mat_private"
00187 static void
00188 create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN)
00189 {
00190   START_FUNC_DH Mat_dh matFake;
00191   Mat_dhCreate (matFakeIN);
00192   CHECK_V_ERROR;
00193   matFake = *matFakeIN;
00194   matFake->m = mat->m;
00195   matFake->n = mat->n;
00196   matFake->rp = mat->rp;
00197   matFake->cval = mat->cval;
00198   matFake->aval = mat->aval;
00199   matFake->beg_row = mat->beg_row;
00200 END_FUNC_DH}
00201 
00202 #undef __FUNC__
00203 #define __FUNC__ "destroy_fake_mat_private"
00204 static void
00205 destroy_fake_mat_private (Mat_dh matFake)
00206 {
00207   START_FUNC_DH matFake->rp = NULL;
00208   matFake->cval = NULL;
00209   matFake->aval = NULL;
00210   Mat_dhDestroy (matFake);
00211   CHECK_V_ERROR;
00212 END_FUNC_DH}
00213 
00214 
00215 
00216 #undef __FUNC__
00217 #define __FUNC__ "Factor_dhReadNz"
00218 int
00219 Factor_dhReadNz (Factor_dh mat)
00220 {
00221   START_FUNC_DH int ierr, retval = mat->rp[mat->m];
00222   int nz = retval;
00223   ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
00224   CHECK_MPI_ERROR (ierr);
00225 END_FUNC_VAL (retval)}
00226 
00227 
00228 
00229 #undef __FUNC__
00230 #define __FUNC__ "Factor_dhPrintRows"
00231 void
00232 Factor_dhPrintRows (Factor_dh mat, FILE * fp)
00233 {
00234   START_FUNC_DH int beg_row = mat->beg_row;
00235   int m = mat->m, i, j;
00236   bool noValues;
00237 
00238   noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00239   if (mat->aval == NULL)
00240     noValues = true;
00241 
00242   if (mat->blockJacobi)
00243     {
00244       adjust_bj_private (mat);
00245       CHECK_V_ERROR;
00246     }
00247 
00248   fprintf (fp,
00249        "\n----------------------- Factor_dhPrintRows ------------------\n");
00250   if (mat->blockJacobi)
00251     {
00252       fprintf (fp,
00253            "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n");
00254     }
00255 
00256   for (i = 0; i < m; ++i)
00257     {
00258       fprintf (fp, "%i :: ", 1 + i + beg_row);
00259       for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j)
00260     {
00261       if (noValues)
00262         {
00263           fprintf (fp, "%i ", 1 + mat->cval[j]);
00264         }
00265       else
00266         {
00267           fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]);
00268         }
00269     }
00270       fprintf (fp, "\n");
00271     }
00272 
00273   if (mat->blockJacobi)
00274     {
00275       unadjust_bj_private (mat);
00276       CHECK_V_ERROR;
00277     }
00278 END_FUNC_DH}
00279 
00280 
00281 #undef __FUNC__
00282 #define __FUNC__ "Factor_dhPrintDiags"
00283 void
00284 Factor_dhPrintDiags (Factor_dh mat, FILE * fp)
00285 {
00286   START_FUNC_DH int beg_row = mat->beg_row;
00287   int m = mat->m, i, pe, *diag = mat->diag;
00288   REAL_DH *aval = mat->aval;
00289 
00290 
00291   fprintf_dh (fp,
00292           "\n----------------------- Factor_dhPrintDiags ------------------\n");
00293   fprintf_dh (fp, "(grep for 'ZERO')\n");
00294 
00295   for (pe = 0; pe < np_dh; ++pe)
00296     {
00297       MPI_Barrier (comm_dh);
00298       if (mat->id == pe)
00299     {
00300       fprintf (fp, "----- subdomain: %i  processor: %i\n", pe, myid_dh);
00301       for (i = 0; i < m; ++i)
00302         {
00303           REAL_DH val = aval[diag[i]];
00304           if (val)
00305         {
00306           fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]);
00307         }
00308           else
00309         {
00310           fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row,
00311                aval[diag[i]]);
00312         }
00313         }
00314     }
00315     }
00316 END_FUNC_DH}
00317 
00318 
00319 #undef __FUNC__
00320 #define __FUNC__ "Factor_dhPrintGraph"
00321 void
00322 Factor_dhPrintGraph (Factor_dh mat, char *filename)
00323 {
00324   START_FUNC_DH FILE * fp;
00325   int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval;
00326 
00327   if (np_dh > 1)
00328     SET_V_ERROR ("only implemented for single mpi task");
00329 
00330   work = (int *) MALLOC_DH (m * sizeof (int));
00331   CHECK_V_ERROR;
00332 
00333   fp = openFile_dh (filename, "w");
00334   CHECK_V_ERROR;
00335 
00336   for (i = 0; i < m; ++i)
00337     {
00338       for (j = 0; j < m; ++j)
00339     work[j] = 0;
00340       for (j = rp[i]; j < rp[i]; ++j)
00341     work[cval[j]] = 1;
00342 
00343       for (j = 0; j < m; ++j)
00344     {
00345       if (work[j])
00346         {
00347           fprintf (fp, " x ");
00348         }
00349       else
00350         {
00351           fprintf (fp, "   ");
00352         }
00353     }
00354       fprintf (fp, "\n");
00355     }
00356 
00357   closeFile_dh (fp);
00358   CHECK_V_ERROR;
00359 
00360   FREE_DH (work);
00361 END_FUNC_DH}
00362 
00363 
00364 #undef __FUNC__
00365 #define __FUNC__ "Factor_dhPrintTriples"
00366 void
00367 Factor_dhPrintTriples (Factor_dh mat, char *filename)
00368 {
00369   START_FUNC_DH int pe, i, j;
00370   int m = mat->m, *rp = mat->rp;
00371   int beg_row = mat->beg_row;
00372   REAL_DH *aval = mat->aval;
00373   bool noValues;
00374   FILE *fp;
00375 
00376   if (mat->blockJacobi)
00377     {
00378       adjust_bj_private (mat);
00379       CHECK_V_ERROR;
00380     }
00381 
00382   noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00383   if (noValues)
00384     aval = NULL;
00385 
00386   for (pe = 0; pe < np_dh; ++pe)
00387     {
00388       MPI_Barrier (comm_dh);
00389       if (mat->id == pe)
00390     {
00391       if (pe == 0)
00392         {
00393           fp = openFile_dh (filename, "w");
00394           CHECK_V_ERROR;
00395         }
00396       else
00397         {
00398           fp = openFile_dh (filename, "a");
00399           CHECK_V_ERROR;
00400         }
00401 
00402       for (i = 0; i < m; ++i)
00403         {
00404           for (j = rp[i]; j < rp[i + 1]; ++j)
00405         {
00406           if (noValues)
00407             {
00408               fprintf (fp, "%i %i\n", 1 + i + beg_row,
00409                    1 + mat->cval[j]);
00410             }
00411           else
00412             {
00413               fprintf (fp, TRIPLES_FORMAT,
00414                    1 + i + beg_row, 1 + mat->cval[j], aval[j]);
00415             }
00416         }
00417         }
00418       closeFile_dh (fp);
00419       CHECK_V_ERROR;
00420     }
00421     }
00422 
00423   if (mat->blockJacobi)
00424     {
00425       unadjust_bj_private (mat);
00426       CHECK_V_ERROR;
00427     }
00428 END_FUNC_DH}
00429 
00430 /*--------------------------------------------------------------------------------
00431  * Functions to setup the matrix for triangular solves.  These are similar to
00432  * MatVecSetup(), except that there are two cases: subdomains ordered lower than
00433  * ourselves, and subdomains ordered higher than ourselves.  This SolveSetup
00434  * is used for Parallel ILU (PILU).  The following are adopted/modified from
00435  * Edmond Chow's ParaSails
00436  *--------------------------------------------------------------------------------*/
00437 
00438 /* adopted from Edmond Chow's ParaSails */
00439 
00440 /* 1. start receives of node data to be received from other processors;
00441    2. send to other processors the list of nodes this processor needs
00442       to receive from them.
00443    Returns: the number of processors from whom nodes will be received.
00444 */
00445 #undef __FUNC__
00446 #define __FUNC__ "setup_receives_private"
00447 static int
00448 setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows,
00449             double *recvBuf, MPI_Request * req,
00450             int *reqind, int reqlen, int *outlist, bool debug)
00451 {
00452   START_FUNC_DH int i, j, this_pe, num_recv = 0;
00453   MPI_Request request;
00454 
00455   if (debug)
00456     {
00457       fprintf (logFile,
00458            "\nFACT ========================================================\n");
00459       fprintf (logFile, "FACT STARTING: setup_receives_private\n");
00460     }
00461 
00462   for (i = 0; i < reqlen; i = j)
00463     {               /* j is set below */
00464       /* determine the processor that owns the row with index reqind[i] */
00465       this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
00466       CHECK_ERROR (-1);
00467 
00468       /* Figure out other rows we need from this_pe */
00469       for (j = i + 1; j < reqlen; j++)
00470     {
00471       int idx = reqind[j];
00472       if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe])
00473         {
00474           break;
00475         }
00476     }
00477 
00478       if (debug)
00479     {
00480       int k;
00481       fprintf (logFile, "FACT need nodes from P_%i: ", this_pe);
00482       for (k = i; k < j; ++k)
00483         fprintf (logFile, "%i ", 1 + reqind[k]);
00484       fprintf (logFile, "\n");
00485     }
00486 
00487       /* Record the number of number of indices needed from this_pe */
00488       outlist[this_pe] = j - i;
00489 
00490       /* Request rows in reqind[i..j-1] */
00491       /* Note: the receiving processor, this_pe, doesn't yet know
00492          about the incoming request, hence, can't set up a matching
00493          receive; this matching receive will be started later,
00494          in setup_sends_private.
00495        */
00496       MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request);
00497       MPI_Request_free (&request);
00498 
00499       /* set up persistent comms for receiving the values from this_pe */
00500       MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555,
00501              comm_dh, req + num_recv);
00502       ++num_recv;
00503     }
00504 
00505   END_FUNC_VAL (num_recv);
00506 }
00507 
00508 /*
00509    1. start receive to get list of nodes that this processor
00510       needs to send to other processors
00511    2. start persistent comms to send the data
00512 */
00513 #undef __FUNC__
00514 #define __FUNC__ "setup_sends_private"
00515 static void
00516 setup_sends_private (Factor_dh mat, int *inlist,
00517              int *o2n_subdomain, bool debug)
00518 {
00519   START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row;
00520   MPI_Request *requests = mat->requests, *sendReq;
00521   MPI_Status *statuses = mat->status;
00522   bool isHigher;
00523   int *rcvBuf;
00524   double *sendBuf;
00525   int myidNEW = o2n_subdomain[myid_dh];
00526   int count;
00527 
00528   if (debug)
00529     {
00530       fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n");
00531     }
00532 
00533   /* Determine size of and allocate sendbuf and sendind */
00534   sendlenLo = sendlenHi = 0;
00535   for (i = 0; i < np_dh; i++)
00536     {
00537       if (inlist[i])
00538     {
00539       if (o2n_subdomain[i] < myidNEW)
00540         {
00541           sendlenLo += inlist[i];
00542         }
00543       else
00544         {
00545           sendlenHi += inlist[i];
00546         }
00547     }
00548     }
00549 
00550   mat->sendlenLo = sendlenLo;
00551   mat->sendlenHi = sendlenHi;
00552   mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double));
00553   CHECK_V_ERROR;
00554   mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double));
00555   CHECK_V_ERROR;
00556   mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int));
00557   CHECK_V_ERROR;
00558   mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int));
00559   CHECK_V_ERROR;
00560 
00561   count = 0;            /* number of calls to MPI_Irecv() */
00562   jLo = jHi = 0;
00563   mat->num_sendLo = 0;
00564   mat->num_sendHi = 0;
00565   for (i = 0; i < np_dh; i++)
00566     {
00567       if (inlist[i])
00568     {
00569       isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00570 
00571       /* Post receive for the actual indices */
00572       if (isHigher)
00573         {
00574           rcvBuf = &mat->sendindHi[jHi];
00575           sendBuf = &mat->sendbufHi[jHi];
00576           sendReq = &mat->send_reqHi[mat->num_sendHi];
00577           mat->num_sendHi++;
00578           jHi += inlist[i];
00579         }
00580       else
00581         {
00582           rcvBuf = &mat->sendindLo[jLo];
00583           sendBuf = &mat->sendbufLo[jLo];
00584           sendReq = &mat->send_reqLo[mat->num_sendLo];
00585           mat->num_sendLo++;
00586           jLo += inlist[i];
00587         }
00588 
00589       /* matching receive, for list of unknowns that will be sent,
00590          during the triangular solves, from ourselves to P_i
00591        */
00592       MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh,
00593              requests + count);
00594       ++count;
00595 
00596       /* Set up the send */
00597       MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh,
00598              sendReq);
00599     }
00600     }
00601 
00602   /* note: count = mat->num_sendLo = mat->num_sendHi */
00603   MPI_Waitall (count, requests, statuses);
00604 
00605   if (debug)
00606     {
00607       int j;
00608       jLo = jHi = 0;
00609 
00610       fprintf (logFile,
00611            "\nFACT columns that I must send to other subdomains:\n");
00612       for (i = 0; i < np_dh; i++)
00613     {
00614       if (inlist[i])
00615         {
00616           isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00617           if (isHigher)
00618         {
00619           rcvBuf = &mat->sendindHi[jHi];
00620           jHi += inlist[i];
00621         }
00622           else
00623         {
00624           rcvBuf = &mat->sendindLo[jLo];
00625           jLo += inlist[i];
00626         }
00627 
00628           fprintf (logFile, "FACT  send to P_%i: ", i);
00629           for (j = 0; j < inlist[i]; ++j)
00630         fprintf (logFile, "%i ", rcvBuf[j] + 1);
00631           fprintf (logFile, "\n");
00632         }
00633     }
00634     }
00635 
00636   /* convert global indices to local indices */
00637   /* these are all indices on this processor */
00638   for (i = 0; i < mat->sendlenLo; i++)
00639     mat->sendindLo[i] -= first;
00640   for (i = 0; i < mat->sendlenHi; i++)
00641     mat->sendindHi[i] -= first;
00642 END_FUNC_DH}
00643 
00644 
00645 
00646 #undef __FUNC__
00647 #define __FUNC__ "Factor_dhSolveSetup"
00648 void
00649 Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg)
00650 {
00651   START_FUNC_DH int *outlist, *inlist;
00652   int i, row, *rp = mat->rp, *cval = mat->cval;
00653   Numbering_dh numb;
00654   int m = mat->m;
00655   /* int firstLocalRow = mat->beg_row; */
00656   int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows;
00657   Mat_dh matFake;
00658   bool debug = false;
00659   double *recvBuf;
00660 
00661   if (mat->debug && logFile != NULL)
00662     debug = true;
00663 
00664   end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
00665   CHECK_V_ERROR;
00666   outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00667   CHECK_V_ERROR;
00668   inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00669   CHECK_V_ERROR;
00670   for (i = 0; i < np_dh; ++i)
00671     {
00672       inlist[i] = 0;
00673       outlist[i] = 0;
00674       end_rows[i] = beg_rows[i] + row_count[i];
00675     }
00676 
00677   /* Create Numbering object */
00678   create_fake_mat_private (mat, &matFake);
00679   CHECK_V_ERROR;
00680   Numbering_dhCreate (&(mat->numbSolve));
00681   CHECK_V_ERROR;
00682   numb = mat->numbSolve;
00683   Numbering_dhSetup (numb, matFake);
00684   CHECK_V_ERROR;
00685   destroy_fake_mat_private (matFake);
00686   CHECK_V_ERROR;
00687 
00688   if (debug)
00689     {
00690       fprintf (stderr, "Numbering_dhSetup completed\n");
00691     }
00692 
00693   /* Allocate recvbuf; recvbuf has numlocal entries saved for local part of x */
00694   i = m + numb->num_ext;
00695   mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double));
00696   CHECK_V_ERROR;
00697   mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double));
00698   CHECK_V_ERROR;
00699   if (debug)
00700     {
00701       fprintf (logFile, "FACT num_extLo= %i  num_extHi= %i\n",
00702            numb->num_extLo, numb->num_extHi);
00703     }
00704 
00705   mat->num_recvLo = 0;
00706   mat->num_recvHi = 0;
00707   if (numb->num_extLo)
00708     {
00709       recvBuf = mat->work_y_lo + m;
00710       mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows,
00711                         recvBuf, mat->recv_reqLo,
00712                         numb->idx_extLo,
00713                         numb->num_extLo, outlist,
00714                         debug);
00715       CHECK_V_ERROR;
00716 
00717     }
00718 
00719   if (numb->num_extHi)
00720     {
00721       recvBuf = mat->work_x_hi + m + numb->num_extLo;
00722       mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows,
00723                         recvBuf, mat->recv_reqHi,
00724                         numb->idx_extHi,
00725                         numb->num_extHi, outlist,
00726                         debug);
00727       CHECK_V_ERROR;
00728     }
00729 
00730   MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
00731   /* At this point, inlist[j] contains the number of indices 
00732      that this processor must send to P_j.  Processors next need
00733      to exchange the actual lists of required indices; this is done
00734      in setup_sends_private()
00735    */
00736 
00737   setup_sends_private (mat, inlist, sg->o2n_sub, debug);
00738   CHECK_V_ERROR;
00739 
00740   /* Convert column indices in each row to local indices */
00741   for (row = 0; row < m; row++)
00742     {
00743       int len = rp[row + 1] - rp[row];
00744       int *ind = cval + rp[row];
00745       Numbering_dhGlobalToLocal (numb, len, ind, ind);
00746       CHECK_V_ERROR;
00747     }
00748 
00749   FREE_DH (outlist);
00750   CHECK_V_ERROR;
00751   FREE_DH (inlist);
00752   CHECK_V_ERROR;
00753   FREE_DH (end_rows);
00754   CHECK_V_ERROR;
00755 
00756   if (debug)
00757     {
00758       int ii, jj;
00759 
00760       fprintf (logFile,
00761            "\n--------- row/col structure, after global to local renumbering\n");
00762       for (ii = 0; ii < mat->m; ++ii)
00763     {
00764       fprintf (logFile, "local row %i :: ", ii + 1);
00765       for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj)
00766         {
00767           fprintf (logFile, "%i ", 1 + mat->cval[jj]);
00768         }
00769       fprintf (logFile, "\n");
00770     }
00771       fprintf (logFile, "\n");
00772       fflush (logFile);
00773     }
00774 END_FUNC_DH}
00775 
00776 /* solve for MPI implementation of PILU.  This function is
00777    so similar to MatVec, that I put it here, instead of with
00778    the other solves located in Euclid_apply.c.
00779 */
00780 static void forward_solve_private (int m, int from, int to,
00781                    int *rp, int *cval, int *diag,
00782                    double *aval, double *rhs, double *work_y,
00783                    bool debug);
00784 
00785 static void backward_solve_private (int m, int from, int to,
00786                     int *rp, int *cval, int *diag,
00787                     double *aval, double *work_y,
00788                     double *work_x, bool debug);
00789 
00790 static int beg_rowG;
00791 
00792 
00793 #undef __FUNC__
00794 #define __FUNC__ "Factor_dhSolve"
00795 void
00796 Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx)
00797 {
00798   START_FUNC_DH Factor_dh mat = ctx->F;
00799   int from, to;
00800   int ierr, i, m = mat->m, first_bdry = mat->first_bdry;
00801   int offsetLo = mat->numbSolve->num_extLo;
00802   int offsetHi = mat->numbSolve->num_extHi;
00803   int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag;
00804   double *aval = mat->aval;
00805   int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi;
00806   int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi;
00807   double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi;
00808   double *work_y = mat->work_y_lo;
00809   double *work_x = mat->work_x_hi;
00810   bool debug = false;
00811 
00812   if (mat->debug && logFile != NULL)
00813     debug = true;
00814   if (debug)
00815     beg_rowG = ctx->F->beg_row;
00816 
00817 /*
00818 for (i=0; i<m+offsetLo+offsetHi; ++i) {
00819   work_y[i] = -99;
00820   work_x[i] = -99;
00821 }
00822 */
00823 
00824   if (debug)
00825     {
00826       fprintf (logFile,
00827            "\n=====================================================\n");
00828       fprintf (logFile,
00829            "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n",
00830            mat->num_recvLo, mat->num_recvHi);
00831     }
00832 
00833   /* start receives from higher and lower ordered subdomains */
00834   if (mat->num_recvLo)
00835     {
00836       MPI_Startall (mat->num_recvLo, mat->recv_reqLo);
00837     }
00838   if (mat->num_recvHi)
00839     {
00840       MPI_Startall (mat->num_recvHi, mat->recv_reqHi);
00841     }
00842 
00843   /*-------------------------------------------------------------
00844    * PART 1: Forward Solve Ly = rhs for y ('y' is called 'work')
00845    *-------------------------------------------------------------*/
00846   /* forward triangular solve on interior nodes */
00847   from = 0;
00848   to = first_bdry;
00849   if (from != to)
00850     {
00851       forward_solve_private (m, from, to, rp, cval, diag, aval,
00852                  rhs, work_y, debug);
00853       CHECK_V_ERROR;
00854     }
00855 
00856   /* wait for receives from lower ordered subdomains, then
00857      complete forward solve on boundary nodes.
00858    */
00859   if (mat->num_recvLo)
00860     {
00861       MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status);
00862 
00863       /* debug block */
00864       if (debug)
00865     {
00866       fprintf (logFile,
00867            "FACT got 'y' values from lower neighbors; work buffer:\n  ");
00868       for (i = 0; i < offsetLo; ++i)
00869         {
00870           fprintf (logFile, "%g ", work_y[m + i]);
00871         }
00872     }
00873     }
00874 
00875   /* forward triangular solve on boundary nodes */
00876   from = first_bdry;
00877   to = m;
00878   if (from != to)
00879     {
00880       forward_solve_private (m, from, to, rp, cval, diag, aval,
00881                  rhs, work_y, debug);
00882       CHECK_V_ERROR;
00883     }
00884 
00885   /*  send boundary elements from work vector 'y' to higher ordered subdomains */
00886   if (mat->num_sendHi)
00887     {
00888 
00889       /* copy elements to send buffer */
00890       for (i = 0; i < sendlenHi; i++)
00891     {
00892       sendbufHi[i] = work_y[sendindHi[i]];
00893     }
00894 
00895       /* start the sends */
00896       MPI_Startall (mat->num_sendHi, mat->send_reqHi);
00897 
00898       /* debug block */
00899       if (debug)
00900     {
00901       fprintf (logFile,
00902            "\nFACT sending 'y' values to higher neighbor:\nFACT   ");
00903       for (i = 0; i < sendlenHi; i++)
00904         {
00905           fprintf (logFile, "%g ", sendbufHi[i]);
00906         }
00907       fprintf (logFile, "\n");
00908     }
00909     }
00910 
00911   /*----------------------------------------------------------
00912    * PART 2: Backward Solve
00913    *----------------------------------------------------------*/
00914   /* wait for bdry nodes 'x' from higher-ordered processsors */
00915   if (mat->num_recvHi)
00916     {
00917       ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status);
00918       CHECK_MPI_V_ERROR (ierr);
00919 
00920       /* debug block */
00921       if (debug)
00922     {
00923       fprintf (logFile, "FACT got 'x' values from higher neighbors:\n  ");
00924       for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i)
00925         {
00926           fprintf (logFile, "%g ", work_x[i]);
00927         }
00928       fprintf (logFile, "\n");
00929     }
00930     }
00931 
00932   /* backward solve boundary nodes */
00933   from = m;
00934   to = first_bdry;
00935   if (from != to)
00936     {
00937       backward_solve_private (m, from, to, rp, cval, diag, aval,
00938                   work_y, work_x, debug);
00939       CHECK_V_ERROR;
00940     }
00941 
00942   /*  send boundary node elements to lower ordered subdomains */
00943   if (mat->num_sendLo)
00944     {
00945 
00946       /* copy elements to send buffer */
00947       for (i = 0; i < sendlenLo; i++)
00948     {
00949       sendbufLo[i] = work_x[sendindLo[i]];
00950     }
00951 
00952       /* start the sends */
00953       ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo);
00954       CHECK_MPI_V_ERROR (ierr);
00955 
00956       /* debug block */
00957       if (debug)
00958     {
00959       fprintf (logFile,
00960            "\nFACT sending 'x' values to lower neighbor:\nFACT   ");
00961       for (i = 0; i < sendlenLo; i++)
00962         {
00963           fprintf (logFile, "%g ", sendbufLo[i]);
00964         }
00965       fprintf (logFile, "\n");
00966     }
00967     }
00968 
00969   /* backward solve interior nodes */
00970   from = first_bdry;
00971   to = 0;
00972   if (from != to)
00973     {
00974       backward_solve_private (m, from, to, rp, cval, diag, aval,
00975                   work_y, work_x, debug);
00976       CHECK_V_ERROR;
00977     }
00978 
00979   /* copy solution from work vector lhs vector */
00980   memcpy (lhs, work_x, m * sizeof (double));
00981 
00982   if (debug)
00983     {
00984       fprintf (logFile, "\nFACT solution: ");
00985       for (i = 0; i < m; ++i)
00986     {
00987       fprintf (logFile, "%g ", lhs[i]);
00988     }
00989       fprintf (logFile, "\n");
00990     }
00991 
00992   /* wait for sends to go through */
00993   if (mat->num_sendLo)
00994     {
00995       ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status);
00996       CHECK_MPI_V_ERROR (ierr);
00997     }
00998 
00999   if (mat->num_sendHi)
01000     {
01001       ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status);
01002       CHECK_MPI_V_ERROR (ierr);
01003     }
01004 END_FUNC_DH}
01005 
01006 
01007 
01008 #undef __FUNC__
01009 #define __FUNC__ "forward_solve_private"
01010 void
01011 forward_solve_private (int m, int from, int to, int *rp,
01012                int *cval, int *diag, double *aval,
01013                double *rhs, double *work_y, bool debug)
01014 {
01015   START_FUNC_DH int i, j, idx;
01016 
01017   if (debug)
01018     {
01019       fprintf (logFile,
01020            "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n",
01021            1 + from, 1 + to, m);
01022     }
01023 
01024 /*
01025   if (from == 0) {
01026     work_y[0] = rhs[0];  
01027     if (debug) {
01028       fprintf(logFile, "FACT   work_y[%i] = %g\n------------\n", 1+beg_rowG, work_y[0]);
01029     }
01030   } else {
01031     --from; 
01032   }
01033 */
01034 
01035   if (debug)
01036     {
01037       for (i = from; i < to; ++i)
01038     {
01039       int len = diag[i] - rp[i];
01040       int *col = cval + rp[i];
01041       double *val = aval + rp[i];
01042       double sum = rhs[i];
01043 
01044       fprintf (logFile, "FACT   solving for work_y[%i] (global)\n",
01045            i + 1 + beg_rowG);
01046       fprintf (logFile, "FACT        sum = %g\n", sum);
01047       for (j = 0; j < len; ++j)
01048         {
01049           idx = col[j];
01050           sum -= (val[j] * work_y[idx]);
01051           fprintf (logFile,
01052                "FACT        sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n",
01053                sum, val[j], 1 + idx, work_y[idx]);
01054         }
01055       work_y[i] = sum;
01056       fprintf (logFile, "FACT  work_y[%i] = %g\n", 1 + i + beg_rowG,
01057            work_y[i]);
01058       fprintf (logFile, "-----------\n");
01059     }
01060 
01061       fprintf (logFile, "\nFACT   work vector at end of forward solve:\n");
01062       for (i = 0; i < to; i++)
01063     fprintf (logFile, "    %i %g\n", i + 1 + beg_rowG, work_y[i]);
01064 
01065     }
01066   else
01067     {
01068       for (i = from; i < to; ++i)
01069     {
01070       int len = diag[i] - rp[i];
01071       int *col = cval + rp[i];
01072       double *val = aval + rp[i];
01073       double sum = rhs[i];
01074 
01075       for (j = 0; j < len; ++j)
01076         {
01077           idx = col[j];
01078           sum -= (val[j] * work_y[idx]);
01079         }
01080       work_y[i] = sum;
01081     }
01082     }
01083 END_FUNC_DH}
01084 
01085 #undef __FUNC__
01086 #define __FUNC__ "backward_solve_private"
01087 void
01088 backward_solve_private (int m, int from, int to, int *rp,
01089             int *cval, int *diag, double *aval,
01090             double *work_y, double *work_x, bool debug)
01091 {
01092   START_FUNC_DH int i, j, idx;
01093 
01094   if (debug)
01095     {
01096       fprintf (logFile,
01097            "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n",
01098            1 + from, 1 + to, m);
01099       for (i = from - 1; i >= to; --i)
01100     {
01101       int len = rp[i + 1] - diag[i] - 1;
01102       int *col = cval + diag[i] + 1;
01103       double *val = aval + diag[i] + 1;
01104       double sum = work_y[i];
01105       fprintf (logFile, "FACT   solving for work_x[%i]\n",
01106            i + 1 + beg_rowG);
01107 
01108       for (j = 0; j < len; ++j)
01109         {
01110           idx = col[j];
01111           sum -= (val[j] * work_x[idx]);
01112           fprintf (logFile,
01113                "FACT        sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n",
01114                sum, val[j], work_x[idx]);
01115         }
01116       work_x[i] = sum * aval[diag[i]];
01117       fprintf (logFile, "FACT   work_x[%i] = %g\n", 1 + i, work_x[i]);
01118       fprintf (logFile, "----------\n");
01119     }
01120 
01121     }
01122   else
01123     {
01124       for (i = from - 1; i >= to; --i)
01125     {
01126       int len = rp[i + 1] - diag[i] - 1;
01127       int *col = cval + diag[i] + 1;
01128       double *val = aval + diag[i] + 1;
01129       double sum = work_y[i];
01130 
01131       for (j = 0; j < len; ++j)
01132         {
01133           idx = col[j];
01134           sum -= (val[j] * work_x[idx]);
01135         }
01136       work_x[i] = sum * aval[diag[i]];
01137     }
01138     }
01139 END_FUNC_DH}
01140 
01141 #undef __FUNC__
01142 #define __FUNC__ "Factor_dhInit"
01143 void
01144 Factor_dhInit (void *A, bool fillFlag, bool avalFlag,
01145            double rho, int id, int beg_rowP, Factor_dh * Fout)
01146 {
01147   START_FUNC_DH int m, n, beg_row, alloc;
01148   Factor_dh F;
01149 
01150   EuclidGetDimensions (A, &beg_row, &m, &n);
01151   CHECK_V_ERROR;
01152   alloc = rho * m;
01153   Factor_dhCreate (&F);
01154   CHECK_V_ERROR;
01155 
01156   *Fout = F;
01157   F->m = m;
01158   F->n = n;
01159   F->beg_row = beg_rowP;
01160   F->id = id;
01161   F->alloc = alloc;
01162 
01163   F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01164   CHECK_V_ERROR;
01165   F->rp[0] = 0;
01166   F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01167   CHECK_V_ERROR;
01168   F->diag = (int *) MALLOC_DH (m * sizeof (int));
01169   CHECK_V_ERROR;
01170   if (fillFlag)
01171     {
01172       F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01173       CHECK_V_ERROR;
01174     }
01175   if (avalFlag)
01176     {
01177       F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01178       CHECK_V_ERROR;
01179     }
01180 END_FUNC_DH}
01181 
01182 #undef __FUNC__
01183 #define __FUNC__ "Factor_dhReallocate"
01184 void
01185 Factor_dhReallocate (Factor_dh F, int used, int additional)
01186 {
01187   START_FUNC_DH int alloc = F->alloc;
01188 
01189   if (used + additional > F->alloc)
01190     {
01191       int *tmpI;
01192       while (alloc < used + additional)
01193     alloc *= 2.0;
01194       F->alloc = alloc;
01195       tmpI = F->cval;
01196       F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01197       CHECK_V_ERROR;
01198       memcpy (F->cval, tmpI, used * sizeof (int));
01199       FREE_DH (tmpI);
01200       CHECK_V_ERROR;
01201       if (F->fill != NULL)
01202     {
01203       tmpI = F->fill;
01204       F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01205       CHECK_V_ERROR;
01206       memcpy (F->fill, tmpI, used * sizeof (int));
01207       FREE_DH (tmpI);
01208       CHECK_V_ERROR;
01209     }
01210       if (F->aval != NULL)
01211     {
01212       REAL_DH *tmpF = F->aval;
01213       F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01214       CHECK_V_ERROR;
01215       memcpy (F->aval, tmpF, used * sizeof (REAL_DH));
01216       FREE_DH (tmpF);
01217       CHECK_V_ERROR;
01218     }
01219     }
01220 END_FUNC_DH}
01221 
01222 #undef __FUNC__
01223 #define __FUNC__ "Factor_dhTranspose"
01224 void
01225 Factor_dhTranspose (Factor_dh A, Factor_dh * Bout)
01226 {
01227   START_FUNC_DH Factor_dh B;
01228 
01229   if (np_dh > 1)
01230     {
01231       SET_V_ERROR ("only for sequential");
01232     }
01233 
01234   Factor_dhCreate (&B);
01235   CHECK_V_ERROR;
01236   *Bout = B;
01237   B->m = B->n = A->m;
01238   if (B->aval == NULL)
01239     {
01240       mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01241                 A->aval, NULL);
01242       CHECK_V_ERROR;
01243     }
01244   else
01245     {
01246       mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01247                 A->aval, &B->aval);
01248       CHECK_V_ERROR;
01249     }
01250 END_FUNC_DH}
01251 
01252 
01253 /* this could be done using OpenMP, but I took it out for now */
01254 #undef __FUNC__
01255 #define __FUNC__ "Factor_dhSolveSeq"
01256 void
01257 Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx)
01258 {
01259   START_FUNC_DH Factor_dh F = ctx->F;
01260   if (F == NULL)
01261     {
01262       printf ("F is null.\n");
01263     }
01264   else
01265     {
01266       printf ("F isn't null.\n");
01267     }
01268   int *rp, *cval, *diag;
01269   int i, j, *vi, nz, m = F->m;
01270   REAL_DH *aval, *work;
01271   /* REAL_DH   *scale; */
01272   REAL_DH *v, sum;
01273   bool debug = false;
01274 
01275   if (ctx->F->debug && logFile != NULL)
01276     debug = true;
01277 
01278   rp = F->rp;
01279   cval = F->cval;
01280   aval = F->aval;
01281   diag = F->diag;
01282   /* scale = ctx->scale; */
01283   work = ctx->work;
01284 
01285   if (debug)
01286     {
01287       fprintf (logFile,
01288            "\nFACT ============================================================\n");
01289       fprintf (logFile, "FACT starting Factor_dhSolveSeq\n");
01290 
01291       /* forward solve lower triangle */
01292       fprintf (logFile, "\nFACT   STARTING FORWARD SOLVE\n------------\n");
01293       work[0] = rhs[0];
01294       fprintf (logFile, "FACT   work[0] = %g\n------------\n", work[0]);
01295       for (i = 1; i < m; i++)
01296     {
01297       v = aval + rp[i];
01298       vi = cval + rp[i];
01299       nz = diag[i] - rp[i];
01300       fprintf (logFile, "FACT   solving for work[%i]\n", i + 1);
01301       sum = rhs[i];
01302       for (j = 0; j < nz; ++j)
01303         {
01304           sum -= (v[j] * work[vi[j]]);
01305           fprintf (logFile,
01306                "FACT         sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01307                sum, v[j], work[vi[j]]);
01308         }
01309       work[i] = sum;
01310       fprintf (logFile, "FACT   work[%i] = %g\n------------\n", 1 + i,
01311            work[i]);
01312     }
01313 
01314 
01315       fprintf (logFile, "\nFACT   work vector at end of forward solve:\n");
01316       for (i = 0; i < m; i++)
01317     fprintf (logFile, "    %i %g\n", i + 1, work[i]);
01318 
01319 
01320       /* backward solve upper triangular boundaries (sequential) */
01321       fprintf (logFile, "\nFACT   STARTING BACKWARD SOLVE\n--------------\n");
01322       for (i = m - 1; i >= 0; i--)
01323     {
01324       v = aval + diag[i] + 1;
01325       vi = cval + diag[i] + 1;
01326       nz = rp[i + 1] - diag[i] - 1;
01327       fprintf (logFile, "FACT   solving for lhs[%i]\n", i + 1);
01328       sum = work[i];
01329       for (j = 0; j < nz; ++j)
01330         {
01331           sum -= (v[j] * work[vi[j]]);
01332           fprintf (logFile,
01333                "FACT         sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01334                sum, v[j], work[vi[j]]);
01335         }
01336       lhs[i] = work[i] = sum * aval[diag[i]];
01337       fprintf (logFile, "FACT   lhs[%i] = %g\n------------\n", 1 + i,
01338            lhs[i]);
01339       fprintf (logFile, "FACT   solving for lhs[%i]\n", i + 1);
01340     }
01341 
01342       fprintf (logFile, "\nFACT solution: ");
01343       for (i = 0; i < m; ++i)
01344     fprintf (logFile, "%g ", lhs[i]);
01345       fprintf (logFile, "\n");
01346 
01347 
01348     }
01349   else
01350     {
01351       /* forward solve lower triangle */
01352       work[0] = rhs[0];
01353       for (i = 1; i < m; i++)
01354     {
01355       v = aval + rp[i];
01356       vi = cval + rp[i];
01357       nz = diag[i] - rp[i];
01358       sum = rhs[i];
01359       while (nz--)
01360         sum -= (*v++ * work[*vi++]);
01361       work[i] = sum;
01362     }
01363 
01364       /* backward solve upper triangular boundaries (sequential) */
01365       for (i = m - 1; i >= 0; i--)
01366     {
01367       v = aval + diag[i] + 1;
01368       vi = cval + diag[i] + 1;
01369       nz = rp[i + 1] - diag[i] - 1;
01370       sum = work[i];
01371       while (nz--)
01372         sum -= (*v++ * work[*vi++]);
01373       lhs[i] = work[i] = sum * aval[diag[i]];
01374     }
01375     }
01376 END_FUNC_DH}
01377 
01378 /*---------------------------------------------------------------
01379  * next two are used by Factor_dhPrintXXX methods
01380  *---------------------------------------------------------------*/
01381 
01382 #undef __FUNC__
01383 #define __FUNC__ "adjust_bj_private"
01384 void
01385 adjust_bj_private (Factor_dh mat)
01386 {
01387   START_FUNC_DH int i;
01388   int nz = mat->rp[mat->m];
01389   int beg_row = mat->beg_row;
01390   for (i = 0; i < nz; ++i)
01391     mat->cval[i] += beg_row;
01392 END_FUNC_DH}
01393 
01394 #undef __FUNC__
01395 #define __FUNC__ "unadjust_bj_private"
01396 void
01397 unadjust_bj_private (Factor_dh mat)
01398 {
01399   START_FUNC_DH int i;
01400   int nz = mat->rp[mat->m];
01401   int beg_row = mat->beg_row;
01402   for (i = 0; i < nz; ++i)
01403     mat->cval[i] -= beg_row;
01404 END_FUNC_DH}
01405 
01406 #undef __FUNC__
01407 #define __FUNC__ "Factor_dhMaxPivotInverse"
01408 double
01409 Factor_dhMaxPivotInverse (Factor_dh mat)
01410 {
01411   START_FUNC_DH int i, m = mat->m, *diags = mat->diag;
01412   REAL_DH *aval = mat->aval;
01413   double minGlobal = 0.0, min = aval[diags[0]];
01414   double retval;
01415 
01416   for (i = 0; i < m; ++i)
01417     min = MIN (min, fabs (aval[diags[i]]));
01418   if (np_dh == 1)
01419     {
01420       minGlobal = min;
01421     }
01422   else
01423     {
01424       MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh);
01425     }
01426 
01427   if (minGlobal == 0)
01428     {
01429       retval = 0;
01430     }
01431   else
01432     {
01433       retval = 1.0 / minGlobal;
01434     }
01435 END_FUNC_VAL (retval)}
01436 
01437 #undef __FUNC__
01438 #define __FUNC__ "Factor_dhMaxValue"
01439 double
01440 Factor_dhMaxValue (Factor_dh mat)
01441 {
01442   START_FUNC_DH double maxGlobal = 0.0, max = 0.0;
01443   int i, nz = mat->rp[mat->m];
01444   REAL_DH *aval = mat->aval;
01445 
01446   for (i = 0; i < nz; ++i)
01447     {
01448       max = MAX (max, fabs (aval[i]));
01449     }
01450 
01451   if (np_dh == 1)
01452     {
01453       maxGlobal = max;
01454     }
01455   else
01456     {
01457       MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01458     }
01459 END_FUNC_VAL (maxGlobal)}
01460 
01461 
01462 #undef __FUNC__
01463 #define __FUNC__ "Factor_dhCondEst"
01464 double
01465 Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx)
01466 {
01467   START_FUNC_DH double max = 0.0, maxGlobal = 0.0;
01468   double *x;
01469   int i, m = mat->m;
01470   Vec_dh lhs, rhs;
01471 
01472   Vec_dhCreate (&lhs);
01473   CHECK_ERROR (-1);
01474   Vec_dhInit (lhs, m);
01475   CHECK_ERROR (-1);
01476   Vec_dhDuplicate (lhs, &rhs);
01477   CHECK_ERROR (-1);
01478   Vec_dhSet (rhs, 1.0);
01479   CHECK_ERROR (-1);
01480   Euclid_dhApply (ctx, rhs->vals, lhs->vals);
01481   CHECK_ERROR (-1);
01482 
01483   x = lhs->vals;
01484   for (i = 0; i < m; ++i)
01485     {
01486       max = MAX (max, fabs (x[i]));
01487     }
01488 
01489   if (np_dh == 1)
01490     {
01491       maxGlobal = max;
01492     }
01493   else
01494     {
01495       MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01496     }
01497 END_FUNC_VAL (maxGlobal)}
 All Classes Files Functions Variables Enumerations Friends