IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends
mat_dh_private.c
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #include "mat_dh_private.h"
00044 #include "Parser_dh.h"
00045 #include "Hash_i_dh.h"
00046 #include "Mat_dh.h"
00047 #include "Mem_dh.h"
00048 #include "Vec_dh.h"
00049 
00050 #define IS_UPPER_TRI 97
00051 #define IS_LOWER_TRI 98
00052 #define IS_FULL      99
00053 static int isTriangular (int m, int *rp, int *cval);
00054 
00055 /* Instantiates Aout; allocates storage for rp, cval, and aval arrays;
00056    uses rowLengths[] and rowToBlock[] data to fill in rp[].
00057 */
00058 static void mat_par_read_allocate_private (Mat_dh * Aout, int n,
00059                        int *rowLengths, int *rowToBlock);
00060 
00061 /* Currently, divides (partitions)matrix by contiguous sections of rows.
00062    For future expansion: use metis.
00063 */
00064 void mat_partition_private (Mat_dh A, int blocks, int *o2n_row,
00065                 int *rowToBlock);
00066 
00067 
00068 static void convert_triples_to_scr_private (int m, int nz,
00069                         int *I, int *J, double *A,
00070                         int *rp, int *cval, double *aval);
00071 
00072 #if 0
00073 #undef __FUNC__
00074 #define __FUNC__ "mat_dh_print_graph_private"
00075 void
00076 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
00077                 double *aval, int *n2o, int *o2n, Hash_i_dh hash,
00078                 FILE * fp)
00079 {
00080   START_FUNC_DH int i, j, row, col;
00081   double val;
00082   bool private_n2o = false;
00083   bool private_hash = false;
00084 
00085   if (n2o == NULL)
00086     {
00087       private_n2o = true;
00088       create_nat_ordering_private (m, &n2o);
00089       CHECK_V_ERROR;
00090       create_nat_ordering_private (m, &o2n);
00091       CHECK_V_ERROR;
00092     }
00093 
00094   if (hash == NULL)
00095     {
00096       private_hash = true;
00097       Hash_i_dhCreate (&hash, -1);
00098       CHECK_V_ERROR;
00099     }
00100 
00101   for (i = 0; i < m; ++i)
00102     {
00103       row = n2o[i];
00104       for (j = rp[row]; j < rp[row + 1]; ++j)
00105     {
00106       col = cval[j];
00107       if (col < beg_row || col >= beg_row + m)
00108         {
00109           int tmp = col;
00110 
00111           /* nonlocal column: get permutation from hash table */
00112           tmp = Hash_i_dhLookup (hash, col);
00113           CHECK_V_ERROR;
00114           if (tmp == -1)
00115         {
00116           sprintf (msgBuf_dh,
00117                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
00118                beg_row, m, col);
00119           SET_V_ERROR (msgBuf_dh);
00120         }
00121           else
00122         {
00123           col = tmp;
00124         }
00125         }
00126       else
00127         {
00128           col = o2n[col];
00129         }
00130 
00131       if (aval == NULL)
00132         {
00133           val = _MATLAB_ZERO_;
00134         }
00135       else
00136         {
00137           val = aval[j];
00138         }
00139       fprintf (fp, "%i %i %g\n", 1 + row + beg_row, 1 + col, val);
00140     }
00141     }
00142 
00143   if (private_n2o)
00144     {
00145       destroy_nat_ordering_private (n2o);
00146       CHECK_V_ERROR;
00147       destroy_nat_ordering_private (o2n);
00148       CHECK_V_ERROR;
00149     }
00150 
00151   if (private_hash)
00152     {
00153       Hash_i_dhDestroy (hash);
00154       CHECK_V_ERROR;
00155     }
00156 END_FUNC_DH}
00157 
00158 #endif
00159 
00160 
00161 /* currently only for unpermuted */
00162 #undef __FUNC__
00163 #define __FUNC__ "mat_dh_print_graph_private"
00164 void
00165 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
00166                 double *aval, int *n2o, int *o2n, Hash_i_dh hash,
00167                 FILE * fp)
00168 {
00169   START_FUNC_DH int i, j, row, col;
00170   bool private_n2o = false;
00171   bool private_hash = false;
00172   int *work = NULL;
00173 
00174   work = (int *) MALLOC_DH (m * sizeof (int));
00175   CHECK_V_ERROR;
00176 
00177   if (n2o == NULL)
00178     {
00179       private_n2o = true;
00180       create_nat_ordering_private (m, &n2o);
00181       CHECK_V_ERROR;
00182       create_nat_ordering_private (m, &o2n);
00183       CHECK_V_ERROR;
00184     }
00185 
00186   if (hash == NULL)
00187     {
00188       private_hash = true;
00189       Hash_i_dhCreate (&hash, -1);
00190       CHECK_V_ERROR;
00191     }
00192 
00193   for (i = 0; i < m; ++i)
00194     {
00195       for (j = 0; j < m; ++j)
00196     work[j] = 0;
00197       row = n2o[i];
00198       for (j = rp[row]; j < rp[row + 1]; ++j)
00199     {
00200       col = cval[j];
00201 
00202       /* local column */
00203       if (col >= beg_row || col < beg_row + m)
00204         {
00205           col = o2n[col];
00206         }
00207 
00208       /* nonlocal column: get permutation from hash table */
00209       else
00210         {
00211           int tmp = col;
00212 
00213           tmp = Hash_i_dhLookup (hash, col);
00214           CHECK_V_ERROR;
00215           if (tmp == -1)
00216         {
00217           sprintf (msgBuf_dh,
00218                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
00219                beg_row, m, col);
00220           SET_V_ERROR (msgBuf_dh);
00221         }
00222           else
00223         {
00224           col = tmp;
00225         }
00226         }
00227 
00228       work[col] = 1;
00229     }
00230 
00231       for (j = 0; j < m; ++j)
00232     {
00233       if (work[j])
00234         {
00235           fprintf (fp, " x ");
00236         }
00237       else
00238         {
00239           fprintf (fp, "   ");
00240         }
00241     }
00242       fprintf (fp, "\n");
00243     }
00244 
00245   if (private_n2o)
00246     {
00247       destroy_nat_ordering_private (n2o);
00248       CHECK_V_ERROR;
00249       destroy_nat_ordering_private (o2n);
00250       CHECK_V_ERROR;
00251     }
00252 
00253   if (private_hash)
00254     {
00255       Hash_i_dhDestroy (hash);
00256       CHECK_V_ERROR;
00257     }
00258 
00259   if (work != NULL)
00260     {
00261       FREE_DH (work);
00262       CHECK_V_ERROR;
00263     }
00264 END_FUNC_DH}
00265 
00266 #undef __FUNC__
00267 #define __FUNC__ "create_nat_ordering_private"
00268 void
00269 create_nat_ordering_private (int m, int **p)
00270 {
00271   START_FUNC_DH int *tmp, i;
00272 
00273   tmp = *p = (int *) MALLOC_DH (m * sizeof (int));
00274   CHECK_V_ERROR;
00275   for (i = 0; i < m; ++i)
00276     {
00277       tmp[i] = i;
00278     }
00279 END_FUNC_DH}
00280 
00281 #undef __FUNC__
00282 #define __FUNC__ "destroy_nat_ordering_private"
00283 void
00284 destroy_nat_ordering_private (int *p)
00285 {
00286   START_FUNC_DH FREE_DH (p);
00287   CHECK_V_ERROR;
00288 END_FUNC_DH}
00289 
00290 
00291 #undef __FUNC__
00292 #define __FUNC__ "invert_perm"
00293 void
00294 invert_perm (int m, int *pIN, int *pOUT)
00295 {
00296   START_FUNC_DH int i;
00297 
00298   for (i = 0; i < m; ++i)
00299     pOUT[pIN[i]] = i;
00300 END_FUNC_DH}
00301 
00302 
00303 
00304 /* only implemented for a single cpu! */
00305 #undef __FUNC__
00306 #define __FUNC__ "mat_dh_print_csr_private"
00307 void
00308 mat_dh_print_csr_private (int m, int *rp, int *cval, double *aval, FILE * fp)
00309 {
00310   START_FUNC_DH int i, nz = rp[m];
00311 
00312   /* print header line */
00313   fprintf (fp, "%i %i\n", m, rp[m]);
00314 
00315   /* print rp[] */
00316   for (i = 0; i <= m; ++i)
00317     fprintf (fp, "%i ", rp[i]);
00318   fprintf (fp, "\n");
00319 
00320   /* print cval[] */
00321   for (i = 0; i < nz; ++i)
00322     fprintf (fp, "%i ", cval[i]);
00323   fprintf (fp, "\n");
00324 
00325   /* print aval[] */
00326   for (i = 0; i < nz; ++i)
00327     fprintf (fp, "%1.19e ", aval[i]);
00328   fprintf (fp, "\n");
00329 
00330 END_FUNC_DH}
00331 
00332 
00333 /* only implemented for a single cpu! */
00334 #undef __FUNC__
00335 #define __FUNC__ "mat_dh_read_csr_private"
00336 void
00337 mat_dh_read_csr_private (int *mOUT, int **rpOUT, int **cvalOUT,
00338              double **avalOUT, FILE * fp)
00339 {
00340   START_FUNC_DH int i, m, nz, items;
00341   int *rp, *cval;
00342   double *aval;
00343 
00344   /* read header line */
00345   items = fscanf (fp, "%d %d", &m, &nz);
00346   if (items != 2)
00347     {
00348       SET_V_ERROR ("failed to read header");
00349     }
00350   else
00351     {
00352       printf ("mat_dh_read_csr_private:: m= %i  nz= %i\n", m, nz);
00353     }
00354 
00355   *mOUT = m;
00356   rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00357   CHECK_V_ERROR;
00358   cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00359   CHECK_V_ERROR;
00360   aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00361   CHECK_V_ERROR;
00362 
00363   /* read rp[] block */
00364   for (i = 0; i <= m; ++i)
00365     {
00366       items = fscanf (fp, "%d", &(rp[i]));
00367       if (items != 1)
00368     {
00369       sprintf (msgBuf_dh, "failed item %i of %i in rp block", i, m + 1);
00370       SET_V_ERROR (msgBuf_dh);
00371     }
00372     }
00373 
00374   /* read cval[] block */
00375   for (i = 0; i < nz; ++i)
00376     {
00377       items = fscanf (fp, "%d", &(cval[i]));
00378       if (items != 1)
00379     {
00380       sprintf (msgBuf_dh, "failed item %i of %i in cval block", i, m + 1);
00381       SET_V_ERROR (msgBuf_dh);
00382     }
00383     }
00384 
00385   /* read aval[] block */
00386   for (i = 0; i < nz; ++i)
00387     {
00388       items = fscanf (fp, "%lg", &(aval[i]));
00389       if (items != 1)
00390     {
00391       sprintf (msgBuf_dh, "failed item %i of %i in aval block", i, m + 1);
00392       SET_V_ERROR (msgBuf_dh);
00393     }
00394     }
00395 END_FUNC_DH}
00396 
00397 /*============================================*/
00398 #define MAX_JUNK 200
00399 
00400 #undef __FUNC__
00401 #define __FUNC__ "mat_dh_read_triples_private"
00402 void
00403 mat_dh_read_triples_private (int ignore, int *mOUT, int **rpOUT,
00404                  int **cvalOUT, double **avalOUT, FILE * fp)
00405 {
00406   START_FUNC_DH int m, n, nz, items, i, j;
00407   int idx = 0;
00408   int *cval, *rp, *I, *J;
00409   double *aval, *A, v;
00410   char junk[MAX_JUNK];
00411   fpos_t fpos;
00412 
00413   /* skip over header */
00414   if (ignore && myid_dh == 0)
00415     {
00416       printf
00417     ("mat_dh_read_triples_private:: ignoring following header lines:\n");
00418       printf
00419     ("--------------------------------------------------------------\n");
00420       for (i = 0; i < ignore; ++i)
00421     {
00422       fgets (junk, MAX_JUNK, fp);
00423       printf ("%s", junk);
00424     }
00425       printf
00426     ("--------------------------------------------------------------\n");
00427       if (fgetpos (fp, &fpos))
00428     SET_V_ERROR ("fgetpos failed!");
00429       printf ("\nmat_dh_read_triples_private::1st two non-ignored lines:\n");
00430       printf
00431     ("--------------------------------------------------------------\n");
00432       for (i = 0; i < 2; ++i)
00433     {
00434       fgets (junk, MAX_JUNK, fp);
00435       printf ("%s", junk);
00436     }
00437       printf
00438     ("--------------------------------------------------------------\n");
00439       if (fsetpos (fp, &fpos))
00440     SET_V_ERROR ("fsetpos failed!");
00441     }
00442 
00443 
00444   if (feof (fp))
00445     printf ("trouble!");
00446 
00447   /* determine matrix dimensions */
00448   m = n = nz = 0;
00449   while (!feof (fp))
00450     {
00451       items = fscanf (fp, "%d %d %lg", &i, &j, &v);
00452       if (items != 3)
00453     {
00454       break;
00455     }
00456       ++nz;
00457       if (i > m)
00458     m = i;
00459       if (j > n)
00460     n = j;
00461     }
00462 
00463   if (myid_dh == 0)
00464     {
00465       printf ("mat_dh_read_triples_private: m= %i  nz= %i\n", m, nz);
00466     }
00467 
00468 
00469   /* reset file, and skip over header again */
00470   rewind (fp);
00471   for (i = 0; i < ignore; ++i)
00472     {
00473       fgets (junk, MAX_JUNK, fp);
00474     }
00475 
00476   /* error check for squareness */
00477   if (m != n)
00478     {
00479       sprintf (msgBuf_dh, "matrix is not square; row= %i, cols= %i", m, n);
00480       SET_V_ERROR (msgBuf_dh);
00481     }
00482 
00483   *mOUT = m;
00484 
00485   /* allocate storage */
00486   rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00487   CHECK_V_ERROR;
00488   cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00489   CHECK_V_ERROR;
00490   aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00491   CHECK_V_ERROR;
00492 
00493   I = (int *) MALLOC_DH (nz * sizeof (int));
00494   CHECK_V_ERROR;
00495   J = (int *) MALLOC_DH (nz * sizeof (int));
00496   CHECK_V_ERROR;
00497   A = (double *) MALLOC_DH (nz * sizeof (double));
00498   CHECK_V_ERROR;
00499 
00500   /* read <row, col, value> triples into arrays */
00501   while (!feof (fp))
00502     {
00503       items = fscanf (fp, "%d %d %lg", &i, &j, &v);
00504       if (items < 3)
00505     break;
00506       j--;
00507       i--;
00508       I[idx] = i;
00509       J[idx] = j;
00510       A[idx] = v;
00511       ++idx;
00512     }
00513 
00514   /* convert from triples to sparse-compressed-row storage */
00515   convert_triples_to_scr_private (m, nz, I, J, A, rp, cval, aval);
00516   CHECK_V_ERROR;
00517 
00518   /* if matrix is triangular */
00519   {
00520     int type;
00521     type = isTriangular (m, rp, cval);
00522     CHECK_V_ERROR;
00523     if (type == IS_UPPER_TRI)
00524       {
00525     printf ("CAUTION: matrix is upper triangular; converting to full\n");
00526       }
00527     else if (type == IS_LOWER_TRI)
00528       {
00529     printf ("CAUTION: matrix is lower triangular; converting to full\n");
00530       }
00531 
00532     if (type == IS_UPPER_TRI || type == IS_LOWER_TRI)
00533       {
00534     make_full_private (m, &rp, &cval, &aval);
00535     CHECK_V_ERROR;
00536       }
00537   }
00538 
00539   *rpOUT = rp;
00540   *cvalOUT = cval;
00541   *avalOUT = aval;
00542 
00543   FREE_DH (I);
00544   CHECK_V_ERROR;
00545   FREE_DH (J);
00546   CHECK_V_ERROR;
00547   FREE_DH (A);
00548   CHECK_V_ERROR;
00549 
00550 END_FUNC_DH}
00551 
00552 #undef __FUNC__
00553 #define __FUNC__ "convert_triples_to_scr_private"
00554 void
00555 convert_triples_to_scr_private (int m, int nz, int *I, int *J, double *A,
00556                 int *rp, int *cval, double *aval)
00557 {
00558   START_FUNC_DH int i;
00559   int *rowCounts;
00560 
00561   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
00562   CHECK_V_ERROR;
00563   for (i = 0; i < m; ++i)
00564     rowCounts[i] = 0;
00565 
00566   /* count number of entries in each row */
00567   for (i = 0; i < nz; ++i)
00568     {
00569       int row = I[i];
00570       rowCounts[row] += 1;
00571     }
00572 
00573   /* prefix-sum to form rp[] */
00574   rp[0] = 0;
00575   for (i = 1; i <= m; ++i)
00576     {
00577       rp[i] = rp[i - 1] + rowCounts[i - 1];
00578     }
00579   memcpy (rowCounts, rp, (m + 1) * sizeof (int));
00580 
00581   /* write SCR arrays */
00582   for (i = 0; i < nz; ++i)
00583     {
00584       int row = I[i];
00585       int col = J[i];
00586       double val = A[i];
00587       int idx = rowCounts[row];
00588       rowCounts[row] += 1;
00589 
00590       cval[idx] = col;
00591       aval[idx] = val;
00592     }
00593 
00594 
00595   FREE_DH (rowCounts);
00596   CHECK_V_ERROR;
00597 END_FUNC_DH}
00598 
00599 
00600 /*======================================================================
00601  * utilities for use in drivers that read, write, convert, and/or
00602  * compare different file types
00603  *======================================================================*/
00604 
00605 void fix_diags_private (Mat_dh A);
00606 void insert_missing_diags_private (Mat_dh A);
00607 
00608 #undef __FUNC__
00609 #define __FUNC__ "readMat"
00610 void
00611 readMat (Mat_dh * Aout, char *ft, char *fn, int ignore)
00612 {
00613   START_FUNC_DH bool makeStructurallySymmetric;
00614   bool fixDiags;
00615   *Aout = NULL;
00616 
00617   makeStructurallySymmetric =
00618     Parser_dhHasSwitch (parser_dh, "-makeSymmetric");
00619   fixDiags = Parser_dhHasSwitch (parser_dh, "-fixDiags");
00620 
00621   if (fn == NULL)
00622     {
00623       SET_V_ERROR ("passed NULL filename; can't open for reading!");
00624     }
00625 
00626   if (!strcmp (ft, "csr"))
00627     {
00628       Mat_dhReadCSR (Aout, fn);
00629       CHECK_V_ERROR;
00630     }
00631 
00632   else if (!strcmp (ft, "trip"))
00633     {
00634       Mat_dhReadTriples (Aout, ignore, fn);
00635       CHECK_V_ERROR;
00636     }
00637 
00638   else if (!strcmp (ft, "ebin"))
00639     {
00640       Mat_dhReadBIN (Aout, fn);
00641       CHECK_V_ERROR;
00642     }
00643 
00644   else if (!strcmp (ft, "petsc"))
00645     {
00646       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00647       SET_V_ERROR (msgBuf_dh);
00648     }
00649 
00650   else
00651     {
00652       sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
00653       SET_V_ERROR (msgBuf_dh);
00654     }
00655 
00656   if (makeStructurallySymmetric)
00657     {
00658       printf ("\npadding with zeros to make structurally symmetric\n");
00659       Mat_dhMakeStructurallySymmetric (*Aout);
00660       CHECK_V_ERROR;
00661     }
00662 
00663   if ((*Aout)->m == 0)
00664     {
00665       SET_V_ERROR ("row count = 0; something's wrong!");
00666     }
00667 
00668   if (fixDiags)
00669     {
00670       fix_diags_private (*Aout);
00671       CHECK_V_ERROR;
00672     }
00673 
00674 END_FUNC_DH}
00675 
00676 
00677 #undef __FUNC__
00678 #define __FUNC__ "fix_diags_private"
00679 void
00680 fix_diags_private (Mat_dh A)
00681 {
00682   START_FUNC_DH int i, j, m = A->m, *rp = A->rp, *cval = A->cval;
00683   double *aval = A->aval;
00684   bool insertDiags = false;
00685 
00686   /* verify that all diagonals are present */
00687   for (i = 0; i < m; ++i)
00688     {
00689       bool isMissing = true;
00690       for (j = rp[i]; j < rp[i + 1]; ++j)
00691     {
00692       if (cval[j] == i)
00693         {
00694           isMissing = false;
00695           break;
00696         }
00697     }
00698       if (isMissing)
00699     {
00700       insertDiags = true;
00701       break;
00702     }
00703     }
00704 
00705   if (insertDiags)
00706     {
00707       insert_missing_diags_private (A);
00708       CHECK_V_ERROR;
00709       rp = A->rp;
00710       cval = A->cval;
00711       aval = A->aval;
00712     }
00713 
00714   /* set value of all diags to largest absolute value in each row */
00715   for (i = 0; i < m; ++i)
00716     {
00717       double sum = 0;
00718       for (j = rp[i]; j < rp[i + 1]; ++j)
00719     {
00720       sum = MAX (sum, fabs (aval[j]));
00721     }
00722       for (j = rp[i]; j < rp[i + 1]; ++j)
00723     {
00724       if (cval[j] == i)
00725         {
00726           aval[j] = sum;
00727           break;
00728         }
00729     }
00730     }
00731 
00732 END_FUNC_DH}
00733 
00734 #undef __FUNC__
00735 #define __FUNC__ "insert_missing_diags_private"
00736 void
00737 insert_missing_diags_private (Mat_dh A)
00738 {
00739   START_FUNC_DH int *RP = A->rp, *CVAL = A->cval, m = A->m;
00740   int *rp, *cval;
00741   double *AVAL = A->aval, *aval;
00742   int i, j, nz = RP[m] + m;
00743   int idx = 0;
00744 
00745   rp = A->rp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
00746   CHECK_V_ERROR;
00747   cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int));
00748   CHECK_V_ERROR;
00749   aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double));
00750   CHECK_V_ERROR;
00751   rp[0] = 0;
00752 
00753   for (i = 0; i < m; ++i)
00754     {
00755       bool isMissing = true;
00756       for (j = RP[i]; j < RP[i + 1]; ++j)
00757     {
00758       cval[idx] = CVAL[j];
00759       aval[idx] = AVAL[j];
00760       ++idx;
00761       if (CVAL[j] == i)
00762         isMissing = false;
00763     }
00764       if (isMissing)
00765     {
00766       cval[idx] = i;
00767       aval[idx] = 0.0;
00768       ++idx;
00769     }
00770       rp[i + 1] = idx;
00771     }
00772 
00773   FREE_DH (RP);
00774   CHECK_V_ERROR;
00775   FREE_DH (CVAL);
00776   CHECK_V_ERROR;
00777   FREE_DH (AVAL);
00778   CHECK_V_ERROR;
00779 END_FUNC_DH}
00780 
00781 #undef __FUNC__
00782 #define __FUNC__ "readVec"
00783 void
00784 readVec (Vec_dh * bout, char *ft, char *fn, int ignore)
00785 {
00786   START_FUNC_DH * bout = NULL;
00787 
00788   if (fn == NULL)
00789     {
00790       SET_V_ERROR ("passed NULL filename; can't open for reading!");
00791     }
00792 
00793   if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
00794     {
00795       Vec_dhRead (bout, ignore, fn);
00796       CHECK_V_ERROR;
00797     }
00798 
00799   else if (!strcmp (ft, "ebin"))
00800     {
00801       Vec_dhReadBIN (bout, fn);
00802       CHECK_V_ERROR;
00803     }
00804 
00805   else if (!strcmp (ft, "petsc"))
00806     {
00807       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00808       SET_V_ERROR (msgBuf_dh);
00809     }
00810 
00811   else
00812     {
00813       sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
00814       SET_V_ERROR (msgBuf_dh);
00815     }
00816 
00817 END_FUNC_DH}
00818 
00819 
00820 #undef __FUNC__
00821 #define __FUNC__ "writeMat"
00822 void
00823 writeMat (Mat_dh Ain, char *ft, char *fn)
00824 {
00825   START_FUNC_DH if (fn == NULL)
00826     {
00827       SET_V_ERROR ("passed NULL filename; can't open for writing!");
00828     }
00829 
00830   if (!strcmp (ft, "csr"))
00831     {
00832       Mat_dhPrintCSR (Ain, NULL, fn);
00833       CHECK_V_ERROR;
00834     }
00835 
00836   else if (!strcmp (ft, "trip"))
00837     {
00838       Mat_dhPrintTriples (Ain, NULL, fn);
00839       CHECK_V_ERROR;
00840     }
00841 
00842   else if (!strcmp (ft, "ebin"))
00843     {
00844       Mat_dhPrintBIN (Ain, NULL, fn);
00845       CHECK_V_ERROR;
00846     }
00847 
00848 
00849   else if (!strcmp (ft, "petsc"))
00850     {
00851       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00852       SET_V_ERROR (msgBuf_dh);
00853     }
00854 
00855   else
00856     {
00857       sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
00858       SET_V_ERROR (msgBuf_dh);
00859     }
00860 
00861 END_FUNC_DH}
00862 
00863 #undef __FUNC__
00864 #define __FUNC__ "writeVec"
00865 void
00866 writeVec (Vec_dh bin, char *ft, char *fn)
00867 {
00868   START_FUNC_DH if (fn == NULL)
00869     {
00870       SET_V_ERROR ("passed NULL filename; can't open for writing!");
00871     }
00872 
00873   if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
00874     {
00875       Vec_dhPrint (bin, NULL, fn);
00876       CHECK_V_ERROR;
00877     }
00878 
00879   else if (!strcmp (ft, "ebin"))
00880     {
00881       Vec_dhPrintBIN (bin, NULL, fn);
00882       CHECK_V_ERROR;
00883     }
00884 
00885   else if (!strcmp (ft, "petsc"))
00886     {
00887       sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
00888       SET_V_ERROR (msgBuf_dh);
00889     }
00890 
00891   else
00892     {
00893       sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
00894       SET_V_ERROR (msgBuf_dh);
00895     }
00896 
00897 END_FUNC_DH}
00898 
00899 #undef __FUNC__
00900 #define __FUNC__ "isTriangular"
00901 int
00902 isTriangular (int m, int *rp, int *cval)
00903 {
00904   START_FUNC_DH int row, j;
00905   int type;
00906   bool type_lower = false, type_upper = false;
00907 
00908   if (np_dh > 1)
00909     {
00910       SET_ERROR (-1, "only implemented for a single cpu");
00911     }
00912 
00913   for (row = 0; row < m; ++row)
00914     {
00915       for (j = rp[row]; j < rp[row + 1]; ++j)
00916     {
00917       int col = cval[j];
00918       if (col < row)
00919         type_lower = true;
00920       if (col > row)
00921         type_upper = true;
00922     }
00923       if (type_lower && type_upper)
00924     break;
00925     }
00926 
00927   if (type_lower && type_upper)
00928     {
00929       type = IS_FULL;
00930     }
00931   else if (type_lower)
00932     {
00933       type = IS_LOWER_TRI;
00934     }
00935   else
00936     {
00937       type = IS_UPPER_TRI;
00938     }
00939 END_FUNC_VAL (type)}
00940 
00941 /*-----------------------------------------------------------------------------------*/
00942 
00943 static void mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
00944                             int *rpIN, int *cvalIN,
00945                             double *avalIN,
00946                             int **rpOUT,
00947                             int **cvalOUT,
00948                             double **avalOUT);
00949 
00950 
00951 #undef __FUNC__
00952 #define __FUNC__ "mat_dh_transpose_reuse_private"
00953 void
00954 mat_dh_transpose_reuse_private (int m,
00955                 int *rpIN, int *cvalIN, double *avalIN,
00956                 int *rpOUT, int *cvalOUT, double *avalOUT)
00957 {
00958   START_FUNC_DH
00959     mat_dh_transpose_reuse_private_private (false, m, rpIN, cvalIN, avalIN,
00960                         &rpOUT, &cvalOUT, &avalOUT);
00961   CHECK_V_ERROR;
00962 END_FUNC_DH}
00963 
00964 
00965 #undef __FUNC__
00966 #define __FUNC__ "mat_dh_transpose_private"
00967 void
00968 mat_dh_transpose_private (int m, int *RP, int **rpOUT,
00969               int *CVAL, int **cvalOUT,
00970               double *AVAL, double **avalOUT)
00971 {
00972   START_FUNC_DH
00973     mat_dh_transpose_reuse_private_private (true, m, RP, CVAL, AVAL,
00974                         rpOUT, cvalOUT, avalOUT);
00975   CHECK_V_ERROR;
00976 END_FUNC_DH}
00977 
00978 #undef __FUNC__
00979 #define __FUNC__ "mat_dh_transpose_private_private"
00980 void
00981 mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
00982                     int *RP, int *CVAL, double *AVAL,
00983                     int **rpOUT, int **cvalOUT,
00984                     double **avalOUT)
00985 {
00986   START_FUNC_DH int *rp, *cval, *tmp;
00987   int i, j, nz = RP[m];
00988   double *aval;
00989 
00990   if (allocateMem)
00991     {
00992       rp = *rpOUT = (int *) MALLOC_DH ((1 + m) * sizeof (int));
00993       CHECK_V_ERROR;
00994       cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
00995       CHECK_V_ERROR;
00996       if (avalOUT != NULL)
00997     {
00998       aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
00999       CHECK_V_ERROR;
01000     }
01001     }
01002   else
01003     {
01004       rp = *rpOUT;
01005       cval = *cvalOUT;
01006       if (avalOUT != NULL)
01007     aval = *avalOUT;
01008     }
01009 
01010 
01011   tmp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
01012   CHECK_V_ERROR;
01013   for (i = 0; i <= m; ++i)
01014     tmp[i] = 0;
01015 
01016   for (i = 0; i < m; ++i)
01017     {
01018       for (j = RP[i]; j < RP[i + 1]; ++j)
01019     {
01020       int col = CVAL[j];
01021       tmp[col + 1] += 1;
01022     }
01023     }
01024   for (i = 1; i <= m; ++i)
01025     tmp[i] += tmp[i - 1];
01026   memcpy (rp, tmp, (m + 1) * sizeof (int));
01027 
01028   if (avalOUT != NULL)
01029     {
01030       for (i = 0; i < m; ++i)
01031     {
01032       for (j = RP[i]; j < RP[i + 1]; ++j)
01033         {
01034           int col = CVAL[j];
01035           int idx = tmp[col];
01036           cval[idx] = i;
01037           aval[idx] = AVAL[j];
01038           tmp[col] += 1;
01039         }
01040     }
01041     }
01042 
01043   else
01044     {
01045       for (i = 0; i < m; ++i)
01046     {
01047       for (j = RP[i]; j < RP[i + 1]; ++j)
01048         {
01049           int col = CVAL[j];
01050           int idx = tmp[col];
01051           cval[idx] = i;
01052           tmp[col] += 1;
01053         }
01054     }
01055     }
01056 
01057   FREE_DH (tmp);
01058   CHECK_V_ERROR;
01059 END_FUNC_DH}
01060 
01061 /*-----------------------------------------------------------------------------------*/
01062 
01063 #undef __FUNC__
01064 #define __FUNC__ "mat_find_owner"
01065 int
01066 mat_find_owner (int *beg_rows, int *end_rows, int index)
01067 {
01068   START_FUNC_DH int pe, owner = -1;
01069 
01070   for (pe = 0; pe < np_dh; ++pe)
01071     {
01072       if (index >= beg_rows[pe] && index < end_rows[pe])
01073     {
01074       owner = pe;
01075       break;
01076     }
01077     }
01078 
01079   if (owner == -1)
01080     {
01081       sprintf (msgBuf_dh, "failed to find owner for index= %i", index);
01082       SET_ERROR (-1, msgBuf_dh);
01083     }
01084 
01085 END_FUNC_VAL (owner)}
01086 
01087 
01088 #define AVAL_TAG 2
01089 #define CVAL_TAG 3
01090 void partition_and_distribute_private (Mat_dh A, Mat_dh * Bout);
01091 void partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout);
01092 
01093 #undef __FUNC__
01094 #define __FUNC__ "readMat_par"
01095 void
01096 readMat_par (Mat_dh * Aout, char *fileType, char *fileName, int ignore)
01097 {
01098   START_FUNC_DH Mat_dh A = NULL;
01099 
01100   if (myid_dh == 0)
01101     {
01102       int tmp = np_dh;
01103       np_dh = 1;
01104       readMat (&A, fileType, fileName, ignore);
01105       CHECK_V_ERROR;
01106       np_dh = tmp;
01107     }
01108 
01109   if (np_dh == 1)
01110     {
01111       *Aout = A;
01112     }
01113   else
01114     {
01115       if (Parser_dhHasSwitch (parser_dh, "-metis"))
01116     {
01117       partition_and_distribute_metis_private (A, Aout);
01118       CHECK_V_ERROR;
01119     }
01120       else
01121     {
01122       partition_and_distribute_private (A, Aout);
01123       CHECK_V_ERROR;
01124     }
01125     }
01126 
01127   if (np_dh > 1 && A != NULL)
01128     {
01129       Mat_dhDestroy (A);
01130       CHECK_V_ERROR;
01131     }
01132 
01133 
01134   if (Parser_dhHasSwitch (parser_dh, "-printMAT"))
01135     {
01136       char xname[] = "A", *name = xname;
01137       Parser_dhReadString (parser_dh, "-printMat", &name);
01138       Mat_dhPrintTriples (*Aout, NULL, name);
01139       CHECK_V_ERROR;
01140       printf_dh ("\n@@@ readMat_par: printed mat to %s\n\n", xname);
01141     }
01142 
01143 
01144 END_FUNC_DH}
01145 
01146 /* this is bad code! */
01147 #undef __FUNC__
01148 #define __FUNC__ "partition_and_distribute_metis_private"
01149 void
01150 partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout)
01151 {
01152   START_FUNC_DH Mat_dh B = NULL;
01153   Mat_dh C = NULL;
01154   int i, m;
01155   int *rowLengths = NULL;
01156   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
01157   int *beg_row = NULL, *row_count = NULL;
01158   MPI_Request *send_req = NULL;
01159   MPI_Request *rcv_req = NULL;
01160   MPI_Status *send_status = NULL;
01161   MPI_Status *rcv_status = NULL;
01162 
01163   MPI_Barrier (comm_dh);
01164   printf_dh ("@@@ partitioning with metis\n");
01165 
01166   /* broadcast number of rows to all processors */
01167   if (myid_dh == 0)
01168     m = A->m;
01169   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
01170 
01171   /* broadcast number of nonzeros in each row to all processors */
01172   rowLengths = (int *) MALLOC_DH (m * sizeof (int));
01173   CHECK_V_ERROR;
01174   rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
01175   CHECK_V_ERROR;
01176 
01177   if (myid_dh == 0)
01178     {
01179       int *tmp = A->rp;
01180       for (i = 0; i < m; ++i)
01181     {
01182       rowLengths[i] = tmp[i + 1] - tmp[i];
01183     }
01184     }
01185   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
01186 
01187   /* partition matrix */
01188   if (myid_dh == 0)
01189     {
01190       int idx = 0;
01191       int j;
01192 
01193       /* partition and permute matrix */
01194       Mat_dhPartition (A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row);
01195       ERRCHKA;
01196       Mat_dhPermute (A, n2o_col, &C);
01197       ERRCHKA;
01198 
01199       /* form rowToBlock array */
01200       for (i = 0; i < np_dh; ++i)
01201     {
01202       for (j = beg_row[i]; j < beg_row[i] + row_count[i]; ++j)
01203         {
01204           rowToBlock[idx++] = i;
01205         }
01206     }
01207     }
01208 
01209   /* broadcast partitiioning information to all processors */
01210   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
01211 
01212   /* allocate storage for local portion of matrix */
01213   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
01214   CHECK_V_ERROR;
01215 
01216   /* root sends each processor its portion of the matrix */
01217   if (myid_dh == 0)
01218     {
01219       int *cval = C->cval, *rp = C->rp;
01220       double *aval = C->aval;
01221       send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01222       CHECK_V_ERROR;
01223       send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01224       CHECK_V_ERROR;
01225       for (i = 0; i < m; ++i)
01226     {
01227       int owner = rowToBlock[i];
01228       int count = rp[i + 1] - rp[i];
01229 
01230       /* error check for empty row */
01231       if (!count)
01232         {
01233           sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
01234           SET_V_ERROR (msgBuf_dh);
01235         }
01236 
01237       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
01238              send_req + 2 * i);
01239       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
01240              comm_dh, send_req + 2 * i + 1);
01241     }
01242     }
01243 
01244   /* all processors receive their local rows */
01245   {
01246     int *cval = B->cval;
01247     int *rp = B->rp;
01248     double *aval = B->aval;
01249     m = B->m;
01250 
01251     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01252     CHECK_V_ERROR;
01253     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01254     CHECK_V_ERROR;
01255 
01256     for (i = 0; i < m; ++i)
01257       {
01258 
01259     /* error check for empty row */
01260     int count = rp[i + 1] - rp[i];
01261     if (!count)
01262       {
01263         sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
01264         SET_V_ERROR (msgBuf_dh);
01265       }
01266 
01267     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
01268            rcv_req + 2 * i);
01269     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
01270            rcv_req + 2 * i + 1);
01271       }
01272   }
01273 
01274   /* wait for all sends/receives to finish */
01275   if (myid_dh == 0)
01276     {
01277       MPI_Waitall (m * 2, send_req, send_status);
01278     }
01279   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
01280 
01281   /* clean up */
01282   if (rowLengths != NULL)
01283     {
01284       FREE_DH (rowLengths);
01285       CHECK_V_ERROR;
01286     }
01287   if (o2n_row != NULL)
01288     {
01289       FREE_DH (o2n_row);
01290       CHECK_V_ERROR;
01291     }
01292   if (n2o_col != NULL)
01293     {
01294       FREE_DH (n2o_col);
01295       CHECK_V_ERROR;
01296     }
01297   if (rowToBlock != NULL)
01298     {
01299       FREE_DH (rowToBlock);
01300       CHECK_V_ERROR;
01301     }
01302   if (send_req != NULL)
01303     {
01304       FREE_DH (send_req);
01305       CHECK_V_ERROR;
01306     }
01307   if (rcv_req != NULL)
01308     {
01309       FREE_DH (rcv_req);
01310       CHECK_V_ERROR;
01311     }
01312   if (send_status != NULL)
01313     {
01314       FREE_DH (send_status);
01315       CHECK_V_ERROR;
01316     }
01317   if (rcv_status != NULL)
01318     {
01319       FREE_DH (rcv_status);
01320       CHECK_V_ERROR;
01321     }
01322   if (beg_row != NULL)
01323     {
01324       FREE_DH (beg_row);
01325       CHECK_V_ERROR;
01326     }
01327   if (row_count != NULL)
01328     {
01329       FREE_DH (row_count);
01330       CHECK_V_ERROR;
01331     }
01332   if (C != NULL)
01333     {
01334       Mat_dhDestroy (C);
01335       ERRCHKA;
01336     }
01337 
01338   *Bout = B;
01339 
01340 END_FUNC_DH}
01341 
01342 
01343 #undef __FUNC__
01344 #define __FUNC__ "partition_and_distribute_private"
01345 void
01346 partition_and_distribute_private (Mat_dh A, Mat_dh * Bout)
01347 {
01348   START_FUNC_DH Mat_dh B = NULL;
01349   int i, m;
01350   int *rowLengths = NULL;
01351   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
01352   MPI_Request *send_req = NULL;
01353   MPI_Request *rcv_req = NULL;
01354   MPI_Status *send_status = NULL;
01355   MPI_Status *rcv_status = NULL;
01356 
01357   MPI_Barrier (comm_dh);
01358 
01359   /* broadcast number of rows to all processors */
01360   if (myid_dh == 0)
01361     m = A->m;
01362   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
01363 
01364   /* broadcast number of nonzeros in each row to all processors */
01365   rowLengths = (int *) MALLOC_DH (m * sizeof (int));
01366   CHECK_V_ERROR;
01367   if (myid_dh == 0)
01368     {
01369       int *tmp = A->rp;
01370       for (i = 0; i < m; ++i)
01371     {
01372       rowLengths[i] = tmp[i + 1] - tmp[i];
01373     }
01374     }
01375   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
01376 
01377   /* partition matrix */
01378   rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
01379   CHECK_V_ERROR;
01380 
01381   if (myid_dh == 0)
01382     {
01383       o2n_row = (int *) MALLOC_DH (m * sizeof (int));
01384       CHECK_V_ERROR;
01385       mat_partition_private (A, np_dh, o2n_row, rowToBlock);
01386       CHECK_V_ERROR;
01387     }
01388 
01389   /* broadcast partitiioning information to all processors */
01390   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
01391 
01392   /* allocate storage for local portion of matrix */
01393   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
01394   CHECK_V_ERROR;
01395 
01396   /* root sends each processor its portion of the matrix */
01397   if (myid_dh == 0)
01398     {
01399       int *cval = A->cval, *rp = A->rp;
01400       double *aval = A->aval;
01401       send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01402       CHECK_V_ERROR;
01403       send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01404       CHECK_V_ERROR;
01405       for (i = 0; i < m; ++i)
01406     {
01407       int owner = rowToBlock[i];
01408       int count = rp[i + 1] - rp[i];
01409 
01410       /* error check for empty row */
01411       if (!count)
01412         {
01413           sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
01414           SET_V_ERROR (msgBuf_dh);
01415         }
01416 
01417       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
01418              send_req + 2 * i);
01419       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
01420              comm_dh, send_req + 2 * i + 1);
01421     }
01422     }
01423 
01424   /* all processors receive their local rows */
01425   {
01426     int *cval = B->cval;
01427     int *rp = B->rp;
01428     double *aval = B->aval;
01429     m = B->m;
01430 
01431     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
01432     CHECK_V_ERROR;
01433     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
01434     CHECK_V_ERROR;
01435 
01436     for (i = 0; i < m; ++i)
01437       {
01438 
01439     /* error check for empty row */
01440     int count = rp[i + 1] - rp[i];
01441     if (!count)
01442       {
01443         sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
01444         SET_V_ERROR (msgBuf_dh);
01445       }
01446 
01447     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
01448            rcv_req + 2 * i);
01449     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
01450            rcv_req + 2 * i + 1);
01451       }
01452   }
01453 
01454   /* wait for all sends/receives to finish */
01455   if (myid_dh == 0)
01456     {
01457       MPI_Waitall (m * 2, send_req, send_status);
01458     }
01459   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
01460 
01461   /* clean up */
01462   if (rowLengths != NULL)
01463     {
01464       FREE_DH (rowLengths);
01465       CHECK_V_ERROR;
01466     }
01467   if (o2n_row != NULL)
01468     {
01469       FREE_DH (o2n_row);
01470       CHECK_V_ERROR;
01471     }
01472   if (n2o_col != NULL)
01473     {
01474       FREE_DH (n2o_col);
01475       CHECK_V_ERROR;
01476     }
01477   if (rowToBlock != NULL)
01478     {
01479       FREE_DH (rowToBlock);
01480       CHECK_V_ERROR;
01481     }
01482   if (send_req != NULL)
01483     {
01484       FREE_DH (send_req);
01485       CHECK_V_ERROR;
01486     }
01487   if (rcv_req != NULL)
01488     {
01489       FREE_DH (rcv_req);
01490       CHECK_V_ERROR;
01491     }
01492   if (send_status != NULL)
01493     {
01494       FREE_DH (send_status);
01495       CHECK_V_ERROR;
01496     }
01497   if (rcv_status != NULL)
01498     {
01499       FREE_DH (rcv_status);
01500       CHECK_V_ERROR;
01501     }
01502 
01503   *Bout = B;
01504 
01505 END_FUNC_DH}
01506 
01507 #undef __FUNC__
01508 #define __FUNC__ "mat_par_read_allocate_private"
01509 void
01510 mat_par_read_allocate_private (Mat_dh * Aout, int n, int *rowLengths,
01511                    int *rowToBlock)
01512 {
01513   START_FUNC_DH Mat_dh A;
01514   int i, m, nz, beg_row, *rp, idx;
01515 
01516   Mat_dhCreate (&A);
01517   CHECK_V_ERROR;
01518   *Aout = A;
01519   A->n = n;
01520 
01521   /* count number of rows owned by this processor */
01522   m = 0;
01523   for (i = 0; i < n; ++i)
01524     {
01525       if (rowToBlock[i] == myid_dh)
01526     ++m;
01527     }
01528   A->m = m;
01529 
01530   /* compute global numbering of first  locally owned row */
01531   beg_row = 0;
01532   for (i = 0; i < n; ++i)
01533     {
01534       if (rowToBlock[i] < myid_dh)
01535     ++beg_row;
01536     }
01537   A->beg_row = beg_row;
01538 
01539   /* allocate storage for row-pointer array */
01540   A->rp = rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01541   CHECK_V_ERROR;
01542   rp[0] = 0;
01543 
01544   /* count number of nonzeros owned by this processor, and form rp array */
01545   nz = 0;
01546   idx = 1;
01547   for (i = 0; i < n; ++i)
01548     {
01549       if (rowToBlock[i] == myid_dh)
01550     {
01551       nz += rowLengths[i];
01552       rp[idx++] = nz;
01553     }
01554     }
01555 
01556   /* allocate storage for column indices and values arrays */
01557   A->cval = (int *) MALLOC_DH (nz * sizeof (int));
01558   CHECK_V_ERROR;
01559   A->aval = (double *) MALLOC_DH (nz * sizeof (double));
01560   CHECK_V_ERROR;
01561 END_FUNC_DH}
01562 
01563 
01564 #undef __FUNC__
01565 #define __FUNC__ "mat_partition_private"
01566 void
01567 mat_partition_private (Mat_dh A, int blocks, int *o2n_row, int *rowToBlock)
01568 {
01569   START_FUNC_DH int i, j, n = A->n;
01570   int rpb = n / blocks;     /* rows per block (except possibly last block) */
01571   int idx = 0;
01572 
01573   while (rpb * blocks < n)
01574     ++rpb;
01575 
01576   if (rpb * (blocks - 1) == n)
01577     {
01578       --rpb;
01579       printf_dh ("adjusted rpb to: %i\n", rpb);
01580     }
01581 
01582   for (i = 0; i < n; ++i)
01583     o2n_row[i] = i;
01584 
01585   /* assign all rows to blocks, except for last block, which may
01586      contain less than "rpb" rows
01587    */
01588   for (i = 0; i < blocks - 1; ++i)
01589     {
01590       for (j = 0; j < rpb; ++j)
01591     {
01592       rowToBlock[idx++] = i;
01593     }
01594     }
01595 
01596   /* now deal with the last block in the partition */
01597   i = blocks - 1;
01598   while (idx < n)
01599     rowToBlock[idx++] = i;
01600 
01601 END_FUNC_DH}
01602 
01603 
01604 /* may produce incorrect result if input is not triangular! */
01605 #undef __FUNC__
01606 #define __FUNC__ "make_full_private"
01607 void
01608 make_full_private (int m, int **rpIN, int **cvalIN, double **avalIN)
01609 {
01610   START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
01611   double *avalNew, *aval = *avalIN;
01612   int nz, *rowCounts = NULL;
01613 
01614   /* count the number of nonzeros in each row */
01615   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01616   CHECK_V_ERROR;
01617   for (i = 0; i <= m; ++i)
01618     rowCounts[i] = 0;
01619 
01620   for (i = 0; i < m; ++i)
01621     {
01622       for (j = rp[i]; j < rp[i + 1]; ++j)
01623     {
01624       int col = cval[j];
01625       rowCounts[i + 1] += 1;
01626       if (col != i)
01627         rowCounts[col + 1] += 1;
01628     }
01629     }
01630 
01631   /* prefix sum to form row pointers for full representation */
01632   rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01633   CHECK_V_ERROR;
01634   for (i = 1; i <= m; ++i)
01635     rowCounts[i] += rowCounts[i - 1];
01636   memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
01637 
01638   /* form full representation */
01639   nz = rpNew[m];
01640 
01641   cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
01642   CHECK_V_ERROR;
01643   avalNew = (double *) MALLOC_DH (nz * sizeof (double));
01644   CHECK_V_ERROR;
01645   for (i = 0; i < m; ++i)
01646     {
01647       for (j = rp[i]; j < rp[i + 1]; ++j)
01648     {
01649       int col = cval[j];
01650       double val = aval[j];
01651 
01652       cvalNew[rowCounts[i]] = col;
01653       avalNew[rowCounts[i]] = val;
01654       rowCounts[i] += 1;
01655       if (col != i)
01656         {
01657           cvalNew[rowCounts[col]] = i;
01658           avalNew[rowCounts[col]] = val;
01659           rowCounts[col] += 1;
01660         }
01661     }
01662     }
01663 
01664   if (rowCounts != NULL)
01665     {
01666       FREE_DH (rowCounts);
01667       CHECK_V_ERROR;
01668     }
01669   FREE_DH (cval);
01670   CHECK_V_ERROR;
01671   FREE_DH (rp);
01672   CHECK_V_ERROR;
01673   FREE_DH (aval);
01674   CHECK_V_ERROR;
01675   *rpIN = rpNew;
01676   *cvalIN = cvalNew;
01677   *avalIN = avalNew;
01678 END_FUNC_DH}
01679 
01680 #undef __FUNC__
01681 #define __FUNC__ "make_symmetric_private"
01682 void
01683 make_symmetric_private (int m, int **rpIN, int **cvalIN, double **avalIN)
01684 {
01685   START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
01686   double *avalNew, *aval = *avalIN;
01687   int nz, *rowCounts = NULL;
01688   int *rpTrans, *cvalTrans;
01689   int *work;
01690   double *avalTrans;
01691   int nzCount = 0, transCount = 0;
01692 
01693   mat_dh_transpose_private (m, rp, &rpTrans,
01694                 cval, &cvalTrans, aval, &avalTrans);
01695   CHECK_V_ERROR;
01696 
01697   /* count the number of nonzeros in each row */
01698   work = (int *) MALLOC_DH (m * sizeof (int));
01699   CHECK_V_ERROR;
01700   for (i = 0; i < m; ++i)
01701     work[i] = -1;
01702   rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01703   CHECK_V_ERROR;
01704   for (i = 0; i <= m; ++i)
01705     rowCounts[i] = 0;
01706 
01707   for (i = 0; i < m; ++i)
01708     {
01709       int ct = 0;
01710       for (j = rp[i]; j < rp[i + 1]; ++j)
01711     {
01712       int col = cval[j];
01713       work[col] = i;
01714       ++ct;
01715       ++nzCount;
01716     }
01717       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
01718     {
01719       int col = cvalTrans[j];
01720       if (work[col] != i)
01721         {
01722           ++ct;
01723           ++transCount;
01724         }
01725     }
01726       rowCounts[i + 1] = ct;
01727     }
01728 
01729   /*---------------------------------------------------------
01730    * if matrix is already symmetric, do nothing
01731    *---------------------------------------------------------*/
01732   if (transCount == 0)
01733     {
01734       printf
01735     ("make_symmetric_private: matrix is already structurally symmetric!\n");
01736       FREE_DH (rpTrans);
01737       CHECK_V_ERROR;
01738       FREE_DH (cvalTrans);
01739       CHECK_V_ERROR;
01740       FREE_DH (avalTrans);
01741       CHECK_V_ERROR;
01742       FREE_DH (work);
01743       CHECK_V_ERROR;
01744       FREE_DH (rowCounts);
01745       CHECK_V_ERROR;
01746       goto END_OF_FUNCTION;
01747     }
01748 
01749   /*---------------------------------------------------------
01750    * otherwise, finish symmetrizing
01751    *---------------------------------------------------------*/
01752   else
01753     {
01754       printf ("original nz= %i\n", rp[m]);
01755       printf ("zeros added= %i\n", transCount);
01756       printf
01757     ("ratio of added zeros to nonzeros = %0.2f (assumes all original entries were nonzero!)\n",
01758      (double) transCount / (double) (nzCount));
01759     }
01760 
01761   /* prefix sum to form row pointers for full representation */
01762   rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01763   CHECK_V_ERROR;
01764   for (i = 1; i <= m; ++i)
01765     rowCounts[i] += rowCounts[i - 1];
01766   memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
01767   for (i = 0; i < m; ++i)
01768     work[i] = -1;
01769 
01770   /* form full representation */
01771   nz = rpNew[m];
01772   cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
01773   CHECK_V_ERROR;
01774   avalNew = (double *) MALLOC_DH (nz * sizeof (double));
01775   CHECK_V_ERROR;
01776   for (i = 0; i < m; ++i)
01777     work[i] = -1;
01778 
01779   for (i = 0; i < m; ++i)
01780     {
01781       for (j = rp[i]; j < rp[i + 1]; ++j)
01782     {
01783       int col = cval[j];
01784       double val = aval[j];
01785       work[col] = i;
01786       cvalNew[rowCounts[i]] = col;
01787       avalNew[rowCounts[i]] = val;
01788       rowCounts[i] += 1;
01789     }
01790       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
01791     {
01792       int col = cvalTrans[j];
01793       if (work[col] != i)
01794         {
01795           cvalNew[rowCounts[i]] = col;
01796           avalNew[rowCounts[i]] = 0.0;
01797           rowCounts[i] += 1;
01798         }
01799     }
01800     }
01801 
01802   if (rowCounts != NULL)
01803     {
01804       FREE_DH (rowCounts);
01805       CHECK_V_ERROR;
01806     }
01807   FREE_DH (work);
01808   CHECK_V_ERROR;
01809   FREE_DH (cval);
01810   CHECK_V_ERROR;
01811   FREE_DH (rp);
01812   CHECK_V_ERROR;
01813   FREE_DH (aval);
01814   CHECK_V_ERROR;
01815   FREE_DH (cvalTrans);
01816   CHECK_V_ERROR;
01817   FREE_DH (rpTrans);
01818   CHECK_V_ERROR;
01819   FREE_DH (avalTrans);
01820   CHECK_V_ERROR;
01821   *rpIN = rpNew;
01822   *cvalIN = cvalNew;
01823   *avalIN = avalNew;
01824 
01825 END_OF_FUNCTION:;
01826 
01827 END_FUNC_DH}
01828 
01829 #undef __FUNC__
01830 #define __FUNC__ "profileMat"
01831 void
01832 profileMat (Mat_dh A)
01833 {
01834   START_FUNC_DH Mat_dh B = NULL;
01835   int type;
01836   int m;
01837   int i, j;
01838   int *work1;
01839   double *work2;
01840   bool isStructurallySymmetric = true;
01841   bool isNumericallySymmetric = true;
01842   bool is_Triangular = false;
01843   int zeroCount = 0, nz;
01844 
01845   if (myid_dh > 0)
01846     {
01847       SET_V_ERROR ("only for a single MPI task!");
01848     }
01849 
01850   m = A->m;
01851 
01852   printf ("\nYY----------------------------------------------------\n");
01853 
01854   /* count number of explicit zeros */
01855   nz = A->rp[m];
01856   for (i = 0; i < nz; ++i)
01857     {
01858       if (A->aval[i] == 0)
01859     ++zeroCount;
01860     }
01861   printf ("YY  row count:      %i\n", m);
01862   printf ("YY  nz count:       %i\n", nz);
01863   printf ("YY  explicit zeros: %i (entire matrix)\n", zeroCount);
01864 
01865   /* count number of missing or zero diagonals */
01866   {
01867     int m_diag = 0, z_diag = 0;
01868     for (i = 0; i < m; ++i)
01869       {
01870     bool flag = true;
01871     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01872       {
01873         int col = A->cval[j];
01874 
01875         /* row has an explicit diagonal element */
01876         if (col == i)
01877           {
01878         double val = A->aval[j];
01879         flag = false;
01880         if (val == 0.0)
01881           ++z_diag;
01882         break;
01883           }
01884       }
01885 
01886     /* row has an implicit zero diagonal element */
01887     if (flag)
01888       ++m_diag;
01889       }
01890     printf ("YY  missing diagonals:   %i\n", m_diag);
01891     printf ("YY  explicit zero diags: %i\n", z_diag);
01892   }
01893 
01894   /* check to see if matrix is triangular */
01895   type = isTriangular (m, A->rp, A->cval);
01896   CHECK_V_ERROR;
01897   if (type == IS_UPPER_TRI)
01898     {
01899       printf ("YY  matrix is upper triangular\n");
01900       is_Triangular = true;
01901       goto END_OF_FUNCTION;
01902     }
01903   else if (type == IS_LOWER_TRI)
01904     {
01905       printf ("YY  matrix is lower triangular\n");
01906       is_Triangular = true;
01907       goto END_OF_FUNCTION;
01908     }
01909 
01910   /* if not triangular, count nz in each triangle */
01911   {
01912     int unz = 0, lnz = 0;
01913     for (i = 0; i < m; ++i)
01914       {
01915     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01916       {
01917         int col = A->cval[j];
01918         if (col < i)
01919           ++lnz;
01920         if (col > i)
01921           ++unz;
01922       }
01923       }
01924     printf ("YY  strict upper triangular nonzeros: %i\n", unz);
01925     printf ("YY  strict lower triangular nonzeros: %i\n", lnz);
01926   }
01927 
01928 
01929 
01930 
01931   Mat_dhTranspose (A, &B);
01932   CHECK_V_ERROR;
01933 
01934   /* check for structural and numerical symmetry */
01935 
01936   work1 = (int *) MALLOC_DH (m * sizeof (int));
01937   CHECK_V_ERROR;
01938   work2 = (double *) MALLOC_DH (m * sizeof (double));
01939   CHECK_V_ERROR;
01940   for (i = 0; i < m; ++i)
01941     work1[i] = -1;
01942   for (i = 0; i < m; ++i)
01943     work2[i] = 0.0;
01944 
01945   for (i = 0; i < m; ++i)
01946     {
01947       for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
01948     {
01949       int col = A->cval[j];
01950       double val = A->aval[j];
01951       work1[col] = i;
01952       work2[col] = val;
01953     }
01954       for (j = B->rp[i]; j < B->rp[i + 1]; ++j)
01955     {
01956       int col = B->cval[j];
01957       double val = B->aval[j];
01958 
01959       if (work1[col] != i)
01960         {
01961           isStructurallySymmetric = false;
01962           isNumericallySymmetric = false;
01963           goto END_OF_FUNCTION;
01964         }
01965       if (work2[col] != val)
01966         {
01967           isNumericallySymmetric = false;
01968           work2[col] = 0.0;
01969         }
01970     }
01971     }
01972 
01973 
01974 END_OF_FUNCTION:;
01975 
01976   if (!is_Triangular)
01977     {
01978       printf ("YY  matrix is NOT triangular\n");
01979       if (isStructurallySymmetric)
01980     {
01981       printf ("YY  matrix IS structurally symmetric\n");
01982     }
01983       else
01984     {
01985       printf ("YY  matrix is NOT structurally symmetric\n");
01986     }
01987       if (isNumericallySymmetric)
01988     {
01989       printf ("YY  matrix IS numerically symmetric\n");
01990     }
01991       else
01992     {
01993       printf ("YY  matrix is NOT numerically symmetric\n");
01994     }
01995     }
01996 
01997   if (work1 != NULL)
01998     {
01999       FREE_DH (work1);
02000       CHECK_V_ERROR;
02001     }
02002   if (work2 != NULL)
02003     {
02004       FREE_DH (work2);
02005       CHECK_V_ERROR;
02006     }
02007   if (B != NULL)
02008     {
02009       Mat_dhDestroy (B);
02010       CHECK_V_ERROR;
02011     }
02012 
02013   printf ("YY----------------------------------------------------\n");
02014 
02015 END_FUNC_DH}
 All Classes Files Functions Variables Enumerations Friends