IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends
SubdomainGraph_dh.c
00001 /*@HEADER
00002 // ***********************************************************************
00003 //
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 */
00042 
00043 #include "SubdomainGraph_dh.h"
00044 #include "getRow_dh.h"
00045 #include "Mem_dh.h"
00046 #include "Parser_dh.h"
00047 #include "Hash_i_dh.h"
00048 #include "mat_dh_private.h"
00049 #include "io_dh.h"
00050 #include "SortedSet_dh.h"
00051 #include "shellSort_dh.h"
00052 
00053 
00054 /* for debugging only! */
00055 #include <unistd.h>
00056 
00057 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
00058                   void *A);
00059 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
00060                   void *A);
00061 /*
00062 static void partition_metis_private(SubdomainGraph_dh s, void *A);
00063 
00064   grep for same below!
00065 */
00066 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
00067                       bool bj);
00068 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
00069 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
00070                          void *A);
00071 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
00072                         void *A);
00073 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
00074                           void *A);
00075 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
00076                      int *interiorNodes, int *bdryNodes,
00077                      int *interiorCount, int *bdryCount);
00078 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
00079                        void *A, int *interiorNodes,
00080                        int *bdryNodes, int *interiorCount,
00081                        int *bdryCount);
00082 
00083 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
00084   /* above also forms n2o[] and o2n[] */
00085 
00086 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
00087 static void color_subdomain_graph_private (SubdomainGraph_dh s);
00088 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
00089 
00090 #undef __FUNC__
00091 #define __FUNC__ "SubdomainGraph_dhCreate"
00092 void
00093 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
00094 {
00095   START_FUNC_DH
00096     struct _subdomain_dh *tmp =
00097     (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
00098   CHECK_V_ERROR;
00099   *s = tmp;
00100 
00101   tmp->blocks = 1;
00102   tmp->ptrs = tmp->adj = NULL;
00103   tmp->colors = 1;
00104   tmp->colorVec = NULL;
00105   tmp->o2n_sub = tmp->n2o_sub = NULL;
00106   tmp->beg_row = tmp->beg_rowP = NULL;
00107   tmp->bdry_count = tmp->row_count = NULL;
00108   tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
00109   tmp->loCount = tmp->hiCount = tmp->allCount = 0;
00110 
00111   tmp->m = 0;
00112   tmp->n2o_row = tmp->o2n_col = NULL;
00113   tmp->o2n_ext = tmp->n2o_ext = NULL;
00114 
00115   tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
00116   tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
00117   {
00118     int i;
00119     for (i = 0; i < TIMING_BINS_SG; ++i)
00120       tmp->timing[i] = 0.0;
00121   }
00122 END_FUNC_DH}
00123 
00124 #undef __FUNC__
00125 #define __FUNC__ "SubdomainGraph_dhDestroy"
00126 void
00127 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
00128 {
00129   START_FUNC_DH if (s->ptrs != NULL)
00130     {
00131       FREE_DH (s->ptrs);
00132       CHECK_V_ERROR;
00133     }
00134   if (s->adj != NULL)
00135     {
00136       FREE_DH (s->adj);
00137       CHECK_V_ERROR;
00138     }
00139   if (s->colorVec != NULL)
00140     {
00141       FREE_DH (s->colorVec);
00142       CHECK_V_ERROR;
00143     }
00144   if (s->o2n_sub != NULL)
00145     {
00146       FREE_DH (s->o2n_sub);
00147       CHECK_V_ERROR;
00148     }
00149   if (s->n2o_sub != NULL)
00150     {
00151       FREE_DH (s->n2o_sub);
00152       CHECK_V_ERROR;
00153     }
00154 
00155   if (s->beg_row != NULL)
00156     {
00157       FREE_DH (s->beg_row);
00158       CHECK_V_ERROR;
00159     }
00160   if (s->beg_rowP != NULL)
00161     {
00162       FREE_DH (s->beg_rowP);
00163       CHECK_V_ERROR;
00164     }
00165   if (s->row_count != NULL)
00166     {
00167       FREE_DH (s->row_count);
00168       CHECK_V_ERROR;
00169     }
00170   if (s->bdry_count != NULL)
00171     {
00172       FREE_DH (s->bdry_count);
00173       CHECK_V_ERROR;
00174     }
00175   if (s->loNabors != NULL)
00176     {
00177       FREE_DH (s->loNabors);
00178       CHECK_V_ERROR;
00179     }
00180   if (s->hiNabors != NULL)
00181     {
00182       FREE_DH (s->hiNabors);
00183       CHECK_V_ERROR;
00184     }
00185   if (s->allNabors != NULL)
00186     {
00187       FREE_DH (s->allNabors);
00188       CHECK_V_ERROR;
00189     }
00190 
00191   if (s->n2o_row != NULL)
00192     {
00193       FREE_DH (s->n2o_row);
00194       CHECK_V_ERROR;
00195     }
00196   if (s->o2n_col != NULL)
00197     {
00198       FREE_DH (s->o2n_col);
00199       CHECK_V_ERROR;
00200     }
00201   if (s->o2n_ext != NULL)
00202     {
00203       Hash_i_dhDestroy (s->o2n_ext);
00204       CHECK_V_ERROR;
00205     }
00206   if (s->n2o_ext != NULL)
00207     {
00208       Hash_i_dhDestroy (s->n2o_ext);
00209       CHECK_V_ERROR;
00210     }
00211   FREE_DH (s);
00212   CHECK_V_ERROR;
00213 END_FUNC_DH}
00214 
00215 #undef __FUNC__
00216 #define __FUNC__ "SubdomainGraph_dhInit"
00217 void
00218 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00219 {
00220   START_FUNC_DH double t1 = MPI_Wtime ();
00221 
00222   if (blocks < 1)
00223     blocks = 1;
00224 
00225   if (np_dh == 1 || blocks > 1)
00226     {
00227       s->blocks = blocks;
00228       init_seq_private (s, blocks, bj, A);
00229       CHECK_V_ERROR;
00230     }
00231   else
00232     {
00233       s->blocks = np_dh;
00234       init_mpi_private (s, np_dh, bj, A);
00235       CHECK_V_ERROR;
00236     }
00237 
00238   s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
00239 END_FUNC_DH}
00240 
00241 
00242 #undef __FUNC__
00243 #define __FUNC__ "SubdomainGraph_dhFindOwner"
00244 int
00245 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
00246 {
00247   START_FUNC_DH int sd;
00248   int *beg_row = s->beg_row;
00249   int *row_count = s->row_count;
00250   int owner = -1, blocks = s->blocks;
00251 
00252   if (permuted)
00253     beg_row = s->beg_rowP;
00254 
00255   /* determine the subdomain that contains "idx" */
00256   for (sd = 0; sd < blocks; ++sd)
00257     {
00258       if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
00259     {
00260       owner = sd;
00261       break;
00262     }
00263     }
00264 
00265   if (owner == -1)
00266     {
00267 
00268       fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
00269       fprintf (stderr, "blocks= %i\n", blocks);
00270 
00271       sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
00272       SET_ERROR (-1, msgBuf_dh);
00273     }
00274 
00275 END_FUNC_VAL (owner)}
00276 
00277 
00278 #undef __FUNC__
00279 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
00280 void
00281 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
00282 {
00283   START_FUNC_DH int i, j, k;
00284   double max = 0, min = INT_MAX;
00285 
00286   fprintf (fp,
00287        "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
00288   fprintf (fp, "colors used     = %i\n", s->colors);
00289   fprintf (fp, "subdomain count = %i\n", s->blocks);
00290 
00291 
00292   fprintf (fp, "\ninterior/boundary node ratios:\n");
00293 
00294   for (i = 0; i < s->blocks; ++i)
00295     {
00296       int inNodes = s->row_count[i] - s->bdry_count[i];
00297       int bdNodes = s->bdry_count[i];
00298       double ratio;
00299 
00300       if (bdNodes == 0)
00301     {
00302       ratio = -1;
00303     }
00304       else
00305     {
00306       ratio = (double) inNodes / (double) bdNodes;
00307     }
00308 
00309       max = MAX (max, ratio);
00310       min = MIN (min, ratio);
00311       fprintf (fp,
00312            "   P_%i: first= %3i  rowCount= %3i  interior= %3i  bdry= %3i  ratio= %0.1f\n",
00313            i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
00314            ratio);
00315     }
00316 
00317 
00318   fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
00319   fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
00320 
00321 
00322     /*-----------------------------------------
00323      * subdomain graph
00324      *-----------------------------------------*/
00325   if (s->adj != NULL)
00326     {
00327       fprintf (fp, "\nunpermuted subdomain graph: \n");
00328       for (i = 0; i < s->blocks; ++i)
00329     {
00330       fprintf (fp, "%i :: ", i);
00331       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
00332         {
00333           fprintf (fp, "%i  ", s->adj[j]);
00334         }
00335       fprintf (fp, "\n");
00336     }
00337     }
00338 
00339 
00340     /*-----------------------------------------
00341      * subdomain permutation
00342      *-----------------------------------------*/
00343   fprintf (fp, "\no2n subdomain permutation:\n");
00344   for (i = 0; i < s->blocks; ++i)
00345     {
00346       fprintf (fp, "  %i %i\n", i, s->o2n_sub[i]);
00347     }
00348   fprintf (fp, "\n");
00349 
00350   if (np_dh > 1)
00351     {
00352 
00353     /*-----------------------------------------
00354      * local n2o_row permutation
00355      *-----------------------------------------*/
00356       fprintf (fp, "\nlocal n2o_row permutation:\n   ");
00357       for (i = 0; i < s->row_count[myid_dh]; ++i)
00358     {
00359       fprintf (fp, "%i ", s->n2o_row[i]);
00360     }
00361       fprintf (fp, "\n");
00362 
00363     /*-----------------------------------------
00364      * local n2o permutation
00365      *-----------------------------------------*/
00366       fprintf (fp, "\nlocal o2n_col permutation:\n   ");
00367       for (i = 0; i < s->row_count[myid_dh]; ++i)
00368     {
00369       fprintf (fp, "%i ", s->o2n_col[i]);
00370     }
00371       fprintf (fp, "\n");
00372 
00373     }
00374   else
00375     {
00376     /*-----------------------------------------
00377      * local n2o_row permutation 
00378      *-----------------------------------------*/
00379       fprintf (fp, "\nlocal n2o_row permutation:\n");
00380       fprintf (fp, "--------------------------\n");
00381       for (k = 0; k < s->blocks; ++k)
00382     {
00383       int beg_row = s->beg_row[k];
00384       int end_row = beg_row + s->row_count[k];
00385 
00386       for (i = beg_row; i < end_row; ++i)
00387         {
00388           fprintf (fp, "%i ", s->n2o_row[i]);
00389         }
00390       fprintf (fp, "\n");
00391     }
00392 
00393     /*-----------------------------------------
00394      * local n2o permutation
00395      *-----------------------------------------*/
00396       fprintf (fp, "\nlocal o2n_col permutation:\n");
00397       fprintf (fp, "--------------------------\n");
00398       for (k = 0; k < s->blocks; ++k)
00399     {
00400       int beg_row = s->beg_row[k];
00401       int end_row = beg_row + s->row_count[k];
00402 
00403       for (i = beg_row; i < end_row; ++i)
00404         {
00405           fprintf (fp, "%i ", s->o2n_col[i]);
00406         }
00407       fprintf (fp, "\n");
00408     }
00409 
00410 
00411     }
00412 
00413 END_FUNC_DH}
00414 
00415 #undef __FUNC__
00416 #define __FUNC__ "init_seq_private"
00417 void
00418 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00419 {
00420   START_FUNC_DH int m, n, beg_row;
00421   double t1;
00422 
00423   /*-------------------------------------------------------
00424    * get number of local rows (m), global rows (n), and
00425    * global numbering of first locally owned row
00426    * (for sequential, beg_row=0 and m == n
00427    *-------------------------------------------------------*/
00428   EuclidGetDimensions (A, &beg_row, &m, &n);
00429   CHECK_V_ERROR;
00430   s->m = n;
00431 
00432   /*-------------------------------------------------------
00433    * allocate storage for all data structures
00434    * EXCEPT s->adj and hash tables.
00435    * (but note that hash tables aren't used for sequential)
00436    *-------------------------------------------------------*/
00437   allocate_storage_private (s, blocks, m, bj);
00438   CHECK_V_ERROR;
00439 
00440   /*-------------------------------------------------------------
00441    * Fill in: beg_row[]
00442    *          beg_rowP[]
00443    *          row_count[]
00444    * At this point, beg_rowP[] is a copy of beg_row[])
00445    *-------------------------------------------------------------*/
00446   {
00447     int i;
00448     int rpp = m / blocks;
00449 
00450     if (rpp * blocks < m)
00451       ++rpp;
00452 
00453     s->beg_row[0] = 0;
00454     for (i = 1; i < blocks; ++i)
00455       s->beg_row[i] = rpp + s->beg_row[i - 1];
00456     for (i = 0; i < blocks; ++i)
00457       s->row_count[i] = rpp;
00458     s->row_count[blocks - 1] = m - rpp * (blocks - 1);
00459   }
00460   memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
00461 
00462 
00463   /*-----------------------------------------------------------------
00464    * Find all neighboring processors in subdomain graph.
00465    * This block fills in: allNabors[]
00466    *-----------------------------------------------------------------*/
00467   /* NA for sequential! */
00468 
00469 
00470   /*-------------------------------------------------------
00471    * Count number of interior nodes for each subdomain;
00472    * also, form permutation vector to order boundary
00473    * nodes last in each subdomain.
00474    * This block fills in: bdry_count[]
00475    *                      n2o_col[]
00476    *                      o2n_row[]
00477    *-------------------------------------------------------*/
00478   t1 = MPI_Wtime ();
00479   if (!bj)
00480     {
00481       find_bdry_nodes_seq_private (s, m, A);
00482       CHECK_V_ERROR;
00483     }
00484   else
00485     {
00486       int i;
00487       for (i = 0; i < m; ++i)
00488     {
00489       s->n2o_row[i] = i;
00490       s->o2n_col[i] = i;
00491     }
00492     }
00493   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00494 
00495   /*-------------------------------------------------------
00496    * Form subdomain graph,
00497    * then color and reorder subdomain graph.
00498    * This block fills in: ptr[]
00499    *                      adj[]
00500    *                      o2n_sub[]
00501    *                      n2o_sub[]
00502    *                      beg_rowP[]
00503    *-------------------------------------------------------*/
00504   t1 = MPI_Wtime ();
00505   if (!bj)
00506     {
00507       form_subdomaingraph_seq_private (s, m, A);
00508       CHECK_V_ERROR;
00509       if (s->doNotColor)
00510     {
00511       int i;
00512       printf_dh ("subdomain coloring and reordering is OFF\n");
00513       for (i = 0; i < blocks; ++i)
00514         {
00515           s->o2n_sub[i] = i;
00516           s->n2o_sub[i] = i;
00517           s->colorVec[i] = 0;
00518         }
00519     }
00520       else
00521     {
00522       SET_INFO ("subdomain coloring and reordering is ON");
00523       color_subdomain_graph_private (s);
00524       CHECK_V_ERROR;
00525     }
00526     }
00527 
00528   /* bj setup */
00529   else
00530     {
00531       int i;
00532       for (i = 0; i < blocks; ++i)
00533     {
00534       s->o2n_sub[i] = i;
00535       s->n2o_sub[i] = i;
00536     }
00537     }
00538   s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00539 
00540   /*-------------------------------------------------------
00541    * Here's a step we DON'T do for the parallel case:
00542    * we need to adjust the matrix row and column perms
00543    * to reflect subdomain reordering (for the parallel
00544    * case, these permutation vectors are purely local and
00545    * zero-based)
00546    *-------------------------------------------------------*/
00547   if (!bj)
00548     {
00549       adjust_matrix_perms_private (s, m);
00550       CHECK_V_ERROR;
00551     }
00552 
00553   /*-------------------------------------------------------
00554    * Build lists of lower and higher ordered neighbors.
00555    * This block fills in: loNabors[]
00556    *                      hiNabors[]
00557    *-------------------------------------------------------*/
00558   /* NA for sequential */
00559 
00560   /*-------------------------------------------------------
00561    *  Exchange boundary node permutation information with
00562    *  neighboring processors in the subdomain graph.
00563    *  This block fills in: o2n_ext (hash table)
00564    *                       n2o_ext (hash table)
00565    *-------------------------------------------------------*/
00566   /* NA for sequential */
00567 
00568 
00569 END_FUNC_DH}
00570 
00571 
00572 #if 0
00573 #undef __FUNC__
00574 #define __FUNC__ "partition_metis_private"
00575 void
00576 partition_metis_private (SubdomainGraph_dh s, void *A)
00577 {
00578   START_FUNC_DH if (ignoreMe)
00579     SET_V_ERROR ("not implemented");
00580 END_FUNC_DH}
00581 #endif
00582 
00583 #undef __FUNC__
00584 #define __FUNC__ "allocate_storage_private"
00585 void
00586 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
00587 {
00588   START_FUNC_DH if (!bj)
00589     {
00590       s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
00591       CHECK_V_ERROR;
00592       s->ptrs[0] = 0;
00593       s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
00594       CHECK_V_ERROR;
00595       s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00596       CHECK_V_ERROR;
00597       s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00598       CHECK_V_ERROR;
00599       s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00600       CHECK_V_ERROR;
00601     }
00602 
00603   s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
00604   CHECK_V_ERROR;
00605   s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
00606   CHECK_V_ERROR;
00607 
00608   /* these are probably only needed for single mpi task -- ?? */
00609   /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */
00610   s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
00611   CHECK_V_ERROR;
00612   s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
00613   CHECK_V_ERROR;
00614   s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
00615   CHECK_V_ERROR;
00616   s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
00617   CHECK_V_ERROR;
00618   s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00619   CHECK_V_ERROR;
00620   s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00621   CHECK_V_ERROR;
00622 
00623 END_FUNC_DH}
00624 
00625 /*-----------------------------------------------------------------*/
00626 
00627 
00628 #undef __FUNC__
00629 #define __FUNC__ "init_mpi_private"
00630 void
00631 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00632 {
00633   START_FUNC_DH int m, n, beg_row;
00634   bool symmetric;
00635   double t1;
00636 
00637   symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
00638   CHECK_V_ERROR;
00639   if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
00640     {
00641       symmetric = true;
00642     }
00643 
00644   /*-------------------------------------------------------
00645    * get number of local rows (m), global rows (n), and
00646    * global numbering of first locally owned row
00647    *-------------------------------------------------------*/
00648   EuclidGetDimensions (A, &beg_row, &m, &n);
00649   CHECK_V_ERROR;
00650   s->m = m;
00651 
00652 
00653   /*-------------------------------------------------------
00654    * allocate storage for all data structures
00655    * EXCEPT s->adj and hash tables.
00656    *-------------------------------------------------------*/
00657   allocate_storage_private (s, blocks, m, bj);
00658   CHECK_V_ERROR;
00659 
00660   /*-------------------------------------------------------------
00661    * Fill in: beg_row[]
00662    *          beg_rowP[]
00663    *          row_count[]
00664    * At this point, beg_rowP[] is a copy of beg_row[])
00665    *-------------------------------------------------------------*/
00666   if (!bj)
00667     {
00668       MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
00669       MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
00670       memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
00671     }
00672   else
00673     {
00674       s->beg_row[myid_dh] = beg_row;
00675       s->beg_rowP[myid_dh] = beg_row;
00676       s->row_count[myid_dh] = m;
00677     }
00678 
00679   /*-----------------------------------------------------------------
00680    * Find all neighboring processors in subdomain graph.
00681    * This block fills in: allNabors[]
00682    *-----------------------------------------------------------------*/
00683   if (!bj)
00684     {
00685       t1 = MPI_Wtime ();
00686       if (symmetric)
00687     {
00688       find_all_neighbors_sym_private (s, m, A);
00689       CHECK_V_ERROR;
00690     }
00691       else
00692     {
00693       find_all_neighbors_unsym_private (s, m, A);
00694       CHECK_V_ERROR;
00695     }
00696       s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
00697     }
00698 
00699 
00700   /*-----------------------------------------------------------------
00701    *  determine which rows are boundary rows, and which are interior
00702    *  rows; also, form permutation vector to order interior
00703    *  nodes first within each subdomain
00704    *  This block fills in: bdry_count[]
00705    *                       n2o_col[]
00706    *                       o2n_row[]
00707    *-----------------------------------------------------------------*/
00708   t1 = MPI_Wtime ();
00709   if (!bj)
00710     {
00711       int *interiorNodes, *bdryNodes;
00712       int interiorCount, bdryCount;
00713       int *o2n = s->o2n_col, idx;
00714       int i;
00715 
00716       interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
00717       CHECK_V_ERROR;
00718       bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
00719       CHECK_V_ERROR;
00720 
00721       /* divide this subdomain's rows into interior and boundary rows;
00722          the returned lists are with respect to local numbering.
00723        */
00724       if (symmetric)
00725     {
00726       find_bdry_nodes_sym_private (s, m, A,
00727                        interiorNodes, bdryNodes,
00728                        &interiorCount, &bdryCount);
00729       CHECK_V_ERROR;
00730     }
00731       else
00732     {
00733       find_bdry_nodes_unsym_private (s, m, A,
00734                      interiorNodes, bdryNodes,
00735                      &interiorCount, &bdryCount);
00736       CHECK_V_ERROR;
00737     }
00738 
00739       /* exchange number of boundary rows with all neighbors */
00740       MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
00741              comm_dh);
00742 
00743       /* form local permutation */
00744       idx = 0;
00745       for (i = 0; i < interiorCount; ++i)
00746     {
00747       o2n[interiorNodes[i]] = idx++;
00748     }
00749       for (i = 0; i < bdryCount; ++i)
00750     {
00751       o2n[bdryNodes[i]] = idx++;
00752     }
00753 
00754       /* invert permutation */
00755       invert_perm (m, o2n, s->n2o_row);
00756       CHECK_V_ERROR;
00757 
00758       FREE_DH (interiorNodes);
00759       CHECK_V_ERROR;
00760       FREE_DH (bdryNodes);
00761       CHECK_V_ERROR;
00762     }
00763 
00764   /* bj setup */
00765   else
00766     {
00767       int *o2n = s->o2n_col, *n2o = s->n2o_row;
00768       int i, m = s->m;
00769 
00770       for (i = 0; i < m; ++i)
00771     {
00772       o2n[i] = i;
00773       n2o[i] = i;
00774     }
00775     }
00776   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00777 
00778   /*-------------------------------------------------------
00779    * Form subdomain graph,
00780    * then color and reorder subdomain graph.
00781    * This block fills in: ptr[]
00782    *                      adj[]
00783    *                      o2n_sub[]
00784    *                      n2o_sub[]
00785    *                      beg_rowP[]
00786    *-------------------------------------------------------*/
00787   if (!bj)
00788     {
00789       t1 = MPI_Wtime ();
00790       form_subdomaingraph_mpi_private (s);
00791       CHECK_V_ERROR;
00792       if (s->doNotColor)
00793     {
00794       int i;
00795       printf_dh ("subdomain coloring and reordering is OFF\n");
00796       for (i = 0; i < blocks; ++i)
00797         {
00798           s->o2n_sub[i] = i;
00799           s->n2o_sub[i] = i;
00800           s->colorVec[i] = 0;
00801         }
00802     }
00803       else
00804     {
00805       SET_INFO ("subdomain coloring and reordering is ON");
00806       color_subdomain_graph_private (s);
00807       CHECK_V_ERROR;
00808     }
00809       s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00810     }
00811 
00812   /*-------------------------------------------------------
00813    * Build lists of lower and higher ordered neighbors.
00814    * This block fills in: loNabors[]
00815    *                      hiNabors[]
00816    *-------------------------------------------------------*/
00817   if (!bj)
00818     {
00819       find_ordered_neighbors_private (s);
00820       CHECK_V_ERROR;
00821     }
00822 
00823   /*-------------------------------------------------------
00824    *  Exchange boundary node permutation information with
00825    *  neighboring processors in the subdomain graph.
00826    *  This block fills in: o2n_ext (hash table)
00827    *                       n2o_ext (hash table)
00828    *-------------------------------------------------------*/
00829   if (!bj)
00830     {
00831       t1 = MPI_Wtime ();
00832       SubdomainGraph_dhExchangePerms (s);
00833       CHECK_V_ERROR;
00834       s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
00835     }
00836 
00837 END_FUNC_DH}
00838 
00839 
00840 
00841 #undef __FUNC__
00842 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
00843 void
00844 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
00845 {
00846   START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
00847   MPI_Status *status = NULL;
00848   int *nabors = s->allNabors, naborCount = s->allCount;
00849   int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
00850   int m = s->row_count[myid_dh];
00851   int beg_row = s->beg_row[myid_dh];
00852   int beg_rowP = s->beg_rowP[myid_dh];
00853   int *bdryNodeCounts = s->bdry_count;
00854   int myBdryCount = s->bdry_count[myid_dh];
00855   bool debug = false;
00856   int myFirstBdry = m - myBdryCount;
00857   int *n2o_row = s->n2o_row;
00858   Hash_i_dh n2o_table, o2n_table;
00859 
00860   if (logFile != NULL && s->debug)
00861     debug = true;
00862 
00863   /* allocate send buffer, and copy permutation info to buffer;
00864      each entry is a <old_value, new_value> pair.
00865    */
00866   sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
00867   CHECK_V_ERROR;
00868 
00869 
00870   if (debug)
00871     {
00872       fprintf (logFile,
00873            "\nSUBG myFirstBdry= %i  myBdryCount= %i  m= %i  beg_rowP= %i\n",
00874            1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
00875       fflush (logFile);
00876     }
00877 
00878   for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00879     {
00880       sendBuf[2 * j] = n2o_row[i] + beg_row;
00881       sendBuf[2 * j + 1] = i + beg_rowP;
00882     }
00883 
00884   if (debug)
00885     {
00886       fprintf (logFile, "\nSUBG SEND_BUF:\n");
00887       for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00888     {
00889       fprintf (logFile, "SUBG  %i, %i\n", 1 + sendBuf[2 * j],
00890            1 + sendBuf[2 * j + 1]);
00891     }
00892       fflush (logFile);
00893     }
00894 
00895   /* allocate a receive buffer for each nabor in the subdomain graph,
00896      and set up index array for locating the beginning of each
00897      nabor's buffers.
00898    */
00899   naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
00900   CHECK_V_ERROR;
00901   naborIdx[0] = 0;
00902   nz = 0;
00903   for (i = 0; i < naborCount; ++i)
00904     {
00905       nz += (2 * bdryNodeCounts[nabors[i]]);
00906       naborIdx[i + 1] = nz;
00907     }
00908 
00909 
00910   recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
00911   CHECK_V_ERROR;
00912 
00913 
00914 /* for (i=0; i<nz; ++i) recvBuf[i] = -10; */
00915 
00916   /* perform sends and receives */
00917   recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00918   CHECK_V_ERROR;
00919   send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00920   CHECK_V_ERROR;
00921   status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
00922   CHECK_V_ERROR;
00923 
00924   for (i = 0; i < naborCount; ++i)
00925     {
00926       int nabr = nabors[i];
00927       int *buf = recvBuf + naborIdx[i];
00928       int ct = 2 * bdryNodeCounts[nabr];
00929 
00930 
00931       MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
00932          &(send_req[i]));
00933 
00934       if (debug)
00935     {
00936       fprintf (logFile, "SUBG   sending %i elts to %i\n", 2 * myBdryCount,
00937            nabr);
00938       fflush (logFile);
00939     }
00940 
00941       MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
00942 
00943       if (debug)
00944     {
00945       fprintf (logFile, "SUBG  receiving %i elts from %i\n", ct, nabr);
00946       fflush (logFile);
00947     }
00948     }
00949 
00950   MPI_Waitall (naborCount, send_req, status);
00951   MPI_Waitall (naborCount, recv_req, status);
00952 
00953   Hash_i_dhCreate (&n2o_table, nz / 2);
00954   CHECK_V_ERROR;
00955   Hash_i_dhCreate (&o2n_table, nz / 2);
00956   CHECK_V_ERROR;
00957   s->n2o_ext = n2o_table;
00958   s->o2n_ext = o2n_table;
00959 
00960   /* insert non-local boundary node permutations in lookup tables */
00961   for (i = 0; i < nz; i += 2)
00962     {
00963       int old = recvBuf[i];
00964       int new = recvBuf[i + 1];
00965 
00966       if (debug)
00967     {
00968       fprintf (logFile, "SUBG  i= %i  old= %i  new= %i\n", i, old + 1,
00969            new + 1);
00970       fflush (logFile);
00971     }
00972 
00973       Hash_i_dhInsert (o2n_table, old, new);
00974       CHECK_V_ERROR;
00975       Hash_i_dhInsert (n2o_table, new, old);
00976       CHECK_V_ERROR;
00977     }
00978 
00979 
00980   if (recvBuf != NULL)
00981     {
00982       FREE_DH (recvBuf);
00983       CHECK_V_ERROR;
00984     }
00985   if (naborIdx != NULL)
00986     {
00987       FREE_DH (naborIdx);
00988       CHECK_V_ERROR;
00989     }
00990   if (sendBuf != NULL)
00991     {
00992       FREE_DH (sendBuf);
00993       CHECK_V_ERROR;
00994     }
00995   if (recv_req != NULL)
00996     {
00997       FREE_DH (recv_req);
00998       CHECK_V_ERROR;
00999     }
01000   if (send_req != NULL)
01001     {
01002       FREE_DH (send_req);
01003       CHECK_V_ERROR;
01004     }
01005   if (status != NULL)
01006     {
01007       FREE_DH (status);
01008       CHECK_V_ERROR;
01009     }
01010 
01011 END_FUNC_DH}
01012 
01013 
01014 
01015 #undef __FUNC__
01016 #define __FUNC__ "form_subdomaingraph_mpi_private"
01017 void
01018 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
01019 {
01020   START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
01021   int *idxAll = NULL;
01022   int i, j, nz, *adj, *ptrs = s->ptrs;
01023   MPI_Request *recvReqs = NULL, sendReq;
01024   MPI_Status *statuses = NULL, status;
01025 
01026   /* all processors tell root how many nabors they have */
01027   if (myid_dh == 0)
01028     {
01029       idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
01030       CHECK_V_ERROR;
01031     }
01032   MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
01033 
01034   /* root counts edges in graph, and broacasts to all */
01035   if (myid_dh == 0)
01036     {
01037       nz = 0;
01038       for (i = 0; i < np_dh; ++i)
01039     nz += idxAll[i];
01040     }
01041   MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
01042 
01043   /* allocate space for adjacency lists (memory for the
01044      pointer array was previously allocated)
01045    */
01046   adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
01047   CHECK_V_ERROR;
01048 
01049   /* root receives adjacency lists from all processors */
01050   if (myid_dh == 0)
01051     {
01052       recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
01053       CHECK_V_ERROR;
01054       statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
01055       CHECK_V_ERROR;
01056 
01057       /* first, set up row pointer array */
01058       ptrs[0] = 0;
01059       for (j = 0; j < np_dh; ++j)
01060     ptrs[j + 1] = ptrs[j] + idxAll[j];
01061 
01062       /* second, start the receives */
01063       for (j = 0; j < np_dh; ++j)
01064     {
01065       int ct = idxAll[j];
01066 
01067       MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
01068              recvReqs + j);
01069     }
01070     }
01071 
01072   /* all processors send root their adjacency list */
01073   MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
01074 
01075   /* wait for comms to go through */
01076   if (myid_dh == 0)
01077     {
01078       MPI_Waitall (np_dh, recvReqs, statuses);
01079     }
01080   MPI_Wait (&sendReq, &status);
01081 
01082   /* root broadcasts assembled subdomain graph to all processors */
01083   MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
01084   MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
01085 
01086   if (idxAll != NULL)
01087     {
01088       FREE_DH (idxAll);
01089       CHECK_V_ERROR;
01090     }
01091   if (recvReqs != NULL)
01092     {
01093       FREE_DH (recvReqs);
01094       CHECK_V_ERROR;
01095     }
01096   if (statuses != NULL)
01097     {
01098       FREE_DH (statuses);
01099       CHECK_V_ERROR;
01100     }
01101 
01102 END_FUNC_DH}
01103 
01104 /* this is ugly and inefficient; but seq mode is primarily
01105    for debugging and testing, so there.
01106 */
01107 #undef __FUNC__
01108 #define __FUNC__ "form_subdomaingraph_seq_private"
01109 void
01110 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
01111 {
01112   START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
01113   int *cval, len, *adj;
01114   int idx = 0, *ptrs = s->ptrs;
01115 
01116   /* allocate storage for adj[]; since this function is intended
01117      for debugging/testing, and the number of blocks should be
01118      relatively small, we'll punt and allocate the maximum
01119      possibly needed.
01120    */
01121   adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01122   CHECK_V_ERROR;
01123 
01124   dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01125   CHECK_V_ERROR;
01126   for (i = 0; i < blocks * blocks; ++i)
01127     dense[i] = 0;
01128 
01129   /* loop over each block's rows to identify all boundary nodes */
01130   for (i = 0; i < blocks; ++i)
01131     {
01132       int beg_row = s->beg_row[i];
01133       int end_row = beg_row + s->row_count[i];
01134 
01135       for (row = beg_row; row < end_row; ++row)
01136     {
01137       EuclidGetRow (A, row, &len, &cval, NULL);
01138       CHECK_V_ERROR;
01139       for (j = 0; j < len; ++j)
01140         {
01141           int col = cval[j];
01142           if (col < beg_row || col >= end_row)
01143         {
01144           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01145           CHECK_V_ERROR;
01146           dense[i * blocks + owner] = 1;
01147           dense[owner * blocks + i] = 1;
01148         }
01149         }
01150       EuclidRestoreRow (A, row, &len, &cval, NULL);
01151       CHECK_V_ERROR;
01152     }
01153     }
01154 
01155   /* form sparse csr representation of subdomain graph
01156      from dense representation
01157    */
01158   ptrs[0] = 0;
01159   for (i = 0; i < blocks; ++i)
01160     {
01161       for (j = 0; j < blocks; ++j)
01162     {
01163       if (dense[i * blocks + j])
01164         {
01165           adj[idx++] = j;
01166         }
01167     }
01168       ptrs[i + 1] = idx;
01169     }
01170 
01171   FREE_DH (dense);
01172   CHECK_V_ERROR;
01173 END_FUNC_DH}
01174 
01175 
01176 #undef __FUNC__
01177 #define __FUNC__ "find_all_neighbors_sym_private"
01178 void
01179 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
01180 {
01181   START_FUNC_DH int *marker, i, j, beg_row, end_row;
01182   int row, len, *cval, ct = 0;
01183   int *nabors = s->allNabors;
01184 
01185   marker = (int *) MALLOC_DH (m * sizeof (int));
01186   CHECK_V_ERROR;
01187   for (i = 0; i < m; ++i)
01188     marker[i] = 0;
01189 
01190   SET_INFO
01191     ("finding nabors in subdomain graph for structurally symmetric matrix");
01192   SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
01193 
01194   beg_row = s->beg_row[myid_dh];
01195   end_row = beg_row + s->row_count[myid_dh];
01196 
01197   for (row = beg_row; row < end_row; ++row)
01198     {
01199       EuclidGetRow (A, row, &len, &cval, NULL);
01200       CHECK_V_ERROR;
01201       for (j = 0; j < len; ++j)
01202     {
01203       int col = cval[j];
01204       if (col < beg_row || col >= end_row)
01205         {
01206           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01207           CHECK_V_ERROR;
01208           if (!marker[owner])
01209         {
01210           marker[owner] = 1;
01211           nabors[ct++] = owner;
01212         }
01213         }
01214     }
01215       EuclidRestoreRow (A, row, &len, &cval, NULL);
01216       CHECK_V_ERROR;
01217     }
01218   s->allCount = ct;
01219 
01220 /* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */
01221 
01222   if (marker != NULL)
01223     {
01224       FREE_DH (marker);
01225       CHECK_V_ERROR;
01226     }
01227 END_FUNC_DH}
01228 
01229 #undef __FUNC__
01230 #define __FUNC__ "find_all_neighbors_unsym_private"
01231 void
01232 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
01233 {
01234   START_FUNC_DH int i, j, row, beg_row, end_row;
01235   int *marker;
01236   int *cval, len, idx = 0;
01237   int nz, *nabors = s->allNabors, *myNabors;
01238 
01239   myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
01240   CHECK_V_ERROR;
01241   marker = (int *) MALLOC_DH (np_dh * sizeof (int));
01242   CHECK_V_ERROR;
01243   for (i = 0; i < np_dh; ++i)
01244     marker[i] = 0;
01245 
01246   SET_INFO
01247     ("finding nabors in subdomain graph for structurally unsymmetric matrix");
01248 
01249   /* loop over this block's boundary rows, finding all nabors in
01250      subdomain graph
01251    */
01252   beg_row = s->beg_row[myid_dh];
01253   end_row = beg_row + s->row_count[myid_dh];
01254 
01255 
01256 
01257   /*for each locally owned row ...   */
01258   for (row = beg_row; row < end_row; ++row)
01259     {
01260       EuclidGetRow (A, row, &len, &cval, NULL);
01261       CHECK_V_ERROR;
01262       for (j = 0; j < len; ++j)
01263     {
01264       int col = cval[j];
01265       /*for each column that corresponds to a non-locally owned row ...  */
01266       if (col < beg_row || col >= end_row)
01267         {
01268           int owner = SubdomainGraph_dhFindOwner (s, col, false);
01269           CHECK_V_ERROR;
01270           /*if I've not yet done so ...   */
01271           if (!marker[owner])
01272         {
01273           marker[owner] = 1;
01274           /*append the non-local row's owner in to the list of my nabors
01275              in the subdomain graph     */
01276           myNabors[idx++] = owner;
01277         }
01278         }
01279     }
01280       EuclidRestoreRow (A, row, &len, &cval, NULL);
01281       CHECK_V_ERROR;
01282     }
01283 
01284   /*
01285      at this point, idx = the number of my neighbors in the subdomain
01286      graph; equivalently, idx is the number of meaningfull slots in
01287      the myNabors array.  -dah 1/31/06
01288    */
01289 
01290   /*
01291      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01292      marker[j] = 1 indicates that processor j IS my nabor
01293      however, there may be some nabors that can't be discovered in the above loop
01294      "//for each locally owned row;" this can happen if the matrix is
01295      structurally unsymmetric.
01296      -dah 1/31/06
01297    */
01298 
01299 /* fprintf(stderr, "[%i] marker: ", myid_dh);
01300 for (j=0; j<np_dh; j++) {
01301   fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j,  marker[j]);
01302 }
01303 fprintf(stderr, "\n");
01304 */
01305 
01306   /* find out who my neighbors are that I cannot discern locally */
01307   MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
01308   CHECK_V_ERROR;
01309 
01310   /* add in neighbors that I know about from scanning my adjacency lists */
01311   for (i = 0; i < idx; ++i)
01312     nabors[myNabors[i]] = 1;
01313 
01314   /* remove self from the adjacency list */
01315   nabors[myid_dh] = 0;
01316 
01317   /*
01318      at this point: marker[j] = 0 indicates that processor j is NOT my nabor
01319      marker[j] = 1 indicates that processor j IS my nabor
01320      and this is guaranteed to be complete.
01321    */
01322 
01323   /* form final list of neighboring processors */
01324   nz = 0;
01325   for (i = 0; i < np_dh; ++i)
01326     {
01327       if (nabors[i])
01328     myNabors[nz++] = i;
01329     }
01330   s->allCount = nz;
01331   memcpy (nabors, myNabors, nz * sizeof (int));
01332 
01333   if (marker != NULL)
01334     {
01335       FREE_DH (marker);
01336       CHECK_V_ERROR;
01337     }
01338   if (myNabors != NULL)
01339     {
01340       FREE_DH (myNabors);
01341       CHECK_V_ERROR;
01342     }
01343 END_FUNC_DH}
01344 
01345 /*================================================================*/
01346 
01347 #undef __FUNC__
01348 #define __FUNC__ "find_bdry_nodes_sym_private"
01349 void
01350 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
01351                  int *interiorNodes, int *bdryNodes,
01352                  int *interiorCount, int *bdryCount)
01353 {
01354   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01355   int end_row = beg_row + s->row_count[myid_dh];
01356   int row, inCt = 0, bdCt = 0;
01357 
01358   int j;
01359   int *cval;
01360 
01361   /* determine if the row is a boundary row */
01362   for (row = beg_row; row < end_row; ++row)
01363     {               /* for each row in the subdomain */
01364       bool isBdry = false;
01365       int len;
01366       EuclidGetRow (A, row, &len, &cval, NULL);
01367       CHECK_V_ERROR;
01368 
01369       for (j = 0; j < len; ++j)
01370     {           /* for each column in the row */
01371       int col = cval[j];
01372       if (col < beg_row || col >= end_row)
01373         {
01374           isBdry = true;
01375           break;
01376         }
01377     }
01378       EuclidRestoreRow (A, row, &len, &cval, NULL);
01379       CHECK_V_ERROR;
01380 
01381       if (isBdry)
01382     {
01383       bdryNodes[bdCt++] = row - beg_row;
01384     }
01385       else
01386     {
01387       interiorNodes[inCt++] = row - beg_row;
01388     }
01389     }
01390 
01391   *interiorCount = inCt;
01392   *bdryCount = bdCt;
01393 
01394 END_FUNC_DH}
01395 
01396 #define BDRY_NODE_TAG 42
01397 
01398 #undef __FUNC__
01399 #define __FUNC__ "find_bdry_nodes_unsym_private"
01400 void
01401 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
01402                    int *interiorNodes, int *boundaryNodes,
01403                    int *interiorCount, int *bdryCount)
01404 {
01405   START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01406   int end_row = beg_row + s->row_count[myid_dh];
01407   int i, j, row, max;
01408   int *cval;
01409   int *list, count;
01410   int *rpIN = NULL, *rpOUT = NULL;
01411   int *sendBuf, *recvBuf;
01412   int *marker, inCt, bdCt;
01413   int *bdryNodes, nz;
01414   int sendCt, recvCt;
01415   MPI_Request *sendReq, *recvReq;
01416   MPI_Status *status;
01417   SortedSet_dh ss;
01418 
01419   SortedSet_dhCreate (&ss, m);
01420   CHECK_V_ERROR;
01421 
01422   /*-----------------------------------------------------
01423    * identify all boundary nodes possible using locally
01424    * owned adjacency lists
01425    *-----------------------------------------------------*/
01426   for (row = beg_row; row < end_row; ++row)
01427     {
01428       bool isBdry = false;
01429       int len;
01430       EuclidGetRow (A, row, &len, &cval, NULL);
01431       CHECK_V_ERROR;
01432 
01433       for (j = 0; j < len; ++j)
01434     {
01435       int col = cval[j];
01436       if (col < beg_row || col >= end_row)
01437         {
01438           isBdry = true;    /* this row is a boundary node */
01439           SortedSet_dhInsert (ss, col);
01440           CHECK_V_ERROR;
01441           /* the row "col" is also a boundary node */
01442         }
01443     }
01444       EuclidRestoreRow (A, row, &len, &cval, NULL);
01445       CHECK_V_ERROR;
01446 
01447       if (isBdry)
01448     {
01449       SortedSet_dhInsert (ss, row);
01450       CHECK_V_ERROR;
01451     }
01452     }
01453 
01454   /*-----------------------------------------------------
01455    * scan the sorted list to determine what boundary
01456    * node information to send to whom
01457    *-----------------------------------------------------*/
01458   sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01459   CHECK_V_ERROR;
01460   recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01461   CHECK_V_ERROR;
01462   rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01463   CHECK_V_ERROR;
01464   rpOUT[0] = 0;
01465   for (i = 0; i < np_dh; ++i)
01466     sendBuf[i] = 0;
01467 
01468   sendCt = 0;           /* total number of processor to whom we will send lists */
01469   SortedSet_dhGetList (ss, &list, &count);
01470   CHECK_V_ERROR;
01471 
01472   for (i = 0; i < count; /* i is set below */ )
01473     {
01474       int node = list[i];
01475       int owner;
01476       int last;
01477 
01478       owner = SubdomainGraph_dhFindOwner (s, node, false);
01479       CHECK_V_ERROR;
01480       last = s->beg_row[owner] + s->row_count[owner];
01481 
01482       /* determine the other boundary nodes that belong to owner */
01483       while ((i < count) && (list[i] < last))
01484     ++i;
01485       ++sendCt;
01486       rpOUT[sendCt] = i;
01487       sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
01488 
01489     }
01490 
01491   /*-----------------------------------------------------
01492    * processors tell each other how much information
01493    * each will send to whom
01494    *-----------------------------------------------------*/
01495   MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
01496   CHECK_V_ERROR;
01497 
01498   /*-----------------------------------------------------
01499    * exchange boundary node information
01500    * (note that we also exchange information with ourself!)
01501    *-----------------------------------------------------*/
01502 
01503   /* first, set up data structures to hold incoming information */
01504   rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01505   CHECK_V_ERROR;
01506   rpIN[0] = 0;
01507   nz = 0;
01508   recvCt = 0;
01509   for (i = 0; i < np_dh; ++i)
01510     {
01511       if (recvBuf[i])
01512     {
01513       ++recvCt;
01514       nz += recvBuf[i];
01515       rpIN[recvCt] = nz;
01516     }
01517     }
01518   bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
01519   CHECK_V_ERROR;
01520   sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
01521   CHECK_V_ERROR;
01522   recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
01523   CHECK_V_ERROR;
01524   max = MAX (sendCt, recvCt);
01525   status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
01526   CHECK_V_ERROR;
01527 
01528   /* second, start receives for incoming data */
01529   j = 0;
01530   for (i = 0; i < np_dh; ++i)
01531     {
01532       if (recvBuf[i])
01533     {
01534       MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
01535              i, BDRY_NODE_TAG, comm_dh, recvReq + j);
01536       ++j;
01537     }
01538     }
01539 
01540   /* third, start sends for outgoing data */
01541   j = 0;
01542   for (i = 0; i < np_dh; ++i)
01543     {
01544       if (sendBuf[i])
01545     {
01546       MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
01547              i, BDRY_NODE_TAG, comm_dh, sendReq + j);
01548       ++j;
01549     }
01550     }
01551 
01552   /* fourth, wait for all comms to finish */
01553   MPI_Waitall (sendCt, sendReq, status);
01554   MPI_Waitall (recvCt, recvReq, status);
01555 
01556   /* fifth, convert from global to local indices */
01557   for (i = 0; i < nz; ++i)
01558     bdryNodes[i] -= beg_row;
01559 
01560   /*-----------------------------------------------------
01561    * consolidate information from all processors to
01562    * identify all local boundary nodes
01563    *-----------------------------------------------------*/
01564   marker = (int *) MALLOC_DH (m * sizeof (int));
01565   CHECK_V_ERROR;
01566   for (i = 0; i < m; ++i)
01567     marker[i] = 0;
01568   for (i = 0; i < nz; ++i)
01569     marker[bdryNodes[i]] = 1;
01570 
01571   inCt = bdCt = 0;
01572   for (i = 0; i < m; ++i)
01573     {
01574       if (marker[i])
01575     {
01576       boundaryNodes[bdCt++] = i;
01577     }
01578       else
01579     {
01580       interiorNodes[inCt++] = i;
01581     }
01582     }
01583   *interiorCount = inCt;
01584   *bdryCount = bdCt;
01585 
01586   /*-----------------------------------------------------
01587    * clean up
01588    *-----------------------------------------------------*/
01589   SortedSet_dhDestroy (ss);
01590   CHECK_V_ERROR;
01591   if (rpIN != NULL)
01592     {
01593       FREE_DH (rpIN);
01594       CHECK_V_ERROR;
01595     }
01596   if (rpOUT != NULL)
01597     {
01598       FREE_DH (rpOUT);
01599       CHECK_V_ERROR;
01600     }
01601   if (sendBuf != NULL)
01602     {
01603       FREE_DH (sendBuf);
01604       CHECK_V_ERROR;
01605     }
01606   if (recvBuf != NULL)
01607     {
01608       FREE_DH (recvBuf);
01609       CHECK_V_ERROR;
01610     }
01611   if (bdryNodes != NULL)
01612     {
01613       FREE_DH (bdryNodes);
01614       CHECK_V_ERROR;
01615     }
01616   if (marker != NULL)
01617     {
01618       FREE_DH (marker);
01619       CHECK_V_ERROR;
01620     }
01621   if (sendReq != NULL)
01622     {
01623       FREE_DH (sendReq);
01624       CHECK_V_ERROR;
01625     }
01626   if (recvReq != NULL)
01627     {
01628       FREE_DH (recvReq);
01629       CHECK_V_ERROR;
01630     }
01631   if (status != NULL)
01632     {
01633       FREE_DH (status);
01634       CHECK_V_ERROR;
01635     }
01636 END_FUNC_DH}
01637 
01638 
01639 #undef __FUNC__
01640 #define __FUNC__ "find_ordered_neighbors_private"
01641 void
01642 find_ordered_neighbors_private (SubdomainGraph_dh s)
01643 {
01644   START_FUNC_DH int *loNabors = s->loNabors;
01645   int *hiNabors = s->hiNabors;
01646   int *allNabors = s->allNabors, allCount = s->allCount;
01647   int loCt = 0, hiCt = 0;
01648   int *o2n = s->o2n_sub;
01649   int i, myNewId = o2n[myid_dh];
01650 
01651   for (i = 0; i < allCount; ++i)
01652     {
01653       int nabor = allNabors[i];
01654       if (o2n[nabor] < myNewId)
01655     {
01656       loNabors[loCt++] = nabor;
01657     }
01658       else
01659     {
01660       hiNabors[hiCt++] = nabor;
01661     }
01662     }
01663 
01664   s->loCount = loCt;
01665   s->hiCount = hiCt;
01666 END_FUNC_DH}
01667 
01668 
01669 #undef __FUNC__
01670 #define __FUNC__ "color_subdomain_graph_private"
01671 void
01672 color_subdomain_graph_private (SubdomainGraph_dh s)
01673 {
01674   START_FUNC_DH int i, n = np_dh;
01675   int *rp = s->ptrs, *cval = s->adj;
01676   int j, *marker, thisNodesColor, *colorCounter;
01677   int *o2n = s->o2n_sub;
01678   int *color = s->colorVec;
01679 
01680   if (np_dh == 1)
01681     n = s->blocks;
01682 
01683   marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01684   colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01685   for (i = 0; i <= n; ++i)
01686     {
01687       marker[i] = -1;
01688       colorCounter[i] = 0;
01689     }
01690 
01691   /*------------------------------------------------------------------
01692    * color the nodes
01693    *------------------------------------------------------------------*/
01694   for (i = 0; i < n; ++i)
01695     {               /* color node "i" */
01696       /* mark colors of "i"s nabors as unavailable;
01697          only need to mark nabors that are (per the input ordering)
01698          numbered less than "i."
01699        */
01700       for (j = rp[i]; j < rp[i + 1]; ++j)
01701     {
01702       int nabor = cval[j];
01703       if (nabor < i)
01704         {
01705           int naborsColor = color[nabor];
01706           marker[naborsColor] = i;
01707         }
01708     }
01709 
01710       /* assign vertex i the "smallest" possible color */
01711       thisNodesColor = -1;
01712       for (j = 0; j < n; ++j)
01713     {
01714       if (marker[j] != i)
01715         {
01716           thisNodesColor = j;
01717           break;
01718         }
01719     }
01720       color[i] = thisNodesColor;
01721       colorCounter[1 + thisNodesColor] += 1;
01722     }
01723 
01724   /*------------------------------------------------------------------
01725    * build ordering vector; if two nodes are similarly colored,
01726    * they will have the same relative ordering as before.
01727    *------------------------------------------------------------------*/
01728   /* prefix-sum to find lowest-numbered node for each color */
01729   for (i = 1; i < n; ++i)
01730     {
01731       if (colorCounter[i] == 0)
01732     break;
01733       colorCounter[i] += colorCounter[i - 1];
01734     }
01735 
01736   for (i = 0; i < n; ++i)
01737     {
01738       o2n[i] = colorCounter[color[i]];
01739       colorCounter[color[i]] += 1;
01740     }
01741 
01742   /* invert permutation */
01743   invert_perm (n, s->o2n_sub, s->n2o_sub);
01744   CHECK_V_ERROR;
01745 
01746 
01747   /*------------------------------------------------------------------
01748    * count the number of colors used
01749    *------------------------------------------------------------------*/
01750   {
01751     int ct = 0;
01752     for (j = 0; j < n; ++j)
01753       {
01754     if (marker[j] == -1)
01755       break;
01756     ++ct;
01757       }
01758     s->colors = ct;
01759   }
01760 
01761 
01762   /*------------------------------------------------------------------
01763    * (re)build the beg_rowP array
01764    *------------------------------------------------------------------*/
01765   {
01766     int sum = 0;
01767     for (i = 0; i < n; ++i)
01768       {
01769     int old = s->n2o_sub[i];
01770     s->beg_rowP[old] = sum;
01771     sum += s->row_count[old];
01772       }
01773   }
01774 
01775   FREE_DH (marker);
01776   CHECK_V_ERROR;
01777   FREE_DH (colorCounter);
01778   CHECK_V_ERROR;
01779 END_FUNC_DH}
01780 
01781 
01782 #undef __FUNC__
01783 #define __FUNC__ "SubdomainGraph_dhDump"
01784 void
01785 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
01786 {
01787   START_FUNC_DH int i;
01788   int sCt = np_dh;
01789   FILE *fp;
01790 
01791   if (np_dh == 1)
01792     sCt = s->blocks;
01793 
01794 
01795   /* ---------------------------------------------------------
01796    *  for seq and par runs, 1st processor prints information
01797    *  that is common to all processors
01798    * ---------------------------------------------------------*/
01799   fp = openFile_dh (filename, "w");
01800   CHECK_V_ERROR;
01801 
01802   /* write subdomain ordering permutations */
01803   fprintf (fp, "----- colors used\n");
01804   fprintf (fp, "%i\n", s->colors);
01805   if (s->colorVec == NULL)
01806     {
01807       fprintf (fp, "s->colorVec == NULL\n");
01808     }
01809   else
01810     {
01811       fprintf (fp, "----- colorVec\n");
01812       for (i = 0; i < sCt; ++i)
01813     {
01814       fprintf (fp, "%i ", s->colorVec[i]);
01815     }
01816       fprintf (fp, "\n");
01817     }
01818 
01819   if (s->o2n_sub == NULL || s->o2n_sub == NULL)
01820     {
01821       fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
01822     }
01823   else
01824     {
01825       fprintf (fp, "----- o2n_sub\n");
01826       for (i = 0; i < sCt; ++i)
01827     {
01828       fprintf (fp, "%i ", s->o2n_sub[i]);
01829     }
01830       fprintf (fp, "\n");
01831       fprintf (fp, "----- n2o_sub\n");
01832       for (i = 0; i < sCt; ++i)
01833     {
01834       fprintf (fp, "%i ", s->n2o_sub[i]);
01835     }
01836       fprintf (fp, "\n");
01837     }
01838 
01839   /* write begin row arrays */
01840   if (s->beg_row == NULL || s->beg_rowP == NULL)
01841     {
01842       fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
01843     }
01844   else
01845     {
01846       fprintf (fp, "----- beg_row\n");
01847       for (i = 0; i < sCt; ++i)
01848     {
01849       fprintf (fp, "%i ", 1 + s->beg_row[i]);
01850     }
01851       fprintf (fp, "\n");
01852       fprintf (fp, "----- beg_rowP\n");
01853       for (i = 0; i < sCt; ++i)
01854     {
01855       fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
01856     }
01857       fprintf (fp, "\n");
01858     }
01859 
01860   /* write row count  and bdry count arrays */
01861   if (s->row_count == NULL || s->bdry_count == NULL)
01862     {
01863       fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
01864     }
01865   else
01866     {
01867       fprintf (fp, "----- row_count\n");
01868       for (i = 0; i < sCt; ++i)
01869     {
01870       fprintf (fp, "%i ", s->row_count[i]);
01871     }
01872       fprintf (fp, "\n");
01873       fprintf (fp, "----- bdry_count\n");
01874       for (i = 0; i < sCt; ++i)
01875     {
01876       fprintf (fp, "%i ", s->bdry_count[i]);
01877     }
01878       fprintf (fp, "\n");
01879 
01880     }
01881 
01882   /* write subdomain graph */
01883   if (s->ptrs == NULL || s->adj == NULL)
01884     {
01885       fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
01886     }
01887   else
01888     {
01889       int j;
01890       int ct;
01891       fprintf (fp, "----- subdomain graph\n");
01892       for (i = 0; i < sCt; ++i)
01893     {
01894       fprintf (fp, "%i :: ", i);
01895       ct = s->ptrs[i + 1] - s->ptrs[i];
01896       if (ct)
01897         {
01898           shellSort_int (ct, s->adj + s->ptrs[i]);
01899           CHECK_V_ERROR;
01900         }
01901       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
01902         {
01903           fprintf (fp, "%i ", s->adj[j]);
01904         }
01905       fprintf (fp, "\n");
01906     }
01907     }
01908   closeFile_dh (fp);
01909   CHECK_V_ERROR;
01910 
01911   /* ---------------------------------------------------------
01912    *  next print info that differs across processors for par
01913    *  trials.  deal with this as two cases: seq and par
01914    * ---------------------------------------------------------*/
01915   if (s->beg_rowP == NULL)
01916     {
01917       SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
01918     }
01919   if (s->row_count == NULL)
01920     {
01921       SET_V_ERROR ("s->row_count == NULL; can't continue");
01922     }
01923   if (s->o2n_sub == NULL)
01924     {
01925       SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
01926     }
01927 
01928 
01929   if (np_dh == 1)
01930     {
01931       fp = openFile_dh (filename, "a");
01932       CHECK_V_ERROR;
01933 
01934       /* write n2o_row  and o2n_col */
01935       if (s->n2o_row == NULL || s->o2n_col == NULL)
01936     {
01937       fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
01938     }
01939       else
01940     {
01941       fprintf (fp, "----- n2o_row\n");
01942       for (i = 0; i < s->m; ++i)
01943         {
01944           fprintf (fp, "%i ", 1 + s->n2o_row[i]);
01945         }
01946       fprintf (fp, "\n");
01947 
01948 #if 0
01949 /*
01950 note: this won't match the parallel case, since
01951       parallel permutation vecs are zero-based and purely
01952       local
01953 */
01954 
01955       fprintf (fp, "----- o2n_col\n");
01956       for (i = 0; i < sCt; ++i)
01957         {
01958           int br = s->beg_row[i];
01959           int er = br + s->row_count[i];
01960 
01961           for (j = br; j < er; ++j)
01962         {
01963           fprintf (fp, "%i ", 1 + s->o2n_col[j]);
01964         }
01965           fprintf (fp, "\n");
01966         }
01967       fprintf (fp, "\n");
01968 
01969 #endif
01970 
01971     }
01972       closeFile_dh (fp);
01973       CHECK_V_ERROR;
01974     }
01975 
01976   /* parallel case */
01977   else
01978     {
01979       int id = s->n2o_sub[myid_dh];
01980       int m = s->m;
01981       int pe;
01982       int beg_row = 0;
01983       if (s->beg_row != 0)
01984     beg_row = s->beg_row[myid_dh];
01985 
01986       /* write n2o_row */
01987       for (pe = 0; pe < np_dh; ++pe)
01988     {
01989       MPI_Barrier (comm_dh);
01990       if (id == pe)
01991         {
01992           fp = openFile_dh (filename, "a");
01993           CHECK_V_ERROR;
01994           if (id == 0)
01995         fprintf (fp, "----- n2o_row\n");
01996 
01997           for (i = 0; i < m; ++i)
01998         {
01999           fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
02000         }
02001           if (id == np_dh - 1)
02002         fprintf (fp, "\n");
02003           closeFile_dh (fp);
02004           CHECK_V_ERROR;
02005         }
02006     }
02007 
02008 #if 0
02009 
02010       /* write o2n_col */
02011       for (pe = 0; pe < np_dh; ++pe)
02012     {
02013       MPI_Barrier (comm_dh);
02014       if (myid_dh == pe)
02015         {
02016           fp = openFile_dh (filename, "a");
02017           CHECK_V_ERROR;
02018           if (myid_dh == 0)
02019         fprintf (fp, "----- o2n_col\n");
02020 
02021           for (i = 0; i < m; ++i)
02022         {
02023           fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
02024         }
02025           fprintf (fp, "\n");
02026 
02027           if (myid_dh == np_dh - 1)
02028         fprintf (fp, "\n");
02029 
02030           closeFile_dh (fp);
02031           CHECK_V_ERROR;
02032         }
02033     }
02034 
02035 #endif
02036 
02037     }
02038 
02039 END_FUNC_DH}
02040 
02041 
02042 
02043 #undef __FUNC__
02044 #define __FUNC__ "find_bdry_nodes_seq_private"
02045 void
02046 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
02047 {
02048   START_FUNC_DH int i, j, row, blocks = s->blocks;
02049   int *cval, *tmp;
02050 
02051   tmp = (int *) MALLOC_DH (m * sizeof (int));
02052   CHECK_V_ERROR;
02053   for (i = 0; i < m; ++i)
02054     tmp[i] = 0;
02055 
02056     /*------------------------------------------ 
02057      * mark all boundary nodes
02058      *------------------------------------------ */
02059   for (i = 0; i < blocks; ++i)
02060     {
02061       int beg_row = s->beg_row[i];
02062       int end_row = beg_row + s->row_count[i];
02063 
02064       for (row = beg_row; row < end_row; ++row)
02065     {
02066       bool isBdry = false;
02067       int len;
02068       EuclidGetRow (A, row, &len, &cval, NULL);
02069       CHECK_V_ERROR;
02070 
02071       for (j = 0; j < len; ++j)
02072         {           /* for each column in the row */
02073           int col = cval[j];
02074 
02075           if (col < beg_row || col >= end_row)
02076         {
02077           tmp[col] = 1;
02078           isBdry = true;
02079         }
02080         }
02081       if (isBdry)
02082         tmp[row] = 1;
02083       EuclidRestoreRow (A, row, &len, &cval, NULL);
02084       CHECK_V_ERROR;
02085     }
02086     }
02087 
02088     /*------------------------------------------
02089      * fill in the bdry_count[] array
02090      *------------------------------------------ */
02091   for (i = 0; i < blocks; ++i)
02092     {
02093       int beg_row = s->beg_row[i];
02094       int end_row = beg_row + s->row_count[i];
02095       int ct = 0;
02096       for (row = beg_row; row < end_row; ++row)
02097     {
02098       if (tmp[row])
02099         ++ct;
02100     }
02101       s->bdry_count[i] = ct;
02102     }
02103 
02104     /*------------------------------------------
02105      * form the o2n_col[] permutation
02106      *------------------------------------------ */
02107   for (i = 0; i < blocks; ++i)
02108     {
02109       int beg_row = s->beg_row[i];
02110       int end_row = beg_row + s->row_count[i];
02111       int interiorIDX = beg_row;
02112       int bdryIDX = end_row - s->bdry_count[i];
02113 
02114       for (row = beg_row; row < end_row; ++row)
02115     {
02116       if (tmp[row])
02117         {
02118           s->o2n_col[row] = bdryIDX++;
02119         }
02120       else
02121         {
02122           s->o2n_col[row] = interiorIDX++;
02123         }
02124     }
02125     }
02126 
02127   /* invert permutation */
02128   invert_perm (m, s->o2n_col, s->n2o_row);
02129   CHECK_V_ERROR;
02130   FREE_DH (tmp);
02131   CHECK_V_ERROR;
02132 
02133 END_FUNC_DH}
02134 
02135 #undef __FUNC__
02136 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
02137 void
02138 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
02139 {
02140   START_FUNC_DH if (myid_dh == 0)
02141     {
02142       int i, j;
02143 
02144       fprintf (fp,
02145            "\n-----------------------------------------------------\n");
02146       fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
02147       fprintf (fp, "-----------------------------------------------------\n");
02148       fprintf (fp, "colors used: %i\n", s->colors);
02149 
02150       fprintf (fp, "o2n ordering vector: ");
02151       for (i = 0; i < s->blocks; ++i)
02152     fprintf (fp, "%i ", s->o2n_sub[i]);
02153 
02154       fprintf (fp, "\ncoloring vector (node, color): \n");
02155       for (i = 0; i < s->blocks; ++i)
02156     fprintf (fp, "  %i, %i\n", i, s->colorVec[i]);
02157 
02158       fprintf (fp, "\n");
02159       fprintf (fp, "Adjacency lists:\n");
02160 
02161       for (i = 0; i < s->blocks; ++i)
02162     {
02163       fprintf (fp, "   P_%i :: ", i);
02164       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
02165         {
02166           fprintf (fp, "%i ", s->adj[j]);
02167         }
02168       fprintf (fp, "\n");
02169     }
02170       fprintf (fp, "-----------------------------------------------------\n");
02171     }
02172 END_FUNC_DH}
02173 
02174 
02175 #undef __FUNC__
02176 #define __FUNC__ "adjust_matrix_perms_private"
02177 void
02178 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
02179 {
02180   START_FUNC_DH int i, j, blocks = s->blocks;
02181   int *o2n = s->o2n_col;
02182 
02183   for (i = 0; i < blocks; ++i)
02184     {
02185       int beg_row = s->beg_row[i];
02186       int end_row = beg_row + s->row_count[i];
02187       int adjust = s->beg_rowP[i] - s->beg_row[i];
02188       for (j = beg_row; j < end_row; ++j)
02189     o2n[j] += adjust;
02190     }
02191 
02192   invert_perm (m, s->o2n_col, s->n2o_row);
02193   CHECK_V_ERROR;
02194 END_FUNC_DH}
02195 
02196 #undef __FUNC__
02197 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
02198 void
02199 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
02200 {
02201   START_FUNC_DH int i;
02202   int blocks = np_dh;
02203   double ratio[25];
02204 
02205   if (myid_dh == 0)
02206     {
02207       if (np_dh == 1)
02208     blocks = s->blocks;
02209       if (blocks > 25)
02210     blocks = 25;
02211 
02212       fprintf (fp, "\n");
02213       fprintf (fp, "Subdomain interior/boundary node ratios\n");
02214       fprintf (fp, "---------------------------------------\n");
02215 
02216       /* compute ratios */
02217       for (i = 0; i < blocks; ++i)
02218     {
02219       if (s->bdry_count[i] == 0)
02220         {
02221           ratio[i] = -1;
02222         }
02223       else
02224         {
02225           ratio[i] =
02226         (double) (s->row_count[i] -
02227               s->bdry_count[i]) / (double) s->bdry_count[i];
02228         }
02229     }
02230 
02231       /* sort ratios */
02232       shellSort_float (blocks, ratio);
02233 
02234       /* print ratios */
02235       if (blocks <= 20)
02236     {           /* print all ratios */
02237       int j = 0;
02238       for (i = 0; i < blocks; ++i)
02239         {
02240           fprintf (fp, "%0.2g  ", ratio[i]);
02241           ++j;
02242           if (j == 10)
02243         {
02244           fprintf (fp, "\n");
02245         }
02246         }
02247       fprintf (fp, "\n");
02248     }
02249       else
02250     {           /* print 10 largest and 10 smallest ratios */
02251       fprintf (fp, "10 smallest ratios: ");
02252       for (i = 0; i < 10; ++i)
02253         {
02254           fprintf (fp, "%0.2g  ", ratio[i]);
02255         }
02256       fprintf (fp, "\n");
02257       fprintf (fp, "10 largest ratios:  ");
02258       {
02259         int start = blocks - 6, stop = blocks - 1;
02260         for (i = start; i < stop; ++i)
02261           {
02262         fprintf (fp, "%0.2g  ", ratio[i]);
02263           }
02264         fprintf (fp, "\n");
02265       }
02266     }
02267     }
02268 
02269 END_FUNC_DH}
02270 
02271 
02272 #undef __FUNC__
02273 #define __FUNC__ "SubdomainGraph_dhPrintStats"
02274 void
02275 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
02276 {
02277   START_FUNC_DH double *timing = sg->timing;
02278 
02279   fprintf_dh (fp, "\nSubdomainGraph timing report\n");
02280   fprintf_dh (fp, "-----------------------------\n");
02281   fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
02282   fprintf_dh (fp, "  find neighbors in subdomain graph: %0.2f\n",
02283           timing[FIND_NABORS_SGT]);
02284   fprintf_dh (fp, "  locally order interiors and bdry:  %0.2f\n",
02285           timing[ORDER_BDRY_SGT]);
02286   fprintf_dh (fp, "  form and color subdomain graph:    %0.2f\n",
02287           timing[FORM_GRAPH_SGT]);
02288   fprintf_dh (fp, "  exchange bdry permutations:        %0.2f\n",
02289           timing[EXCHANGE_PERMS_SGT]);
02290   fprintf_dh (fp, "  everything else (should be small): %0.2f\n",
02291           timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
02292                    timing[ORDER_BDRY_SGT] +
02293                    timing[FORM_GRAPH_SGT] +
02294                    timing[EXCHANGE_PERMS_SGT]));
02295 END_FUNC_DH}
 All Classes Files Functions Variables Enumerations Friends