00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "SubdomainGraph_dh.h"
00044 #include "getRow_dh.h"
00045 #include "Mem_dh.h"
00046 #include "Parser_dh.h"
00047 #include "Hash_i_dh.h"
00048 #include "mat_dh_private.h"
00049 #include "io_dh.h"
00050 #include "SortedSet_dh.h"
00051 #include "shellSort_dh.h"
00052
00053
00054
00055 #include <unistd.h>
00056
00057 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
00058 void *A);
00059 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
00060 void *A);
00061
00062
00063
00064
00065
00066 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
00067 bool bj);
00068 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
00069 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
00070 void *A);
00071 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
00072 void *A);
00073 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
00074 void *A);
00075 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
00076 int *interiorNodes, int *bdryNodes,
00077 int *interiorCount, int *bdryCount);
00078 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
00079 void *A, int *interiorNodes,
00080 int *bdryNodes, int *interiorCount,
00081 int *bdryCount);
00082
00083 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
00084
00085
00086 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
00087 static void color_subdomain_graph_private (SubdomainGraph_dh s);
00088 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
00089
00090 #undef __FUNC__
00091 #define __FUNC__ "SubdomainGraph_dhCreate"
00092 void
00093 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
00094 {
00095 START_FUNC_DH
00096 struct _subdomain_dh *tmp =
00097 (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
00098 CHECK_V_ERROR;
00099 *s = tmp;
00100
00101 tmp->blocks = 1;
00102 tmp->ptrs = tmp->adj = NULL;
00103 tmp->colors = 1;
00104 tmp->colorVec = NULL;
00105 tmp->o2n_sub = tmp->n2o_sub = NULL;
00106 tmp->beg_row = tmp->beg_rowP = NULL;
00107 tmp->bdry_count = tmp->row_count = NULL;
00108 tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
00109 tmp->loCount = tmp->hiCount = tmp->allCount = 0;
00110
00111 tmp->m = 0;
00112 tmp->n2o_row = tmp->o2n_col = NULL;
00113 tmp->o2n_ext = tmp->n2o_ext = NULL;
00114
00115 tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
00116 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
00117 {
00118 int i;
00119 for (i = 0; i < TIMING_BINS_SG; ++i)
00120 tmp->timing[i] = 0.0;
00121 }
00122 END_FUNC_DH}
00123
00124 #undef __FUNC__
00125 #define __FUNC__ "SubdomainGraph_dhDestroy"
00126 void
00127 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
00128 {
00129 START_FUNC_DH if (s->ptrs != NULL)
00130 {
00131 FREE_DH (s->ptrs);
00132 CHECK_V_ERROR;
00133 }
00134 if (s->adj != NULL)
00135 {
00136 FREE_DH (s->adj);
00137 CHECK_V_ERROR;
00138 }
00139 if (s->colorVec != NULL)
00140 {
00141 FREE_DH (s->colorVec);
00142 CHECK_V_ERROR;
00143 }
00144 if (s->o2n_sub != NULL)
00145 {
00146 FREE_DH (s->o2n_sub);
00147 CHECK_V_ERROR;
00148 }
00149 if (s->n2o_sub != NULL)
00150 {
00151 FREE_DH (s->n2o_sub);
00152 CHECK_V_ERROR;
00153 }
00154
00155 if (s->beg_row != NULL)
00156 {
00157 FREE_DH (s->beg_row);
00158 CHECK_V_ERROR;
00159 }
00160 if (s->beg_rowP != NULL)
00161 {
00162 FREE_DH (s->beg_rowP);
00163 CHECK_V_ERROR;
00164 }
00165 if (s->row_count != NULL)
00166 {
00167 FREE_DH (s->row_count);
00168 CHECK_V_ERROR;
00169 }
00170 if (s->bdry_count != NULL)
00171 {
00172 FREE_DH (s->bdry_count);
00173 CHECK_V_ERROR;
00174 }
00175 if (s->loNabors != NULL)
00176 {
00177 FREE_DH (s->loNabors);
00178 CHECK_V_ERROR;
00179 }
00180 if (s->hiNabors != NULL)
00181 {
00182 FREE_DH (s->hiNabors);
00183 CHECK_V_ERROR;
00184 }
00185 if (s->allNabors != NULL)
00186 {
00187 FREE_DH (s->allNabors);
00188 CHECK_V_ERROR;
00189 }
00190
00191 if (s->n2o_row != NULL)
00192 {
00193 FREE_DH (s->n2o_row);
00194 CHECK_V_ERROR;
00195 }
00196 if (s->o2n_col != NULL)
00197 {
00198 FREE_DH (s->o2n_col);
00199 CHECK_V_ERROR;
00200 }
00201 if (s->o2n_ext != NULL)
00202 {
00203 Hash_i_dhDestroy (s->o2n_ext);
00204 CHECK_V_ERROR;
00205 }
00206 if (s->n2o_ext != NULL)
00207 {
00208 Hash_i_dhDestroy (s->n2o_ext);
00209 CHECK_V_ERROR;
00210 }
00211 FREE_DH (s);
00212 CHECK_V_ERROR;
00213 END_FUNC_DH}
00214
00215 #undef __FUNC__
00216 #define __FUNC__ "SubdomainGraph_dhInit"
00217 void
00218 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00219 {
00220 START_FUNC_DH double t1 = MPI_Wtime ();
00221
00222 if (blocks < 1)
00223 blocks = 1;
00224
00225 if (np_dh == 1 || blocks > 1)
00226 {
00227 s->blocks = blocks;
00228 init_seq_private (s, blocks, bj, A);
00229 CHECK_V_ERROR;
00230 }
00231 else
00232 {
00233 s->blocks = np_dh;
00234 init_mpi_private (s, np_dh, bj, A);
00235 CHECK_V_ERROR;
00236 }
00237
00238 s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
00239 END_FUNC_DH}
00240
00241
00242 #undef __FUNC__
00243 #define __FUNC__ "SubdomainGraph_dhFindOwner"
00244 int
00245 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
00246 {
00247 START_FUNC_DH int sd;
00248 int *beg_row = s->beg_row;
00249 int *row_count = s->row_count;
00250 int owner = -1, blocks = s->blocks;
00251
00252 if (permuted)
00253 beg_row = s->beg_rowP;
00254
00255
00256 for (sd = 0; sd < blocks; ++sd)
00257 {
00258 if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
00259 {
00260 owner = sd;
00261 break;
00262 }
00263 }
00264
00265 if (owner == -1)
00266 {
00267
00268 fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
00269 fprintf (stderr, "blocks= %i\n", blocks);
00270
00271 sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
00272 SET_ERROR (-1, msgBuf_dh);
00273 }
00274
00275 END_FUNC_VAL (owner)}
00276
00277
00278 #undef __FUNC__
00279 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
00280 void
00281 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
00282 {
00283 START_FUNC_DH int i, j, k;
00284 double max = 0, min = INT_MAX;
00285
00286 fprintf (fp,
00287 "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
00288 fprintf (fp, "colors used = %i\n", s->colors);
00289 fprintf (fp, "subdomain count = %i\n", s->blocks);
00290
00291
00292 fprintf (fp, "\ninterior/boundary node ratios:\n");
00293
00294 for (i = 0; i < s->blocks; ++i)
00295 {
00296 int inNodes = s->row_count[i] - s->bdry_count[i];
00297 int bdNodes = s->bdry_count[i];
00298 double ratio;
00299
00300 if (bdNodes == 0)
00301 {
00302 ratio = -1;
00303 }
00304 else
00305 {
00306 ratio = (double) inNodes / (double) bdNodes;
00307 }
00308
00309 max = MAX (max, ratio);
00310 min = MIN (min, ratio);
00311 fprintf (fp,
00312 " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n",
00313 i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
00314 ratio);
00315 }
00316
00317
00318 fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
00319 fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
00320
00321
00322
00323
00324
00325 if (s->adj != NULL)
00326 {
00327 fprintf (fp, "\nunpermuted subdomain graph: \n");
00328 for (i = 0; i < s->blocks; ++i)
00329 {
00330 fprintf (fp, "%i :: ", i);
00331 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
00332 {
00333 fprintf (fp, "%i ", s->adj[j]);
00334 }
00335 fprintf (fp, "\n");
00336 }
00337 }
00338
00339
00340
00341
00342
00343 fprintf (fp, "\no2n subdomain permutation:\n");
00344 for (i = 0; i < s->blocks; ++i)
00345 {
00346 fprintf (fp, " %i %i\n", i, s->o2n_sub[i]);
00347 }
00348 fprintf (fp, "\n");
00349
00350 if (np_dh > 1)
00351 {
00352
00353
00354
00355
00356 fprintf (fp, "\nlocal n2o_row permutation:\n ");
00357 for (i = 0; i < s->row_count[myid_dh]; ++i)
00358 {
00359 fprintf (fp, "%i ", s->n2o_row[i]);
00360 }
00361 fprintf (fp, "\n");
00362
00363
00364
00365
00366 fprintf (fp, "\nlocal o2n_col permutation:\n ");
00367 for (i = 0; i < s->row_count[myid_dh]; ++i)
00368 {
00369 fprintf (fp, "%i ", s->o2n_col[i]);
00370 }
00371 fprintf (fp, "\n");
00372
00373 }
00374 else
00375 {
00376
00377
00378
00379 fprintf (fp, "\nlocal n2o_row permutation:\n");
00380 fprintf (fp, "--------------------------\n");
00381 for (k = 0; k < s->blocks; ++k)
00382 {
00383 int beg_row = s->beg_row[k];
00384 int end_row = beg_row + s->row_count[k];
00385
00386 for (i = beg_row; i < end_row; ++i)
00387 {
00388 fprintf (fp, "%i ", s->n2o_row[i]);
00389 }
00390 fprintf (fp, "\n");
00391 }
00392
00393
00394
00395
00396 fprintf (fp, "\nlocal o2n_col permutation:\n");
00397 fprintf (fp, "--------------------------\n");
00398 for (k = 0; k < s->blocks; ++k)
00399 {
00400 int beg_row = s->beg_row[k];
00401 int end_row = beg_row + s->row_count[k];
00402
00403 for (i = beg_row; i < end_row; ++i)
00404 {
00405 fprintf (fp, "%i ", s->o2n_col[i]);
00406 }
00407 fprintf (fp, "\n");
00408 }
00409
00410
00411 }
00412
00413 END_FUNC_DH}
00414
00415 #undef __FUNC__
00416 #define __FUNC__ "init_seq_private"
00417 void
00418 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00419 {
00420 START_FUNC_DH int m, n, beg_row;
00421 double t1;
00422
00423
00424
00425
00426
00427
00428 EuclidGetDimensions (A, &beg_row, &m, &n);
00429 CHECK_V_ERROR;
00430 s->m = n;
00431
00432
00433
00434
00435
00436
00437 allocate_storage_private (s, blocks, m, bj);
00438 CHECK_V_ERROR;
00439
00440
00441
00442
00443
00444
00445
00446 {
00447 int i;
00448 int rpp = m / blocks;
00449
00450 if (rpp * blocks < m)
00451 ++rpp;
00452
00453 s->beg_row[0] = 0;
00454 for (i = 1; i < blocks; ++i)
00455 s->beg_row[i] = rpp + s->beg_row[i - 1];
00456 for (i = 0; i < blocks; ++i)
00457 s->row_count[i] = rpp;
00458 s->row_count[blocks - 1] = m - rpp * (blocks - 1);
00459 }
00460 memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478 t1 = MPI_Wtime ();
00479 if (!bj)
00480 {
00481 find_bdry_nodes_seq_private (s, m, A);
00482 CHECK_V_ERROR;
00483 }
00484 else
00485 {
00486 int i;
00487 for (i = 0; i < m; ++i)
00488 {
00489 s->n2o_row[i] = i;
00490 s->o2n_col[i] = i;
00491 }
00492 }
00493 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 t1 = MPI_Wtime ();
00505 if (!bj)
00506 {
00507 form_subdomaingraph_seq_private (s, m, A);
00508 CHECK_V_ERROR;
00509 if (s->doNotColor)
00510 {
00511 int i;
00512 printf_dh ("subdomain coloring and reordering is OFF\n");
00513 for (i = 0; i < blocks; ++i)
00514 {
00515 s->o2n_sub[i] = i;
00516 s->n2o_sub[i] = i;
00517 s->colorVec[i] = 0;
00518 }
00519 }
00520 else
00521 {
00522 SET_INFO ("subdomain coloring and reordering is ON");
00523 color_subdomain_graph_private (s);
00524 CHECK_V_ERROR;
00525 }
00526 }
00527
00528
00529 else
00530 {
00531 int i;
00532 for (i = 0; i < blocks; ++i)
00533 {
00534 s->o2n_sub[i] = i;
00535 s->n2o_sub[i] = i;
00536 }
00537 }
00538 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00539
00540
00541
00542
00543
00544
00545
00546
00547 if (!bj)
00548 {
00549 adjust_matrix_perms_private (s, m);
00550 CHECK_V_ERROR;
00551 }
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569 END_FUNC_DH}
00570
00571
00572 #if 0
00573 #undef __FUNC__
00574 #define __FUNC__ "partition_metis_private"
00575 void
00576 partition_metis_private (SubdomainGraph_dh s, void *A)
00577 {
00578 START_FUNC_DH if (ignoreMe)
00579 SET_V_ERROR ("not implemented");
00580 END_FUNC_DH}
00581 #endif
00582
00583 #undef __FUNC__
00584 #define __FUNC__ "allocate_storage_private"
00585 void
00586 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
00587 {
00588 START_FUNC_DH if (!bj)
00589 {
00590 s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
00591 CHECK_V_ERROR;
00592 s->ptrs[0] = 0;
00593 s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
00594 CHECK_V_ERROR;
00595 s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00596 CHECK_V_ERROR;
00597 s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00598 CHECK_V_ERROR;
00599 s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00600 CHECK_V_ERROR;
00601 }
00602
00603 s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
00604 CHECK_V_ERROR;
00605 s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
00606 CHECK_V_ERROR;
00607
00608
00609
00610 s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
00611 CHECK_V_ERROR;
00612 s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
00613 CHECK_V_ERROR;
00614 s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
00615 CHECK_V_ERROR;
00616 s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
00617 CHECK_V_ERROR;
00618 s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00619 CHECK_V_ERROR;
00620 s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00621 CHECK_V_ERROR;
00622
00623 END_FUNC_DH}
00624
00625
00626
00627
00628 #undef __FUNC__
00629 #define __FUNC__ "init_mpi_private"
00630 void
00631 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00632 {
00633 START_FUNC_DH int m, n, beg_row;
00634 bool symmetric;
00635 double t1;
00636
00637 symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
00638 CHECK_V_ERROR;
00639 if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
00640 {
00641 symmetric = true;
00642 }
00643
00644
00645
00646
00647
00648 EuclidGetDimensions (A, &beg_row, &m, &n);
00649 CHECK_V_ERROR;
00650 s->m = m;
00651
00652
00653
00654
00655
00656
00657 allocate_storage_private (s, blocks, m, bj);
00658 CHECK_V_ERROR;
00659
00660
00661
00662
00663
00664
00665
00666 if (!bj)
00667 {
00668 MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
00669 MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
00670 memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
00671 }
00672 else
00673 {
00674 s->beg_row[myid_dh] = beg_row;
00675 s->beg_rowP[myid_dh] = beg_row;
00676 s->row_count[myid_dh] = m;
00677 }
00678
00679
00680
00681
00682
00683 if (!bj)
00684 {
00685 t1 = MPI_Wtime ();
00686 if (symmetric)
00687 {
00688 find_all_neighbors_sym_private (s, m, A);
00689 CHECK_V_ERROR;
00690 }
00691 else
00692 {
00693 find_all_neighbors_unsym_private (s, m, A);
00694 CHECK_V_ERROR;
00695 }
00696 s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
00697 }
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708 t1 = MPI_Wtime ();
00709 if (!bj)
00710 {
00711 int *interiorNodes, *bdryNodes;
00712 int interiorCount, bdryCount;
00713 int *o2n = s->o2n_col, idx;
00714 int i;
00715
00716 interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
00717 CHECK_V_ERROR;
00718 bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
00719 CHECK_V_ERROR;
00720
00721
00722
00723
00724 if (symmetric)
00725 {
00726 find_bdry_nodes_sym_private (s, m, A,
00727 interiorNodes, bdryNodes,
00728 &interiorCount, &bdryCount);
00729 CHECK_V_ERROR;
00730 }
00731 else
00732 {
00733 find_bdry_nodes_unsym_private (s, m, A,
00734 interiorNodes, bdryNodes,
00735 &interiorCount, &bdryCount);
00736 CHECK_V_ERROR;
00737 }
00738
00739
00740 MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
00741 comm_dh);
00742
00743
00744 idx = 0;
00745 for (i = 0; i < interiorCount; ++i)
00746 {
00747 o2n[interiorNodes[i]] = idx++;
00748 }
00749 for (i = 0; i < bdryCount; ++i)
00750 {
00751 o2n[bdryNodes[i]] = idx++;
00752 }
00753
00754
00755 invert_perm (m, o2n, s->n2o_row);
00756 CHECK_V_ERROR;
00757
00758 FREE_DH (interiorNodes);
00759 CHECK_V_ERROR;
00760 FREE_DH (bdryNodes);
00761 CHECK_V_ERROR;
00762 }
00763
00764
00765 else
00766 {
00767 int *o2n = s->o2n_col, *n2o = s->n2o_row;
00768 int i, m = s->m;
00769
00770 for (i = 0; i < m; ++i)
00771 {
00772 o2n[i] = i;
00773 n2o[i] = i;
00774 }
00775 }
00776 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787 if (!bj)
00788 {
00789 t1 = MPI_Wtime ();
00790 form_subdomaingraph_mpi_private (s);
00791 CHECK_V_ERROR;
00792 if (s->doNotColor)
00793 {
00794 int i;
00795 printf_dh ("subdomain coloring and reordering is OFF\n");
00796 for (i = 0; i < blocks; ++i)
00797 {
00798 s->o2n_sub[i] = i;
00799 s->n2o_sub[i] = i;
00800 s->colorVec[i] = 0;
00801 }
00802 }
00803 else
00804 {
00805 SET_INFO ("subdomain coloring and reordering is ON");
00806 color_subdomain_graph_private (s);
00807 CHECK_V_ERROR;
00808 }
00809 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00810 }
00811
00812
00813
00814
00815
00816
00817 if (!bj)
00818 {
00819 find_ordered_neighbors_private (s);
00820 CHECK_V_ERROR;
00821 }
00822
00823
00824
00825
00826
00827
00828
00829 if (!bj)
00830 {
00831 t1 = MPI_Wtime ();
00832 SubdomainGraph_dhExchangePerms (s);
00833 CHECK_V_ERROR;
00834 s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
00835 }
00836
00837 END_FUNC_DH}
00838
00839
00840
00841 #undef __FUNC__
00842 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
00843 void
00844 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
00845 {
00846 START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
00847 MPI_Status *status = NULL;
00848 int *nabors = s->allNabors, naborCount = s->allCount;
00849 int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
00850 int m = s->row_count[myid_dh];
00851 int beg_row = s->beg_row[myid_dh];
00852 int beg_rowP = s->beg_rowP[myid_dh];
00853 int *bdryNodeCounts = s->bdry_count;
00854 int myBdryCount = s->bdry_count[myid_dh];
00855 bool debug = false;
00856 int myFirstBdry = m - myBdryCount;
00857 int *n2o_row = s->n2o_row;
00858 Hash_i_dh n2o_table, o2n_table;
00859
00860 if (logFile != NULL && s->debug)
00861 debug = true;
00862
00863
00864
00865
00866 sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
00867 CHECK_V_ERROR;
00868
00869
00870 if (debug)
00871 {
00872 fprintf (logFile,
00873 "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n",
00874 1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
00875 fflush (logFile);
00876 }
00877
00878 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00879 {
00880 sendBuf[2 * j] = n2o_row[i] + beg_row;
00881 sendBuf[2 * j + 1] = i + beg_rowP;
00882 }
00883
00884 if (debug)
00885 {
00886 fprintf (logFile, "\nSUBG SEND_BUF:\n");
00887 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00888 {
00889 fprintf (logFile, "SUBG %i, %i\n", 1 + sendBuf[2 * j],
00890 1 + sendBuf[2 * j + 1]);
00891 }
00892 fflush (logFile);
00893 }
00894
00895
00896
00897
00898
00899 naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
00900 CHECK_V_ERROR;
00901 naborIdx[0] = 0;
00902 nz = 0;
00903 for (i = 0; i < naborCount; ++i)
00904 {
00905 nz += (2 * bdryNodeCounts[nabors[i]]);
00906 naborIdx[i + 1] = nz;
00907 }
00908
00909
00910 recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
00911 CHECK_V_ERROR;
00912
00913
00914
00915
00916
00917 recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00918 CHECK_V_ERROR;
00919 send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00920 CHECK_V_ERROR;
00921 status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
00922 CHECK_V_ERROR;
00923
00924 for (i = 0; i < naborCount; ++i)
00925 {
00926 int nabr = nabors[i];
00927 int *buf = recvBuf + naborIdx[i];
00928 int ct = 2 * bdryNodeCounts[nabr];
00929
00930
00931 MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
00932 &(send_req[i]));
00933
00934 if (debug)
00935 {
00936 fprintf (logFile, "SUBG sending %i elts to %i\n", 2 * myBdryCount,
00937 nabr);
00938 fflush (logFile);
00939 }
00940
00941 MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
00942
00943 if (debug)
00944 {
00945 fprintf (logFile, "SUBG receiving %i elts from %i\n", ct, nabr);
00946 fflush (logFile);
00947 }
00948 }
00949
00950 MPI_Waitall (naborCount, send_req, status);
00951 MPI_Waitall (naborCount, recv_req, status);
00952
00953 Hash_i_dhCreate (&n2o_table, nz / 2);
00954 CHECK_V_ERROR;
00955 Hash_i_dhCreate (&o2n_table, nz / 2);
00956 CHECK_V_ERROR;
00957 s->n2o_ext = n2o_table;
00958 s->o2n_ext = o2n_table;
00959
00960
00961 for (i = 0; i < nz; i += 2)
00962 {
00963 int old = recvBuf[i];
00964 int new = recvBuf[i + 1];
00965
00966 if (debug)
00967 {
00968 fprintf (logFile, "SUBG i= %i old= %i new= %i\n", i, old + 1,
00969 new + 1);
00970 fflush (logFile);
00971 }
00972
00973 Hash_i_dhInsert (o2n_table, old, new);
00974 CHECK_V_ERROR;
00975 Hash_i_dhInsert (n2o_table, new, old);
00976 CHECK_V_ERROR;
00977 }
00978
00979
00980 if (recvBuf != NULL)
00981 {
00982 FREE_DH (recvBuf);
00983 CHECK_V_ERROR;
00984 }
00985 if (naborIdx != NULL)
00986 {
00987 FREE_DH (naborIdx);
00988 CHECK_V_ERROR;
00989 }
00990 if (sendBuf != NULL)
00991 {
00992 FREE_DH (sendBuf);
00993 CHECK_V_ERROR;
00994 }
00995 if (recv_req != NULL)
00996 {
00997 FREE_DH (recv_req);
00998 CHECK_V_ERROR;
00999 }
01000 if (send_req != NULL)
01001 {
01002 FREE_DH (send_req);
01003 CHECK_V_ERROR;
01004 }
01005 if (status != NULL)
01006 {
01007 FREE_DH (status);
01008 CHECK_V_ERROR;
01009 }
01010
01011 END_FUNC_DH}
01012
01013
01014
01015 #undef __FUNC__
01016 #define __FUNC__ "form_subdomaingraph_mpi_private"
01017 void
01018 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
01019 {
01020 START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
01021 int *idxAll = NULL;
01022 int i, j, nz, *adj, *ptrs = s->ptrs;
01023 MPI_Request *recvReqs = NULL, sendReq;
01024 MPI_Status *statuses = NULL, status;
01025
01026
01027 if (myid_dh == 0)
01028 {
01029 idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
01030 CHECK_V_ERROR;
01031 }
01032 MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
01033
01034
01035 if (myid_dh == 0)
01036 {
01037 nz = 0;
01038 for (i = 0; i < np_dh; ++i)
01039 nz += idxAll[i];
01040 }
01041 MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
01042
01043
01044
01045
01046 adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
01047 CHECK_V_ERROR;
01048
01049
01050 if (myid_dh == 0)
01051 {
01052 recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
01053 CHECK_V_ERROR;
01054 statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
01055 CHECK_V_ERROR;
01056
01057
01058 ptrs[0] = 0;
01059 for (j = 0; j < np_dh; ++j)
01060 ptrs[j + 1] = ptrs[j] + idxAll[j];
01061
01062
01063 for (j = 0; j < np_dh; ++j)
01064 {
01065 int ct = idxAll[j];
01066
01067 MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
01068 recvReqs + j);
01069 }
01070 }
01071
01072
01073 MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
01074
01075
01076 if (myid_dh == 0)
01077 {
01078 MPI_Waitall (np_dh, recvReqs, statuses);
01079 }
01080 MPI_Wait (&sendReq, &status);
01081
01082
01083 MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
01084 MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
01085
01086 if (idxAll != NULL)
01087 {
01088 FREE_DH (idxAll);
01089 CHECK_V_ERROR;
01090 }
01091 if (recvReqs != NULL)
01092 {
01093 FREE_DH (recvReqs);
01094 CHECK_V_ERROR;
01095 }
01096 if (statuses != NULL)
01097 {
01098 FREE_DH (statuses);
01099 CHECK_V_ERROR;
01100 }
01101
01102 END_FUNC_DH}
01103
01104
01105
01106
01107 #undef __FUNC__
01108 #define __FUNC__ "form_subdomaingraph_seq_private"
01109 void
01110 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
01111 {
01112 START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
01113 int *cval, len, *adj;
01114 int idx = 0, *ptrs = s->ptrs;
01115
01116
01117
01118
01119
01120
01121 adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01122 CHECK_V_ERROR;
01123
01124 dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01125 CHECK_V_ERROR;
01126 for (i = 0; i < blocks * blocks; ++i)
01127 dense[i] = 0;
01128
01129
01130 for (i = 0; i < blocks; ++i)
01131 {
01132 int beg_row = s->beg_row[i];
01133 int end_row = beg_row + s->row_count[i];
01134
01135 for (row = beg_row; row < end_row; ++row)
01136 {
01137 EuclidGetRow (A, row, &len, &cval, NULL);
01138 CHECK_V_ERROR;
01139 for (j = 0; j < len; ++j)
01140 {
01141 int col = cval[j];
01142 if (col < beg_row || col >= end_row)
01143 {
01144 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01145 CHECK_V_ERROR;
01146 dense[i * blocks + owner] = 1;
01147 dense[owner * blocks + i] = 1;
01148 }
01149 }
01150 EuclidRestoreRow (A, row, &len, &cval, NULL);
01151 CHECK_V_ERROR;
01152 }
01153 }
01154
01155
01156
01157
01158 ptrs[0] = 0;
01159 for (i = 0; i < blocks; ++i)
01160 {
01161 for (j = 0; j < blocks; ++j)
01162 {
01163 if (dense[i * blocks + j])
01164 {
01165 adj[idx++] = j;
01166 }
01167 }
01168 ptrs[i + 1] = idx;
01169 }
01170
01171 FREE_DH (dense);
01172 CHECK_V_ERROR;
01173 END_FUNC_DH}
01174
01175
01176 #undef __FUNC__
01177 #define __FUNC__ "find_all_neighbors_sym_private"
01178 void
01179 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
01180 {
01181 START_FUNC_DH int *marker, i, j, beg_row, end_row;
01182 int row, len, *cval, ct = 0;
01183 int *nabors = s->allNabors;
01184
01185 marker = (int *) MALLOC_DH (m * sizeof (int));
01186 CHECK_V_ERROR;
01187 for (i = 0; i < m; ++i)
01188 marker[i] = 0;
01189
01190 SET_INFO
01191 ("finding nabors in subdomain graph for structurally symmetric matrix");
01192 SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
01193
01194 beg_row = s->beg_row[myid_dh];
01195 end_row = beg_row + s->row_count[myid_dh];
01196
01197 for (row = beg_row; row < end_row; ++row)
01198 {
01199 EuclidGetRow (A, row, &len, &cval, NULL);
01200 CHECK_V_ERROR;
01201 for (j = 0; j < len; ++j)
01202 {
01203 int col = cval[j];
01204 if (col < beg_row || col >= end_row)
01205 {
01206 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01207 CHECK_V_ERROR;
01208 if (!marker[owner])
01209 {
01210 marker[owner] = 1;
01211 nabors[ct++] = owner;
01212 }
01213 }
01214 }
01215 EuclidRestoreRow (A, row, &len, &cval, NULL);
01216 CHECK_V_ERROR;
01217 }
01218 s->allCount = ct;
01219
01220
01221
01222 if (marker != NULL)
01223 {
01224 FREE_DH (marker);
01225 CHECK_V_ERROR;
01226 }
01227 END_FUNC_DH}
01228
01229 #undef __FUNC__
01230 #define __FUNC__ "find_all_neighbors_unsym_private"
01231 void
01232 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
01233 {
01234 START_FUNC_DH int i, j, row, beg_row, end_row;
01235 int *marker;
01236 int *cval, len, idx = 0;
01237 int nz, *nabors = s->allNabors, *myNabors;
01238
01239 myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
01240 CHECK_V_ERROR;
01241 marker = (int *) MALLOC_DH (np_dh * sizeof (int));
01242 CHECK_V_ERROR;
01243 for (i = 0; i < np_dh; ++i)
01244 marker[i] = 0;
01245
01246 SET_INFO
01247 ("finding nabors in subdomain graph for structurally unsymmetric matrix");
01248
01249
01250
01251
01252 beg_row = s->beg_row[myid_dh];
01253 end_row = beg_row + s->row_count[myid_dh];
01254
01255
01256
01257
01258 for (row = beg_row; row < end_row; ++row)
01259 {
01260 EuclidGetRow (A, row, &len, &cval, NULL);
01261 CHECK_V_ERROR;
01262 for (j = 0; j < len; ++j)
01263 {
01264 int col = cval[j];
01265
01266 if (col < beg_row || col >= end_row)
01267 {
01268 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01269 CHECK_V_ERROR;
01270
01271 if (!marker[owner])
01272 {
01273 marker[owner] = 1;
01274
01275
01276 myNabors[idx++] = owner;
01277 }
01278 }
01279 }
01280 EuclidRestoreRow (A, row, &len, &cval, NULL);
01281 CHECK_V_ERROR;
01282 }
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307 MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
01308 CHECK_V_ERROR;
01309
01310
01311 for (i = 0; i < idx; ++i)
01312 nabors[myNabors[i]] = 1;
01313
01314
01315 nabors[myid_dh] = 0;
01316
01317
01318
01319
01320
01321
01322
01323
01324 nz = 0;
01325 for (i = 0; i < np_dh; ++i)
01326 {
01327 if (nabors[i])
01328 myNabors[nz++] = i;
01329 }
01330 s->allCount = nz;
01331 memcpy (nabors, myNabors, nz * sizeof (int));
01332
01333 if (marker != NULL)
01334 {
01335 FREE_DH (marker);
01336 CHECK_V_ERROR;
01337 }
01338 if (myNabors != NULL)
01339 {
01340 FREE_DH (myNabors);
01341 CHECK_V_ERROR;
01342 }
01343 END_FUNC_DH}
01344
01345
01346
01347 #undef __FUNC__
01348 #define __FUNC__ "find_bdry_nodes_sym_private"
01349 void
01350 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
01351 int *interiorNodes, int *bdryNodes,
01352 int *interiorCount, int *bdryCount)
01353 {
01354 START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01355 int end_row = beg_row + s->row_count[myid_dh];
01356 int row, inCt = 0, bdCt = 0;
01357
01358 int j;
01359 int *cval;
01360
01361
01362 for (row = beg_row; row < end_row; ++row)
01363 {
01364 bool isBdry = false;
01365 int len;
01366 EuclidGetRow (A, row, &len, &cval, NULL);
01367 CHECK_V_ERROR;
01368
01369 for (j = 0; j < len; ++j)
01370 {
01371 int col = cval[j];
01372 if (col < beg_row || col >= end_row)
01373 {
01374 isBdry = true;
01375 break;
01376 }
01377 }
01378 EuclidRestoreRow (A, row, &len, &cval, NULL);
01379 CHECK_V_ERROR;
01380
01381 if (isBdry)
01382 {
01383 bdryNodes[bdCt++] = row - beg_row;
01384 }
01385 else
01386 {
01387 interiorNodes[inCt++] = row - beg_row;
01388 }
01389 }
01390
01391 *interiorCount = inCt;
01392 *bdryCount = bdCt;
01393
01394 END_FUNC_DH}
01395
01396 #define BDRY_NODE_TAG 42
01397
01398 #undef __FUNC__
01399 #define __FUNC__ "find_bdry_nodes_unsym_private"
01400 void
01401 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
01402 int *interiorNodes, int *boundaryNodes,
01403 int *interiorCount, int *bdryCount)
01404 {
01405 START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01406 int end_row = beg_row + s->row_count[myid_dh];
01407 int i, j, row, max;
01408 int *cval;
01409 int *list, count;
01410 int *rpIN = NULL, *rpOUT = NULL;
01411 int *sendBuf, *recvBuf;
01412 int *marker, inCt, bdCt;
01413 int *bdryNodes, nz;
01414 int sendCt, recvCt;
01415 MPI_Request *sendReq, *recvReq;
01416 MPI_Status *status;
01417 SortedSet_dh ss;
01418
01419 SortedSet_dhCreate (&ss, m);
01420 CHECK_V_ERROR;
01421
01422
01423
01424
01425
01426 for (row = beg_row; row < end_row; ++row)
01427 {
01428 bool isBdry = false;
01429 int len;
01430 EuclidGetRow (A, row, &len, &cval, NULL);
01431 CHECK_V_ERROR;
01432
01433 for (j = 0; j < len; ++j)
01434 {
01435 int col = cval[j];
01436 if (col < beg_row || col >= end_row)
01437 {
01438 isBdry = true;
01439 SortedSet_dhInsert (ss, col);
01440 CHECK_V_ERROR;
01441
01442 }
01443 }
01444 EuclidRestoreRow (A, row, &len, &cval, NULL);
01445 CHECK_V_ERROR;
01446
01447 if (isBdry)
01448 {
01449 SortedSet_dhInsert (ss, row);
01450 CHECK_V_ERROR;
01451 }
01452 }
01453
01454
01455
01456
01457
01458 sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01459 CHECK_V_ERROR;
01460 recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01461 CHECK_V_ERROR;
01462 rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01463 CHECK_V_ERROR;
01464 rpOUT[0] = 0;
01465 for (i = 0; i < np_dh; ++i)
01466 sendBuf[i] = 0;
01467
01468 sendCt = 0;
01469 SortedSet_dhGetList (ss, &list, &count);
01470 CHECK_V_ERROR;
01471
01472 for (i = 0; i < count; )
01473 {
01474 int node = list[i];
01475 int owner;
01476 int last;
01477
01478 owner = SubdomainGraph_dhFindOwner (s, node, false);
01479 CHECK_V_ERROR;
01480 last = s->beg_row[owner] + s->row_count[owner];
01481
01482
01483 while ((i < count) && (list[i] < last))
01484 ++i;
01485 ++sendCt;
01486 rpOUT[sendCt] = i;
01487 sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
01488
01489 }
01490
01491
01492
01493
01494
01495 MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
01496 CHECK_V_ERROR;
01497
01498
01499
01500
01501
01502
01503
01504 rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01505 CHECK_V_ERROR;
01506 rpIN[0] = 0;
01507 nz = 0;
01508 recvCt = 0;
01509 for (i = 0; i < np_dh; ++i)
01510 {
01511 if (recvBuf[i])
01512 {
01513 ++recvCt;
01514 nz += recvBuf[i];
01515 rpIN[recvCt] = nz;
01516 }
01517 }
01518 bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
01519 CHECK_V_ERROR;
01520 sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
01521 CHECK_V_ERROR;
01522 recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
01523 CHECK_V_ERROR;
01524 max = MAX (sendCt, recvCt);
01525 status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
01526 CHECK_V_ERROR;
01527
01528
01529 j = 0;
01530 for (i = 0; i < np_dh; ++i)
01531 {
01532 if (recvBuf[i])
01533 {
01534 MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
01535 i, BDRY_NODE_TAG, comm_dh, recvReq + j);
01536 ++j;
01537 }
01538 }
01539
01540
01541 j = 0;
01542 for (i = 0; i < np_dh; ++i)
01543 {
01544 if (sendBuf[i])
01545 {
01546 MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
01547 i, BDRY_NODE_TAG, comm_dh, sendReq + j);
01548 ++j;
01549 }
01550 }
01551
01552
01553 MPI_Waitall (sendCt, sendReq, status);
01554 MPI_Waitall (recvCt, recvReq, status);
01555
01556
01557 for (i = 0; i < nz; ++i)
01558 bdryNodes[i] -= beg_row;
01559
01560
01561
01562
01563
01564 marker = (int *) MALLOC_DH (m * sizeof (int));
01565 CHECK_V_ERROR;
01566 for (i = 0; i < m; ++i)
01567 marker[i] = 0;
01568 for (i = 0; i < nz; ++i)
01569 marker[bdryNodes[i]] = 1;
01570
01571 inCt = bdCt = 0;
01572 for (i = 0; i < m; ++i)
01573 {
01574 if (marker[i])
01575 {
01576 boundaryNodes[bdCt++] = i;
01577 }
01578 else
01579 {
01580 interiorNodes[inCt++] = i;
01581 }
01582 }
01583 *interiorCount = inCt;
01584 *bdryCount = bdCt;
01585
01586
01587
01588
01589 SortedSet_dhDestroy (ss);
01590 CHECK_V_ERROR;
01591 if (rpIN != NULL)
01592 {
01593 FREE_DH (rpIN);
01594 CHECK_V_ERROR;
01595 }
01596 if (rpOUT != NULL)
01597 {
01598 FREE_DH (rpOUT);
01599 CHECK_V_ERROR;
01600 }
01601 if (sendBuf != NULL)
01602 {
01603 FREE_DH (sendBuf);
01604 CHECK_V_ERROR;
01605 }
01606 if (recvBuf != NULL)
01607 {
01608 FREE_DH (recvBuf);
01609 CHECK_V_ERROR;
01610 }
01611 if (bdryNodes != NULL)
01612 {
01613 FREE_DH (bdryNodes);
01614 CHECK_V_ERROR;
01615 }
01616 if (marker != NULL)
01617 {
01618 FREE_DH (marker);
01619 CHECK_V_ERROR;
01620 }
01621 if (sendReq != NULL)
01622 {
01623 FREE_DH (sendReq);
01624 CHECK_V_ERROR;
01625 }
01626 if (recvReq != NULL)
01627 {
01628 FREE_DH (recvReq);
01629 CHECK_V_ERROR;
01630 }
01631 if (status != NULL)
01632 {
01633 FREE_DH (status);
01634 CHECK_V_ERROR;
01635 }
01636 END_FUNC_DH}
01637
01638
01639 #undef __FUNC__
01640 #define __FUNC__ "find_ordered_neighbors_private"
01641 void
01642 find_ordered_neighbors_private (SubdomainGraph_dh s)
01643 {
01644 START_FUNC_DH int *loNabors = s->loNabors;
01645 int *hiNabors = s->hiNabors;
01646 int *allNabors = s->allNabors, allCount = s->allCount;
01647 int loCt = 0, hiCt = 0;
01648 int *o2n = s->o2n_sub;
01649 int i, myNewId = o2n[myid_dh];
01650
01651 for (i = 0; i < allCount; ++i)
01652 {
01653 int nabor = allNabors[i];
01654 if (o2n[nabor] < myNewId)
01655 {
01656 loNabors[loCt++] = nabor;
01657 }
01658 else
01659 {
01660 hiNabors[hiCt++] = nabor;
01661 }
01662 }
01663
01664 s->loCount = loCt;
01665 s->hiCount = hiCt;
01666 END_FUNC_DH}
01667
01668
01669 #undef __FUNC__
01670 #define __FUNC__ "color_subdomain_graph_private"
01671 void
01672 color_subdomain_graph_private (SubdomainGraph_dh s)
01673 {
01674 START_FUNC_DH int i, n = np_dh;
01675 int *rp = s->ptrs, *cval = s->adj;
01676 int j, *marker, thisNodesColor, *colorCounter;
01677 int *o2n = s->o2n_sub;
01678 int *color = s->colorVec;
01679
01680 if (np_dh == 1)
01681 n = s->blocks;
01682
01683 marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01684 colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01685 for (i = 0; i <= n; ++i)
01686 {
01687 marker[i] = -1;
01688 colorCounter[i] = 0;
01689 }
01690
01691
01692
01693
01694 for (i = 0; i < n; ++i)
01695 {
01696
01697
01698
01699
01700 for (j = rp[i]; j < rp[i + 1]; ++j)
01701 {
01702 int nabor = cval[j];
01703 if (nabor < i)
01704 {
01705 int naborsColor = color[nabor];
01706 marker[naborsColor] = i;
01707 }
01708 }
01709
01710
01711 thisNodesColor = -1;
01712 for (j = 0; j < n; ++j)
01713 {
01714 if (marker[j] != i)
01715 {
01716 thisNodesColor = j;
01717 break;
01718 }
01719 }
01720 color[i] = thisNodesColor;
01721 colorCounter[1 + thisNodesColor] += 1;
01722 }
01723
01724
01725
01726
01727
01728
01729 for (i = 1; i < n; ++i)
01730 {
01731 if (colorCounter[i] == 0)
01732 break;
01733 colorCounter[i] += colorCounter[i - 1];
01734 }
01735
01736 for (i = 0; i < n; ++i)
01737 {
01738 o2n[i] = colorCounter[color[i]];
01739 colorCounter[color[i]] += 1;
01740 }
01741
01742
01743 invert_perm (n, s->o2n_sub, s->n2o_sub);
01744 CHECK_V_ERROR;
01745
01746
01747
01748
01749
01750 {
01751 int ct = 0;
01752 for (j = 0; j < n; ++j)
01753 {
01754 if (marker[j] == -1)
01755 break;
01756 ++ct;
01757 }
01758 s->colors = ct;
01759 }
01760
01761
01762
01763
01764
01765 {
01766 int sum = 0;
01767 for (i = 0; i < n; ++i)
01768 {
01769 int old = s->n2o_sub[i];
01770 s->beg_rowP[old] = sum;
01771 sum += s->row_count[old];
01772 }
01773 }
01774
01775 FREE_DH (marker);
01776 CHECK_V_ERROR;
01777 FREE_DH (colorCounter);
01778 CHECK_V_ERROR;
01779 END_FUNC_DH}
01780
01781
01782 #undef __FUNC__
01783 #define __FUNC__ "SubdomainGraph_dhDump"
01784 void
01785 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
01786 {
01787 START_FUNC_DH int i;
01788 int sCt = np_dh;
01789 FILE *fp;
01790
01791 if (np_dh == 1)
01792 sCt = s->blocks;
01793
01794
01795
01796
01797
01798
01799 fp = openFile_dh (filename, "w");
01800 CHECK_V_ERROR;
01801
01802
01803 fprintf (fp, "----- colors used\n");
01804 fprintf (fp, "%i\n", s->colors);
01805 if (s->colorVec == NULL)
01806 {
01807 fprintf (fp, "s->colorVec == NULL\n");
01808 }
01809 else
01810 {
01811 fprintf (fp, "----- colorVec\n");
01812 for (i = 0; i < sCt; ++i)
01813 {
01814 fprintf (fp, "%i ", s->colorVec[i]);
01815 }
01816 fprintf (fp, "\n");
01817 }
01818
01819 if (s->o2n_sub == NULL || s->o2n_sub == NULL)
01820 {
01821 fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
01822 }
01823 else
01824 {
01825 fprintf (fp, "----- o2n_sub\n");
01826 for (i = 0; i < sCt; ++i)
01827 {
01828 fprintf (fp, "%i ", s->o2n_sub[i]);
01829 }
01830 fprintf (fp, "\n");
01831 fprintf (fp, "----- n2o_sub\n");
01832 for (i = 0; i < sCt; ++i)
01833 {
01834 fprintf (fp, "%i ", s->n2o_sub[i]);
01835 }
01836 fprintf (fp, "\n");
01837 }
01838
01839
01840 if (s->beg_row == NULL || s->beg_rowP == NULL)
01841 {
01842 fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
01843 }
01844 else
01845 {
01846 fprintf (fp, "----- beg_row\n");
01847 for (i = 0; i < sCt; ++i)
01848 {
01849 fprintf (fp, "%i ", 1 + s->beg_row[i]);
01850 }
01851 fprintf (fp, "\n");
01852 fprintf (fp, "----- beg_rowP\n");
01853 for (i = 0; i < sCt; ++i)
01854 {
01855 fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
01856 }
01857 fprintf (fp, "\n");
01858 }
01859
01860
01861 if (s->row_count == NULL || s->bdry_count == NULL)
01862 {
01863 fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
01864 }
01865 else
01866 {
01867 fprintf (fp, "----- row_count\n");
01868 for (i = 0; i < sCt; ++i)
01869 {
01870 fprintf (fp, "%i ", s->row_count[i]);
01871 }
01872 fprintf (fp, "\n");
01873 fprintf (fp, "----- bdry_count\n");
01874 for (i = 0; i < sCt; ++i)
01875 {
01876 fprintf (fp, "%i ", s->bdry_count[i]);
01877 }
01878 fprintf (fp, "\n");
01879
01880 }
01881
01882
01883 if (s->ptrs == NULL || s->adj == NULL)
01884 {
01885 fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
01886 }
01887 else
01888 {
01889 int j;
01890 int ct;
01891 fprintf (fp, "----- subdomain graph\n");
01892 for (i = 0; i < sCt; ++i)
01893 {
01894 fprintf (fp, "%i :: ", i);
01895 ct = s->ptrs[i + 1] - s->ptrs[i];
01896 if (ct)
01897 {
01898 shellSort_int (ct, s->adj + s->ptrs[i]);
01899 CHECK_V_ERROR;
01900 }
01901 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
01902 {
01903 fprintf (fp, "%i ", s->adj[j]);
01904 }
01905 fprintf (fp, "\n");
01906 }
01907 }
01908 closeFile_dh (fp);
01909 CHECK_V_ERROR;
01910
01911
01912
01913
01914
01915 if (s->beg_rowP == NULL)
01916 {
01917 SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
01918 }
01919 if (s->row_count == NULL)
01920 {
01921 SET_V_ERROR ("s->row_count == NULL; can't continue");
01922 }
01923 if (s->o2n_sub == NULL)
01924 {
01925 SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
01926 }
01927
01928
01929 if (np_dh == 1)
01930 {
01931 fp = openFile_dh (filename, "a");
01932 CHECK_V_ERROR;
01933
01934
01935 if (s->n2o_row == NULL || s->o2n_col == NULL)
01936 {
01937 fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
01938 }
01939 else
01940 {
01941 fprintf (fp, "----- n2o_row\n");
01942 for (i = 0; i < s->m; ++i)
01943 {
01944 fprintf (fp, "%i ", 1 + s->n2o_row[i]);
01945 }
01946 fprintf (fp, "\n");
01947
01948 #if 0
01949
01950
01951
01952
01953
01954
01955 fprintf (fp, "----- o2n_col\n");
01956 for (i = 0; i < sCt; ++i)
01957 {
01958 int br = s->beg_row[i];
01959 int er = br + s->row_count[i];
01960
01961 for (j = br; j < er; ++j)
01962 {
01963 fprintf (fp, "%i ", 1 + s->o2n_col[j]);
01964 }
01965 fprintf (fp, "\n");
01966 }
01967 fprintf (fp, "\n");
01968
01969 #endif
01970
01971 }
01972 closeFile_dh (fp);
01973 CHECK_V_ERROR;
01974 }
01975
01976
01977 else
01978 {
01979 int id = s->n2o_sub[myid_dh];
01980 int m = s->m;
01981 int pe;
01982 int beg_row = 0;
01983 if (s->beg_row != 0)
01984 beg_row = s->beg_row[myid_dh];
01985
01986
01987 for (pe = 0; pe < np_dh; ++pe)
01988 {
01989 MPI_Barrier (comm_dh);
01990 if (id == pe)
01991 {
01992 fp = openFile_dh (filename, "a");
01993 CHECK_V_ERROR;
01994 if (id == 0)
01995 fprintf (fp, "----- n2o_row\n");
01996
01997 for (i = 0; i < m; ++i)
01998 {
01999 fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
02000 }
02001 if (id == np_dh - 1)
02002 fprintf (fp, "\n");
02003 closeFile_dh (fp);
02004 CHECK_V_ERROR;
02005 }
02006 }
02007
02008 #if 0
02009
02010
02011 for (pe = 0; pe < np_dh; ++pe)
02012 {
02013 MPI_Barrier (comm_dh);
02014 if (myid_dh == pe)
02015 {
02016 fp = openFile_dh (filename, "a");
02017 CHECK_V_ERROR;
02018 if (myid_dh == 0)
02019 fprintf (fp, "----- o2n_col\n");
02020
02021 for (i = 0; i < m; ++i)
02022 {
02023 fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
02024 }
02025 fprintf (fp, "\n");
02026
02027 if (myid_dh == np_dh - 1)
02028 fprintf (fp, "\n");
02029
02030 closeFile_dh (fp);
02031 CHECK_V_ERROR;
02032 }
02033 }
02034
02035 #endif
02036
02037 }
02038
02039 END_FUNC_DH}
02040
02041
02042
02043 #undef __FUNC__
02044 #define __FUNC__ "find_bdry_nodes_seq_private"
02045 void
02046 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
02047 {
02048 START_FUNC_DH int i, j, row, blocks = s->blocks;
02049 int *cval, *tmp;
02050
02051 tmp = (int *) MALLOC_DH (m * sizeof (int));
02052 CHECK_V_ERROR;
02053 for (i = 0; i < m; ++i)
02054 tmp[i] = 0;
02055
02056
02057
02058
02059 for (i = 0; i < blocks; ++i)
02060 {
02061 int beg_row = s->beg_row[i];
02062 int end_row = beg_row + s->row_count[i];
02063
02064 for (row = beg_row; row < end_row; ++row)
02065 {
02066 bool isBdry = false;
02067 int len;
02068 EuclidGetRow (A, row, &len, &cval, NULL);
02069 CHECK_V_ERROR;
02070
02071 for (j = 0; j < len; ++j)
02072 {
02073 int col = cval[j];
02074
02075 if (col < beg_row || col >= end_row)
02076 {
02077 tmp[col] = 1;
02078 isBdry = true;
02079 }
02080 }
02081 if (isBdry)
02082 tmp[row] = 1;
02083 EuclidRestoreRow (A, row, &len, &cval, NULL);
02084 CHECK_V_ERROR;
02085 }
02086 }
02087
02088
02089
02090
02091 for (i = 0; i < blocks; ++i)
02092 {
02093 int beg_row = s->beg_row[i];
02094 int end_row = beg_row + s->row_count[i];
02095 int ct = 0;
02096 for (row = beg_row; row < end_row; ++row)
02097 {
02098 if (tmp[row])
02099 ++ct;
02100 }
02101 s->bdry_count[i] = ct;
02102 }
02103
02104
02105
02106
02107 for (i = 0; i < blocks; ++i)
02108 {
02109 int beg_row = s->beg_row[i];
02110 int end_row = beg_row + s->row_count[i];
02111 int interiorIDX = beg_row;
02112 int bdryIDX = end_row - s->bdry_count[i];
02113
02114 for (row = beg_row; row < end_row; ++row)
02115 {
02116 if (tmp[row])
02117 {
02118 s->o2n_col[row] = bdryIDX++;
02119 }
02120 else
02121 {
02122 s->o2n_col[row] = interiorIDX++;
02123 }
02124 }
02125 }
02126
02127
02128 invert_perm (m, s->o2n_col, s->n2o_row);
02129 CHECK_V_ERROR;
02130 FREE_DH (tmp);
02131 CHECK_V_ERROR;
02132
02133 END_FUNC_DH}
02134
02135 #undef __FUNC__
02136 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
02137 void
02138 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
02139 {
02140 START_FUNC_DH if (myid_dh == 0)
02141 {
02142 int i, j;
02143
02144 fprintf (fp,
02145 "\n-----------------------------------------------------\n");
02146 fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
02147 fprintf (fp, "-----------------------------------------------------\n");
02148 fprintf (fp, "colors used: %i\n", s->colors);
02149
02150 fprintf (fp, "o2n ordering vector: ");
02151 for (i = 0; i < s->blocks; ++i)
02152 fprintf (fp, "%i ", s->o2n_sub[i]);
02153
02154 fprintf (fp, "\ncoloring vector (node, color): \n");
02155 for (i = 0; i < s->blocks; ++i)
02156 fprintf (fp, " %i, %i\n", i, s->colorVec[i]);
02157
02158 fprintf (fp, "\n");
02159 fprintf (fp, "Adjacency lists:\n");
02160
02161 for (i = 0; i < s->blocks; ++i)
02162 {
02163 fprintf (fp, " P_%i :: ", i);
02164 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
02165 {
02166 fprintf (fp, "%i ", s->adj[j]);
02167 }
02168 fprintf (fp, "\n");
02169 }
02170 fprintf (fp, "-----------------------------------------------------\n");
02171 }
02172 END_FUNC_DH}
02173
02174
02175 #undef __FUNC__
02176 #define __FUNC__ "adjust_matrix_perms_private"
02177 void
02178 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
02179 {
02180 START_FUNC_DH int i, j, blocks = s->blocks;
02181 int *o2n = s->o2n_col;
02182
02183 for (i = 0; i < blocks; ++i)
02184 {
02185 int beg_row = s->beg_row[i];
02186 int end_row = beg_row + s->row_count[i];
02187 int adjust = s->beg_rowP[i] - s->beg_row[i];
02188 for (j = beg_row; j < end_row; ++j)
02189 o2n[j] += adjust;
02190 }
02191
02192 invert_perm (m, s->o2n_col, s->n2o_row);
02193 CHECK_V_ERROR;
02194 END_FUNC_DH}
02195
02196 #undef __FUNC__
02197 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
02198 void
02199 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
02200 {
02201 START_FUNC_DH int i;
02202 int blocks = np_dh;
02203 double ratio[25];
02204
02205 if (myid_dh == 0)
02206 {
02207 if (np_dh == 1)
02208 blocks = s->blocks;
02209 if (blocks > 25)
02210 blocks = 25;
02211
02212 fprintf (fp, "\n");
02213 fprintf (fp, "Subdomain interior/boundary node ratios\n");
02214 fprintf (fp, "---------------------------------------\n");
02215
02216
02217 for (i = 0; i < blocks; ++i)
02218 {
02219 if (s->bdry_count[i] == 0)
02220 {
02221 ratio[i] = -1;
02222 }
02223 else
02224 {
02225 ratio[i] =
02226 (double) (s->row_count[i] -
02227 s->bdry_count[i]) / (double) s->bdry_count[i];
02228 }
02229 }
02230
02231
02232 shellSort_float (blocks, ratio);
02233
02234
02235 if (blocks <= 20)
02236 {
02237 int j = 0;
02238 for (i = 0; i < blocks; ++i)
02239 {
02240 fprintf (fp, "%0.2g ", ratio[i]);
02241 ++j;
02242 if (j == 10)
02243 {
02244 fprintf (fp, "\n");
02245 }
02246 }
02247 fprintf (fp, "\n");
02248 }
02249 else
02250 {
02251 fprintf (fp, "10 smallest ratios: ");
02252 for (i = 0; i < 10; ++i)
02253 {
02254 fprintf (fp, "%0.2g ", ratio[i]);
02255 }
02256 fprintf (fp, "\n");
02257 fprintf (fp, "10 largest ratios: ");
02258 {
02259 int start = blocks - 6, stop = blocks - 1;
02260 for (i = start; i < stop; ++i)
02261 {
02262 fprintf (fp, "%0.2g ", ratio[i]);
02263 }
02264 fprintf (fp, "\n");
02265 }
02266 }
02267 }
02268
02269 END_FUNC_DH}
02270
02271
02272 #undef __FUNC__
02273 #define __FUNC__ "SubdomainGraph_dhPrintStats"
02274 void
02275 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
02276 {
02277 START_FUNC_DH double *timing = sg->timing;
02278
02279 fprintf_dh (fp, "\nSubdomainGraph timing report\n");
02280 fprintf_dh (fp, "-----------------------------\n");
02281 fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
02282 fprintf_dh (fp, " find neighbors in subdomain graph: %0.2f\n",
02283 timing[FIND_NABORS_SGT]);
02284 fprintf_dh (fp, " locally order interiors and bdry: %0.2f\n",
02285 timing[ORDER_BDRY_SGT]);
02286 fprintf_dh (fp, " form and color subdomain graph: %0.2f\n",
02287 timing[FORM_GRAPH_SGT]);
02288 fprintf_dh (fp, " exchange bdry permutations: %0.2f\n",
02289 timing[EXCHANGE_PERMS_SGT]);
02290 fprintf_dh (fp, " everything else (should be small): %0.2f\n",
02291 timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
02292 timing[ORDER_BDRY_SGT] +
02293 timing[FORM_GRAPH_SGT] +
02294 timing[EXCHANGE_PERMS_SGT]));
02295 END_FUNC_DH}