00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "SubdomainGraph_dh.h"
00031 #include "getRow_dh.h"
00032 #include "Mem_dh.h"
00033 #include "Parser_dh.h"
00034 #include "Hash_i_dh.h"
00035 #include "mat_dh_private.h"
00036 #include "io_dh.h"
00037 #include "SortedSet_dh.h"
00038 #include "shellSort_dh.h"
00039
00040
00041
00042 #include <unistd.h>
00043
00044 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
00045 void *A);
00046 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
00047 void *A);
00048
00049
00050
00051
00052
00053 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
00054 bool bj);
00055 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
00056 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
00057 void *A);
00058 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
00059 void *A);
00060 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
00061 void *A);
00062 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
00063 int *interiorNodes, int *bdryNodes,
00064 int *interiorCount, int *bdryCount);
00065 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
00066 void *A, int *interiorNodes,
00067 int *bdryNodes, int *interiorCount,
00068 int *bdryCount);
00069
00070 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
00071
00072
00073 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
00074 static void color_subdomain_graph_private (SubdomainGraph_dh s);
00075 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
00076
00077 #undef __FUNC__
00078 #define __FUNC__ "SubdomainGraph_dhCreate"
00079 void
00080 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
00081 {
00082 START_FUNC_DH
00083 struct _subdomain_dh *tmp =
00084 (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
00085 CHECK_V_ERROR;
00086 *s = tmp;
00087
00088 tmp->blocks = 1;
00089 tmp->ptrs = tmp->adj = NULL;
00090 tmp->colors = 1;
00091 tmp->colorVec = NULL;
00092 tmp->o2n_sub = tmp->n2o_sub = NULL;
00093 tmp->beg_row = tmp->beg_rowP = NULL;
00094 tmp->bdry_count = tmp->row_count = NULL;
00095 tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
00096 tmp->loCount = tmp->hiCount = tmp->allCount = 0;
00097
00098 tmp->m = 0;
00099 tmp->n2o_row = tmp->o2n_col = NULL;
00100 tmp->o2n_ext = tmp->n2o_ext = NULL;
00101
00102 tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
00103 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
00104 {
00105 int i;
00106 for (i = 0; i < TIMING_BINS_SG; ++i)
00107 tmp->timing[i] = 0.0;
00108 }
00109 END_FUNC_DH}
00110
00111 #undef __FUNC__
00112 #define __FUNC__ "SubdomainGraph_dhDestroy"
00113 void
00114 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
00115 {
00116 START_FUNC_DH if (s->ptrs != NULL)
00117 {
00118 FREE_DH (s->ptrs);
00119 CHECK_V_ERROR;
00120 }
00121 if (s->adj != NULL)
00122 {
00123 FREE_DH (s->adj);
00124 CHECK_V_ERROR;
00125 }
00126 if (s->colorVec != NULL)
00127 {
00128 FREE_DH (s->colorVec);
00129 CHECK_V_ERROR;
00130 }
00131 if (s->o2n_sub != NULL)
00132 {
00133 FREE_DH (s->o2n_sub);
00134 CHECK_V_ERROR;
00135 }
00136 if (s->n2o_sub != NULL)
00137 {
00138 FREE_DH (s->n2o_sub);
00139 CHECK_V_ERROR;
00140 }
00141
00142 if (s->beg_row != NULL)
00143 {
00144 FREE_DH (s->beg_row);
00145 CHECK_V_ERROR;
00146 }
00147 if (s->beg_rowP != NULL)
00148 {
00149 FREE_DH (s->beg_rowP);
00150 CHECK_V_ERROR;
00151 }
00152 if (s->row_count != NULL)
00153 {
00154 FREE_DH (s->row_count);
00155 CHECK_V_ERROR;
00156 }
00157 if (s->bdry_count != NULL)
00158 {
00159 FREE_DH (s->bdry_count);
00160 CHECK_V_ERROR;
00161 }
00162 if (s->loNabors != NULL)
00163 {
00164 FREE_DH (s->loNabors);
00165 CHECK_V_ERROR;
00166 }
00167 if (s->hiNabors != NULL)
00168 {
00169 FREE_DH (s->hiNabors);
00170 CHECK_V_ERROR;
00171 }
00172 if (s->allNabors != NULL)
00173 {
00174 FREE_DH (s->allNabors);
00175 CHECK_V_ERROR;
00176 }
00177
00178 if (s->n2o_row != NULL)
00179 {
00180 FREE_DH (s->n2o_row);
00181 CHECK_V_ERROR;
00182 }
00183 if (s->o2n_col != NULL)
00184 {
00185 FREE_DH (s->o2n_col);
00186 CHECK_V_ERROR;
00187 }
00188 if (s->o2n_ext != NULL)
00189 {
00190 Hash_i_dhDestroy (s->o2n_ext);
00191 CHECK_V_ERROR;
00192 }
00193 if (s->n2o_ext != NULL)
00194 {
00195 Hash_i_dhDestroy (s->n2o_ext);
00196 CHECK_V_ERROR;
00197 }
00198 FREE_DH (s);
00199 CHECK_V_ERROR;
00200 END_FUNC_DH}
00201
00202 #undef __FUNC__
00203 #define __FUNC__ "SubdomainGraph_dhInit"
00204 void
00205 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00206 {
00207 START_FUNC_DH double t1 = MPI_Wtime ();
00208
00209 if (blocks < 1)
00210 blocks = 1;
00211
00212 if (np_dh == 1 || blocks > 1)
00213 {
00214 s->blocks = blocks;
00215 init_seq_private (s, blocks, bj, A);
00216 CHECK_V_ERROR;
00217 }
00218 else
00219 {
00220 s->blocks = np_dh;
00221 init_mpi_private (s, np_dh, bj, A);
00222 CHECK_V_ERROR;
00223 }
00224
00225 s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
00226 END_FUNC_DH}
00227
00228
00229 #undef __FUNC__
00230 #define __FUNC__ "SubdomainGraph_dhFindOwner"
00231 int
00232 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
00233 {
00234 START_FUNC_DH int sd;
00235 int *beg_row = s->beg_row;
00236 int *row_count = s->row_count;
00237 int owner = -1, blocks = s->blocks;
00238
00239 if (permuted)
00240 beg_row = s->beg_rowP;
00241
00242
00243 for (sd = 0; sd < blocks; ++sd)
00244 {
00245 if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
00246 {
00247 owner = sd;
00248 break;
00249 }
00250 }
00251
00252 if (owner == -1)
00253 {
00254
00255 fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
00256 fprintf (stderr, "blocks= %i\n", blocks);
00257
00258 sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
00259 SET_ERROR (-1, msgBuf_dh);
00260 }
00261
00262 END_FUNC_VAL (owner)}
00263
00264
00265 #undef __FUNC__
00266 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
00267 void
00268 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
00269 {
00270 START_FUNC_DH int i, j, k;
00271 double max = 0, min = INT_MAX;
00272
00273 fprintf (fp,
00274 "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
00275 fprintf (fp, "colors used = %i\n", s->colors);
00276 fprintf (fp, "subdomain count = %i\n", s->blocks);
00277
00278
00279 fprintf (fp, "\ninterior/boundary node ratios:\n");
00280
00281 for (i = 0; i < s->blocks; ++i)
00282 {
00283 int inNodes = s->row_count[i] - s->bdry_count[i];
00284 int bdNodes = s->bdry_count[i];
00285 double ratio;
00286
00287 if (bdNodes == 0)
00288 {
00289 ratio = -1;
00290 }
00291 else
00292 {
00293 ratio = (double) inNodes / (double) bdNodes;
00294 }
00295
00296 max = MAX (max, ratio);
00297 min = MIN (min, ratio);
00298 fprintf (fp,
00299 " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n",
00300 i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
00301 ratio);
00302 }
00303
00304
00305 fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
00306 fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
00307
00308
00309
00310
00311
00312 if (s->adj != NULL)
00313 {
00314 fprintf (fp, "\nunpermuted subdomain graph: \n");
00315 for (i = 0; i < s->blocks; ++i)
00316 {
00317 fprintf (fp, "%i :: ", i);
00318 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
00319 {
00320 fprintf (fp, "%i ", s->adj[j]);
00321 }
00322 fprintf (fp, "\n");
00323 }
00324 }
00325
00326
00327
00328
00329
00330 fprintf (fp, "\no2n subdomain permutation:\n");
00331 for (i = 0; i < s->blocks; ++i)
00332 {
00333 fprintf (fp, " %i %i\n", i, s->o2n_sub[i]);
00334 }
00335 fprintf (fp, "\n");
00336
00337 if (np_dh > 1)
00338 {
00339
00340
00341
00342
00343 fprintf (fp, "\nlocal n2o_row permutation:\n ");
00344 for (i = 0; i < s->row_count[myid_dh]; ++i)
00345 {
00346 fprintf (fp, "%i ", s->n2o_row[i]);
00347 }
00348 fprintf (fp, "\n");
00349
00350
00351
00352
00353 fprintf (fp, "\nlocal o2n_col permutation:\n ");
00354 for (i = 0; i < s->row_count[myid_dh]; ++i)
00355 {
00356 fprintf (fp, "%i ", s->o2n_col[i]);
00357 }
00358 fprintf (fp, "\n");
00359
00360 }
00361 else
00362 {
00363
00364
00365
00366 fprintf (fp, "\nlocal n2o_row permutation:\n");
00367 fprintf (fp, "--------------------------\n");
00368 for (k = 0; k < s->blocks; ++k)
00369 {
00370 int beg_row = s->beg_row[k];
00371 int end_row = beg_row + s->row_count[k];
00372
00373 for (i = beg_row; i < end_row; ++i)
00374 {
00375 fprintf (fp, "%i ", s->n2o_row[i]);
00376 }
00377 fprintf (fp, "\n");
00378 }
00379
00380
00381
00382
00383 fprintf (fp, "\nlocal o2n_col permutation:\n");
00384 fprintf (fp, "--------------------------\n");
00385 for (k = 0; k < s->blocks; ++k)
00386 {
00387 int beg_row = s->beg_row[k];
00388 int end_row = beg_row + s->row_count[k];
00389
00390 for (i = beg_row; i < end_row; ++i)
00391 {
00392 fprintf (fp, "%i ", s->o2n_col[i]);
00393 }
00394 fprintf (fp, "\n");
00395 }
00396
00397
00398 }
00399
00400 END_FUNC_DH}
00401
00402 #undef __FUNC__
00403 #define __FUNC__ "init_seq_private"
00404 void
00405 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00406 {
00407 START_FUNC_DH int m, n, beg_row;
00408 double t1;
00409
00410
00411
00412
00413
00414
00415 EuclidGetDimensions (A, &beg_row, &m, &n);
00416 CHECK_V_ERROR;
00417 s->m = n;
00418
00419
00420
00421
00422
00423
00424 allocate_storage_private (s, blocks, m, bj);
00425 CHECK_V_ERROR;
00426
00427
00428
00429
00430
00431
00432
00433 {
00434 int i;
00435 int rpp = m / blocks;
00436
00437 if (rpp * blocks < m)
00438 ++rpp;
00439
00440 s->beg_row[0] = 0;
00441 for (i = 1; i < blocks; ++i)
00442 s->beg_row[i] = rpp + s->beg_row[i - 1];
00443 for (i = 0; i < blocks; ++i)
00444 s->row_count[i] = rpp;
00445 s->row_count[blocks - 1] = m - rpp * (blocks - 1);
00446 }
00447 memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465 t1 = MPI_Wtime ();
00466 if (!bj)
00467 {
00468 find_bdry_nodes_seq_private (s, m, A);
00469 CHECK_V_ERROR;
00470 }
00471 else
00472 {
00473 int i;
00474 for (i = 0; i < m; ++i)
00475 {
00476 s->n2o_row[i] = i;
00477 s->o2n_col[i] = i;
00478 }
00479 }
00480 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491 t1 = MPI_Wtime ();
00492 if (!bj)
00493 {
00494 form_subdomaingraph_seq_private (s, m, A);
00495 CHECK_V_ERROR;
00496 if (s->doNotColor)
00497 {
00498 int i;
00499 printf_dh ("subdomain coloring and reordering is OFF\n");
00500 for (i = 0; i < blocks; ++i)
00501 {
00502 s->o2n_sub[i] = i;
00503 s->n2o_sub[i] = i;
00504 s->colorVec[i] = 0;
00505 }
00506 }
00507 else
00508 {
00509 SET_INFO ("subdomain coloring and reordering is ON");
00510 color_subdomain_graph_private (s);
00511 CHECK_V_ERROR;
00512 }
00513 }
00514
00515
00516 else
00517 {
00518 int i;
00519 for (i = 0; i < blocks; ++i)
00520 {
00521 s->o2n_sub[i] = i;
00522 s->n2o_sub[i] = i;
00523 }
00524 }
00525 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00526
00527
00528
00529
00530
00531
00532
00533
00534 if (!bj)
00535 {
00536 adjust_matrix_perms_private (s, m);
00537 CHECK_V_ERROR;
00538 }
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556 END_FUNC_DH}
00557
00558
00559 #if 0
00560 #undef __FUNC__
00561 #define __FUNC__ "partition_metis_private"
00562 void
00563 partition_metis_private (SubdomainGraph_dh s, void *A)
00564 {
00565 START_FUNC_DH if (ignoreMe)
00566 SET_V_ERROR ("not implemented");
00567 END_FUNC_DH}
00568 #endif
00569
00570 #undef __FUNC__
00571 #define __FUNC__ "allocate_storage_private"
00572 void
00573 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
00574 {
00575 START_FUNC_DH if (!bj)
00576 {
00577 s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
00578 CHECK_V_ERROR;
00579 s->ptrs[0] = 0;
00580 s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
00581 CHECK_V_ERROR;
00582 s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00583 CHECK_V_ERROR;
00584 s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00585 CHECK_V_ERROR;
00586 s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
00587 CHECK_V_ERROR;
00588 }
00589
00590 s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
00591 CHECK_V_ERROR;
00592 s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
00593 CHECK_V_ERROR;
00594
00595
00596
00597 s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
00598 CHECK_V_ERROR;
00599 s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
00600 CHECK_V_ERROR;
00601 s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
00602 CHECK_V_ERROR;
00603 s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
00604 CHECK_V_ERROR;
00605 s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00606 CHECK_V_ERROR;
00607 s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
00608 CHECK_V_ERROR;
00609
00610 END_FUNC_DH}
00611
00612
00613
00614
00615 #undef __FUNC__
00616 #define __FUNC__ "init_mpi_private"
00617 void
00618 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
00619 {
00620 START_FUNC_DH int m, n, beg_row;
00621 bool symmetric;
00622 double t1;
00623
00624 symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
00625 CHECK_V_ERROR;
00626 if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
00627 {
00628 symmetric = true;
00629 }
00630
00631
00632
00633
00634
00635 EuclidGetDimensions (A, &beg_row, &m, &n);
00636 CHECK_V_ERROR;
00637 s->m = m;
00638
00639
00640
00641
00642
00643
00644 allocate_storage_private (s, blocks, m, bj);
00645 CHECK_V_ERROR;
00646
00647
00648
00649
00650
00651
00652
00653 if (!bj)
00654 {
00655 MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
00656 MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
00657 memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
00658 }
00659 else
00660 {
00661 s->beg_row[myid_dh] = beg_row;
00662 s->beg_rowP[myid_dh] = beg_row;
00663 s->row_count[myid_dh] = m;
00664 }
00665
00666
00667
00668
00669
00670 if (!bj)
00671 {
00672 t1 = MPI_Wtime ();
00673 if (symmetric)
00674 {
00675 find_all_neighbors_sym_private (s, m, A);
00676 CHECK_V_ERROR;
00677 }
00678 else
00679 {
00680 find_all_neighbors_unsym_private (s, m, A);
00681 CHECK_V_ERROR;
00682 }
00683 s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
00684 }
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695 t1 = MPI_Wtime ();
00696 if (!bj)
00697 {
00698 int *interiorNodes, *bdryNodes;
00699 int interiorCount, bdryCount;
00700 int *o2n = s->o2n_col, idx;
00701 int i;
00702
00703 interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
00704 CHECK_V_ERROR;
00705 bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
00706 CHECK_V_ERROR;
00707
00708
00709
00710
00711 if (symmetric)
00712 {
00713 find_bdry_nodes_sym_private (s, m, A,
00714 interiorNodes, bdryNodes,
00715 &interiorCount, &bdryCount);
00716 CHECK_V_ERROR;
00717 }
00718 else
00719 {
00720 find_bdry_nodes_unsym_private (s, m, A,
00721 interiorNodes, bdryNodes,
00722 &interiorCount, &bdryCount);
00723 CHECK_V_ERROR;
00724 }
00725
00726
00727 MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
00728 comm_dh);
00729
00730
00731 idx = 0;
00732 for (i = 0; i < interiorCount; ++i)
00733 {
00734 o2n[interiorNodes[i]] = idx++;
00735 }
00736 for (i = 0; i < bdryCount; ++i)
00737 {
00738 o2n[bdryNodes[i]] = idx++;
00739 }
00740
00741
00742 invert_perm (m, o2n, s->n2o_row);
00743 CHECK_V_ERROR;
00744
00745 FREE_DH (interiorNodes);
00746 CHECK_V_ERROR;
00747 FREE_DH (bdryNodes);
00748 CHECK_V_ERROR;
00749 }
00750
00751
00752 else
00753 {
00754 int *o2n = s->o2n_col, *n2o = s->n2o_row;
00755 int i, m = s->m;
00756
00757 for (i = 0; i < m; ++i)
00758 {
00759 o2n[i] = i;
00760 n2o[i] = i;
00761 }
00762 }
00763 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774 if (!bj)
00775 {
00776 t1 = MPI_Wtime ();
00777 form_subdomaingraph_mpi_private (s);
00778 CHECK_V_ERROR;
00779 if (s->doNotColor)
00780 {
00781 int i;
00782 printf_dh ("subdomain coloring and reordering is OFF\n");
00783 for (i = 0; i < blocks; ++i)
00784 {
00785 s->o2n_sub[i] = i;
00786 s->n2o_sub[i] = i;
00787 s->colorVec[i] = 0;
00788 }
00789 }
00790 else
00791 {
00792 SET_INFO ("subdomain coloring and reordering is ON");
00793 color_subdomain_graph_private (s);
00794 CHECK_V_ERROR;
00795 }
00796 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
00797 }
00798
00799
00800
00801
00802
00803
00804 if (!bj)
00805 {
00806 find_ordered_neighbors_private (s);
00807 CHECK_V_ERROR;
00808 }
00809
00810
00811
00812
00813
00814
00815
00816 if (!bj)
00817 {
00818 t1 = MPI_Wtime ();
00819 SubdomainGraph_dhExchangePerms (s);
00820 CHECK_V_ERROR;
00821 s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
00822 }
00823
00824 END_FUNC_DH}
00825
00826
00827
00828 #undef __FUNC__
00829 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
00830 void
00831 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
00832 {
00833 START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
00834 MPI_Status *status = NULL;
00835 int *nabors = s->allNabors, naborCount = s->allCount;
00836 int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
00837 int m = s->row_count[myid_dh];
00838 int beg_row = s->beg_row[myid_dh];
00839 int beg_rowP = s->beg_rowP[myid_dh];
00840 int *bdryNodeCounts = s->bdry_count;
00841 int myBdryCount = s->bdry_count[myid_dh];
00842 bool debug = false;
00843 int myFirstBdry = m - myBdryCount;
00844 int *n2o_row = s->n2o_row;
00845 Hash_i_dh n2o_table, o2n_table;
00846
00847 if (logFile != NULL && s->debug)
00848 debug = true;
00849
00850
00851
00852
00853 sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
00854 CHECK_V_ERROR;
00855
00856
00857 if (debug)
00858 {
00859 fprintf (logFile,
00860 "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n",
00861 1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
00862 fflush (logFile);
00863 }
00864
00865 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00866 {
00867 sendBuf[2 * j] = n2o_row[i] + beg_row;
00868 sendBuf[2 * j + 1] = i + beg_rowP;
00869 }
00870
00871 if (debug)
00872 {
00873 fprintf (logFile, "\nSUBG SEND_BUF:\n");
00874 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
00875 {
00876 fprintf (logFile, "SUBG %i, %i\n", 1 + sendBuf[2 * j],
00877 1 + sendBuf[2 * j + 1]);
00878 }
00879 fflush (logFile);
00880 }
00881
00882
00883
00884
00885
00886 naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
00887 CHECK_V_ERROR;
00888 naborIdx[0] = 0;
00889 nz = 0;
00890 for (i = 0; i < naborCount; ++i)
00891 {
00892 nz += (2 * bdryNodeCounts[nabors[i]]);
00893 naborIdx[i + 1] = nz;
00894 }
00895
00896
00897 recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
00898 CHECK_V_ERROR;
00899
00900
00901
00902
00903
00904 recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00905 CHECK_V_ERROR;
00906 send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
00907 CHECK_V_ERROR;
00908 status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
00909 CHECK_V_ERROR;
00910
00911 for (i = 0; i < naborCount; ++i)
00912 {
00913 int nabr = nabors[i];
00914 int *buf = recvBuf + naborIdx[i];
00915 int ct = 2 * bdryNodeCounts[nabr];
00916
00917
00918 MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
00919 &(send_req[i]));
00920
00921 if (debug)
00922 {
00923 fprintf (logFile, "SUBG sending %i elts to %i\n", 2 * myBdryCount,
00924 nabr);
00925 fflush (logFile);
00926 }
00927
00928 MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
00929
00930 if (debug)
00931 {
00932 fprintf (logFile, "SUBG receiving %i elts from %i\n", ct, nabr);
00933 fflush (logFile);
00934 }
00935 }
00936
00937 MPI_Waitall (naborCount, send_req, status);
00938 MPI_Waitall (naborCount, recv_req, status);
00939
00940 Hash_i_dhCreate (&n2o_table, nz / 2);
00941 CHECK_V_ERROR;
00942 Hash_i_dhCreate (&o2n_table, nz / 2);
00943 CHECK_V_ERROR;
00944 s->n2o_ext = n2o_table;
00945 s->o2n_ext = o2n_table;
00946
00947
00948 for (i = 0; i < nz; i += 2)
00949 {
00950 int old = recvBuf[i];
00951 int new = recvBuf[i + 1];
00952
00953 if (debug)
00954 {
00955 fprintf (logFile, "SUBG i= %i old= %i new= %i\n", i, old + 1,
00956 new + 1);
00957 fflush (logFile);
00958 }
00959
00960 Hash_i_dhInsert (o2n_table, old, new);
00961 CHECK_V_ERROR;
00962 Hash_i_dhInsert (n2o_table, new, old);
00963 CHECK_V_ERROR;
00964 }
00965
00966
00967 if (recvBuf != NULL)
00968 {
00969 FREE_DH (recvBuf);
00970 CHECK_V_ERROR;
00971 }
00972 if (naborIdx != NULL)
00973 {
00974 FREE_DH (naborIdx);
00975 CHECK_V_ERROR;
00976 }
00977 if (sendBuf != NULL)
00978 {
00979 FREE_DH (sendBuf);
00980 CHECK_V_ERROR;
00981 }
00982 if (recv_req != NULL)
00983 {
00984 FREE_DH (recv_req);
00985 CHECK_V_ERROR;
00986 }
00987 if (send_req != NULL)
00988 {
00989 FREE_DH (send_req);
00990 CHECK_V_ERROR;
00991 }
00992 if (status != NULL)
00993 {
00994 FREE_DH (status);
00995 CHECK_V_ERROR;
00996 }
00997
00998 END_FUNC_DH}
00999
01000
01001
01002 #undef __FUNC__
01003 #define __FUNC__ "form_subdomaingraph_mpi_private"
01004 void
01005 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
01006 {
01007 START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
01008 int *idxAll = NULL;
01009 int i, j, nz, *adj, *ptrs = s->ptrs;
01010 MPI_Request *recvReqs = NULL, sendReq;
01011 MPI_Status *statuses = NULL, status;
01012
01013
01014 if (myid_dh == 0)
01015 {
01016 idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
01017 CHECK_V_ERROR;
01018 }
01019 MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
01020
01021
01022 if (myid_dh == 0)
01023 {
01024 nz = 0;
01025 for (i = 0; i < np_dh; ++i)
01026 nz += idxAll[i];
01027 }
01028 MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
01029
01030
01031
01032
01033 adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
01034 CHECK_V_ERROR;
01035
01036
01037 if (myid_dh == 0)
01038 {
01039 recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
01040 CHECK_V_ERROR;
01041 statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
01042 CHECK_V_ERROR;
01043
01044
01045 ptrs[0] = 0;
01046 for (j = 0; j < np_dh; ++j)
01047 ptrs[j + 1] = ptrs[j] + idxAll[j];
01048
01049
01050 for (j = 0; j < np_dh; ++j)
01051 {
01052 int ct = idxAll[j];
01053
01054 MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
01055 recvReqs + j);
01056 }
01057 }
01058
01059
01060 MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
01061
01062
01063 if (myid_dh == 0)
01064 {
01065 MPI_Waitall (np_dh, recvReqs, statuses);
01066 }
01067 MPI_Wait (&sendReq, &status);
01068
01069
01070 MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
01071 MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
01072
01073 if (idxAll != NULL)
01074 {
01075 FREE_DH (idxAll);
01076 CHECK_V_ERROR;
01077 }
01078 if (recvReqs != NULL)
01079 {
01080 FREE_DH (recvReqs);
01081 CHECK_V_ERROR;
01082 }
01083 if (statuses != NULL)
01084 {
01085 FREE_DH (statuses);
01086 CHECK_V_ERROR;
01087 }
01088
01089 END_FUNC_DH}
01090
01091
01092
01093
01094 #undef __FUNC__
01095 #define __FUNC__ "form_subdomaingraph_seq_private"
01096 void
01097 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
01098 {
01099 START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
01100 int *cval, len, *adj;
01101 int idx = 0, *ptrs = s->ptrs;
01102
01103
01104
01105
01106
01107
01108 adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01109 CHECK_V_ERROR;
01110
01111 dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
01112 CHECK_V_ERROR;
01113 for (i = 0; i < blocks * blocks; ++i)
01114 dense[i] = 0;
01115
01116
01117 for (i = 0; i < blocks; ++i)
01118 {
01119 int beg_row = s->beg_row[i];
01120 int end_row = beg_row + s->row_count[i];
01121
01122 for (row = beg_row; row < end_row; ++row)
01123 {
01124 EuclidGetRow (A, row, &len, &cval, NULL);
01125 CHECK_V_ERROR;
01126 for (j = 0; j < len; ++j)
01127 {
01128 int col = cval[j];
01129 if (col < beg_row || col >= end_row)
01130 {
01131 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01132 CHECK_V_ERROR;
01133 dense[i * blocks + owner] = 1;
01134 dense[owner * blocks + i] = 1;
01135 }
01136 }
01137 EuclidRestoreRow (A, row, &len, &cval, NULL);
01138 CHECK_V_ERROR;
01139 }
01140 }
01141
01142
01143
01144
01145 ptrs[0] = 0;
01146 for (i = 0; i < blocks; ++i)
01147 {
01148 for (j = 0; j < blocks; ++j)
01149 {
01150 if (dense[i * blocks + j])
01151 {
01152 adj[idx++] = j;
01153 }
01154 }
01155 ptrs[i + 1] = idx;
01156 }
01157
01158 FREE_DH (dense);
01159 CHECK_V_ERROR;
01160 END_FUNC_DH}
01161
01162
01163 #undef __FUNC__
01164 #define __FUNC__ "find_all_neighbors_sym_private"
01165 void
01166 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
01167 {
01168 START_FUNC_DH int *marker, i, j, beg_row, end_row;
01169 int row, len, *cval, ct = 0;
01170 int *nabors = s->allNabors;
01171
01172 marker = (int *) MALLOC_DH (m * sizeof (int));
01173 CHECK_V_ERROR;
01174 for (i = 0; i < m; ++i)
01175 marker[i] = 0;
01176
01177 SET_INFO
01178 ("finding nabors in subdomain graph for structurally symmetric matrix");
01179 SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
01180
01181 beg_row = s->beg_row[myid_dh];
01182 end_row = beg_row + s->row_count[myid_dh];
01183
01184 for (row = beg_row; row < end_row; ++row)
01185 {
01186 EuclidGetRow (A, row, &len, &cval, NULL);
01187 CHECK_V_ERROR;
01188 for (j = 0; j < len; ++j)
01189 {
01190 int col = cval[j];
01191 if (col < beg_row || col >= end_row)
01192 {
01193 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01194 CHECK_V_ERROR;
01195 if (!marker[owner])
01196 {
01197 marker[owner] = 1;
01198 nabors[ct++] = owner;
01199 }
01200 }
01201 }
01202 EuclidRestoreRow (A, row, &len, &cval, NULL);
01203 CHECK_V_ERROR;
01204 }
01205 s->allCount = ct;
01206
01207
01208
01209 if (marker != NULL)
01210 {
01211 FREE_DH (marker);
01212 CHECK_V_ERROR;
01213 }
01214 END_FUNC_DH}
01215
01216 #undef __FUNC__
01217 #define __FUNC__ "find_all_neighbors_unsym_private"
01218 void
01219 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
01220 {
01221 START_FUNC_DH int i, j, row, beg_row, end_row;
01222 int *marker;
01223 int *cval, len, idx = 0;
01224 int nz, *nabors = s->allNabors, *myNabors;
01225
01226 myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
01227 CHECK_V_ERROR;
01228 marker = (int *) MALLOC_DH (np_dh * sizeof (int));
01229 CHECK_V_ERROR;
01230 for (i = 0; i < np_dh; ++i)
01231 marker[i] = 0;
01232
01233 SET_INFO
01234 ("finding nabors in subdomain graph for structurally unsymmetric matrix");
01235
01236
01237
01238
01239 beg_row = s->beg_row[myid_dh];
01240 end_row = beg_row + s->row_count[myid_dh];
01241
01242
01243
01244
01245 for (row = beg_row; row < end_row; ++row)
01246 {
01247 EuclidGetRow (A, row, &len, &cval, NULL);
01248 CHECK_V_ERROR;
01249 for (j = 0; j < len; ++j)
01250 {
01251 int col = cval[j];
01252
01253 if (col < beg_row || col >= end_row)
01254 {
01255 int owner = SubdomainGraph_dhFindOwner (s, col, false);
01256 CHECK_V_ERROR;
01257
01258 if (!marker[owner])
01259 {
01260 marker[owner] = 1;
01261
01262
01263 myNabors[idx++] = owner;
01264 }
01265 }
01266 }
01267 EuclidRestoreRow (A, row, &len, &cval, NULL);
01268 CHECK_V_ERROR;
01269 }
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294 MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
01295 CHECK_V_ERROR;
01296
01297
01298 for (i = 0; i < idx; ++i)
01299 nabors[myNabors[i]] = 1;
01300
01301
01302 nabors[myid_dh] = 0;
01303
01304
01305
01306
01307
01308
01309
01310
01311 nz = 0;
01312 for (i = 0; i < np_dh; ++i)
01313 {
01314 if (nabors[i])
01315 myNabors[nz++] = i;
01316 }
01317 s->allCount = nz;
01318 memcpy (nabors, myNabors, nz * sizeof (int));
01319
01320 if (marker != NULL)
01321 {
01322 FREE_DH (marker);
01323 CHECK_V_ERROR;
01324 }
01325 if (myNabors != NULL)
01326 {
01327 FREE_DH (myNabors);
01328 CHECK_V_ERROR;
01329 }
01330 END_FUNC_DH}
01331
01332
01333
01334 #undef __FUNC__
01335 #define __FUNC__ "find_bdry_nodes_sym_private"
01336 void
01337 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
01338 int *interiorNodes, int *bdryNodes,
01339 int *interiorCount, int *bdryCount)
01340 {
01341 START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01342 int end_row = beg_row + s->row_count[myid_dh];
01343 int row, inCt = 0, bdCt = 0;
01344
01345 int j;
01346 int *cval;
01347
01348
01349 for (row = beg_row; row < end_row; ++row)
01350 {
01351 bool isBdry = false;
01352 int len;
01353 EuclidGetRow (A, row, &len, &cval, NULL);
01354 CHECK_V_ERROR;
01355
01356 for (j = 0; j < len; ++j)
01357 {
01358 int col = cval[j];
01359 if (col < beg_row || col >= end_row)
01360 {
01361 isBdry = true;
01362 break;
01363 }
01364 }
01365 EuclidRestoreRow (A, row, &len, &cval, NULL);
01366 CHECK_V_ERROR;
01367
01368 if (isBdry)
01369 {
01370 bdryNodes[bdCt++] = row - beg_row;
01371 }
01372 else
01373 {
01374 interiorNodes[inCt++] = row - beg_row;
01375 }
01376 }
01377
01378 *interiorCount = inCt;
01379 *bdryCount = bdCt;
01380
01381 END_FUNC_DH}
01382
01383 #define BDRY_NODE_TAG 42
01384
01385 #undef __FUNC__
01386 #define __FUNC__ "find_bdry_nodes_unsym_private"
01387 void
01388 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
01389 int *interiorNodes, int *boundaryNodes,
01390 int *interiorCount, int *bdryCount)
01391 {
01392 START_FUNC_DH int beg_row = s->beg_row[myid_dh];
01393 int end_row = beg_row + s->row_count[myid_dh];
01394 int i, j, row, max;
01395 int *cval;
01396 int *list, count;
01397 int *rpIN = NULL, *rpOUT = NULL;
01398 int *sendBuf, *recvBuf;
01399 int *marker, inCt, bdCt;
01400 int *bdryNodes, nz;
01401 int sendCt, recvCt;
01402 MPI_Request *sendReq, *recvReq;
01403 MPI_Status *status;
01404 SortedSet_dh ss;
01405
01406 SortedSet_dhCreate (&ss, m);
01407 CHECK_V_ERROR;
01408
01409
01410
01411
01412
01413 for (row = beg_row; row < end_row; ++row)
01414 {
01415 bool isBdry = false;
01416 int len;
01417 EuclidGetRow (A, row, &len, &cval, NULL);
01418 CHECK_V_ERROR;
01419
01420 for (j = 0; j < len; ++j)
01421 {
01422 int col = cval[j];
01423 if (col < beg_row || col >= end_row)
01424 {
01425 isBdry = true;
01426 SortedSet_dhInsert (ss, col);
01427 CHECK_V_ERROR;
01428
01429 }
01430 }
01431 EuclidRestoreRow (A, row, &len, &cval, NULL);
01432 CHECK_V_ERROR;
01433
01434 if (isBdry)
01435 {
01436 SortedSet_dhInsert (ss, row);
01437 CHECK_V_ERROR;
01438 }
01439 }
01440
01441
01442
01443
01444
01445 sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01446 CHECK_V_ERROR;
01447 recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
01448 CHECK_V_ERROR;
01449 rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01450 CHECK_V_ERROR;
01451 rpOUT[0] = 0;
01452 for (i = 0; i < np_dh; ++i)
01453 sendBuf[i] = 0;
01454
01455 sendCt = 0;
01456 SortedSet_dhGetList (ss, &list, &count);
01457 CHECK_V_ERROR;
01458
01459 for (i = 0; i < count; )
01460 {
01461 int node = list[i];
01462 int owner;
01463 int last;
01464
01465 owner = SubdomainGraph_dhFindOwner (s, node, false);
01466 CHECK_V_ERROR;
01467 last = s->beg_row[owner] + s->row_count[owner];
01468
01469
01470 while ((i < count) && (list[i] < last))
01471 ++i;
01472 ++sendCt;
01473 rpOUT[sendCt] = i;
01474 sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
01475
01476 }
01477
01478
01479
01480
01481
01482 MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
01483 CHECK_V_ERROR;
01484
01485
01486
01487
01488
01489
01490
01491 rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
01492 CHECK_V_ERROR;
01493 rpIN[0] = 0;
01494 nz = 0;
01495 recvCt = 0;
01496 for (i = 0; i < np_dh; ++i)
01497 {
01498 if (recvBuf[i])
01499 {
01500 ++recvCt;
01501 nz += recvBuf[i];
01502 rpIN[recvCt] = nz;
01503 }
01504 }
01505 bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
01506 CHECK_V_ERROR;
01507 sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
01508 CHECK_V_ERROR;
01509 recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
01510 CHECK_V_ERROR;
01511 max = MAX (sendCt, recvCt);
01512 status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
01513 CHECK_V_ERROR;
01514
01515
01516 j = 0;
01517 for (i = 0; i < np_dh; ++i)
01518 {
01519 if (recvBuf[i])
01520 {
01521 MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
01522 i, BDRY_NODE_TAG, comm_dh, recvReq + j);
01523 ++j;
01524 }
01525 }
01526
01527
01528 j = 0;
01529 for (i = 0; i < np_dh; ++i)
01530 {
01531 if (sendBuf[i])
01532 {
01533 MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
01534 i, BDRY_NODE_TAG, comm_dh, sendReq + j);
01535 ++j;
01536 }
01537 }
01538
01539
01540 MPI_Waitall (sendCt, sendReq, status);
01541 MPI_Waitall (recvCt, recvReq, status);
01542
01543
01544 for (i = 0; i < nz; ++i)
01545 bdryNodes[i] -= beg_row;
01546
01547
01548
01549
01550
01551 marker = (int *) MALLOC_DH (m * sizeof (int));
01552 CHECK_V_ERROR;
01553 for (i = 0; i < m; ++i)
01554 marker[i] = 0;
01555 for (i = 0; i < nz; ++i)
01556 marker[bdryNodes[i]] = 1;
01557
01558 inCt = bdCt = 0;
01559 for (i = 0; i < m; ++i)
01560 {
01561 if (marker[i])
01562 {
01563 boundaryNodes[bdCt++] = i;
01564 }
01565 else
01566 {
01567 interiorNodes[inCt++] = i;
01568 }
01569 }
01570 *interiorCount = inCt;
01571 *bdryCount = bdCt;
01572
01573
01574
01575
01576 SortedSet_dhDestroy (ss);
01577 CHECK_V_ERROR;
01578 if (rpIN != NULL)
01579 {
01580 FREE_DH (rpIN);
01581 CHECK_V_ERROR;
01582 }
01583 if (rpOUT != NULL)
01584 {
01585 FREE_DH (rpOUT);
01586 CHECK_V_ERROR;
01587 }
01588 if (sendBuf != NULL)
01589 {
01590 FREE_DH (sendBuf);
01591 CHECK_V_ERROR;
01592 }
01593 if (recvBuf != NULL)
01594 {
01595 FREE_DH (recvBuf);
01596 CHECK_V_ERROR;
01597 }
01598 if (bdryNodes != NULL)
01599 {
01600 FREE_DH (bdryNodes);
01601 CHECK_V_ERROR;
01602 }
01603 if (marker != NULL)
01604 {
01605 FREE_DH (marker);
01606 CHECK_V_ERROR;
01607 }
01608 if (sendReq != NULL)
01609 {
01610 FREE_DH (sendReq);
01611 CHECK_V_ERROR;
01612 }
01613 if (recvReq != NULL)
01614 {
01615 FREE_DH (recvReq);
01616 CHECK_V_ERROR;
01617 }
01618 if (status != NULL)
01619 {
01620 FREE_DH (status);
01621 CHECK_V_ERROR;
01622 }
01623 END_FUNC_DH}
01624
01625
01626 #undef __FUNC__
01627 #define __FUNC__ "find_ordered_neighbors_private"
01628 void
01629 find_ordered_neighbors_private (SubdomainGraph_dh s)
01630 {
01631 START_FUNC_DH int *loNabors = s->loNabors;
01632 int *hiNabors = s->hiNabors;
01633 int *allNabors = s->allNabors, allCount = s->allCount;
01634 int loCt = 0, hiCt = 0;
01635 int *o2n = s->o2n_sub;
01636 int i, myNewId = o2n[myid_dh];
01637
01638 for (i = 0; i < allCount; ++i)
01639 {
01640 int nabor = allNabors[i];
01641 if (o2n[nabor] < myNewId)
01642 {
01643 loNabors[loCt++] = nabor;
01644 }
01645 else
01646 {
01647 hiNabors[hiCt++] = nabor;
01648 }
01649 }
01650
01651 s->loCount = loCt;
01652 s->hiCount = hiCt;
01653 END_FUNC_DH}
01654
01655
01656 #undef __FUNC__
01657 #define __FUNC__ "color_subdomain_graph_private"
01658 void
01659 color_subdomain_graph_private (SubdomainGraph_dh s)
01660 {
01661 START_FUNC_DH int i, n = np_dh;
01662 int *rp = s->ptrs, *cval = s->adj;
01663 int j, *marker, thisNodesColor, *colorCounter;
01664 int *o2n = s->o2n_sub;
01665 int *color = s->colorVec;
01666
01667 if (np_dh == 1)
01668 n = s->blocks;
01669
01670 marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01671 colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
01672 for (i = 0; i <= n; ++i)
01673 {
01674 marker[i] = -1;
01675 colorCounter[i] = 0;
01676 }
01677
01678
01679
01680
01681 for (i = 0; i < n; ++i)
01682 {
01683
01684
01685
01686
01687 for (j = rp[i]; j < rp[i + 1]; ++j)
01688 {
01689 int nabor = cval[j];
01690 if (nabor < i)
01691 {
01692 int naborsColor = color[nabor];
01693 marker[naborsColor] = i;
01694 }
01695 }
01696
01697
01698 thisNodesColor = -1;
01699 for (j = 0; j < n; ++j)
01700 {
01701 if (marker[j] != i)
01702 {
01703 thisNodesColor = j;
01704 break;
01705 }
01706 }
01707 color[i] = thisNodesColor;
01708 colorCounter[1 + thisNodesColor] += 1;
01709 }
01710
01711
01712
01713
01714
01715
01716 for (i = 1; i < n; ++i)
01717 {
01718 if (colorCounter[i] == 0)
01719 break;
01720 colorCounter[i] += colorCounter[i - 1];
01721 }
01722
01723 for (i = 0; i < n; ++i)
01724 {
01725 o2n[i] = colorCounter[color[i]];
01726 colorCounter[color[i]] += 1;
01727 }
01728
01729
01730 invert_perm (n, s->o2n_sub, s->n2o_sub);
01731 CHECK_V_ERROR;
01732
01733
01734
01735
01736
01737 {
01738 int ct = 0;
01739 for (j = 0; j < n; ++j)
01740 {
01741 if (marker[j] == -1)
01742 break;
01743 ++ct;
01744 }
01745 s->colors = ct;
01746 }
01747
01748
01749
01750
01751
01752 {
01753 int sum = 0;
01754 for (i = 0; i < n; ++i)
01755 {
01756 int old = s->n2o_sub[i];
01757 s->beg_rowP[old] = sum;
01758 sum += s->row_count[old];
01759 }
01760 }
01761
01762 FREE_DH (marker);
01763 CHECK_V_ERROR;
01764 FREE_DH (colorCounter);
01765 CHECK_V_ERROR;
01766 END_FUNC_DH}
01767
01768
01769 #undef __FUNC__
01770 #define __FUNC__ "SubdomainGraph_dhDump"
01771 void
01772 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
01773 {
01774 START_FUNC_DH int i;
01775 int sCt = np_dh;
01776 FILE *fp;
01777
01778 if (np_dh == 1)
01779 sCt = s->blocks;
01780
01781
01782
01783
01784
01785
01786 fp = openFile_dh (filename, "w");
01787 CHECK_V_ERROR;
01788
01789
01790 fprintf (fp, "----- colors used\n");
01791 fprintf (fp, "%i\n", s->colors);
01792 if (s->colorVec == NULL)
01793 {
01794 fprintf (fp, "s->colorVec == NULL\n");
01795 }
01796 else
01797 {
01798 fprintf (fp, "----- colorVec\n");
01799 for (i = 0; i < sCt; ++i)
01800 {
01801 fprintf (fp, "%i ", s->colorVec[i]);
01802 }
01803 fprintf (fp, "\n");
01804 }
01805
01806 if (s->o2n_sub == NULL || s->o2n_sub == NULL)
01807 {
01808 fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
01809 }
01810 else
01811 {
01812 fprintf (fp, "----- o2n_sub\n");
01813 for (i = 0; i < sCt; ++i)
01814 {
01815 fprintf (fp, "%i ", s->o2n_sub[i]);
01816 }
01817 fprintf (fp, "\n");
01818 fprintf (fp, "----- n2o_sub\n");
01819 for (i = 0; i < sCt; ++i)
01820 {
01821 fprintf (fp, "%i ", s->n2o_sub[i]);
01822 }
01823 fprintf (fp, "\n");
01824 }
01825
01826
01827 if (s->beg_row == NULL || s->beg_rowP == NULL)
01828 {
01829 fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
01830 }
01831 else
01832 {
01833 fprintf (fp, "----- beg_row\n");
01834 for (i = 0; i < sCt; ++i)
01835 {
01836 fprintf (fp, "%i ", 1 + s->beg_row[i]);
01837 }
01838 fprintf (fp, "\n");
01839 fprintf (fp, "----- beg_rowP\n");
01840 for (i = 0; i < sCt; ++i)
01841 {
01842 fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
01843 }
01844 fprintf (fp, "\n");
01845 }
01846
01847
01848 if (s->row_count == NULL || s->bdry_count == NULL)
01849 {
01850 fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
01851 }
01852 else
01853 {
01854 fprintf (fp, "----- row_count\n");
01855 for (i = 0; i < sCt; ++i)
01856 {
01857 fprintf (fp, "%i ", s->row_count[i]);
01858 }
01859 fprintf (fp, "\n");
01860 fprintf (fp, "----- bdry_count\n");
01861 for (i = 0; i < sCt; ++i)
01862 {
01863 fprintf (fp, "%i ", s->bdry_count[i]);
01864 }
01865 fprintf (fp, "\n");
01866
01867 }
01868
01869
01870 if (s->ptrs == NULL || s->adj == NULL)
01871 {
01872 fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
01873 }
01874 else
01875 {
01876 int j;
01877 int ct;
01878 fprintf (fp, "----- subdomain graph\n");
01879 for (i = 0; i < sCt; ++i)
01880 {
01881 fprintf (fp, "%i :: ", i);
01882 ct = s->ptrs[i + 1] - s->ptrs[i];
01883 if (ct)
01884 {
01885 shellSort_int (ct, s->adj + s->ptrs[i]);
01886 CHECK_V_ERROR;
01887 }
01888 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
01889 {
01890 fprintf (fp, "%i ", s->adj[j]);
01891 }
01892 fprintf (fp, "\n");
01893 }
01894 }
01895 closeFile_dh (fp);
01896 CHECK_V_ERROR;
01897
01898
01899
01900
01901
01902 if (s->beg_rowP == NULL)
01903 {
01904 SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
01905 }
01906 if (s->row_count == NULL)
01907 {
01908 SET_V_ERROR ("s->row_count == NULL; can't continue");
01909 }
01910 if (s->o2n_sub == NULL)
01911 {
01912 SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
01913 }
01914
01915
01916 if (np_dh == 1)
01917 {
01918 fp = openFile_dh (filename, "a");
01919 CHECK_V_ERROR;
01920
01921
01922 if (s->n2o_row == NULL || s->o2n_col == NULL)
01923 {
01924 fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
01925 }
01926 else
01927 {
01928 fprintf (fp, "----- n2o_row\n");
01929 for (i = 0; i < s->m; ++i)
01930 {
01931 fprintf (fp, "%i ", 1 + s->n2o_row[i]);
01932 }
01933 fprintf (fp, "\n");
01934
01935 #if 0
01936
01937
01938
01939
01940
01941
01942 fprintf (fp, "----- o2n_col\n");
01943 for (i = 0; i < sCt; ++i)
01944 {
01945 int br = s->beg_row[i];
01946 int er = br + s->row_count[i];
01947
01948 for (j = br; j < er; ++j)
01949 {
01950 fprintf (fp, "%i ", 1 + s->o2n_col[j]);
01951 }
01952 fprintf (fp, "\n");
01953 }
01954 fprintf (fp, "\n");
01955
01956 #endif
01957
01958 }
01959 closeFile_dh (fp);
01960 CHECK_V_ERROR;
01961 }
01962
01963
01964 else
01965 {
01966 int id = s->n2o_sub[myid_dh];
01967 int m = s->m;
01968 int pe;
01969 int beg_row = 0;
01970 if (s->beg_row != 0)
01971 beg_row = s->beg_row[myid_dh];
01972
01973
01974 for (pe = 0; pe < np_dh; ++pe)
01975 {
01976 MPI_Barrier (comm_dh);
01977 if (id == pe)
01978 {
01979 fp = openFile_dh (filename, "a");
01980 CHECK_V_ERROR;
01981 if (id == 0)
01982 fprintf (fp, "----- n2o_row\n");
01983
01984 for (i = 0; i < m; ++i)
01985 {
01986 fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
01987 }
01988 if (id == np_dh - 1)
01989 fprintf (fp, "\n");
01990 closeFile_dh (fp);
01991 CHECK_V_ERROR;
01992 }
01993 }
01994
01995 #if 0
01996
01997
01998 for (pe = 0; pe < np_dh; ++pe)
01999 {
02000 MPI_Barrier (comm_dh);
02001 if (myid_dh == pe)
02002 {
02003 fp = openFile_dh (filename, "a");
02004 CHECK_V_ERROR;
02005 if (myid_dh == 0)
02006 fprintf (fp, "----- o2n_col\n");
02007
02008 for (i = 0; i < m; ++i)
02009 {
02010 fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
02011 }
02012 fprintf (fp, "\n");
02013
02014 if (myid_dh == np_dh - 1)
02015 fprintf (fp, "\n");
02016
02017 closeFile_dh (fp);
02018 CHECK_V_ERROR;
02019 }
02020 }
02021
02022 #endif
02023
02024 }
02025
02026 END_FUNC_DH}
02027
02028
02029
02030 #undef __FUNC__
02031 #define __FUNC__ "find_bdry_nodes_seq_private"
02032 void
02033 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
02034 {
02035 START_FUNC_DH int i, j, row, blocks = s->blocks;
02036 int *cval, *tmp;
02037
02038 tmp = (int *) MALLOC_DH (m * sizeof (int));
02039 CHECK_V_ERROR;
02040 for (i = 0; i < m; ++i)
02041 tmp[i] = 0;
02042
02043
02044
02045
02046 for (i = 0; i < blocks; ++i)
02047 {
02048 int beg_row = s->beg_row[i];
02049 int end_row = beg_row + s->row_count[i];
02050
02051 for (row = beg_row; row < end_row; ++row)
02052 {
02053 bool isBdry = false;
02054 int len;
02055 EuclidGetRow (A, row, &len, &cval, NULL);
02056 CHECK_V_ERROR;
02057
02058 for (j = 0; j < len; ++j)
02059 {
02060 int col = cval[j];
02061
02062 if (col < beg_row || col >= end_row)
02063 {
02064 tmp[col] = 1;
02065 isBdry = true;
02066 }
02067 }
02068 if (isBdry)
02069 tmp[row] = 1;
02070 EuclidRestoreRow (A, row, &len, &cval, NULL);
02071 CHECK_V_ERROR;
02072 }
02073 }
02074
02075
02076
02077
02078 for (i = 0; i < blocks; ++i)
02079 {
02080 int beg_row = s->beg_row[i];
02081 int end_row = beg_row + s->row_count[i];
02082 int ct = 0;
02083 for (row = beg_row; row < end_row; ++row)
02084 {
02085 if (tmp[row])
02086 ++ct;
02087 }
02088 s->bdry_count[i] = ct;
02089 }
02090
02091
02092
02093
02094 for (i = 0; i < blocks; ++i)
02095 {
02096 int beg_row = s->beg_row[i];
02097 int end_row = beg_row + s->row_count[i];
02098 int interiorIDX = beg_row;
02099 int bdryIDX = end_row - s->bdry_count[i];
02100
02101 for (row = beg_row; row < end_row; ++row)
02102 {
02103 if (tmp[row])
02104 {
02105 s->o2n_col[row] = bdryIDX++;
02106 }
02107 else
02108 {
02109 s->o2n_col[row] = interiorIDX++;
02110 }
02111 }
02112 }
02113
02114
02115 invert_perm (m, s->o2n_col, s->n2o_row);
02116 CHECK_V_ERROR;
02117 FREE_DH (tmp);
02118 CHECK_V_ERROR;
02119
02120 END_FUNC_DH}
02121
02122 #undef __FUNC__
02123 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
02124 void
02125 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
02126 {
02127 START_FUNC_DH if (myid_dh == 0)
02128 {
02129 int i, j;
02130
02131 fprintf (fp,
02132 "\n-----------------------------------------------------\n");
02133 fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
02134 fprintf (fp, "-----------------------------------------------------\n");
02135 fprintf (fp, "colors used: %i\n", s->colors);
02136
02137 fprintf (fp, "o2n ordering vector: ");
02138 for (i = 0; i < s->blocks; ++i)
02139 fprintf (fp, "%i ", s->o2n_sub[i]);
02140
02141 fprintf (fp, "\ncoloring vector (node, color): \n");
02142 for (i = 0; i < s->blocks; ++i)
02143 fprintf (fp, " %i, %i\n", i, s->colorVec[i]);
02144
02145 fprintf (fp, "\n");
02146 fprintf (fp, "Adjacency lists:\n");
02147
02148 for (i = 0; i < s->blocks; ++i)
02149 {
02150 fprintf (fp, " P_%i :: ", i);
02151 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
02152 {
02153 fprintf (fp, "%i ", s->adj[j]);
02154 }
02155 fprintf (fp, "\n");
02156 }
02157 fprintf (fp, "-----------------------------------------------------\n");
02158 }
02159 END_FUNC_DH}
02160
02161
02162 #undef __FUNC__
02163 #define __FUNC__ "adjust_matrix_perms_private"
02164 void
02165 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
02166 {
02167 START_FUNC_DH int i, j, blocks = s->blocks;
02168 int *o2n = s->o2n_col;
02169
02170 for (i = 0; i < blocks; ++i)
02171 {
02172 int beg_row = s->beg_row[i];
02173 int end_row = beg_row + s->row_count[i];
02174 int adjust = s->beg_rowP[i] - s->beg_row[i];
02175 for (j = beg_row; j < end_row; ++j)
02176 o2n[j] += adjust;
02177 }
02178
02179 invert_perm (m, s->o2n_col, s->n2o_row);
02180 CHECK_V_ERROR;
02181 END_FUNC_DH}
02182
02183 #undef __FUNC__
02184 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
02185 void
02186 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
02187 {
02188 START_FUNC_DH int i;
02189 int blocks = np_dh;
02190 double ratio[25];
02191
02192 if (myid_dh == 0)
02193 {
02194 if (np_dh == 1)
02195 blocks = s->blocks;
02196 if (blocks > 25)
02197 blocks = 25;
02198
02199 fprintf (fp, "\n");
02200 fprintf (fp, "Subdomain interior/boundary node ratios\n");
02201 fprintf (fp, "---------------------------------------\n");
02202
02203
02204 for (i = 0; i < blocks; ++i)
02205 {
02206 if (s->bdry_count[i] == 0)
02207 {
02208 ratio[i] = -1;
02209 }
02210 else
02211 {
02212 ratio[i] =
02213 (double) (s->row_count[i] -
02214 s->bdry_count[i]) / (double) s->bdry_count[i];
02215 }
02216 }
02217
02218
02219 shellSort_float (blocks, ratio);
02220
02221
02222 if (blocks <= 20)
02223 {
02224 int j = 0;
02225 for (i = 0; i < blocks; ++i)
02226 {
02227 fprintf (fp, "%0.2g ", ratio[i]);
02228 ++j;
02229 if (j == 10)
02230 {
02231 fprintf (fp, "\n");
02232 }
02233 }
02234 fprintf (fp, "\n");
02235 }
02236 else
02237 {
02238 fprintf (fp, "10 smallest ratios: ");
02239 for (i = 0; i < 10; ++i)
02240 {
02241 fprintf (fp, "%0.2g ", ratio[i]);
02242 }
02243 fprintf (fp, "\n");
02244 fprintf (fp, "10 largest ratios: ");
02245 {
02246 int start = blocks - 6, stop = blocks - 1;
02247 for (i = start; i < stop; ++i)
02248 {
02249 fprintf (fp, "%0.2g ", ratio[i]);
02250 }
02251 fprintf (fp, "\n");
02252 }
02253 }
02254 }
02255
02256 END_FUNC_DH}
02257
02258
02259 #undef __FUNC__
02260 #define __FUNC__ "SubdomainGraph_dhPrintStats"
02261 void
02262 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
02263 {
02264 START_FUNC_DH double *timing = sg->timing;
02265
02266 fprintf_dh (fp, "\nSubdomainGraph timing report\n");
02267 fprintf_dh (fp, "-----------------------------\n");
02268 fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
02269 fprintf_dh (fp, " find neighbors in subdomain graph: %0.2f\n",
02270 timing[FIND_NABORS_SGT]);
02271 fprintf_dh (fp, " locally order interiors and bdry: %0.2f\n",
02272 timing[ORDER_BDRY_SGT]);
02273 fprintf_dh (fp, " form and color subdomain graph: %0.2f\n",
02274 timing[FORM_GRAPH_SGT]);
02275 fprintf_dh (fp, " exchange bdry permutations: %0.2f\n",
02276 timing[EXCHANGE_PERMS_SGT]);
02277 fprintf_dh (fp, " everything else (should be small): %0.2f\n",
02278 timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
02279 timing[ORDER_BDRY_SGT] +
02280 timing[FORM_GRAPH_SGT] +
02281 timing[EXCHANGE_PERMS_SGT]));
02282 END_FUNC_DH}