00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "Factor_dh.h"
00044 #include "Vec_dh.h"
00045 #include "Mat_dh.h"
00046 #include "SubdomainGraph_dh.h"
00047 #include "TimeLog_dh.h"
00048 #include "Mem_dh.h"
00049 #include "Numbering_dh.h"
00050 #include "Hash_i_dh.h"
00051 #include "Parser_dh.h"
00052 #include "mat_dh_private.h"
00053 #include "getRow_dh.h"
00054 #include "Euclid_dh.h"
00055 #include "io_dh.h"
00056
00057
00058 void
00059 Factor_dh_junk ()
00060 {
00061 }
00062
00063 static void adjust_bj_private (Factor_dh mat);
00064 static void unadjust_bj_private (Factor_dh mat);
00065
00066
00067 #undef __FUNC__
00068 #define __FUNC__ "Factor_dhCreate"
00069 void
00070 Factor_dhCreate (Factor_dh * mat)
00071 {
00072 START_FUNC_DH struct _factor_dh *tmp;
00073
00074 if (np_dh > MAX_MPI_TASKS)
00075 {
00076 SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!");
00077 }
00078
00079 tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh));
00080 CHECK_V_ERROR;
00081 *mat = tmp;
00082
00083 tmp->m = 0;
00084 tmp->n = 0;
00085 tmp->id = myid_dh;
00086 tmp->beg_row = 0;
00087 tmp->first_bdry = 0;
00088 tmp->bdry_count = 0;
00089 tmp->blockJacobi = false;
00090
00091 tmp->rp = NULL;
00092 tmp->cval = NULL;
00093 tmp->aval = NULL;
00094 tmp->fill = NULL;
00095 tmp->diag = NULL;
00096 tmp->alloc = 0;
00097
00098 tmp->work_y_lo = tmp->work_x_hi = NULL;
00099 tmp->sendbufLo = tmp->sendbufHi = NULL;
00100 tmp->sendindLo = tmp->sendindHi = NULL;
00101 tmp->num_recvLo = tmp->num_recvHi = 0;
00102 tmp->num_sendLo = tmp->num_sendHi = 0;
00103 tmp->sendlenLo = tmp->sendlenHi = 0;
00104
00105 tmp->solveIsSetup = false;
00106 tmp->numbSolve = NULL;
00107
00108 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor");
00109
00110
00111 END_FUNC_DH}
00112
00113 #undef __FUNC__
00114 #define __FUNC__ "Factor_dhDestroy"
00115 void
00116 Factor_dhDestroy (Factor_dh mat)
00117 {
00118 START_FUNC_DH if (mat->rp != NULL)
00119 {
00120 FREE_DH (mat->rp);
00121 CHECK_V_ERROR;
00122 }
00123 if (mat->cval != NULL)
00124 {
00125 FREE_DH (mat->cval);
00126 CHECK_V_ERROR;
00127 }
00128 if (mat->aval != NULL)
00129 {
00130 FREE_DH (mat->aval);
00131 CHECK_V_ERROR;
00132 }
00133 if (mat->diag != NULL)
00134 {
00135 FREE_DH (mat->diag);
00136 CHECK_V_ERROR;
00137 }
00138 if (mat->fill != NULL)
00139 {
00140 FREE_DH (mat->fill);
00141 CHECK_V_ERROR;
00142 }
00143
00144 if (mat->work_y_lo != NULL)
00145 {
00146 FREE_DH (mat->work_y_lo);
00147 CHECK_V_ERROR;
00148 }
00149 if (mat->work_x_hi != NULL)
00150 {
00151 FREE_DH (mat->work_x_hi);
00152 CHECK_V_ERROR;
00153 }
00154 if (mat->sendbufLo != NULL)
00155 {
00156 FREE_DH (mat->sendbufLo);
00157 CHECK_V_ERROR;
00158 }
00159 if (mat->sendbufHi != NULL)
00160 {
00161 FREE_DH (mat->sendbufHi);
00162 CHECK_V_ERROR;
00163 }
00164 if (mat->sendindLo != NULL)
00165 {
00166 FREE_DH (mat->sendindLo);
00167 CHECK_V_ERROR;
00168 }
00169 if (mat->sendindHi != NULL)
00170 {
00171 FREE_DH (mat->sendindHi);
00172 CHECK_V_ERROR;
00173 }
00174
00175 if (mat->numbSolve != NULL)
00176 {
00177 Numbering_dhDestroy (mat->numbSolve);
00178 CHECK_V_ERROR;
00179 }
00180 FREE_DH (mat);
00181 CHECK_V_ERROR;
00182 END_FUNC_DH}
00183
00184
00185 #undef __FUNC__
00186 #define __FUNC__ "create_fake_mat_private"
00187 static void
00188 create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN)
00189 {
00190 START_FUNC_DH Mat_dh matFake;
00191 Mat_dhCreate (matFakeIN);
00192 CHECK_V_ERROR;
00193 matFake = *matFakeIN;
00194 matFake->m = mat->m;
00195 matFake->n = mat->n;
00196 matFake->rp = mat->rp;
00197 matFake->cval = mat->cval;
00198 matFake->aval = mat->aval;
00199 matFake->beg_row = mat->beg_row;
00200 END_FUNC_DH}
00201
00202 #undef __FUNC__
00203 #define __FUNC__ "destroy_fake_mat_private"
00204 static void
00205 destroy_fake_mat_private (Mat_dh matFake)
00206 {
00207 START_FUNC_DH matFake->rp = NULL;
00208 matFake->cval = NULL;
00209 matFake->aval = NULL;
00210 Mat_dhDestroy (matFake);
00211 CHECK_V_ERROR;
00212 END_FUNC_DH}
00213
00214
00215
00216 #undef __FUNC__
00217 #define __FUNC__ "Factor_dhReadNz"
00218 int
00219 Factor_dhReadNz (Factor_dh mat)
00220 {
00221 START_FUNC_DH int ierr, retval = mat->rp[mat->m];
00222 int nz = retval;
00223 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
00224 CHECK_MPI_ERROR (ierr);
00225 END_FUNC_VAL (retval)}
00226
00227
00228
00229 #undef __FUNC__
00230 #define __FUNC__ "Factor_dhPrintRows"
00231 void
00232 Factor_dhPrintRows (Factor_dh mat, FILE * fp)
00233 {
00234 START_FUNC_DH int beg_row = mat->beg_row;
00235 int m = mat->m, i, j;
00236 bool noValues;
00237
00238 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00239 if (mat->aval == NULL)
00240 noValues = true;
00241
00242 if (mat->blockJacobi)
00243 {
00244 adjust_bj_private (mat);
00245 CHECK_V_ERROR;
00246 }
00247
00248 fprintf (fp,
00249 "\n----------------------- Factor_dhPrintRows ------------------\n");
00250 if (mat->blockJacobi)
00251 {
00252 fprintf (fp,
00253 "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n");
00254 }
00255
00256 for (i = 0; i < m; ++i)
00257 {
00258 fprintf (fp, "%i :: ", 1 + i + beg_row);
00259 for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j)
00260 {
00261 if (noValues)
00262 {
00263 fprintf (fp, "%i ", 1 + mat->cval[j]);
00264 }
00265 else
00266 {
00267 fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]);
00268 }
00269 }
00270 fprintf (fp, "\n");
00271 }
00272
00273 if (mat->blockJacobi)
00274 {
00275 unadjust_bj_private (mat);
00276 CHECK_V_ERROR;
00277 }
00278 END_FUNC_DH}
00279
00280
00281 #undef __FUNC__
00282 #define __FUNC__ "Factor_dhPrintDiags"
00283 void
00284 Factor_dhPrintDiags (Factor_dh mat, FILE * fp)
00285 {
00286 START_FUNC_DH int beg_row = mat->beg_row;
00287 int m = mat->m, i, pe, *diag = mat->diag;
00288 REAL_DH *aval = mat->aval;
00289
00290
00291 fprintf_dh (fp,
00292 "\n----------------------- Factor_dhPrintDiags ------------------\n");
00293 fprintf_dh (fp, "(grep for 'ZERO')\n");
00294
00295 for (pe = 0; pe < np_dh; ++pe)
00296 {
00297 MPI_Barrier (comm_dh);
00298 if (mat->id == pe)
00299 {
00300 fprintf (fp, "----- subdomain: %i processor: %i\n", pe, myid_dh);
00301 for (i = 0; i < m; ++i)
00302 {
00303 REAL_DH val = aval[diag[i]];
00304 if (val)
00305 {
00306 fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]);
00307 }
00308 else
00309 {
00310 fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row,
00311 aval[diag[i]]);
00312 }
00313 }
00314 }
00315 }
00316 END_FUNC_DH}
00317
00318
00319 #undef __FUNC__
00320 #define __FUNC__ "Factor_dhPrintGraph"
00321 void
00322 Factor_dhPrintGraph (Factor_dh mat, char *filename)
00323 {
00324 START_FUNC_DH FILE * fp;
00325 int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval;
00326
00327 if (np_dh > 1)
00328 SET_V_ERROR ("only implemented for single mpi task");
00329
00330 work = (int *) MALLOC_DH (m * sizeof (int));
00331 CHECK_V_ERROR;
00332
00333 fp = openFile_dh (filename, "w");
00334 CHECK_V_ERROR;
00335
00336 for (i = 0; i < m; ++i)
00337 {
00338 for (j = 0; j < m; ++j)
00339 work[j] = 0;
00340 for (j = rp[i]; j < rp[i]; ++j)
00341 work[cval[j]] = 1;
00342
00343 for (j = 0; j < m; ++j)
00344 {
00345 if (work[j])
00346 {
00347 fprintf (fp, " x ");
00348 }
00349 else
00350 {
00351 fprintf (fp, " ");
00352 }
00353 }
00354 fprintf (fp, "\n");
00355 }
00356
00357 closeFile_dh (fp);
00358 CHECK_V_ERROR;
00359
00360 FREE_DH (work);
00361 END_FUNC_DH}
00362
00363
00364 #undef __FUNC__
00365 #define __FUNC__ "Factor_dhPrintTriples"
00366 void
00367 Factor_dhPrintTriples (Factor_dh mat, char *filename)
00368 {
00369 START_FUNC_DH int pe, i, j;
00370 int m = mat->m, *rp = mat->rp;
00371 int beg_row = mat->beg_row;
00372 REAL_DH *aval = mat->aval;
00373 bool noValues;
00374 FILE *fp;
00375
00376 if (mat->blockJacobi)
00377 {
00378 adjust_bj_private (mat);
00379 CHECK_V_ERROR;
00380 }
00381
00382 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00383 if (noValues)
00384 aval = NULL;
00385
00386 for (pe = 0; pe < np_dh; ++pe)
00387 {
00388 MPI_Barrier (comm_dh);
00389 if (mat->id == pe)
00390 {
00391 if (pe == 0)
00392 {
00393 fp = openFile_dh (filename, "w");
00394 CHECK_V_ERROR;
00395 }
00396 else
00397 {
00398 fp = openFile_dh (filename, "a");
00399 CHECK_V_ERROR;
00400 }
00401
00402 for (i = 0; i < m; ++i)
00403 {
00404 for (j = rp[i]; j < rp[i + 1]; ++j)
00405 {
00406 if (noValues)
00407 {
00408 fprintf (fp, "%i %i\n", 1 + i + beg_row,
00409 1 + mat->cval[j]);
00410 }
00411 else
00412 {
00413 fprintf (fp, TRIPLES_FORMAT,
00414 1 + i + beg_row, 1 + mat->cval[j], aval[j]);
00415 }
00416 }
00417 }
00418 closeFile_dh (fp);
00419 CHECK_V_ERROR;
00420 }
00421 }
00422
00423 if (mat->blockJacobi)
00424 {
00425 unadjust_bj_private (mat);
00426 CHECK_V_ERROR;
00427 }
00428 END_FUNC_DH}
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445 #undef __FUNC__
00446 #define __FUNC__ "setup_receives_private"
00447 static int
00448 setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows,
00449 double *recvBuf, MPI_Request * req,
00450 int *reqind, int reqlen, int *outlist, bool debug)
00451 {
00452 START_FUNC_DH int i, j, this_pe, num_recv = 0;
00453 MPI_Request request;
00454
00455 if (debug)
00456 {
00457 fprintf (logFile,
00458 "\nFACT ========================================================\n");
00459 fprintf (logFile, "FACT STARTING: setup_receives_private\n");
00460 }
00461
00462 for (i = 0; i < reqlen; i = j)
00463 {
00464
00465 this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
00466 CHECK_ERROR (-1);
00467
00468
00469 for (j = i + 1; j < reqlen; j++)
00470 {
00471 int idx = reqind[j];
00472 if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe])
00473 {
00474 break;
00475 }
00476 }
00477
00478 if (debug)
00479 {
00480 int k;
00481 fprintf (logFile, "FACT need nodes from P_%i: ", this_pe);
00482 for (k = i; k < j; ++k)
00483 fprintf (logFile, "%i ", 1 + reqind[k]);
00484 fprintf (logFile, "\n");
00485 }
00486
00487
00488 outlist[this_pe] = j - i;
00489
00490
00491
00492
00493
00494
00495
00496 MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request);
00497 MPI_Request_free (&request);
00498
00499
00500 MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555,
00501 comm_dh, req + num_recv);
00502 ++num_recv;
00503 }
00504
00505 END_FUNC_VAL (num_recv);
00506 }
00507
00508
00509
00510
00511
00512
00513 #undef __FUNC__
00514 #define __FUNC__ "setup_sends_private"
00515 static void
00516 setup_sends_private (Factor_dh mat, int *inlist,
00517 int *o2n_subdomain, bool debug)
00518 {
00519 START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row;
00520 MPI_Request *requests = mat->requests, *sendReq;
00521 MPI_Status *statuses = mat->status;
00522 bool isHigher;
00523 int *rcvBuf;
00524 double *sendBuf;
00525 int myidNEW = o2n_subdomain[myid_dh];
00526 int count;
00527
00528 if (debug)
00529 {
00530 fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n");
00531 }
00532
00533
00534 sendlenLo = sendlenHi = 0;
00535 for (i = 0; i < np_dh; i++)
00536 {
00537 if (inlist[i])
00538 {
00539 if (o2n_subdomain[i] < myidNEW)
00540 {
00541 sendlenLo += inlist[i];
00542 }
00543 else
00544 {
00545 sendlenHi += inlist[i];
00546 }
00547 }
00548 }
00549
00550 mat->sendlenLo = sendlenLo;
00551 mat->sendlenHi = sendlenHi;
00552 mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double));
00553 CHECK_V_ERROR;
00554 mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double));
00555 CHECK_V_ERROR;
00556 mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int));
00557 CHECK_V_ERROR;
00558 mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int));
00559 CHECK_V_ERROR;
00560
00561 count = 0;
00562 jLo = jHi = 0;
00563 mat->num_sendLo = 0;
00564 mat->num_sendHi = 0;
00565 for (i = 0; i < np_dh; i++)
00566 {
00567 if (inlist[i])
00568 {
00569 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00570
00571
00572 if (isHigher)
00573 {
00574 rcvBuf = &mat->sendindHi[jHi];
00575 sendBuf = &mat->sendbufHi[jHi];
00576 sendReq = &mat->send_reqHi[mat->num_sendHi];
00577 mat->num_sendHi++;
00578 jHi += inlist[i];
00579 }
00580 else
00581 {
00582 rcvBuf = &mat->sendindLo[jLo];
00583 sendBuf = &mat->sendbufLo[jLo];
00584 sendReq = &mat->send_reqLo[mat->num_sendLo];
00585 mat->num_sendLo++;
00586 jLo += inlist[i];
00587 }
00588
00589
00590
00591
00592 MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh,
00593 requests + count);
00594 ++count;
00595
00596
00597 MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh,
00598 sendReq);
00599 }
00600 }
00601
00602
00603 MPI_Waitall (count, requests, statuses);
00604
00605 if (debug)
00606 {
00607 int j;
00608 jLo = jHi = 0;
00609
00610 fprintf (logFile,
00611 "\nFACT columns that I must send to other subdomains:\n");
00612 for (i = 0; i < np_dh; i++)
00613 {
00614 if (inlist[i])
00615 {
00616 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00617 if (isHigher)
00618 {
00619 rcvBuf = &mat->sendindHi[jHi];
00620 jHi += inlist[i];
00621 }
00622 else
00623 {
00624 rcvBuf = &mat->sendindLo[jLo];
00625 jLo += inlist[i];
00626 }
00627
00628 fprintf (logFile, "FACT send to P_%i: ", i);
00629 for (j = 0; j < inlist[i]; ++j)
00630 fprintf (logFile, "%i ", rcvBuf[j] + 1);
00631 fprintf (logFile, "\n");
00632 }
00633 }
00634 }
00635
00636
00637
00638 for (i = 0; i < mat->sendlenLo; i++)
00639 mat->sendindLo[i] -= first;
00640 for (i = 0; i < mat->sendlenHi; i++)
00641 mat->sendindHi[i] -= first;
00642 END_FUNC_DH}
00643
00644
00645
00646 #undef __FUNC__
00647 #define __FUNC__ "Factor_dhSolveSetup"
00648 void
00649 Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg)
00650 {
00651 START_FUNC_DH int *outlist, *inlist;
00652 int i, row, *rp = mat->rp, *cval = mat->cval;
00653 Numbering_dh numb;
00654 int m = mat->m;
00655
00656 int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows;
00657 Mat_dh matFake;
00658 bool debug = false;
00659 double *recvBuf;
00660
00661 if (mat->debug && logFile != NULL)
00662 debug = true;
00663
00664 end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
00665 CHECK_V_ERROR;
00666 outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00667 CHECK_V_ERROR;
00668 inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00669 CHECK_V_ERROR;
00670 for (i = 0; i < np_dh; ++i)
00671 {
00672 inlist[i] = 0;
00673 outlist[i] = 0;
00674 end_rows[i] = beg_rows[i] + row_count[i];
00675 }
00676
00677
00678 create_fake_mat_private (mat, &matFake);
00679 CHECK_V_ERROR;
00680 Numbering_dhCreate (&(mat->numbSolve));
00681 CHECK_V_ERROR;
00682 numb = mat->numbSolve;
00683 Numbering_dhSetup (numb, matFake);
00684 CHECK_V_ERROR;
00685 destroy_fake_mat_private (matFake);
00686 CHECK_V_ERROR;
00687
00688 if (debug)
00689 {
00690 fprintf (stderr, "Numbering_dhSetup completed\n");
00691 }
00692
00693
00694 i = m + numb->num_ext;
00695 mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double));
00696 CHECK_V_ERROR;
00697 mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double));
00698 CHECK_V_ERROR;
00699 if (debug)
00700 {
00701 fprintf (logFile, "FACT num_extLo= %i num_extHi= %i\n",
00702 numb->num_extLo, numb->num_extHi);
00703 }
00704
00705 mat->num_recvLo = 0;
00706 mat->num_recvHi = 0;
00707 if (numb->num_extLo)
00708 {
00709 recvBuf = mat->work_y_lo + m;
00710 mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows,
00711 recvBuf, mat->recv_reqLo,
00712 numb->idx_extLo,
00713 numb->num_extLo, outlist,
00714 debug);
00715 CHECK_V_ERROR;
00716
00717 }
00718
00719 if (numb->num_extHi)
00720 {
00721 recvBuf = mat->work_x_hi + m + numb->num_extLo;
00722 mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows,
00723 recvBuf, mat->recv_reqHi,
00724 numb->idx_extHi,
00725 numb->num_extHi, outlist,
00726 debug);
00727 CHECK_V_ERROR;
00728 }
00729
00730 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
00731
00732
00733
00734
00735
00736
00737 setup_sends_private (mat, inlist, sg->o2n_sub, debug);
00738 CHECK_V_ERROR;
00739
00740
00741 for (row = 0; row < m; row++)
00742 {
00743 int len = rp[row + 1] - rp[row];
00744 int *ind = cval + rp[row];
00745 Numbering_dhGlobalToLocal (numb, len, ind, ind);
00746 CHECK_V_ERROR;
00747 }
00748
00749 FREE_DH (outlist);
00750 CHECK_V_ERROR;
00751 FREE_DH (inlist);
00752 CHECK_V_ERROR;
00753 FREE_DH (end_rows);
00754 CHECK_V_ERROR;
00755
00756 if (debug)
00757 {
00758 int ii, jj;
00759
00760 fprintf (logFile,
00761 "\n--------- row/col structure, after global to local renumbering\n");
00762 for (ii = 0; ii < mat->m; ++ii)
00763 {
00764 fprintf (logFile, "local row %i :: ", ii + 1);
00765 for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj)
00766 {
00767 fprintf (logFile, "%i ", 1 + mat->cval[jj]);
00768 }
00769 fprintf (logFile, "\n");
00770 }
00771 fprintf (logFile, "\n");
00772 fflush (logFile);
00773 }
00774 END_FUNC_DH}
00775
00776
00777
00778
00779
00780 static void forward_solve_private (int m, int from, int to,
00781 int *rp, int *cval, int *diag,
00782 double *aval, double *rhs, double *work_y,
00783 bool debug);
00784
00785 static void backward_solve_private (int m, int from, int to,
00786 int *rp, int *cval, int *diag,
00787 double *aval, double *work_y,
00788 double *work_x, bool debug);
00789
00790 static int beg_rowG;
00791
00792
00793 #undef __FUNC__
00794 #define __FUNC__ "Factor_dhSolve"
00795 void
00796 Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx)
00797 {
00798 START_FUNC_DH Factor_dh mat = ctx->F;
00799 int from, to;
00800 int ierr, i, m = mat->m, first_bdry = mat->first_bdry;
00801 int offsetLo = mat->numbSolve->num_extLo;
00802 int offsetHi = mat->numbSolve->num_extHi;
00803 int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag;
00804 double *aval = mat->aval;
00805 int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi;
00806 int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi;
00807 double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi;
00808 double *work_y = mat->work_y_lo;
00809 double *work_x = mat->work_x_hi;
00810 bool debug = false;
00811
00812 if (mat->debug && logFile != NULL)
00813 debug = true;
00814 if (debug)
00815 beg_rowG = ctx->F->beg_row;
00816
00817
00818
00819
00820
00821
00822
00823
00824 if (debug)
00825 {
00826 fprintf (logFile,
00827 "\n=====================================================\n");
00828 fprintf (logFile,
00829 "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n",
00830 mat->num_recvLo, mat->num_recvHi);
00831 }
00832
00833
00834 if (mat->num_recvLo)
00835 {
00836 MPI_Startall (mat->num_recvLo, mat->recv_reqLo);
00837 }
00838 if (mat->num_recvHi)
00839 {
00840 MPI_Startall (mat->num_recvHi, mat->recv_reqHi);
00841 }
00842
00843
00844
00845
00846
00847 from = 0;
00848 to = first_bdry;
00849 if (from != to)
00850 {
00851 forward_solve_private (m, from, to, rp, cval, diag, aval,
00852 rhs, work_y, debug);
00853 CHECK_V_ERROR;
00854 }
00855
00856
00857
00858
00859 if (mat->num_recvLo)
00860 {
00861 MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status);
00862
00863
00864 if (debug)
00865 {
00866 fprintf (logFile,
00867 "FACT got 'y' values from lower neighbors; work buffer:\n ");
00868 for (i = 0; i < offsetLo; ++i)
00869 {
00870 fprintf (logFile, "%g ", work_y[m + i]);
00871 }
00872 }
00873 }
00874
00875
00876 from = first_bdry;
00877 to = m;
00878 if (from != to)
00879 {
00880 forward_solve_private (m, from, to, rp, cval, diag, aval,
00881 rhs, work_y, debug);
00882 CHECK_V_ERROR;
00883 }
00884
00885
00886 if (mat->num_sendHi)
00887 {
00888
00889
00890 for (i = 0; i < sendlenHi; i++)
00891 {
00892 sendbufHi[i] = work_y[sendindHi[i]];
00893 }
00894
00895
00896 MPI_Startall (mat->num_sendHi, mat->send_reqHi);
00897
00898
00899 if (debug)
00900 {
00901 fprintf (logFile,
00902 "\nFACT sending 'y' values to higher neighbor:\nFACT ");
00903 for (i = 0; i < sendlenHi; i++)
00904 {
00905 fprintf (logFile, "%g ", sendbufHi[i]);
00906 }
00907 fprintf (logFile, "\n");
00908 }
00909 }
00910
00911
00912
00913
00914
00915 if (mat->num_recvHi)
00916 {
00917 ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status);
00918 CHECK_MPI_V_ERROR (ierr);
00919
00920
00921 if (debug)
00922 {
00923 fprintf (logFile, "FACT got 'x' values from higher neighbors:\n ");
00924 for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i)
00925 {
00926 fprintf (logFile, "%g ", work_x[i]);
00927 }
00928 fprintf (logFile, "\n");
00929 }
00930 }
00931
00932
00933 from = m;
00934 to = first_bdry;
00935 if (from != to)
00936 {
00937 backward_solve_private (m, from, to, rp, cval, diag, aval,
00938 work_y, work_x, debug);
00939 CHECK_V_ERROR;
00940 }
00941
00942
00943 if (mat->num_sendLo)
00944 {
00945
00946
00947 for (i = 0; i < sendlenLo; i++)
00948 {
00949 sendbufLo[i] = work_x[sendindLo[i]];
00950 }
00951
00952
00953 ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo);
00954 CHECK_MPI_V_ERROR (ierr);
00955
00956
00957 if (debug)
00958 {
00959 fprintf (logFile,
00960 "\nFACT sending 'x' values to lower neighbor:\nFACT ");
00961 for (i = 0; i < sendlenLo; i++)
00962 {
00963 fprintf (logFile, "%g ", sendbufLo[i]);
00964 }
00965 fprintf (logFile, "\n");
00966 }
00967 }
00968
00969
00970 from = first_bdry;
00971 to = 0;
00972 if (from != to)
00973 {
00974 backward_solve_private (m, from, to, rp, cval, diag, aval,
00975 work_y, work_x, debug);
00976 CHECK_V_ERROR;
00977 }
00978
00979
00980 memcpy (lhs, work_x, m * sizeof (double));
00981
00982 if (debug)
00983 {
00984 fprintf (logFile, "\nFACT solution: ");
00985 for (i = 0; i < m; ++i)
00986 {
00987 fprintf (logFile, "%g ", lhs[i]);
00988 }
00989 fprintf (logFile, "\n");
00990 }
00991
00992
00993 if (mat->num_sendLo)
00994 {
00995 ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status);
00996 CHECK_MPI_V_ERROR (ierr);
00997 }
00998
00999 if (mat->num_sendHi)
01000 {
01001 ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status);
01002 CHECK_MPI_V_ERROR (ierr);
01003 }
01004 END_FUNC_DH}
01005
01006
01007
01008 #undef __FUNC__
01009 #define __FUNC__ "forward_solve_private"
01010 void
01011 forward_solve_private (int m, int from, int to, int *rp,
01012 int *cval, int *diag, double *aval,
01013 double *rhs, double *work_y, bool debug)
01014 {
01015 START_FUNC_DH int i, j, idx;
01016
01017 if (debug)
01018 {
01019 fprintf (logFile,
01020 "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n",
01021 1 + from, 1 + to, m);
01022 }
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035 if (debug)
01036 {
01037 for (i = from; i < to; ++i)
01038 {
01039 int len = diag[i] - rp[i];
01040 int *col = cval + rp[i];
01041 double *val = aval + rp[i];
01042 double sum = rhs[i];
01043
01044 fprintf (logFile, "FACT solving for work_y[%i] (global)\n",
01045 i + 1 + beg_rowG);
01046 fprintf (logFile, "FACT sum = %g\n", sum);
01047 for (j = 0; j < len; ++j)
01048 {
01049 idx = col[j];
01050 sum -= (val[j] * work_y[idx]);
01051 fprintf (logFile,
01052 "FACT sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n",
01053 sum, val[j], 1 + idx, work_y[idx]);
01054 }
01055 work_y[i] = sum;
01056 fprintf (logFile, "FACT work_y[%i] = %g\n", 1 + i + beg_rowG,
01057 work_y[i]);
01058 fprintf (logFile, "-----------\n");
01059 }
01060
01061 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
01062 for (i = 0; i < to; i++)
01063 fprintf (logFile, " %i %g\n", i + 1 + beg_rowG, work_y[i]);
01064
01065 }
01066 else
01067 {
01068 for (i = from; i < to; ++i)
01069 {
01070 int len = diag[i] - rp[i];
01071 int *col = cval + rp[i];
01072 double *val = aval + rp[i];
01073 double sum = rhs[i];
01074
01075 for (j = 0; j < len; ++j)
01076 {
01077 idx = col[j];
01078 sum -= (val[j] * work_y[idx]);
01079 }
01080 work_y[i] = sum;
01081 }
01082 }
01083 END_FUNC_DH}
01084
01085 #undef __FUNC__
01086 #define __FUNC__ "backward_solve_private"
01087 void
01088 backward_solve_private (int m, int from, int to, int *rp,
01089 int *cval, int *diag, double *aval,
01090 double *work_y, double *work_x, bool debug)
01091 {
01092 START_FUNC_DH int i, j, idx;
01093
01094 if (debug)
01095 {
01096 fprintf (logFile,
01097 "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n",
01098 1 + from, 1 + to, m);
01099 for (i = from - 1; i >= to; --i)
01100 {
01101 int len = rp[i + 1] - diag[i] - 1;
01102 int *col = cval + diag[i] + 1;
01103 double *val = aval + diag[i] + 1;
01104 double sum = work_y[i];
01105 fprintf (logFile, "FACT solving for work_x[%i]\n",
01106 i + 1 + beg_rowG);
01107
01108 for (j = 0; j < len; ++j)
01109 {
01110 idx = col[j];
01111 sum -= (val[j] * work_x[idx]);
01112 fprintf (logFile,
01113 "FACT sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n",
01114 sum, val[j], work_x[idx]);
01115 }
01116 work_x[i] = sum * aval[diag[i]];
01117 fprintf (logFile, "FACT work_x[%i] = %g\n", 1 + i, work_x[i]);
01118 fprintf (logFile, "----------\n");
01119 }
01120
01121 }
01122 else
01123 {
01124 for (i = from - 1; i >= to; --i)
01125 {
01126 int len = rp[i + 1] - diag[i] - 1;
01127 int *col = cval + diag[i] + 1;
01128 double *val = aval + diag[i] + 1;
01129 double sum = work_y[i];
01130
01131 for (j = 0; j < len; ++j)
01132 {
01133 idx = col[j];
01134 sum -= (val[j] * work_x[idx]);
01135 }
01136 work_x[i] = sum * aval[diag[i]];
01137 }
01138 }
01139 END_FUNC_DH}
01140
01141 #undef __FUNC__
01142 #define __FUNC__ "Factor_dhInit"
01143 void
01144 Factor_dhInit (void *A, bool fillFlag, bool avalFlag,
01145 double rho, int id, int beg_rowP, Factor_dh * Fout)
01146 {
01147 START_FUNC_DH int m, n, beg_row, alloc;
01148 Factor_dh F;
01149
01150 EuclidGetDimensions (A, &beg_row, &m, &n);
01151 CHECK_V_ERROR;
01152 alloc = rho * m;
01153 Factor_dhCreate (&F);
01154 CHECK_V_ERROR;
01155
01156 *Fout = F;
01157 F->m = m;
01158 F->n = n;
01159 F->beg_row = beg_rowP;
01160 F->id = id;
01161 F->alloc = alloc;
01162
01163 F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01164 CHECK_V_ERROR;
01165 F->rp[0] = 0;
01166 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01167 CHECK_V_ERROR;
01168 F->diag = (int *) MALLOC_DH (m * sizeof (int));
01169 CHECK_V_ERROR;
01170 if (fillFlag)
01171 {
01172 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01173 CHECK_V_ERROR;
01174 }
01175 if (avalFlag)
01176 {
01177 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01178 CHECK_V_ERROR;
01179 }
01180 END_FUNC_DH}
01181
01182 #undef __FUNC__
01183 #define __FUNC__ "Factor_dhReallocate"
01184 void
01185 Factor_dhReallocate (Factor_dh F, int used, int additional)
01186 {
01187 START_FUNC_DH int alloc = F->alloc;
01188
01189 if (used + additional > F->alloc)
01190 {
01191 int *tmpI;
01192 while (alloc < used + additional)
01193 alloc *= 2.0;
01194 F->alloc = alloc;
01195 tmpI = F->cval;
01196 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01197 CHECK_V_ERROR;
01198 memcpy (F->cval, tmpI, used * sizeof (int));
01199 FREE_DH (tmpI);
01200 CHECK_V_ERROR;
01201 if (F->fill != NULL)
01202 {
01203 tmpI = F->fill;
01204 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01205 CHECK_V_ERROR;
01206 memcpy (F->fill, tmpI, used * sizeof (int));
01207 FREE_DH (tmpI);
01208 CHECK_V_ERROR;
01209 }
01210 if (F->aval != NULL)
01211 {
01212 REAL_DH *tmpF = F->aval;
01213 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01214 CHECK_V_ERROR;
01215 memcpy (F->aval, tmpF, used * sizeof (REAL_DH));
01216 FREE_DH (tmpF);
01217 CHECK_V_ERROR;
01218 }
01219 }
01220 END_FUNC_DH}
01221
01222 #undef __FUNC__
01223 #define __FUNC__ "Factor_dhTranspose"
01224 void
01225 Factor_dhTranspose (Factor_dh A, Factor_dh * Bout)
01226 {
01227 START_FUNC_DH Factor_dh B;
01228
01229 if (np_dh > 1)
01230 {
01231 SET_V_ERROR ("only for sequential");
01232 }
01233
01234 Factor_dhCreate (&B);
01235 CHECK_V_ERROR;
01236 *Bout = B;
01237 B->m = B->n = A->m;
01238 if (B->aval == NULL)
01239 {
01240 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01241 A->aval, NULL);
01242 CHECK_V_ERROR;
01243 }
01244 else
01245 {
01246 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01247 A->aval, &B->aval);
01248 CHECK_V_ERROR;
01249 }
01250 END_FUNC_DH}
01251
01252
01253
01254 #undef __FUNC__
01255 #define __FUNC__ "Factor_dhSolveSeq"
01256 void
01257 Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx)
01258 {
01259 START_FUNC_DH Factor_dh F = ctx->F;
01260 if (F == NULL)
01261 {
01262 printf ("F is null.\n");
01263 }
01264 else
01265 {
01266 printf ("F isn't null.\n");
01267 }
01268 int *rp, *cval, *diag;
01269 int i, j, *vi, nz, m = F->m;
01270 REAL_DH *aval, *work;
01271
01272 REAL_DH *v, sum;
01273 bool debug = false;
01274
01275 if (ctx->F->debug && logFile != NULL)
01276 debug = true;
01277
01278 rp = F->rp;
01279 cval = F->cval;
01280 aval = F->aval;
01281 diag = F->diag;
01282
01283 work = ctx->work;
01284
01285 if (debug)
01286 {
01287 fprintf (logFile,
01288 "\nFACT ============================================================\n");
01289 fprintf (logFile, "FACT starting Factor_dhSolveSeq\n");
01290
01291
01292 fprintf (logFile, "\nFACT STARTING FORWARD SOLVE\n------------\n");
01293 work[0] = rhs[0];
01294 fprintf (logFile, "FACT work[0] = %g\n------------\n", work[0]);
01295 for (i = 1; i < m; i++)
01296 {
01297 v = aval + rp[i];
01298 vi = cval + rp[i];
01299 nz = diag[i] - rp[i];
01300 fprintf (logFile, "FACT solving for work[%i]\n", i + 1);
01301 sum = rhs[i];
01302 for (j = 0; j < nz; ++j)
01303 {
01304 sum -= (v[j] * work[vi[j]]);
01305 fprintf (logFile,
01306 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01307 sum, v[j], work[vi[j]]);
01308 }
01309 work[i] = sum;
01310 fprintf (logFile, "FACT work[%i] = %g\n------------\n", 1 + i,
01311 work[i]);
01312 }
01313
01314
01315 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
01316 for (i = 0; i < m; i++)
01317 fprintf (logFile, " %i %g\n", i + 1, work[i]);
01318
01319
01320
01321 fprintf (logFile, "\nFACT STARTING BACKWARD SOLVE\n--------------\n");
01322 for (i = m - 1; i >= 0; i--)
01323 {
01324 v = aval + diag[i] + 1;
01325 vi = cval + diag[i] + 1;
01326 nz = rp[i + 1] - diag[i] - 1;
01327 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
01328 sum = work[i];
01329 for (j = 0; j < nz; ++j)
01330 {
01331 sum -= (v[j] * work[vi[j]]);
01332 fprintf (logFile,
01333 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01334 sum, v[j], work[vi[j]]);
01335 }
01336 lhs[i] = work[i] = sum * aval[diag[i]];
01337 fprintf (logFile, "FACT lhs[%i] = %g\n------------\n", 1 + i,
01338 lhs[i]);
01339 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
01340 }
01341
01342 fprintf (logFile, "\nFACT solution: ");
01343 for (i = 0; i < m; ++i)
01344 fprintf (logFile, "%g ", lhs[i]);
01345 fprintf (logFile, "\n");
01346
01347
01348 }
01349 else
01350 {
01351
01352 work[0] = rhs[0];
01353 for (i = 1; i < m; i++)
01354 {
01355 v = aval + rp[i];
01356 vi = cval + rp[i];
01357 nz = diag[i] - rp[i];
01358 sum = rhs[i];
01359 while (nz--)
01360 sum -= (*v++ * work[*vi++]);
01361 work[i] = sum;
01362 }
01363
01364
01365 for (i = m - 1; i >= 0; i--)
01366 {
01367 v = aval + diag[i] + 1;
01368 vi = cval + diag[i] + 1;
01369 nz = rp[i + 1] - diag[i] - 1;
01370 sum = work[i];
01371 while (nz--)
01372 sum -= (*v++ * work[*vi++]);
01373 lhs[i] = work[i] = sum * aval[diag[i]];
01374 }
01375 }
01376 END_FUNC_DH}
01377
01378
01379
01380
01381
01382 #undef __FUNC__
01383 #define __FUNC__ "adjust_bj_private"
01384 void
01385 adjust_bj_private (Factor_dh mat)
01386 {
01387 START_FUNC_DH int i;
01388 int nz = mat->rp[mat->m];
01389 int beg_row = mat->beg_row;
01390 for (i = 0; i < nz; ++i)
01391 mat->cval[i] += beg_row;
01392 END_FUNC_DH}
01393
01394 #undef __FUNC__
01395 #define __FUNC__ "unadjust_bj_private"
01396 void
01397 unadjust_bj_private (Factor_dh mat)
01398 {
01399 START_FUNC_DH int i;
01400 int nz = mat->rp[mat->m];
01401 int beg_row = mat->beg_row;
01402 for (i = 0; i < nz; ++i)
01403 mat->cval[i] -= beg_row;
01404 END_FUNC_DH}
01405
01406 #undef __FUNC__
01407 #define __FUNC__ "Factor_dhMaxPivotInverse"
01408 double
01409 Factor_dhMaxPivotInverse (Factor_dh mat)
01410 {
01411 START_FUNC_DH int i, m = mat->m, *diags = mat->diag;
01412 REAL_DH *aval = mat->aval;
01413 double minGlobal = 0.0, min = aval[diags[0]];
01414 double retval;
01415
01416 for (i = 0; i < m; ++i)
01417 min = MIN (min, fabs (aval[diags[i]]));
01418 if (np_dh == 1)
01419 {
01420 minGlobal = min;
01421 }
01422 else
01423 {
01424 MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh);
01425 }
01426
01427 if (minGlobal == 0)
01428 {
01429 retval = 0;
01430 }
01431 else
01432 {
01433 retval = 1.0 / minGlobal;
01434 }
01435 END_FUNC_VAL (retval)}
01436
01437 #undef __FUNC__
01438 #define __FUNC__ "Factor_dhMaxValue"
01439 double
01440 Factor_dhMaxValue (Factor_dh mat)
01441 {
01442 START_FUNC_DH double maxGlobal = 0.0, max = 0.0;
01443 int i, nz = mat->rp[mat->m];
01444 REAL_DH *aval = mat->aval;
01445
01446 for (i = 0; i < nz; ++i)
01447 {
01448 max = MAX (max, fabs (aval[i]));
01449 }
01450
01451 if (np_dh == 1)
01452 {
01453 maxGlobal = max;
01454 }
01455 else
01456 {
01457 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01458 }
01459 END_FUNC_VAL (maxGlobal)}
01460
01461
01462 #undef __FUNC__
01463 #define __FUNC__ "Factor_dhCondEst"
01464 double
01465 Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx)
01466 {
01467 START_FUNC_DH double max = 0.0, maxGlobal = 0.0;
01468 double *x;
01469 int i, m = mat->m;
01470 Vec_dh lhs, rhs;
01471
01472 Vec_dhCreate (&lhs);
01473 CHECK_ERROR (-1);
01474 Vec_dhInit (lhs, m);
01475 CHECK_ERROR (-1);
01476 Vec_dhDuplicate (lhs, &rhs);
01477 CHECK_ERROR (-1);
01478 Vec_dhSet (rhs, 1.0);
01479 CHECK_ERROR (-1);
01480 Euclid_dhApply (ctx, rhs->vals, lhs->vals);
01481 CHECK_ERROR (-1);
01482
01483 x = lhs->vals;
01484 for (i = 0; i < m; ++i)
01485 {
01486 max = MAX (max, fabs (x[i]));
01487 }
01488
01489 if (np_dh == 1)
01490 {
01491 maxGlobal = max;
01492 }
01493 else
01494 {
01495 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01496 }
01497 END_FUNC_VAL (maxGlobal)}