00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "Factor_dh.h"
00031 #include "Vec_dh.h"
00032 #include "Mat_dh.h"
00033 #include "SubdomainGraph_dh.h"
00034 #include "TimeLog_dh.h"
00035 #include "Mem_dh.h"
00036 #include "Numbering_dh.h"
00037 #include "Hash_i_dh.h"
00038 #include "Parser_dh.h"
00039 #include "mat_dh_private.h"
00040 #include "getRow_dh.h"
00041 #include "Euclid_dh.h"
00042 #include "io_dh.h"
00043
00044
00045 void
00046 Factor_dh_junk ()
00047 {
00048 }
00049
00050 static void adjust_bj_private (Factor_dh mat);
00051 static void unadjust_bj_private (Factor_dh mat);
00052
00053
00054 #undef __FUNC__
00055 #define __FUNC__ "Factor_dhCreate"
00056 void
00057 Factor_dhCreate (Factor_dh * mat)
00058 {
00059 START_FUNC_DH struct _factor_dh *tmp;
00060
00061 if (np_dh > MAX_MPI_TASKS)
00062 {
00063 SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!");
00064 }
00065
00066 tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh));
00067 CHECK_V_ERROR;
00068 *mat = tmp;
00069
00070 tmp->m = 0;
00071 tmp->n = 0;
00072 tmp->id = myid_dh;
00073 tmp->beg_row = 0;
00074 tmp->first_bdry = 0;
00075 tmp->bdry_count = 0;
00076 tmp->blockJacobi = false;
00077
00078 tmp->rp = NULL;
00079 tmp->cval = NULL;
00080 tmp->aval = NULL;
00081 tmp->fill = NULL;
00082 tmp->diag = NULL;
00083 tmp->alloc = 0;
00084
00085 tmp->work_y_lo = tmp->work_x_hi = NULL;
00086 tmp->sendbufLo = tmp->sendbufHi = NULL;
00087 tmp->sendindLo = tmp->sendindHi = NULL;
00088 tmp->num_recvLo = tmp->num_recvHi = 0;
00089 tmp->num_sendLo = tmp->num_sendHi = 0;
00090 tmp->sendlenLo = tmp->sendlenHi = 0;
00091
00092 tmp->solveIsSetup = false;
00093 tmp->numbSolve = NULL;
00094
00095 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor");
00096
00097
00098 END_FUNC_DH}
00099
00100 #undef __FUNC__
00101 #define __FUNC__ "Factor_dhDestroy"
00102 void
00103 Factor_dhDestroy (Factor_dh mat)
00104 {
00105 START_FUNC_DH if (mat->rp != NULL)
00106 {
00107 FREE_DH (mat->rp);
00108 CHECK_V_ERROR;
00109 }
00110 if (mat->cval != NULL)
00111 {
00112 FREE_DH (mat->cval);
00113 CHECK_V_ERROR;
00114 }
00115 if (mat->aval != NULL)
00116 {
00117 FREE_DH (mat->aval);
00118 CHECK_V_ERROR;
00119 }
00120 if (mat->diag != NULL)
00121 {
00122 FREE_DH (mat->diag);
00123 CHECK_V_ERROR;
00124 }
00125 if (mat->fill != NULL)
00126 {
00127 FREE_DH (mat->fill);
00128 CHECK_V_ERROR;
00129 }
00130
00131 if (mat->work_y_lo != NULL)
00132 {
00133 FREE_DH (mat->work_y_lo);
00134 CHECK_V_ERROR;
00135 }
00136 if (mat->work_x_hi != NULL)
00137 {
00138 FREE_DH (mat->work_x_hi);
00139 CHECK_V_ERROR;
00140 }
00141 if (mat->sendbufLo != NULL)
00142 {
00143 FREE_DH (mat->sendbufLo);
00144 CHECK_V_ERROR;
00145 }
00146 if (mat->sendbufHi != NULL)
00147 {
00148 FREE_DH (mat->sendbufHi);
00149 CHECK_V_ERROR;
00150 }
00151 if (mat->sendindLo != NULL)
00152 {
00153 FREE_DH (mat->sendindLo);
00154 CHECK_V_ERROR;
00155 }
00156 if (mat->sendindHi != NULL)
00157 {
00158 FREE_DH (mat->sendindHi);
00159 CHECK_V_ERROR;
00160 }
00161
00162 if (mat->numbSolve != NULL)
00163 {
00164 Numbering_dhDestroy (mat->numbSolve);
00165 CHECK_V_ERROR;
00166 }
00167 FREE_DH (mat);
00168 CHECK_V_ERROR;
00169 END_FUNC_DH}
00170
00171
00172 #undef __FUNC__
00173 #define __FUNC__ "create_fake_mat_private"
00174 static void
00175 create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN)
00176 {
00177 START_FUNC_DH Mat_dh matFake;
00178 Mat_dhCreate (matFakeIN);
00179 CHECK_V_ERROR;
00180 matFake = *matFakeIN;
00181 matFake->m = mat->m;
00182 matFake->n = mat->n;
00183 matFake->rp = mat->rp;
00184 matFake->cval = mat->cval;
00185 matFake->aval = mat->aval;
00186 matFake->beg_row = mat->beg_row;
00187 END_FUNC_DH}
00188
00189 #undef __FUNC__
00190 #define __FUNC__ "destroy_fake_mat_private"
00191 static void
00192 destroy_fake_mat_private (Mat_dh matFake)
00193 {
00194 START_FUNC_DH matFake->rp = NULL;
00195 matFake->cval = NULL;
00196 matFake->aval = NULL;
00197 Mat_dhDestroy (matFake);
00198 CHECK_V_ERROR;
00199 END_FUNC_DH}
00200
00201
00202
00203 #undef __FUNC__
00204 #define __FUNC__ "Factor_dhReadNz"
00205 int
00206 Factor_dhReadNz (Factor_dh mat)
00207 {
00208 START_FUNC_DH int ierr, retval = mat->rp[mat->m];
00209 int nz = retval;
00210 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
00211 CHECK_MPI_ERROR (ierr);
00212 END_FUNC_VAL (retval)}
00213
00214
00215
00216 #undef __FUNC__
00217 #define __FUNC__ "Factor_dhPrintRows"
00218 void
00219 Factor_dhPrintRows (Factor_dh mat, FILE * fp)
00220 {
00221 START_FUNC_DH int beg_row = mat->beg_row;
00222 int m = mat->m, i, j;
00223 bool noValues;
00224
00225 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00226 if (mat->aval == NULL)
00227 noValues = true;
00228
00229 if (mat->blockJacobi)
00230 {
00231 adjust_bj_private (mat);
00232 CHECK_V_ERROR;
00233 }
00234
00235 fprintf (fp,
00236 "\n----------------------- Factor_dhPrintRows ------------------\n");
00237 if (mat->blockJacobi)
00238 {
00239 fprintf (fp,
00240 "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n");
00241 }
00242
00243 for (i = 0; i < m; ++i)
00244 {
00245 fprintf (fp, "%i :: ", 1 + i + beg_row);
00246 for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j)
00247 {
00248 if (noValues)
00249 {
00250 fprintf (fp, "%i ", 1 + mat->cval[j]);
00251 }
00252 else
00253 {
00254 fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]);
00255 }
00256 }
00257 fprintf (fp, "\n");
00258 }
00259
00260 if (mat->blockJacobi)
00261 {
00262 unadjust_bj_private (mat);
00263 CHECK_V_ERROR;
00264 }
00265 END_FUNC_DH}
00266
00267
00268 #undef __FUNC__
00269 #define __FUNC__ "Factor_dhPrintDiags"
00270 void
00271 Factor_dhPrintDiags (Factor_dh mat, FILE * fp)
00272 {
00273 START_FUNC_DH int beg_row = mat->beg_row;
00274 int m = mat->m, i, pe, *diag = mat->diag;
00275 REAL_DH *aval = mat->aval;
00276
00277
00278 fprintf_dh (fp,
00279 "\n----------------------- Factor_dhPrintDiags ------------------\n");
00280 fprintf_dh (fp, "(grep for 'ZERO')\n");
00281
00282 for (pe = 0; pe < np_dh; ++pe)
00283 {
00284 MPI_Barrier (comm_dh);
00285 if (mat->id == pe)
00286 {
00287 fprintf (fp, "----- subdomain: %i processor: %i\n", pe, myid_dh);
00288 for (i = 0; i < m; ++i)
00289 {
00290 REAL_DH val = aval[diag[i]];
00291 if (val)
00292 {
00293 fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]);
00294 }
00295 else
00296 {
00297 fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row,
00298 aval[diag[i]]);
00299 }
00300 }
00301 }
00302 }
00303 END_FUNC_DH}
00304
00305
00306 #undef __FUNC__
00307 #define __FUNC__ "Factor_dhPrintGraph"
00308 void
00309 Factor_dhPrintGraph (Factor_dh mat, char *filename)
00310 {
00311 START_FUNC_DH FILE * fp;
00312 int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval;
00313
00314 if (np_dh > 1)
00315 SET_V_ERROR ("only implemented for single mpi task");
00316
00317 work = (int *) MALLOC_DH (m * sizeof (int));
00318 CHECK_V_ERROR;
00319
00320 fp = openFile_dh (filename, "w");
00321 CHECK_V_ERROR;
00322
00323 for (i = 0; i < m; ++i)
00324 {
00325 for (j = 0; j < m; ++j)
00326 work[j] = 0;
00327 for (j = rp[i]; j < rp[i]; ++j)
00328 work[cval[j]] = 1;
00329
00330 for (j = 0; j < m; ++j)
00331 {
00332 if (work[j])
00333 {
00334 fprintf (fp, " x ");
00335 }
00336 else
00337 {
00338 fprintf (fp, " ");
00339 }
00340 }
00341 fprintf (fp, "\n");
00342 }
00343
00344 closeFile_dh (fp);
00345 CHECK_V_ERROR;
00346
00347 FREE_DH (work);
00348 END_FUNC_DH}
00349
00350
00351 #undef __FUNC__
00352 #define __FUNC__ "Factor_dhPrintTriples"
00353 void
00354 Factor_dhPrintTriples (Factor_dh mat, char *filename)
00355 {
00356 START_FUNC_DH int pe, i, j;
00357 int m = mat->m, *rp = mat->rp;
00358 int beg_row = mat->beg_row;
00359 REAL_DH *aval = mat->aval;
00360 bool noValues;
00361 FILE *fp;
00362
00363 if (mat->blockJacobi)
00364 {
00365 adjust_bj_private (mat);
00366 CHECK_V_ERROR;
00367 }
00368
00369 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
00370 if (noValues)
00371 aval = NULL;
00372
00373 for (pe = 0; pe < np_dh; ++pe)
00374 {
00375 MPI_Barrier (comm_dh);
00376 if (mat->id == pe)
00377 {
00378 if (pe == 0)
00379 {
00380 fp = openFile_dh (filename, "w");
00381 CHECK_V_ERROR;
00382 }
00383 else
00384 {
00385 fp = openFile_dh (filename, "a");
00386 CHECK_V_ERROR;
00387 }
00388
00389 for (i = 0; i < m; ++i)
00390 {
00391 for (j = rp[i]; j < rp[i + 1]; ++j)
00392 {
00393 if (noValues)
00394 {
00395 fprintf (fp, "%i %i\n", 1 + i + beg_row,
00396 1 + mat->cval[j]);
00397 }
00398 else
00399 {
00400 fprintf (fp, TRIPLES_FORMAT,
00401 1 + i + beg_row, 1 + mat->cval[j], aval[j]);
00402 }
00403 }
00404 }
00405 closeFile_dh (fp);
00406 CHECK_V_ERROR;
00407 }
00408 }
00409
00410 if (mat->blockJacobi)
00411 {
00412 unadjust_bj_private (mat);
00413 CHECK_V_ERROR;
00414 }
00415 END_FUNC_DH}
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432 #undef __FUNC__
00433 #define __FUNC__ "setup_receives_private"
00434 static int
00435 setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows,
00436 double *recvBuf, MPI_Request * req,
00437 int *reqind, int reqlen, int *outlist, bool debug)
00438 {
00439 START_FUNC_DH int i, j, this_pe, num_recv = 0;
00440 MPI_Request request;
00441
00442 if (debug)
00443 {
00444 fprintf (logFile,
00445 "\nFACT ========================================================\n");
00446 fprintf (logFile, "FACT STARTING: setup_receives_private\n");
00447 }
00448
00449 for (i = 0; i < reqlen; i = j)
00450 {
00451
00452 this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
00453 CHECK_ERROR (-1);
00454
00455
00456 for (j = i + 1; j < reqlen; j++)
00457 {
00458 int idx = reqind[j];
00459 if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe])
00460 {
00461 break;
00462 }
00463 }
00464
00465 if (debug)
00466 {
00467 int k;
00468 fprintf (logFile, "FACT need nodes from P_%i: ", this_pe);
00469 for (k = i; k < j; ++k)
00470 fprintf (logFile, "%i ", 1 + reqind[k]);
00471 fprintf (logFile, "\n");
00472 }
00473
00474
00475 outlist[this_pe] = j - i;
00476
00477
00478
00479
00480
00481
00482
00483 MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request);
00484 MPI_Request_free (&request);
00485
00486
00487 MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555,
00488 comm_dh, req + num_recv);
00489 ++num_recv;
00490 }
00491
00492 END_FUNC_VAL (num_recv);
00493 }
00494
00495
00496
00497
00498
00499
00500 #undef __FUNC__
00501 #define __FUNC__ "setup_sends_private"
00502 static void
00503 setup_sends_private (Factor_dh mat, int *inlist,
00504 int *o2n_subdomain, bool debug)
00505 {
00506 START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row;
00507 MPI_Request *requests = mat->requests, *sendReq;
00508 MPI_Status *statuses = mat->status;
00509 bool isHigher;
00510 int *rcvBuf;
00511 double *sendBuf;
00512 int myidNEW = o2n_subdomain[myid_dh];
00513 int count;
00514
00515 if (debug)
00516 {
00517 fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n");
00518 }
00519
00520
00521 sendlenLo = sendlenHi = 0;
00522 for (i = 0; i < np_dh; i++)
00523 {
00524 if (inlist[i])
00525 {
00526 if (o2n_subdomain[i] < myidNEW)
00527 {
00528 sendlenLo += inlist[i];
00529 }
00530 else
00531 {
00532 sendlenHi += inlist[i];
00533 }
00534 }
00535 }
00536
00537 mat->sendlenLo = sendlenLo;
00538 mat->sendlenHi = sendlenHi;
00539 mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double));
00540 CHECK_V_ERROR;
00541 mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double));
00542 CHECK_V_ERROR;
00543 mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int));
00544 CHECK_V_ERROR;
00545 mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int));
00546 CHECK_V_ERROR;
00547
00548 count = 0;
00549 jLo = jHi = 0;
00550 mat->num_sendLo = 0;
00551 mat->num_sendHi = 0;
00552 for (i = 0; i < np_dh; i++)
00553 {
00554 if (inlist[i])
00555 {
00556 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00557
00558
00559 if (isHigher)
00560 {
00561 rcvBuf = &mat->sendindHi[jHi];
00562 sendBuf = &mat->sendbufHi[jHi];
00563 sendReq = &mat->send_reqHi[mat->num_sendHi];
00564 mat->num_sendHi++;
00565 jHi += inlist[i];
00566 }
00567 else
00568 {
00569 rcvBuf = &mat->sendindLo[jLo];
00570 sendBuf = &mat->sendbufLo[jLo];
00571 sendReq = &mat->send_reqLo[mat->num_sendLo];
00572 mat->num_sendLo++;
00573 jLo += inlist[i];
00574 }
00575
00576
00577
00578
00579 MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh,
00580 requests + count);
00581 ++count;
00582
00583
00584 MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh,
00585 sendReq);
00586 }
00587 }
00588
00589
00590 MPI_Waitall (count, requests, statuses);
00591
00592 if (debug)
00593 {
00594 int j;
00595 jLo = jHi = 0;
00596
00597 fprintf (logFile,
00598 "\nFACT columns that I must send to other subdomains:\n");
00599 for (i = 0; i < np_dh; i++)
00600 {
00601 if (inlist[i])
00602 {
00603 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
00604 if (isHigher)
00605 {
00606 rcvBuf = &mat->sendindHi[jHi];
00607 jHi += inlist[i];
00608 }
00609 else
00610 {
00611 rcvBuf = &mat->sendindLo[jLo];
00612 jLo += inlist[i];
00613 }
00614
00615 fprintf (logFile, "FACT send to P_%i: ", i);
00616 for (j = 0; j < inlist[i]; ++j)
00617 fprintf (logFile, "%i ", rcvBuf[j] + 1);
00618 fprintf (logFile, "\n");
00619 }
00620 }
00621 }
00622
00623
00624
00625 for (i = 0; i < mat->sendlenLo; i++)
00626 mat->sendindLo[i] -= first;
00627 for (i = 0; i < mat->sendlenHi; i++)
00628 mat->sendindHi[i] -= first;
00629 END_FUNC_DH}
00630
00631
00632
00633 #undef __FUNC__
00634 #define __FUNC__ "Factor_dhSolveSetup"
00635 void
00636 Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg)
00637 {
00638 START_FUNC_DH int *outlist, *inlist;
00639 int i, row, *rp = mat->rp, *cval = mat->cval;
00640 Numbering_dh numb;
00641 int m = mat->m;
00642
00643 int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows;
00644 Mat_dh matFake;
00645 bool debug = false;
00646 double *recvBuf;
00647
00648 if (mat->debug && logFile != NULL)
00649 debug = true;
00650
00651 end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
00652 CHECK_V_ERROR;
00653 outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00654 CHECK_V_ERROR;
00655 inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
00656 CHECK_V_ERROR;
00657 for (i = 0; i < np_dh; ++i)
00658 {
00659 inlist[i] = 0;
00660 outlist[i] = 0;
00661 end_rows[i] = beg_rows[i] + row_count[i];
00662 }
00663
00664
00665 create_fake_mat_private (mat, &matFake);
00666 CHECK_V_ERROR;
00667 Numbering_dhCreate (&(mat->numbSolve));
00668 CHECK_V_ERROR;
00669 numb = mat->numbSolve;
00670 Numbering_dhSetup (numb, matFake);
00671 CHECK_V_ERROR;
00672 destroy_fake_mat_private (matFake);
00673 CHECK_V_ERROR;
00674
00675 if (debug)
00676 {
00677 fprintf (stderr, "Numbering_dhSetup completed\n");
00678 }
00679
00680
00681 i = m + numb->num_ext;
00682 mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double));
00683 CHECK_V_ERROR;
00684 mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double));
00685 CHECK_V_ERROR;
00686 if (debug)
00687 {
00688 fprintf (logFile, "FACT num_extLo= %i num_extHi= %i\n",
00689 numb->num_extLo, numb->num_extHi);
00690 }
00691
00692 mat->num_recvLo = 0;
00693 mat->num_recvHi = 0;
00694 if (numb->num_extLo)
00695 {
00696 recvBuf = mat->work_y_lo + m;
00697 mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows,
00698 recvBuf, mat->recv_reqLo,
00699 numb->idx_extLo,
00700 numb->num_extLo, outlist,
00701 debug);
00702 CHECK_V_ERROR;
00703
00704 }
00705
00706 if (numb->num_extHi)
00707 {
00708 recvBuf = mat->work_x_hi + m + numb->num_extLo;
00709 mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows,
00710 recvBuf, mat->recv_reqHi,
00711 numb->idx_extHi,
00712 numb->num_extHi, outlist,
00713 debug);
00714 CHECK_V_ERROR;
00715 }
00716
00717 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
00718
00719
00720
00721
00722
00723
00724 setup_sends_private (mat, inlist, sg->o2n_sub, debug);
00725 CHECK_V_ERROR;
00726
00727
00728 for (row = 0; row < m; row++)
00729 {
00730 int len = rp[row + 1] - rp[row];
00731 int *ind = cval + rp[row];
00732 Numbering_dhGlobalToLocal (numb, len, ind, ind);
00733 CHECK_V_ERROR;
00734 }
00735
00736 FREE_DH (outlist);
00737 CHECK_V_ERROR;
00738 FREE_DH (inlist);
00739 CHECK_V_ERROR;
00740 FREE_DH (end_rows);
00741 CHECK_V_ERROR;
00742
00743 if (debug)
00744 {
00745 int ii, jj;
00746
00747 fprintf (logFile,
00748 "\n--------- row/col structure, after global to local renumbering\n");
00749 for (ii = 0; ii < mat->m; ++ii)
00750 {
00751 fprintf (logFile, "local row %i :: ", ii + 1);
00752 for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj)
00753 {
00754 fprintf (logFile, "%i ", 1 + mat->cval[jj]);
00755 }
00756 fprintf (logFile, "\n");
00757 }
00758 fprintf (logFile, "\n");
00759 fflush (logFile);
00760 }
00761 END_FUNC_DH}
00762
00763
00764
00765
00766
00767 static void forward_solve_private (int m, int from, int to,
00768 int *rp, int *cval, int *diag,
00769 double *aval, double *rhs, double *work_y,
00770 bool debug);
00771
00772 static void backward_solve_private (int m, int from, int to,
00773 int *rp, int *cval, int *diag,
00774 double *aval, double *work_y,
00775 double *work_x, bool debug);
00776
00777 static int beg_rowG;
00778
00779
00780 #undef __FUNC__
00781 #define __FUNC__ "Factor_dhSolve"
00782 void
00783 Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx)
00784 {
00785 START_FUNC_DH Factor_dh mat = ctx->F;
00786 int from, to;
00787 int ierr, i, m = mat->m, first_bdry = mat->first_bdry;
00788 int offsetLo = mat->numbSolve->num_extLo;
00789 int offsetHi = mat->numbSolve->num_extHi;
00790 int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag;
00791 double *aval = mat->aval;
00792 int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi;
00793 int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi;
00794 double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi;
00795 double *work_y = mat->work_y_lo;
00796 double *work_x = mat->work_x_hi;
00797 bool debug = false;
00798
00799 if (mat->debug && logFile != NULL)
00800 debug = true;
00801 if (debug)
00802 beg_rowG = ctx->F->beg_row;
00803
00804
00805
00806
00807
00808
00809
00810
00811 if (debug)
00812 {
00813 fprintf (logFile,
00814 "\n=====================================================\n");
00815 fprintf (logFile,
00816 "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n",
00817 mat->num_recvLo, mat->num_recvHi);
00818 }
00819
00820
00821 if (mat->num_recvLo)
00822 {
00823 MPI_Startall (mat->num_recvLo, mat->recv_reqLo);
00824 }
00825 if (mat->num_recvHi)
00826 {
00827 MPI_Startall (mat->num_recvHi, mat->recv_reqHi);
00828 }
00829
00830
00831
00832
00833
00834 from = 0;
00835 to = first_bdry;
00836 if (from != to)
00837 {
00838 forward_solve_private (m, from, to, rp, cval, diag, aval,
00839 rhs, work_y, debug);
00840 CHECK_V_ERROR;
00841 }
00842
00843
00844
00845
00846 if (mat->num_recvLo)
00847 {
00848 MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status);
00849
00850
00851 if (debug)
00852 {
00853 fprintf (logFile,
00854 "FACT got 'y' values from lower neighbors; work buffer:\n ");
00855 for (i = 0; i < offsetLo; ++i)
00856 {
00857 fprintf (logFile, "%g ", work_y[m + i]);
00858 }
00859 }
00860 }
00861
00862
00863 from = first_bdry;
00864 to = m;
00865 if (from != to)
00866 {
00867 forward_solve_private (m, from, to, rp, cval, diag, aval,
00868 rhs, work_y, debug);
00869 CHECK_V_ERROR;
00870 }
00871
00872
00873 if (mat->num_sendHi)
00874 {
00875
00876
00877 for (i = 0; i < sendlenHi; i++)
00878 {
00879 sendbufHi[i] = work_y[sendindHi[i]];
00880 }
00881
00882
00883 MPI_Startall (mat->num_sendHi, mat->send_reqHi);
00884
00885
00886 if (debug)
00887 {
00888 fprintf (logFile,
00889 "\nFACT sending 'y' values to higher neighbor:\nFACT ");
00890 for (i = 0; i < sendlenHi; i++)
00891 {
00892 fprintf (logFile, "%g ", sendbufHi[i]);
00893 }
00894 fprintf (logFile, "\n");
00895 }
00896 }
00897
00898
00899
00900
00901
00902 if (mat->num_recvHi)
00903 {
00904 ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status);
00905 CHECK_MPI_V_ERROR (ierr);
00906
00907
00908 if (debug)
00909 {
00910 fprintf (logFile, "FACT got 'x' values from higher neighbors:\n ");
00911 for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i)
00912 {
00913 fprintf (logFile, "%g ", work_x[i]);
00914 }
00915 fprintf (logFile, "\n");
00916 }
00917 }
00918
00919
00920 from = m;
00921 to = first_bdry;
00922 if (from != to)
00923 {
00924 backward_solve_private (m, from, to, rp, cval, diag, aval,
00925 work_y, work_x, debug);
00926 CHECK_V_ERROR;
00927 }
00928
00929
00930 if (mat->num_sendLo)
00931 {
00932
00933
00934 for (i = 0; i < sendlenLo; i++)
00935 {
00936 sendbufLo[i] = work_x[sendindLo[i]];
00937 }
00938
00939
00940 ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo);
00941 CHECK_MPI_V_ERROR (ierr);
00942
00943
00944 if (debug)
00945 {
00946 fprintf (logFile,
00947 "\nFACT sending 'x' values to lower neighbor:\nFACT ");
00948 for (i = 0; i < sendlenLo; i++)
00949 {
00950 fprintf (logFile, "%g ", sendbufLo[i]);
00951 }
00952 fprintf (logFile, "\n");
00953 }
00954 }
00955
00956
00957 from = first_bdry;
00958 to = 0;
00959 if (from != to)
00960 {
00961 backward_solve_private (m, from, to, rp, cval, diag, aval,
00962 work_y, work_x, debug);
00963 CHECK_V_ERROR;
00964 }
00965
00966
00967 memcpy (lhs, work_x, m * sizeof (double));
00968
00969 if (debug)
00970 {
00971 fprintf (logFile, "\nFACT solution: ");
00972 for (i = 0; i < m; ++i)
00973 {
00974 fprintf (logFile, "%g ", lhs[i]);
00975 }
00976 fprintf (logFile, "\n");
00977 }
00978
00979
00980 if (mat->num_sendLo)
00981 {
00982 ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status);
00983 CHECK_MPI_V_ERROR (ierr);
00984 }
00985
00986 if (mat->num_sendHi)
00987 {
00988 ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status);
00989 CHECK_MPI_V_ERROR (ierr);
00990 }
00991 END_FUNC_DH}
00992
00993
00994
00995 #undef __FUNC__
00996 #define __FUNC__ "forward_solve_private"
00997 void
00998 forward_solve_private (int m, int from, int to, int *rp,
00999 int *cval, int *diag, double *aval,
01000 double *rhs, double *work_y, bool debug)
01001 {
01002 START_FUNC_DH int i, j, idx;
01003
01004 if (debug)
01005 {
01006 fprintf (logFile,
01007 "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n",
01008 1 + from, 1 + to, m);
01009 }
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022 if (debug)
01023 {
01024 for (i = from; i < to; ++i)
01025 {
01026 int len = diag[i] - rp[i];
01027 int *col = cval + rp[i];
01028 double *val = aval + rp[i];
01029 double sum = rhs[i];
01030
01031 fprintf (logFile, "FACT solving for work_y[%i] (global)\n",
01032 i + 1 + beg_rowG);
01033 fprintf (logFile, "FACT sum = %g\n", sum);
01034 for (j = 0; j < len; ++j)
01035 {
01036 idx = col[j];
01037 sum -= (val[j] * work_y[idx]);
01038 fprintf (logFile,
01039 "FACT sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n",
01040 sum, val[j], 1 + idx, work_y[idx]);
01041 }
01042 work_y[i] = sum;
01043 fprintf (logFile, "FACT work_y[%i] = %g\n", 1 + i + beg_rowG,
01044 work_y[i]);
01045 fprintf (logFile, "-----------\n");
01046 }
01047
01048 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
01049 for (i = 0; i < to; i++)
01050 fprintf (logFile, " %i %g\n", i + 1 + beg_rowG, work_y[i]);
01051
01052 }
01053 else
01054 {
01055 for (i = from; i < to; ++i)
01056 {
01057 int len = diag[i] - rp[i];
01058 int *col = cval + rp[i];
01059 double *val = aval + rp[i];
01060 double sum = rhs[i];
01061
01062 for (j = 0; j < len; ++j)
01063 {
01064 idx = col[j];
01065 sum -= (val[j] * work_y[idx]);
01066 }
01067 work_y[i] = sum;
01068 }
01069 }
01070 END_FUNC_DH}
01071
01072 #undef __FUNC__
01073 #define __FUNC__ "backward_solve_private"
01074 void
01075 backward_solve_private (int m, int from, int to, int *rp,
01076 int *cval, int *diag, double *aval,
01077 double *work_y, double *work_x, bool debug)
01078 {
01079 START_FUNC_DH int i, j, idx;
01080
01081 if (debug)
01082 {
01083 fprintf (logFile,
01084 "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n",
01085 1 + from, 1 + to, m);
01086 for (i = from - 1; i >= to; --i)
01087 {
01088 int len = rp[i + 1] - diag[i] - 1;
01089 int *col = cval + diag[i] + 1;
01090 double *val = aval + diag[i] + 1;
01091 double sum = work_y[i];
01092 fprintf (logFile, "FACT solving for work_x[%i]\n",
01093 i + 1 + beg_rowG);
01094
01095 for (j = 0; j < len; ++j)
01096 {
01097 idx = col[j];
01098 sum -= (val[j] * work_x[idx]);
01099 fprintf (logFile,
01100 "FACT sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n",
01101 sum, val[j], work_x[idx]);
01102 }
01103 work_x[i] = sum * aval[diag[i]];
01104 fprintf (logFile, "FACT work_x[%i] = %g\n", 1 + i, work_x[i]);
01105 fprintf (logFile, "----------\n");
01106 }
01107
01108 }
01109 else
01110 {
01111 for (i = from - 1; i >= to; --i)
01112 {
01113 int len = rp[i + 1] - diag[i] - 1;
01114 int *col = cval + diag[i] + 1;
01115 double *val = aval + diag[i] + 1;
01116 double sum = work_y[i];
01117
01118 for (j = 0; j < len; ++j)
01119 {
01120 idx = col[j];
01121 sum -= (val[j] * work_x[idx]);
01122 }
01123 work_x[i] = sum * aval[diag[i]];
01124 }
01125 }
01126 END_FUNC_DH}
01127
01128 #undef __FUNC__
01129 #define __FUNC__ "Factor_dhInit"
01130 void
01131 Factor_dhInit (void *A, bool fillFlag, bool avalFlag,
01132 double rho, int id, int beg_rowP, Factor_dh * Fout)
01133 {
01134 START_FUNC_DH int m, n, beg_row, alloc;
01135 Factor_dh F;
01136
01137 EuclidGetDimensions (A, &beg_row, &m, &n);
01138 CHECK_V_ERROR;
01139 alloc = rho * m;
01140 Factor_dhCreate (&F);
01141 CHECK_V_ERROR;
01142
01143 *Fout = F;
01144 F->m = m;
01145 F->n = n;
01146 F->beg_row = beg_rowP;
01147 F->id = id;
01148 F->alloc = alloc;
01149
01150 F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
01151 CHECK_V_ERROR;
01152 F->rp[0] = 0;
01153 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01154 CHECK_V_ERROR;
01155 F->diag = (int *) MALLOC_DH (m * sizeof (int));
01156 CHECK_V_ERROR;
01157 if (fillFlag)
01158 {
01159 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01160 CHECK_V_ERROR;
01161 }
01162 if (avalFlag)
01163 {
01164 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01165 CHECK_V_ERROR;
01166 }
01167 END_FUNC_DH}
01168
01169 #undef __FUNC__
01170 #define __FUNC__ "Factor_dhReallocate"
01171 void
01172 Factor_dhReallocate (Factor_dh F, int used, int additional)
01173 {
01174 START_FUNC_DH int alloc = F->alloc;
01175
01176 if (used + additional > F->alloc)
01177 {
01178 int *tmpI;
01179 while (alloc < used + additional)
01180 alloc *= 2.0;
01181 F->alloc = alloc;
01182 tmpI = F->cval;
01183 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
01184 CHECK_V_ERROR;
01185 memcpy (F->cval, tmpI, used * sizeof (int));
01186 FREE_DH (tmpI);
01187 CHECK_V_ERROR;
01188 if (F->fill != NULL)
01189 {
01190 tmpI = F->fill;
01191 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
01192 CHECK_V_ERROR;
01193 memcpy (F->fill, tmpI, used * sizeof (int));
01194 FREE_DH (tmpI);
01195 CHECK_V_ERROR;
01196 }
01197 if (F->aval != NULL)
01198 {
01199 REAL_DH *tmpF = F->aval;
01200 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
01201 CHECK_V_ERROR;
01202 memcpy (F->aval, tmpF, used * sizeof (REAL_DH));
01203 FREE_DH (tmpF);
01204 CHECK_V_ERROR;
01205 }
01206 }
01207 END_FUNC_DH}
01208
01209 #undef __FUNC__
01210 #define __FUNC__ "Factor_dhTranspose"
01211 void
01212 Factor_dhTranspose (Factor_dh A, Factor_dh * Bout)
01213 {
01214 START_FUNC_DH Factor_dh B;
01215
01216 if (np_dh > 1)
01217 {
01218 SET_V_ERROR ("only for sequential");
01219 }
01220
01221 Factor_dhCreate (&B);
01222 CHECK_V_ERROR;
01223 *Bout = B;
01224 B->m = B->n = A->m;
01225 if (B->aval == NULL)
01226 {
01227 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01228 A->aval, NULL);
01229 CHECK_V_ERROR;
01230 }
01231 else
01232 {
01233 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
01234 A->aval, &B->aval);
01235 CHECK_V_ERROR;
01236 }
01237 END_FUNC_DH}
01238
01239
01240
01241 #undef __FUNC__
01242 #define __FUNC__ "Factor_dhSolveSeq"
01243 void
01244 Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx)
01245 {
01246 START_FUNC_DH Factor_dh F = ctx->F;
01247 if (F == NULL)
01248 {
01249 printf ("F is null.\n");
01250 }
01251 else
01252 {
01253 printf ("F isn't null.\n");
01254 }
01255 int *rp, *cval, *diag;
01256 int i, j, *vi, nz, m = F->m;
01257 REAL_DH *aval, *work;
01258
01259 REAL_DH *v, sum;
01260 bool debug = false;
01261
01262 if (ctx->F->debug && logFile != NULL)
01263 debug = true;
01264
01265 rp = F->rp;
01266 cval = F->cval;
01267 aval = F->aval;
01268 diag = F->diag;
01269
01270 work = ctx->work;
01271
01272 if (debug)
01273 {
01274 fprintf (logFile,
01275 "\nFACT ============================================================\n");
01276 fprintf (logFile, "FACT starting Factor_dhSolveSeq\n");
01277
01278
01279 fprintf (logFile, "\nFACT STARTING FORWARD SOLVE\n------------\n");
01280 work[0] = rhs[0];
01281 fprintf (logFile, "FACT work[0] = %g\n------------\n", work[0]);
01282 for (i = 1; i < m; i++)
01283 {
01284 v = aval + rp[i];
01285 vi = cval + rp[i];
01286 nz = diag[i] - rp[i];
01287 fprintf (logFile, "FACT solving for work[%i]\n", i + 1);
01288 sum = rhs[i];
01289 for (j = 0; j < nz; ++j)
01290 {
01291 sum -= (v[j] * work[vi[j]]);
01292 fprintf (logFile,
01293 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01294 sum, v[j], work[vi[j]]);
01295 }
01296 work[i] = sum;
01297 fprintf (logFile, "FACT work[%i] = %g\n------------\n", 1 + i,
01298 work[i]);
01299 }
01300
01301
01302 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
01303 for (i = 0; i < m; i++)
01304 fprintf (logFile, " %i %g\n", i + 1, work[i]);
01305
01306
01307
01308 fprintf (logFile, "\nFACT STARTING BACKWARD SOLVE\n--------------\n");
01309 for (i = m - 1; i >= 0; i--)
01310 {
01311 v = aval + diag[i] + 1;
01312 vi = cval + diag[i] + 1;
01313 nz = rp[i + 1] - diag[i] - 1;
01314 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
01315 sum = work[i];
01316 for (j = 0; j < nz; ++j)
01317 {
01318 sum -= (v[j] * work[vi[j]]);
01319 fprintf (logFile,
01320 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
01321 sum, v[j], work[vi[j]]);
01322 }
01323 lhs[i] = work[i] = sum * aval[diag[i]];
01324 fprintf (logFile, "FACT lhs[%i] = %g\n------------\n", 1 + i,
01325 lhs[i]);
01326 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
01327 }
01328
01329 fprintf (logFile, "\nFACT solution: ");
01330 for (i = 0; i < m; ++i)
01331 fprintf (logFile, "%g ", lhs[i]);
01332 fprintf (logFile, "\n");
01333
01334
01335 }
01336 else
01337 {
01338
01339 work[0] = rhs[0];
01340 for (i = 1; i < m; i++)
01341 {
01342 v = aval + rp[i];
01343 vi = cval + rp[i];
01344 nz = diag[i] - rp[i];
01345 sum = rhs[i];
01346 while (nz--)
01347 sum -= (*v++ * work[*vi++]);
01348 work[i] = sum;
01349 }
01350
01351
01352 for (i = m - 1; i >= 0; i--)
01353 {
01354 v = aval + diag[i] + 1;
01355 vi = cval + diag[i] + 1;
01356 nz = rp[i + 1] - diag[i] - 1;
01357 sum = work[i];
01358 while (nz--)
01359 sum -= (*v++ * work[*vi++]);
01360 lhs[i] = work[i] = sum * aval[diag[i]];
01361 }
01362 }
01363 END_FUNC_DH}
01364
01365
01366
01367
01368
01369 #undef __FUNC__
01370 #define __FUNC__ "adjust_bj_private"
01371 void
01372 adjust_bj_private (Factor_dh mat)
01373 {
01374 START_FUNC_DH int i;
01375 int nz = mat->rp[mat->m];
01376 int beg_row = mat->beg_row;
01377 for (i = 0; i < nz; ++i)
01378 mat->cval[i] += beg_row;
01379 END_FUNC_DH}
01380
01381 #undef __FUNC__
01382 #define __FUNC__ "unadjust_bj_private"
01383 void
01384 unadjust_bj_private (Factor_dh mat)
01385 {
01386 START_FUNC_DH int i;
01387 int nz = mat->rp[mat->m];
01388 int beg_row = mat->beg_row;
01389 for (i = 0; i < nz; ++i)
01390 mat->cval[i] -= beg_row;
01391 END_FUNC_DH}
01392
01393 #undef __FUNC__
01394 #define __FUNC__ "Factor_dhMaxPivotInverse"
01395 double
01396 Factor_dhMaxPivotInverse (Factor_dh mat)
01397 {
01398 START_FUNC_DH int i, m = mat->m, *diags = mat->diag;
01399 REAL_DH *aval = mat->aval;
01400 double minGlobal = 0.0, min = aval[diags[0]];
01401 double retval;
01402
01403 for (i = 0; i < m; ++i)
01404 min = MIN (min, fabs (aval[diags[i]]));
01405 if (np_dh == 1)
01406 {
01407 minGlobal = min;
01408 }
01409 else
01410 {
01411 MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh);
01412 }
01413
01414 if (minGlobal == 0)
01415 {
01416 retval = 0;
01417 }
01418 else
01419 {
01420 retval = 1.0 / minGlobal;
01421 }
01422 END_FUNC_VAL (retval)}
01423
01424 #undef __FUNC__
01425 #define __FUNC__ "Factor_dhMaxValue"
01426 double
01427 Factor_dhMaxValue (Factor_dh mat)
01428 {
01429 START_FUNC_DH double maxGlobal = 0.0, max = 0.0;
01430 int i, nz = mat->rp[mat->m];
01431 REAL_DH *aval = mat->aval;
01432
01433 for (i = 0; i < nz; ++i)
01434 {
01435 max = MAX (max, fabs (aval[i]));
01436 }
01437
01438 if (np_dh == 1)
01439 {
01440 maxGlobal = max;
01441 }
01442 else
01443 {
01444 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01445 }
01446 END_FUNC_VAL (maxGlobal)}
01447
01448
01449 #undef __FUNC__
01450 #define __FUNC__ "Factor_dhCondEst"
01451 double
01452 Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx)
01453 {
01454 START_FUNC_DH double max = 0.0, maxGlobal = 0.0;
01455 double *x;
01456 int i, m = mat->m;
01457 Vec_dh lhs, rhs;
01458
01459 Vec_dhCreate (&lhs);
01460 CHECK_ERROR (-1);
01461 Vec_dhInit (lhs, m);
01462 CHECK_ERROR (-1);
01463 Vec_dhDuplicate (lhs, &rhs);
01464 CHECK_ERROR (-1);
01465 Vec_dhSet (rhs, 1.0);
01466 CHECK_ERROR (-1);
01467 Euclid_dhApply (ctx, rhs->vals, lhs->vals);
01468 CHECK_ERROR (-1);
01469
01470 x = lhs->vals;
01471 for (i = 0; i < m; ++i)
01472 {
01473 max = MAX (max, fabs (x[i]));
01474 }
01475
01476 if (np_dh == 1)
01477 {
01478 maxGlobal = max;
01479 }
01480 else
01481 {
01482 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
01483 }
01484 END_FUNC_VAL (maxGlobal)}