|
EpetraExt
Development
|
00001 //@HEADER 00002 // *********************************************************************** 00003 // 00004 // EpetraExt: Epetra Extended - Linear Algebra Services Package 00005 // Copyright (2011) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 00042 #include <EpetraExt_ConfigDefs.h> 00043 #include <EpetraExt_MatrixMatrix.h> 00044 00045 #include <EpetraExt_MMHelpers.h> 00046 00047 #include <EpetraExt_Transpose_RowMatrix.h> 00048 00049 #include <Epetra_Export.h> 00050 #include <Epetra_Import.h> 00051 #include <Epetra_Util.h> 00052 #include <Epetra_Map.h> 00053 #include <Epetra_Comm.h> 00054 #include <Epetra_CrsMatrix.h> 00055 #include <Epetra_Vector.h> 00056 #include <Epetra_Directory.h> 00057 #include <Epetra_HashTable.h> 00058 #include <Epetra_Distributor.h> 00059 00060 #include <Teuchos_TimeMonitor.hpp> 00061 00062 #ifdef HAVE_VECTOR 00063 #include <vector> 00064 #endif 00065 00066 00067 00068 namespace EpetraExt { 00069 //========================================================================= 00070 // 00071 //Method for internal use... sparsedot forms a dot-product between two 00072 //sparsely-populated 'vectors'. 00073 //Important assumption: assumes the indices in u_ind and v_ind are sorted. 00074 // 00075 template<typename int_type> 00076 double sparsedot(double* u, int_type* u_ind, int u_len, 00077 double* v, int_type* v_ind, int v_len) 00078 { 00079 double result = 0.0; 00080 00081 int v_idx = 0; 00082 int u_idx = 0; 00083 00084 while(v_idx < v_len && u_idx < u_len) { 00085 int_type ui = u_ind[u_idx]; 00086 int_type vi = v_ind[v_idx]; 00087 00088 if (ui < vi) { 00089 ++u_idx; 00090 } 00091 else if (ui > vi) { 00092 ++v_idx; 00093 } 00094 else { 00095 result += u[u_idx++]*v[v_idx++]; 00096 } 00097 } 00098 00099 return(result); 00100 } 00101 00102 //========================================================================= 00103 //kernel method for computing the local portion of C = A*B^T 00104 template<typename int_type> 00105 int mult_A_Btrans(CrsMatrixStruct& Aview, 00106 CrsMatrixStruct& Bview, 00107 CrsWrapper& C) 00108 { 00109 int i, j, k; 00110 int returnValue = 0; 00111 00112 int maxlen = 0; 00113 for(i=0; i<Aview.numRows; ++i) { 00114 if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i]; 00115 } 00116 for(i=0; i<Bview.numRows; ++i) { 00117 if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i]; 00118 } 00119 00120 //std::cout << "Aview: " << std::endl; 00121 //dumpCrsMatrixStruct(Aview); 00122 00123 //std::cout << "Bview: " << std::endl; 00124 //dumpCrsMatrixStruct(Bview); 00125 00126 int numBcols = Bview.colMap->NumMyElements(); 00127 int numBrows = Bview.numRows; 00128 00129 int iworklen = maxlen*2 + numBcols; 00130 int_type* iwork = new int_type[iworklen]; 00131 00132 int_type * bcols = iwork+maxlen*2; 00133 int_type* bgids = 0; 00134 Bview.colMap->MyGlobalElementsPtr(bgids); 00135 double* bvals = new double[maxlen*2]; 00136 double* avals = bvals+maxlen; 00137 00138 int_type max_all_b = (int_type) Bview.colMap->MaxAllGID64(); 00139 int_type min_all_b = (int_type) Bview.colMap->MinAllGID64(); 00140 00141 //bcols will hold the GIDs from B's column-map for fast access 00142 //during the computations below 00143 for(i=0; i<numBcols; ++i) { 00144 int blid = Bview.colMap->LID(bgids[i]); 00145 bcols[blid] = bgids[i]; 00146 } 00147 00148 //next create arrays indicating the first and last column-index in 00149 //each row of B, so that we can know when to skip certain rows below. 00150 //This will provide a large performance gain for banded matrices, and 00151 //a somewhat smaller gain for *most* other matrices. 00152 int_type* b_firstcol = new int_type[2*numBrows]; 00153 int_type* b_lastcol = b_firstcol+numBrows; 00154 int_type temp; 00155 for(i=0; i<numBrows; ++i) { 00156 b_firstcol[i] = max_all_b; 00157 b_lastcol[i] = min_all_b; 00158 00159 int Blen_i = Bview.numEntriesPerRow[i]; 00160 if (Blen_i < 1) continue; 00161 int* Bindices_i = Bview.indices[i]; 00162 00163 if (Bview.remote[i]) { 00164 for(k=0; k<Blen_i; ++k) { 00165 temp = (int_type) Bview.importColMap->GID64(Bindices_i[k]); 00166 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00167 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00168 } 00169 } 00170 else { 00171 for(k=0; k<Blen_i; ++k) { 00172 temp = bcols[Bindices_i[k]]; 00173 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00174 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00175 } 00176 } 00177 } 00178 00179 Epetra_Util util; 00180 00181 int_type* Aind = iwork; 00182 int_type* Bind = iwork+maxlen; 00183 00184 bool C_filled = C.Filled(); 00185 00186 //To form C = A*B^T, we're going to execute this expression: 00187 // 00188 // C(i,j) = sum_k( A(i,k)*B(j,k) ) 00189 // 00190 //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T). 00191 //But it requires the use of a 'sparsedot' function (we're simply forming 00192 //dot-products with row A_i and row B_j for all i and j). 00193 00194 //loop over the rows of A. 00195 for(i=0; i<Aview.numRows; ++i) { 00196 if (Aview.remote[i]) { 00197 continue; 00198 } 00199 00200 int* Aindices_i = Aview.indices[i]; 00201 double* Aval_i = Aview.values[i]; 00202 int A_len_i = Aview.numEntriesPerRow[i]; 00203 if (A_len_i < 1) { 00204 continue; 00205 } 00206 00207 for(k=0; k<A_len_i; ++k) { 00208 Aind[k] = (int_type) Aview.colMap->GID64(Aindices_i[k]); 00209 avals[k] = Aval_i[k]; 00210 } 00211 00212 util.Sort<int_type>(true, A_len_i, Aind, 1, &avals, 0, NULL, 0, 0); 00213 00214 int_type mina = Aind[0]; 00215 int_type maxa = Aind[A_len_i-1]; 00216 00217 if (mina > max_all_b || maxa < min_all_b) { 00218 continue; 00219 } 00220 00221 int_type global_row = (int_type) Aview.rowMap->GID64(i); 00222 00223 //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:)) 00224 for(j=0; j<Bview.numRows; ++j) { 00225 if (b_firstcol[j] > maxa || b_lastcol[j] < mina) { 00226 continue; 00227 } 00228 00229 int* Bindices_j = Bview.indices[j]; 00230 int B_len_j = Bview.numEntriesPerRow[j]; 00231 if (B_len_j < 1) { 00232 continue; 00233 } 00234 00235 int_type tmp; 00236 int Blen = 0; 00237 00238 if (Bview.remote[j]) { 00239 for(k=0; k<B_len_j; ++k) { 00240 tmp = (int_type) Bview.importColMap->GID64(Bindices_j[k]); 00241 if (tmp < mina || tmp > maxa) { 00242 continue; 00243 } 00244 00245 bvals[Blen] = Bview.values[j][k]; 00246 Bind[Blen++] = tmp; 00247 } 00248 } 00249 else { 00250 for(k=0; k<B_len_j; ++k) { 00251 tmp = bcols[Bindices_j[k]]; 00252 if (tmp < mina || tmp > maxa) { 00253 continue; 00254 } 00255 00256 bvals[Blen] = Bview.values[j][k]; 00257 Bind[Blen++] = tmp; 00258 } 00259 } 00260 00261 if (Blen < 1) { 00262 continue; 00263 } 00264 00265 util.Sort<int_type>(true, Blen, Bind, 1, &bvals, 0, NULL, 0, 0); 00266 00267 double C_ij = sparsedot(avals, Aind, A_len_i, 00268 bvals, Bind, Blen); 00269 00270 if (C_ij == 0.0) { 00271 continue; 00272 } 00273 int_type global_col = (int_type) Bview.rowMap->GID64(j); 00274 00275 int err = C_filled ? 00276 C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col) 00277 : 00278 C.InsertGlobalValues(global_row, 1, &C_ij, &global_col); 00279 00280 if (err < 0) { 00281 return(err); 00282 } 00283 if (err > 0) { 00284 if (C_filled) { 00285 //C.Filled()==true, and C doesn't have all the necessary nonzero 00286 //locations, or global_row or global_col is out of range (less 00287 //than 0 or non local). 00288 std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed " 00289 << "to insert value in result matrix at position "<<global_row 00290 <<"," <<global_col<<", possibly because result matrix has a " 00291 << "column-map that doesn't include column "<<global_col 00292 <<" on this proc." <<std::endl; 00293 return(err); 00294 } 00295 } 00296 } 00297 } 00298 00299 delete [] iwork; 00300 delete [] bvals; 00301 delete [] b_firstcol; 00302 00303 return(returnValue); 00304 } 00305 00306 int mult_A_Btrans(CrsMatrixStruct& Aview, 00307 CrsMatrixStruct& Bview, 00308 CrsWrapper& C) 00309 { 00310 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 00311 if(Aview.rowMap->GlobalIndicesInt() && 00312 Aview.colMap->GlobalIndicesInt() && 00313 Aview.rowMap->GlobalIndicesInt() && 00314 Aview.colMap->GlobalIndicesInt()) { 00315 return mult_A_Btrans<int>(Aview, Bview, C); 00316 } 00317 else 00318 #endif 00319 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 00320 if(Aview.rowMap->GlobalIndicesLongLong() && 00321 Aview.colMap->GlobalIndicesLongLong() && 00322 Aview.rowMap->GlobalIndicesLongLong() && 00323 Aview.colMap->GlobalIndicesLongLong()) { 00324 return mult_A_Btrans<long long>(Aview, Bview, C); 00325 } 00326 else 00327 #endif 00328 throw std::runtime_error("EpetraExt::mult_A_Btrans: GlobalIndices type unknown"); 00329 } 00330 00331 //========================================================================= 00332 //kernel method for computing the local portion of C = A^T*B 00333 template<typename int_type> 00334 int mult_Atrans_B(CrsMatrixStruct& Aview, 00335 CrsMatrixStruct& Bview, 00336 CrsWrapper& C) 00337 { 00338 int C_firstCol = Bview.colMap->MinLID(); 00339 int C_lastCol = Bview.colMap->MaxLID(); 00340 00341 int C_firstCol_import = 0; 00342 int C_lastCol_import = -1; 00343 00344 if (Bview.importColMap != NULL) { 00345 C_firstCol_import = Bview.importColMap->MinLID(); 00346 C_lastCol_import = Bview.importColMap->MaxLID(); 00347 } 00348 00349 int C_numCols = C_lastCol - C_firstCol + 1; 00350 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00351 00352 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00353 00354 double* C_row_i = new double[C_numCols]; 00355 int_type* C_colInds = new int_type[C_numCols]; 00356 00357 int i, j, k; 00358 00359 for(j=0; j<C_numCols; ++j) { 00360 C_row_i[j] = 0.0; 00361 C_colInds[j] = 0; 00362 } 00363 00364 //To form C = A^T*B, compute a series of outer-product updates. 00365 // 00366 // for (ith column of A^T) { 00367 // C_i = outer product of A^T(:,i) and B(i,:) 00368 // Where C_i is the ith matrix update, 00369 // A^T(:,i) is the ith column of A^T, and 00370 // B(i,:) is the ith row of B. 00371 // 00372 00373 //dumpCrsMatrixStruct(Aview); 00374 //dumpCrsMatrixStruct(Bview); 00375 int localProc = Bview.colMap->Comm().MyPID(); 00376 00377 int_type* Arows = 0; 00378 Aview.rowMap->MyGlobalElementsPtr(Arows); 00379 00380 bool C_filled = C.Filled(); 00381 00382 //loop over the rows of A (which are the columns of A^T). 00383 for(i=0; i<Aview.numRows; ++i) { 00384 00385 int* Aindices_i = Aview.indices[i]; 00386 double* Aval_i = Aview.values[i]; 00387 00388 //we'll need to get the row of B corresponding to Arows[i], 00389 //where Arows[i] is the GID of A's ith row. 00390 int Bi = Bview.rowMap->LID(Arows[i]); 00391 if (Bi<0) { 00392 std::cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row " 00393 <<Arows[i]<<" of matrix B, but doesn't have it."<<std::endl; 00394 return(-1); 00395 } 00396 00397 int* Bcol_inds = Bview.indices[Bi]; 00398 double* Bvals_i = Bview.values[Bi]; 00399 00400 //for each column-index Aj in the i-th row of A, we'll update 00401 //global-row GID(Aj) of the result matrix C. In that row of C, 00402 //we'll update column-indices given by the column-indices in the 00403 //ith row of B that we're now holding (Bcol_inds). 00404 00405 //First create a list of GIDs for the column-indices 00406 //that we'll be updating. 00407 00408 int Blen = Bview.numEntriesPerRow[Bi]; 00409 if (Bview.remote[Bi]) { 00410 for(j=0; j<Blen; ++j) { 00411 C_colInds[j] = (int_type) Bview.importColMap->GID64(Bcol_inds[j]); 00412 } 00413 } 00414 else { 00415 for(j=0; j<Blen; ++j) { 00416 C_colInds[j] = (int_type) Bview.colMap->GID64(Bcol_inds[j]); 00417 } 00418 } 00419 00420 //loop across the i-th row of A (column of A^T) 00421 for(j=0; j<Aview.numEntriesPerRow[i]; ++j) { 00422 00423 int Aj = Aindices_i[j]; 00424 double Aval = Aval_i[j]; 00425 00426 int_type global_row; 00427 if (Aview.remote[i]) { 00428 global_row = (int_type) Aview.importColMap->GID64(Aj); 00429 } 00430 else { 00431 global_row = (int_type) Aview.colMap->GID64(Aj); 00432 } 00433 00434 if (!C.RowMap().MyGID(global_row)) { 00435 continue; 00436 } 00437 00438 for(k=0; k<Blen; ++k) { 00439 C_row_i[k] = Aval*Bvals_i[k]; 00440 } 00441 00442 // 00443 //Now add this row-update to C. 00444 // 00445 00446 int err = C_filled ? 00447 C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds) 00448 : 00449 C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds); 00450 00451 if (err < 0) { 00452 return(err); 00453 } 00454 if (err > 0) { 00455 if (C_filled) { 00456 //C is Filled, and doesn't have all the necessary nonzero locations. 00457 return(err); 00458 } 00459 } 00460 } 00461 } 00462 00463 delete [] C_row_i; 00464 delete [] C_colInds; 00465 00466 return(0); 00467 } 00468 00469 int mult_Atrans_B(CrsMatrixStruct& Aview, 00470 CrsMatrixStruct& Bview, 00471 CrsWrapper& C) 00472 { 00473 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 00474 if(Aview.rowMap->GlobalIndicesInt() && 00475 Aview.colMap->GlobalIndicesInt() && 00476 Aview.rowMap->GlobalIndicesInt() && 00477 Aview.colMap->GlobalIndicesInt()) { 00478 return mult_Atrans_B<int>(Aview, Bview, C); 00479 } 00480 else 00481 #endif 00482 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 00483 if(Aview.rowMap->GlobalIndicesLongLong() && 00484 Aview.colMap->GlobalIndicesLongLong() && 00485 Aview.rowMap->GlobalIndicesLongLong() && 00486 Aview.colMap->GlobalIndicesLongLong()) { 00487 return mult_Atrans_B<long long>(Aview, Bview, C); 00488 } 00489 else 00490 #endif 00491 throw std::runtime_error("EpetraExt::mult_Atrans_B: GlobalIndices type unknown"); 00492 } 00493 00494 //kernel method for computing the local portion of C = A^T*B^T 00495 template<typename int_type> 00496 int mult_Atrans_Btrans(CrsMatrixStruct& Aview, 00497 CrsMatrixStruct& Bview, 00498 CrsWrapper& C) 00499 { 00500 int C_firstCol = Aview.rowMap->MinLID(); 00501 int C_lastCol = Aview.rowMap->MaxLID(); 00502 00503 int C_firstCol_import = 0; 00504 int C_lastCol_import = -1; 00505 00506 if (Aview.importColMap != NULL) { 00507 C_firstCol_import = Aview.importColMap->MinLID(); 00508 C_lastCol_import = Aview.importColMap->MaxLID(); 00509 } 00510 00511 int C_numCols = C_lastCol - C_firstCol + 1; 00512 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00513 00514 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00515 00516 double* dwork = new double[C_numCols]; 00517 int_type* iwork = new int_type[C_numCols]; 00518 00519 double* C_col_j = dwork; 00520 int_type* C_inds = iwork; 00521 00522 //std::cout << "Aview: " << std::endl; 00523 //dumpCrsMatrixStruct(Aview); 00524 00525 //std::cout << "Bview: " << std::endl; 00526 //dumpCrsMatrixStruct(Bview); 00527 00528 00529 int i, j, k; 00530 00531 for(j=0; j<C_numCols; ++j) { 00532 C_col_j[j] = 0.0; 00533 C_inds[j] = -1; 00534 } 00535 00536 int_type* A_col_inds = 0; 00537 Aview.colMap->MyGlobalElementsPtr(A_col_inds); 00538 int_type* A_col_inds_import = 0; 00539 if(Aview.importColMap) 00540 Aview.importColMap->MyGlobalElementsPtr(A_col_inds_import); 00541 00542 const Epetra_Map* Crowmap = &(C.RowMap()); 00543 00544 //To form C = A^T*B^T, we're going to execute this expression: 00545 // 00546 // C(i,j) = sum_k( A(k,i)*B(j,k) ) 00547 // 00548 //Our goal, of course, is to navigate the data in A and B once, without 00549 //performing searches for column-indices, etc. In other words, we avoid 00550 //column-wise operations like the plague... 00551 00552 int_type* Brows = 0; 00553 Bview.rowMap->MyGlobalElementsPtr(Brows); 00554 00555 //loop over the rows of B 00556 for(j=0; j<Bview.numRows; ++j) { 00557 int* Bindices_j = Bview.indices[j]; 00558 double* Bvals_j = Bview.values[j]; 00559 00560 int_type global_col = Brows[j]; 00561 00562 //loop across columns in the j-th row of B and for each corresponding 00563 //row in A, loop across columns and accumulate product 00564 //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other 00565 //words, as we stride across B(j,:), we use selected rows in A to 00566 //calculate updates for column j of the result matrix C. 00567 00568 for(k=0; k<Bview.numEntriesPerRow[j]; ++k) { 00569 00570 int bk = Bindices_j[k]; 00571 double Bval = Bvals_j[k]; 00572 00573 int_type global_k; 00574 if (Bview.remote[j]) { 00575 global_k = (int_type) Bview.importColMap->GID64(bk); 00576 } 00577 else { 00578 global_k = (int_type) Bview.colMap->GID64(bk); 00579 } 00580 00581 //get the corresponding row in A 00582 int ak = Aview.rowMap->LID(global_k); 00583 if (ak<0) { 00584 continue; 00585 } 00586 00587 int* Aindices_k = Aview.indices[ak]; 00588 double* Avals_k = Aview.values[ak]; 00589 00590 int C_len = 0; 00591 00592 if (Aview.remote[ak]) { 00593 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00594 C_col_j[C_len] = Avals_k[i]*Bval; 00595 C_inds[C_len++] = A_col_inds_import[Aindices_k[i]]; 00596 } 00597 } 00598 else { 00599 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00600 C_col_j[C_len] = Avals_k[i]*Bval; 00601 C_inds[C_len++] = A_col_inds[Aindices_k[i]]; 00602 } 00603 } 00604 00605 //Now loop across the C_col_j values and put non-zeros into C. 00606 00607 for(i=0; i < C_len ; ++i) { 00608 if (C_col_j[i] == 0.0) continue; 00609 00610 int_type global_row = C_inds[i]; 00611 if (!Crowmap->MyGID(global_row)) { 00612 continue; 00613 } 00614 00615 int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00616 00617 if (err < 0) { 00618 return(err); 00619 } 00620 else { 00621 if (err > 0) { 00622 err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00623 if (err < 0) { 00624 return(err); 00625 } 00626 } 00627 } 00628 } 00629 00630 } 00631 } 00632 00633 delete [] dwork; 00634 delete [] iwork; 00635 00636 return(0); 00637 } 00638 00639 int mult_Atrans_Btrans(CrsMatrixStruct& Aview, 00640 CrsMatrixStruct& Bview, 00641 CrsWrapper& C) 00642 { 00643 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 00644 if(Aview.rowMap->GlobalIndicesInt() && 00645 Aview.colMap->GlobalIndicesInt() && 00646 Aview.rowMap->GlobalIndicesInt() && 00647 Aview.colMap->GlobalIndicesInt()) { 00648 return mult_Atrans_Btrans<int>(Aview, Bview, C); 00649 } 00650 else 00651 #endif 00652 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 00653 if(Aview.rowMap->GlobalIndicesLongLong() && 00654 Aview.colMap->GlobalIndicesLongLong() && 00655 Aview.rowMap->GlobalIndicesLongLong() && 00656 Aview.colMap->GlobalIndicesLongLong()) { 00657 return mult_Atrans_Btrans<long long>(Aview, Bview, C); 00658 } 00659 else 00660 #endif 00661 throw std::runtime_error("EpetraExt::mult_Atrans_Btrans: GlobalIndices type unknown"); 00662 } 00663 00664 // ============================================================== 00665 template<typename int_type> 00666 int import_and_extract_views(const Epetra_CrsMatrix& M, 00667 const Epetra_Map& targetMap, 00668 CrsMatrixStruct& Mview, 00669 const Epetra_Import * prototypeImporter=0) 00670 { 00671 //The goal of this method is to populate the 'Mview' struct with views of the 00672 //rows of M, including all rows that correspond to elements in 'targetMap'. 00673 // 00674 //If targetMap includes local elements that correspond to remotely-owned rows 00675 //of M, then those remotely-owned rows will be imported into 00676 //'Mview.importMatrix', and views of them will be included in 'Mview'. 00677 00678 // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap. 00679 // The typical use of this is to provide A's column map when I&XV is called for B, for 00680 // a C = A * B multiply. 00681 00682 #ifdef ENABLE_MMM_TIMINGS 00683 using Teuchos::TimeMonitor; 00684 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Alloc"))); 00685 #endif 00686 Mview.deleteContents(); 00687 00688 Mview.origMatrix = &M; 00689 const Epetra_Map& Mrowmap = M.RowMap(); 00690 int numProcs = Mrowmap.Comm().NumProc(); 00691 Mview.numRows = targetMap.NumMyElements(); 00692 int_type* Mrows = 0; 00693 targetMap.MyGlobalElementsPtr(Mrows); 00694 00695 if (Mview.numRows > 0) { 00696 Mview.numEntriesPerRow = new int[Mview.numRows]; 00697 Mview.indices = new int*[Mview.numRows]; 00698 Mview.values = new double*[Mview.numRows]; 00699 Mview.remote = new bool[Mview.numRows]; 00700 } 00701 00702 Mview.origRowMap = &(M.RowMap()); 00703 Mview.rowMap = &targetMap; 00704 Mview.colMap = &(M.ColMap()); 00705 Mview.domainMap = &(M.DomainMap()); 00706 Mview.importColMap = NULL; 00707 Mview.numRemote = 0; 00708 00709 00710 #ifdef ENABLE_MMM_TIMINGS 00711 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Extract"))); 00712 #endif 00713 00714 int i; 00715 int *rowptr=0, *colind=0; 00716 double *vals=0; 00717 00718 EPETRA_CHK_ERR( M.ExtractCrsDataPointers(rowptr,colind,vals) ); 00719 00720 if(Mrowmap.SameAs(targetMap)) { 00721 // Short Circuit: The Row and Target Maps are the Same 00722 for(i=0; i<Mview.numRows; ++i) { 00723 Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i]; 00724 Mview.indices[i] = colind + rowptr[i]; 00725 Mview.values[i] = vals + rowptr[i]; 00726 Mview.remote[i] = false; 00727 } 00728 return 0; 00729 } 00730 else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){ 00731 // The prototypeImporter can tell me what is local and what is remote 00732 int * PermuteToLIDs = prototypeImporter->PermuteToLIDs(); 00733 int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs(); 00734 int * RemoteLIDs = prototypeImporter->RemoteLIDs(); 00735 for(int i=0; i<prototypeImporter->NumSameIDs();i++){ 00736 Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i]; 00737 Mview.indices[i] = colind + rowptr[i]; 00738 Mview.values[i] = vals + rowptr[i]; 00739 Mview.remote[i] = false; 00740 } 00741 for(int i=0; i<prototypeImporter->NumPermuteIDs();i++){ 00742 int to = PermuteToLIDs[i]; 00743 int from = PermuteFromLIDs[i]; 00744 Mview.numEntriesPerRow[to] = rowptr[from+1]-rowptr[from]; 00745 Mview.indices[to] = colind + rowptr[from]; 00746 Mview.values[to] = vals + rowptr[from]; 00747 Mview.remote[to] = false; 00748 } 00749 for(int i=0; i<prototypeImporter->NumRemoteIDs();i++){ 00750 Mview.remote[RemoteLIDs[i]] = true; 00751 ++Mview.numRemote; 00752 } 00753 } 00754 else { 00755 // Only LID can tell me who is local and who is remote 00756 for(i=0; i<Mview.numRows; ++i) { 00757 int mlid = Mrowmap.LID(Mrows[i]); 00758 if (mlid < 0) { 00759 Mview.remote[i] = true; 00760 ++Mview.numRemote; 00761 } 00762 else { 00763 Mview.numEntriesPerRow[i] = rowptr[mlid+1]-rowptr[mlid]; 00764 Mview.indices[i] = colind + rowptr[mlid]; 00765 Mview.values[i] = vals + rowptr[mlid]; 00766 Mview.remote[i] = false; 00767 } 00768 } 00769 } 00770 00771 #ifdef ENABLE_MMM_TIMINGS 00772 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Collective-0"))); 00773 #endif 00774 00775 if (numProcs < 2) { 00776 if (Mview.numRemote > 0) { 00777 std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but " 00778 << "attempting to import remote matrix rows."<<std::endl; 00779 return(-1); 00780 } 00781 00782 //If only one processor we don't need to import any remote rows, so return. 00783 return(0); 00784 } 00785 00786 // 00787 //Now we will import the needed remote rows of M, if the global maximum 00788 //value of numRemote is greater than 0. 00789 // 00790 00791 int globalMaxNumRemote = 0; 00792 Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1); 00793 00794 if (globalMaxNumRemote > 0) { 00795 #ifdef ENABLE_MMM_TIMINGS 00796 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-1"))); 00797 #endif 00798 //Create a map that describes the remote rows of M that we need. 00799 00800 int_type* MremoteRows = Mview.numRemote>0 ? new int_type[Mview.numRemote] : NULL; 00801 int offset = 0; 00802 for(i=0; i<Mview.numRows; ++i) { 00803 if (Mview.remote[i]) { 00804 MremoteRows[offset++] = Mrows[i]; 00805 } 00806 } 00807 00808 LightweightMap MremoteRowMap((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64()); 00809 00810 #ifdef ENABLE_MMM_TIMINGS 00811 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-2"))); 00812 #endif 00813 //Create an importer with target-map MremoteRowMap and 00814 //source-map Mrowmap. 00815 Epetra_Import * importer=0; 00816 RemoteOnlyImport * Rimporter=0; 00817 if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)) { 00818 Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap); 00819 } 00820 else if(!prototypeImporter) { 00821 Epetra_Map MremoteRowMap2((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64(), Mrowmap.Comm()); 00822 importer=new Epetra_Import(MremoteRowMap2, Mrowmap); 00823 } 00824 else 00825 throw std::runtime_error("prototypeImporter->SourceMap() does not match M.RowMap()!"); 00826 00827 00828 #ifdef ENABLE_MMM_TIMINGS 00829 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-3"))); 00830 #endif 00831 00832 if(Mview.importMatrix) delete Mview.importMatrix; 00833 if(Rimporter) Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter); 00834 else Mview.importMatrix = new LightweightCrsMatrix(M,*importer); 00835 00836 #ifdef ENABLE_MMM_TIMINGS 00837 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-4"))); 00838 #endif 00839 00840 //Finally, use the freshly imported data to fill in the gaps in our views 00841 int N; 00842 if(Mview.importMatrix->use_lw) N = Mview.importMatrix->RowMapLW_->NumMyElements(); 00843 else N = Mview.importMatrix->RowMapEP_->NumMyElements(); 00844 00845 if(N > 0) { 00846 rowptr = &Mview.importMatrix->rowptr_[0]; 00847 colind = &Mview.importMatrix->colind_[0]; 00848 vals = &Mview.importMatrix->vals_[0]; 00849 } 00850 00851 00852 for(i=0; i<Mview.numRows; ++i) { 00853 if (Mview.remote[i]) { 00854 int importLID = MremoteRowMap.LID(Mrows[i]); 00855 Mview.numEntriesPerRow[i] = rowptr[importLID+1]-rowptr[importLID]; 00856 Mview.indices[i] = colind + rowptr[importLID]; 00857 Mview.values[i] = vals + rowptr[importLID]; 00858 } 00859 } 00860 00861 00862 int_type * MyColGIDs = 0; 00863 if(Mview.importMatrix->ColMap_.NumMyElements()>0) 00864 Mview.importMatrix->ColMap_.MyGlobalElementsPtr(MyColGIDs); 00865 Mview.importColMap = new Epetra_Map((int_type) -1,Mview.importMatrix->ColMap_.NumMyElements(),MyColGIDs,(int_type) Mview.importMatrix->ColMap_.IndexBase64(),M.Comm()); 00866 delete [] MremoteRows; 00867 #ifdef ENABLE_MMM_TIMINGS 00868 MM=Teuchos::null; 00869 #endif 00870 00871 // Cleanup 00872 delete Rimporter; 00873 delete importer; 00874 00875 } 00876 return(0); 00877 } 00878 00879 00880 00881 00882 // ============================================================== 00883 template<typename int_type> 00884 int import_only(const Epetra_CrsMatrix& M, 00885 const Epetra_Map& targetMap, 00886 CrsMatrixStruct& Mview, 00887 const Epetra_Import * prototypeImporter=0) 00888 { 00889 // The goal of this method to populare the Mview object with ONLY the rows of M 00890 // that correspond to elements in 'targetMap.' There will be no population of the 00891 // numEntriesPerRow, indices, values, remote or numRemote arrays. 00892 00893 00894 // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap. 00895 // The typical use of this is to provide A's column map when I&XV is called for B, for 00896 // a C = A * B multiply. 00897 00898 #ifdef ENABLE_MMM_TIMINGS 00899 using Teuchos::TimeMonitor; 00900 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Setup"))); 00901 #endif 00902 00903 Mview.deleteContents(); 00904 00905 Mview.origMatrix = &M; 00906 const Epetra_Map& Mrowmap = M.RowMap(); 00907 int numProcs = Mrowmap.Comm().NumProc(); 00908 Mview.numRows = targetMap.NumMyElements(); 00909 00910 Mview.origRowMap = &(M.RowMap()); 00911 Mview.rowMap = &targetMap; 00912 Mview.colMap = &(M.ColMap()); 00913 Mview.domainMap = &(M.DomainMap()); 00914 Mview.importColMap = NULL; 00915 00916 int i; 00917 int numRemote =0; 00918 int N = Mview.rowMap->NumMyElements(); 00919 00920 if(Mrowmap.SameAs(targetMap)) { 00921 numRemote = 0; 00922 Mview.targetMapToOrigRow.resize(N); 00923 for(i=0;i<N; i++) Mview.targetMapToOrigRow[i]=i; 00924 return 0; 00925 } 00926 else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){ 00927 numRemote = prototypeImporter->NumRemoteIDs(); 00928 00929 Mview.targetMapToOrigRow.resize(N); Mview.targetMapToOrigRow.assign(N,-1); 00930 Mview.targetMapToImportRow.resize(N); Mview.targetMapToImportRow.assign(N,-1); 00931 00932 const int * PermuteToLIDs = prototypeImporter->PermuteToLIDs(); 00933 const int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs(); 00934 const int * RemoteLIDs = prototypeImporter->RemoteLIDs(); 00935 00936 for(i=0; i<prototypeImporter->NumSameIDs();i++) 00937 Mview.targetMapToOrigRow[i] = i; 00938 00939 // NOTE: These are reversed on purpose since we're doing a revere map. 00940 for(i=0; i<prototypeImporter->NumPermuteIDs();i++) 00941 Mview.targetMapToOrigRow[PermuteToLIDs[i]] = PermuteFromLIDs[i]; 00942 00943 for(i=0; i<prototypeImporter->NumRemoteIDs();i++) 00944 Mview.targetMapToImportRow[RemoteLIDs[i]] = i; 00945 00946 } 00947 else 00948 throw std::runtime_error("import_only: This routine only works if you either have the right map or no prototypeImporter"); 00949 00950 if (numProcs < 2) { 00951 if (Mview.numRemote > 0) { 00952 std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but " 00953 << "attempting to import remote matrix rows."<<std::endl; 00954 return(-1); 00955 } 00956 00957 //If only one processor we don't need to import any remote rows, so return. 00958 return(0); 00959 } 00960 00961 #ifdef ENABLE_MMM_TIMINGS 00962 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-1"))); 00963 #endif 00964 const int * RemoteLIDs = prototypeImporter->RemoteLIDs(); 00965 00966 //Create a map that describes the remote rows of M that we need. 00967 int_type* MremoteRows = numRemote>0 ? new int_type[prototypeImporter->NumRemoteIDs()] : 0; 00968 for(i=0; i<prototypeImporter->NumRemoteIDs(); i++) 00969 MremoteRows[i] = (int_type) targetMap.GID64(RemoteLIDs[i]); 00970 00971 LightweightMap MremoteRowMap((int_type) -1, numRemote, MremoteRows, (int_type)Mrowmap.IndexBase64()); 00972 00973 #ifdef ENABLE_MMM_TIMINGS 00974 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-2"))); 00975 #endif 00976 //Create an importer with target-map MremoteRowMap and 00977 //source-map Mrowmap. 00978 RemoteOnlyImport * Rimporter=0; 00979 Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap); 00980 00981 #ifdef ENABLE_MMM_TIMINGS 00982 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-3"))); 00983 #endif 00984 00985 Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter); 00986 00987 #ifdef ENABLE_MMM_TIMINGS 00988 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-4"))); 00989 #endif 00990 00991 // Cleanup 00992 delete Rimporter; 00993 delete [] MremoteRows; 00994 00995 return(0); 00996 } 00997 00998 00999 01000 01001 //========================================================================= 01002 template<typename int_type> 01003 int form_map_union(const Epetra_Map* map1, 01004 const Epetra_Map* map2, 01005 const Epetra_Map*& mapunion) 01006 { 01007 //form the union of two maps 01008 01009 if (map1 == NULL) { 01010 mapunion = new Epetra_Map(*map2); 01011 return(0); 01012 } 01013 01014 if (map2 == NULL) { 01015 mapunion = new Epetra_Map(*map1); 01016 return(0); 01017 } 01018 01019 int map1_len = map1->NumMyElements(); 01020 int_type* map1_elements = 0; 01021 map1->MyGlobalElementsPtr(map1_elements); 01022 int map2_len = map2->NumMyElements(); 01023 int_type* map2_elements = 0; 01024 map2->MyGlobalElementsPtr(map2_elements); 01025 01026 int_type* union_elements = new int_type[map1_len+map2_len]; 01027 01028 int map1_offset = 0, map2_offset = 0, union_offset = 0; 01029 01030 while(map1_offset < map1_len && map2_offset < map2_len) { 01031 int_type map1_elem = map1_elements[map1_offset]; 01032 int_type map2_elem = map2_elements[map2_offset]; 01033 01034 if (map1_elem < map2_elem) { 01035 union_elements[union_offset++] = map1_elem; 01036 ++map1_offset; 01037 } 01038 else if (map1_elem > map2_elem) { 01039 union_elements[union_offset++] = map2_elem; 01040 ++map2_offset; 01041 } 01042 else { 01043 union_elements[union_offset++] = map1_elem; 01044 ++map1_offset; 01045 ++map2_offset; 01046 } 01047 } 01048 01049 int i; 01050 for(i=map1_offset; i<map1_len; ++i) { 01051 union_elements[union_offset++] = map1_elements[i]; 01052 } 01053 01054 for(i=map2_offset; i<map2_len; ++i) { 01055 union_elements[union_offset++] = map2_elements[i]; 01056 } 01057 01058 mapunion = new Epetra_Map((int_type) -1, union_offset, union_elements, 01059 (int_type) map1->IndexBase64(), map1->Comm()); 01060 01061 delete [] union_elements; 01062 01063 return(0); 01064 } 01065 01066 //========================================================================= 01067 template<typename int_type> 01068 Epetra_Map* Tfind_rows_containing_cols(const Epetra_CrsMatrix& M, 01069 const Epetra_Map& column_map) 01070 { 01071 //The goal of this function is to find all rows in the matrix M that contain 01072 //column-indices which are in 'column_map'. A map containing those rows is 01073 //returned. (The returned map will then be used as the source row-map for 01074 //importing those rows into an overlapping distribution.) 01075 01076 std::pair<int_type,int_type> my_col_range = get_col_range<int_type>(column_map); 01077 01078 const Epetra_Comm& Comm = M.RowMap().Comm(); 01079 int num_procs = Comm.NumProc(); 01080 int my_proc = Comm.MyPID(); 01081 std::vector<int> send_procs; 01082 send_procs.reserve(num_procs-1); 01083 std::vector<int_type> col_ranges; 01084 col_ranges.reserve(2*(num_procs-1)); 01085 for(int p=0; p<num_procs; ++p) { 01086 if (p == my_proc) continue; 01087 send_procs.push_back(p); 01088 col_ranges.push_back(my_col_range.first); 01089 col_ranges.push_back(my_col_range.second); 01090 } 01091 01092 Epetra_Distributor* distor = Comm.CreateDistributor(); 01093 01094 int num_recv_procs = 0; 01095 int num_send_procs = send_procs.size(); 01096 bool deterministic = true; 01097 int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL; 01098 distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs); 01099 01100 int len_import_chars = 0; 01101 char* import_chars = NULL; 01102 01103 char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL; 01104 int err = distor->Do(export_chars, 2*sizeof(int_type), len_import_chars, import_chars); 01105 if (err != 0) { 01106 std::cout << "ERROR returned from Distributor::Do."<<std::endl; 01107 } 01108 01109 int_type* IntImports = reinterpret_cast<int_type*>(import_chars); 01110 int num_import_pairs = len_import_chars/(2*sizeof(int_type)); 01111 int offset = 0; 01112 std::vector<int> send_procs2; 01113 send_procs2.reserve(num_procs); 01114 std::vector<int_type> proc_col_ranges; 01115 std::pair<int_type,int_type> M_col_range = get_col_range<int_type>(M); 01116 for(int i=0; i<num_import_pairs; ++i) { 01117 int_type first_col = IntImports[offset++]; 01118 int_type last_col = IntImports[offset++]; 01119 if (first_col <= M_col_range.second && last_col >= M_col_range.first) { 01120 send_procs2.push_back(send_procs[i]); 01121 proc_col_ranges.push_back(first_col); 01122 proc_col_ranges.push_back(last_col); 01123 } 01124 } 01125 01126 std::vector<int_type> send_rows; 01127 std::vector<int> rows_per_send_proc; 01128 pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc); 01129 01130 Epetra_Distributor* distor2 = Comm.CreateDistributor(); 01131 01132 int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL; 01133 num_recv_procs = 0; 01134 err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs); 01135 01136 export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL; 01137 int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL; 01138 len_import_chars = 0; 01139 err = distor2->Do(export_chars, (int)sizeof(int_type), rows_per_send_proc_ptr, len_import_chars, import_chars); 01140 01141 int_type* recvd_row_ints = reinterpret_cast<int_type*>(import_chars); 01142 int num_recvd_rows = len_import_chars/sizeof(int_type); 01143 01144 Epetra_Map recvd_rows((int_type) -1, num_recvd_rows, recvd_row_ints, (int_type) column_map.IndexBase64(), Comm); 01145 01146 delete distor; 01147 delete distor2; 01148 delete [] import_chars; 01149 01150 Epetra_Map* result_map = NULL; 01151 01152 err = form_map_union<int_type>(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map); 01153 if (err != 0) { 01154 return(NULL); 01155 } 01156 01157 return(result_map); 01158 } 01159 01160 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M, 01161 const Epetra_Map& column_map) 01162 { 01163 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01164 if(M.RowMatrixRowMap().GlobalIndicesInt() && column_map.GlobalIndicesInt()) { 01165 return Tfind_rows_containing_cols<int>(M, column_map); 01166 } 01167 else 01168 #endif 01169 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01170 if(M.RowMatrixRowMap().GlobalIndicesLongLong() && column_map.GlobalIndicesLongLong()) { 01171 return Tfind_rows_containing_cols<long long>(M, column_map); 01172 } 01173 else 01174 #endif 01175 throw std::runtime_error("EpetraExt::find_rows_containing_cols: GlobalIndices type unknown"); 01176 } 01177 01178 //========================================================================= 01179 template<typename int_type> 01180 int MatrixMatrix::TMultiply(const Epetra_CrsMatrix& A, 01181 bool transposeA, 01182 const Epetra_CrsMatrix& B, 01183 bool transposeB, 01184 Epetra_CrsMatrix& C, 01185 bool call_FillComplete_on_result) 01186 { 01187 // DEBUG 01188 // bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal(); 01189 #ifdef ENABLE_MMM_TIMINGS 01190 using Teuchos::TimeMonitor; 01191 Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Setup"))); 01192 #endif 01193 // end DEBUG 01194 01195 // 01196 //This method forms the matrix-matrix product C = op(A) * op(B), where 01197 //op(A) == A if transposeA is false, 01198 //op(A) == A^T if transposeA is true, 01199 //and similarly for op(B). 01200 // 01201 01202 //A and B should already be Filled. 01203 //(Should we go ahead and call FillComplete() on them if necessary? 01204 // or error out? For now, we choose to error out.) 01205 if (!A.Filled() || !B.Filled()) { 01206 EPETRA_CHK_ERR(-1); 01207 } 01208 01209 // Is the C matrix new? 01210 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal(); 01211 01212 //We're going to refer to the different combinations of op(A) and op(B) 01213 //as scenario 1 through 4. 01214 01215 int scenario = 1;//A*B 01216 if (transposeB && !transposeA) scenario = 2;//A*B^T 01217 if (transposeA && !transposeB) scenario = 3;//A^T*B 01218 if (transposeA && transposeB) scenario = 4;//A^T*B^T 01219 if(NewFlag && transposeA && !transposeB) scenario = 5; // A^T*B, newmatrix 01220 01221 //now check size compatibility 01222 long long Aouter = transposeA ? A.NumGlobalCols64() : A.NumGlobalRows64(); 01223 long long Bouter = transposeB ? B.NumGlobalRows64() : B.NumGlobalCols64(); 01224 long long Ainner = transposeA ? A.NumGlobalRows64() : A.NumGlobalCols64(); 01225 long long Binner = transposeB ? B.NumGlobalCols64() : B.NumGlobalRows64(); 01226 if (Ainner != Binner) { 01227 std::cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) " 01228 << "must match for matrix-matrix product. op(A) is " 01229 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl; 01230 return(-1); 01231 } 01232 01233 //The result matrix C must at least have a row-map that reflects the 01234 //correct row-size. Don't check the number of columns because rectangular 01235 //matrices which were constructed with only one map can still end up 01236 //having the correct capacity and dimensions when filled. 01237 if (Aouter > C.NumGlobalRows64()) { 01238 std::cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 01239 << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows64() 01240 << " rows, should have at least "<<Aouter << std::endl; 01241 return(-1); 01242 } 01243 01244 //It doesn't matter whether C is already Filled or not. If it is already 01245 //Filled, it must have space allocated for the positions that will be 01246 //referenced in forming C = op(A)*op(B). If it doesn't have enough space, 01247 //we'll error out later when trying to store result values. 01248 01249 //We're going to need to import remotely-owned sections of A and/or B 01250 //if more than 1 processor is performing this run, depending on the scenario. 01251 int numProcs = A.Comm().NumProc(); 01252 01253 //If we are to use the transpose of A and/or B, we'll need to be able to 01254 //access, on the local processor, all rows that contain column-indices in 01255 //the domain-map. 01256 const Epetra_Map* domainMap_A = &(A.DomainMap()); 01257 const Epetra_Map* domainMap_B = &(B.DomainMap()); 01258 01259 const Epetra_Map* rowmap_A = &(A.RowMap()); 01260 const Epetra_Map* rowmap_B = &(B.RowMap()); 01261 01262 //Declare some 'work-space' maps which may be created depending on 01263 //the scenario, and which will be deleted before exiting this function. 01264 const Epetra_Map* workmap1 = NULL; 01265 const Epetra_Map* workmap2 = NULL; 01266 const Epetra_Map* mapunion1 = NULL; 01267 01268 //Declare a couple of structs that will be used to hold views of the data 01269 //of A and B, to be used for fast access during the matrix-multiplication. 01270 CrsMatrixStruct Aview; 01271 CrsMatrixStruct Atransview; 01272 CrsMatrixStruct Bview; 01273 Teuchos::RCP<Epetra_CrsMatrix> Atrans; 01274 01275 const Epetra_Map* targetMap_A = rowmap_A; 01276 const Epetra_Map* targetMap_B = rowmap_B; 01277 01278 #ifdef ENABLE_MMM_TIMINGS 01279 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All I&X"))); 01280 #endif 01281 if (numProcs > 1) { 01282 //If op(A) = A^T, find all rows of A that contain column-indices in the 01283 //local portion of the domain-map. (We'll import any remote rows 01284 //that fit this criteria onto the local processor.) 01285 if (scenario == 3 || scenario == 4) { 01286 workmap1 = Tfind_rows_containing_cols<int_type>(A, *domainMap_A); 01287 targetMap_A = workmap1; 01288 } 01289 } 01290 if (scenario == 5) { 01291 targetMap_A = &(A.ColMap()); 01292 } 01293 01294 //Now import any needed remote rows and populate the Aview struct. 01295 if(scenario==1 && call_FillComplete_on_result) { 01296 EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview)); 01297 } 01298 else if (scenario == 5){ 01299 // Perform a local transpose of A and store that in Atransview 01300 EpetraExt::RowMatrix_Transpose at(const_cast<Epetra_Map *>(targetMap_A),false); 01301 Epetra_CrsMatrix * Anonconst = const_cast<Epetra_CrsMatrix *>(&A); 01302 Atrans = Teuchos::rcp(at.CreateTransposeLocal(*Anonconst)); 01303 import_only<int_type>(*Atrans,*targetMap_A,Atransview); 01304 } 01305 else { 01306 EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview)); 01307 } 01308 01309 01310 // NOTE: Next up is to switch to import_only for B as well, and then modify the THREE SerialCores 01311 // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups. 01312 01313 01314 // Make sure B's views are consistent with A even in serial. 01315 const Epetra_Map* colmap_op_A = NULL; 01316 if(scenario==1 || numProcs > 1){ 01317 if (transposeA && scenario == 3) { 01318 colmap_op_A = targetMap_A; 01319 } 01320 else { 01321 colmap_op_A = &(A.ColMap()); 01322 } 01323 targetMap_B = colmap_op_A; 01324 } 01325 if(scenario==5) targetMap_B = &(B.RowMap()); 01326 01327 01328 if (numProcs > 1) { 01329 //If op(B) = B^T, find all rows of B that contain column-indices in the 01330 //local-portion of the domain-map, or in the column-map of op(A). 01331 //We'll import any remote rows that fit this criteria onto the 01332 //local processor. 01333 if (transposeB) { 01334 EPETRA_CHK_ERR( form_map_union<int_type>(colmap_op_A, domainMap_B, mapunion1) ); 01335 workmap2 = Tfind_rows_containing_cols<int_type>(B, *mapunion1); 01336 targetMap_B = workmap2; 01337 } 01338 } 01339 01340 //Now import any needed remote rows and populate the Bview struct. 01341 if((scenario==1 && call_FillComplete_on_result) || scenario==5) { 01342 EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer())); 01343 } 01344 else { 01345 EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) ); 01346 } 01347 01348 #ifdef ENABLE_MMM_TIMINGS 01349 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Multiply"))); 01350 #endif 01351 01352 // Zero if filled 01353 if(C.Filled()) C.PutScalar(0.0); 01354 01355 //Now call the appropriate method to perform the actual multiplication. 01356 CrsWrapper_Epetra_CrsMatrix ecrsmat(C); 01357 01358 switch(scenario) { 01359 case 1: EPETRA_CHK_ERR( mult_A_B(A,Aview,B,Bview,C,call_FillComplete_on_result) ); 01360 break; 01361 case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) ); 01362 break; 01363 case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) ); 01364 break; 01365 case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) ); 01366 break; 01367 case 5: EPETRA_CHK_ERR( mult_AT_B_newmatrix(Atransview, Bview, C) ); 01368 break; 01369 } 01370 01371 01372 if (scenario != 1 && call_FillComplete_on_result && scenario != 5) { 01373 //We'll call FillComplete on the C matrix before we exit, and give 01374 //it a domain-map and a range-map. 01375 //The domain-map will be the domain-map of B, unless 01376 //op(B)==transpose(B), in which case the range-map of B will be used. 01377 //The range-map will be the range-map of A, unless 01378 //op(A)==transpose(A), in which case the domain-map of A will be used. 01379 01380 const Epetra_Map* domainmap = 01381 transposeB ? &(B.RangeMap()) : &(B.DomainMap()); 01382 01383 const Epetra_Map* rangemap = 01384 transposeA ? &(A.DomainMap()) : &(A.RangeMap()); 01385 01386 if (!C.Filled()) { 01387 EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) ); 01388 } 01389 } 01390 01391 //Finally, delete the objects that were potentially created 01392 //during the course of importing remote sections of A and B. 01393 01394 delete mapunion1; mapunion1 = NULL; 01395 delete workmap1; workmap1 = NULL; 01396 delete workmap2; workmap2 = NULL; 01397 01398 return(0); 01399 } 01400 01401 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A, 01402 bool transposeA, 01403 const Epetra_CrsMatrix& B, 01404 bool transposeB, 01405 Epetra_CrsMatrix& C, 01406 bool call_FillComplete_on_result) 01407 { 01408 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01409 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01410 return TMultiply<int>(A, transposeA, B, transposeB, C, call_FillComplete_on_result); 01411 } 01412 else 01413 #endif 01414 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01415 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01416 return TMultiply<long long>(A, transposeA, B, transposeB, C, call_FillComplete_on_result); 01417 } 01418 else 01419 #endif 01420 throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown"); 01421 } 01422 01423 //========================================================================= 01424 template<typename int_type> 01425 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A, 01426 bool transposeA, 01427 double scalarA, 01428 Epetra_CrsMatrix& B, 01429 double scalarB ) 01430 { 01431 // 01432 //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where 01433 01434 //A should already be Filled. It doesn't matter whether B is 01435 //already Filled, but if it is, then its graph must already contain 01436 //all nonzero locations that will be referenced in forming the 01437 //sum. 01438 01439 if (!A.Filled() ) { 01440 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl; 01441 EPETRA_CHK_ERR(-1); 01442 } 01443 01444 //explicit tranpose A formed as necessary 01445 Epetra_CrsMatrix * Aprime = 0; 01446 EpetraExt::RowMatrix_Transpose * Atrans = 0; 01447 if( transposeA ) 01448 { 01449 Atrans = new EpetraExt::RowMatrix_Transpose(); 01450 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01451 } 01452 else 01453 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01454 01455 int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() ); 01456 int A_NumEntries, B_NumEntries; 01457 int_type * A_Indices = new int_type[MaxNumEntries]; 01458 double * A_Values = new double[MaxNumEntries]; 01459 int* B_Indices_local; 01460 int_type* B_Indices_global; 01461 double* B_Values; 01462 01463 int NumMyRows = B.NumMyRows(); 01464 int_type Row; 01465 int err; 01466 int ierr = 0; 01467 01468 if( scalarA ) 01469 { 01470 //Loop over B's rows and sum into 01471 for( int i = 0; i < NumMyRows; ++i ) 01472 { 01473 Row = (int_type) B.GRID64(i); 01474 EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) ); 01475 01476 if (scalarB != 1.0) { 01477 if (!B.Filled()) { 01478 EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries, 01479 B_Values, B_Indices_global)); 01480 } 01481 else { 01482 EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries, 01483 B_Values, B_Indices_local)); 01484 } 01485 01486 for(int jj=0; jj<B_NumEntries; ++jj) { 01487 B_Values[jj] = scalarB*B_Values[jj]; 01488 } 01489 } 01490 01491 if( scalarA != 1.0 ) { 01492 for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA; 01493 } 01494 01495 if( B.Filled() ) {//Sum In Values 01496 err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01497 assert( err >= 0 ); 01498 if (err < 0) ierr = err; 01499 } 01500 else { 01501 err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01502 assert( err == 0 || err == 1 || err == 3 ); 01503 if (err < 0) ierr = err; 01504 } 01505 } 01506 } 01507 else { 01508 EPETRA_CHK_ERR( B.Scale(scalarB) ); 01509 } 01510 01511 delete [] A_Indices; 01512 delete [] A_Values; 01513 01514 if( Atrans ) delete Atrans; 01515 01516 return(ierr); 01517 } 01518 01519 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01520 bool transposeA, 01521 double scalarA, 01522 Epetra_CrsMatrix& B, 01523 double scalarB ) 01524 { 01525 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01526 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01527 return TAdd<int>(A, transposeA, scalarA, B, scalarB); 01528 } 01529 else 01530 #endif 01531 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01532 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01533 return TAdd<long long>(A, transposeA, scalarA, B, scalarB); 01534 } 01535 else 01536 #endif 01537 throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown"); 01538 } 01539 01540 template<typename int_type> 01541 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A, 01542 bool transposeA, 01543 double scalarA, 01544 const Epetra_CrsMatrix & B, 01545 bool transposeB, 01546 double scalarB, 01547 Epetra_CrsMatrix * & C) 01548 { 01549 // 01550 //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where 01551 01552 //A and B should already be Filled. C should be an empty pointer. 01553 01554 if (!A.Filled() || !B.Filled() ) { 01555 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false," 01556 << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl; 01557 EPETRA_CHK_ERR(-1); 01558 } 01559 01560 Epetra_CrsMatrix * Aprime = 0, * Bprime=0; 01561 EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0; 01562 01563 //explicit tranpose A formed as necessary 01564 if( transposeA ) { 01565 Atrans = new EpetraExt::RowMatrix_Transpose(); 01566 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01567 } 01568 else 01569 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01570 01571 //explicit tranpose B formed as necessary 01572 if( transposeB ) { 01573 Btrans = new EpetraExt::RowMatrix_Transpose(); 01574 Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B))))); 01575 } 01576 else 01577 Bprime = const_cast<Epetra_CrsMatrix*>(&B); 01578 01579 // allocate or zero the new matrix 01580 if(C!=0) 01581 C->PutScalar(0.0); 01582 else 01583 C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0); 01584 01585 // build arrays for easy resuse 01586 int ierr = 0; 01587 Epetra_CrsMatrix * Mat[] = { Aprime,Bprime}; 01588 double scalar[] = { scalarA, scalarB}; 01589 01590 // do a loop over each matrix to add: A reordering might be more efficient 01591 for(int k=0;k<2;k++) { 01592 int MaxNumEntries = Mat[k]->MaxNumEntries(); 01593 int NumEntries; 01594 int_type * Indices = new int_type[MaxNumEntries]; 01595 double * Values = new double[MaxNumEntries]; 01596 01597 int NumMyRows = Mat[k]->NumMyRows(); 01598 int err; 01599 int_type Row; 01600 int ierr = 0; 01601 01602 //Loop over rows and sum into C 01603 for( int i = 0; i < NumMyRows; ++i ) { 01604 Row = (int_type) Mat[k]->GRID64(i); 01605 EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices)); 01606 01607 if( scalar[k] != 1.0 ) 01608 for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k]; 01609 01610 if(C->Filled()) { // Sum in values 01611 err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices ); 01612 if (err < 0) ierr = err; 01613 } else { // just add it to the unfilled CRS Matrix 01614 err = C->InsertGlobalValues( Row, NumEntries, Values, Indices ); 01615 if (err < 0) ierr = err; 01616 } 01617 } 01618 01619 delete [] Indices; 01620 delete [] Values; 01621 } 01622 01623 if( Atrans ) delete Atrans; 01624 if( Btrans ) delete Btrans; 01625 01626 return(ierr); 01627 } 01628 01629 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01630 bool transposeA, 01631 double scalarA, 01632 const Epetra_CrsMatrix & B, 01633 bool transposeB, 01634 double scalarB, 01635 Epetra_CrsMatrix * & C) 01636 { 01637 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01638 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01639 return TAdd<int>(A, transposeA, scalarA, B, transposeB, scalarB, C); 01640 } 01641 else 01642 #endif 01643 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01644 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01645 return TAdd<long long>(A, transposeA, scalarA, B, transposeB, scalarB, C); 01646 } 01647 else 01648 #endif 01649 throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown"); 01650 } 01651 01652 01653 01654 //========================================================================= 01655 template<typename int_type> 01656 int MatrixMatrix::TJacobi(double omega, 01657 const Epetra_Vector & Dinv, 01658 const Epetra_CrsMatrix& A, 01659 const Epetra_CrsMatrix& B, 01660 Epetra_CrsMatrix& C, 01661 bool call_FillComplete_on_result) 01662 { 01663 #ifdef ENABLE_MMM_TIMINGS 01664 using Teuchos::TimeMonitor; 01665 Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Setup"))); 01666 #endif 01667 01668 //A and B should already be Filled. 01669 if (!A.Filled() || !B.Filled()) { 01670 EPETRA_CHK_ERR(-1); 01671 } 01672 01673 //now check size compatibility 01674 long long Aouter = A.NumGlobalRows64(); 01675 long long Bouter = B.NumGlobalCols64(); 01676 long long Ainner = A.NumGlobalCols64(); 01677 long long Binner = B.NumGlobalRows64(); 01678 long long Dlen = Dinv.GlobalLength64(); 01679 if (Ainner != Binner) { 01680 std::cerr << "MatrixMatrix::Jacobi: ERROR, inner dimensions of A and B " 01681 << "must match for matrix-matrix product. A is " 01682 <<Aouter<<"x"<<Ainner << ", B is "<<Binner<<"x"<<Bouter<<std::endl; 01683 return(-1); 01684 } 01685 01686 //The result matrix C must at least have a row-map that reflects the 01687 //correct row-size. Don't check the number of columns because rectangular 01688 //matrices which were constructed with only one map can still end up 01689 //having the correct capacity and dimensions when filled. 01690 if (Aouter > C.NumGlobalRows64()) { 01691 std::cerr << "MatrixMatrix::Jacobi: ERROR, dimensions of result C must " 01692 << "match dimensions of A * B. C has "<<C.NumGlobalRows64() 01693 << " rows, should have at least "<<Aouter << std::endl; 01694 return(-1); 01695 } 01696 01697 // Check against the D matrix 01698 if(Dlen != Aouter) { 01699 std::cerr << "MatrixMatrix::Jacboi: ERROR, dimensions of result D must " 01700 << "match dimensions of A's rows. D has "<< Dlen 01701 << " rows, should have " << Aouter << std::endl; 01702 return(-1); 01703 } 01704 01705 if(!A.RowMap().SameAs(B.RowMap()) || !A.RowMap().SameAs(Dinv.Map())) { 01706 std::cerr << "MatrixMatrix::Jacboi: ERROR, RowMap of A must match RowMap of B " 01707 << "and Map of D."<<std::endl; 01708 return(-1); 01709 } 01710 01711 //It doesn't matter whether C is already Filled or not. If it is already 01712 //Filled, it must have space allocated for the positions that will be 01713 //referenced in forming C. If it doesn't have enough space, 01714 //we'll error out later when trying to store result values. 01715 01716 //We're going to need to import remotely-owned sections of A and/or B 01717 //if more than 1 processor is performing this run, depending on the scenario. 01718 int numProcs = A.Comm().NumProc(); 01719 01720 // Maps 01721 const Epetra_Map* rowmap_A = &(A.RowMap()); 01722 const Epetra_Map* rowmap_B = &(B.RowMap()); 01723 01724 01725 01726 //Declare some 'work-space' maps which may be created depending on 01727 //the scenario, and which will be deleted before exiting this function. 01728 const Epetra_Map* workmap1 = NULL; 01729 const Epetra_Map* workmap2 = NULL; 01730 const Epetra_Map* mapunion1 = NULL; 01731 01732 //Declare a couple of structs that will be used to hold views of the data 01733 //of A and B, to be used for fast access during the matrix-multiplication. 01734 CrsMatrixStruct Aview; 01735 CrsMatrixStruct Bview; 01736 01737 const Epetra_Map* targetMap_A = rowmap_A; 01738 const Epetra_Map* targetMap_B = rowmap_B; 01739 01740 #ifdef ENABLE_MMM_TIMINGS 01741 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All I&X"))); 01742 #endif 01743 01744 //Now import any needed remote rows and populate the Aview struct. 01745 if(call_FillComplete_on_result) { 01746 EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview)); 01747 } 01748 else { 01749 EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview)); 01750 } 01751 01752 // NOTE: Next up is to switch to import_only for B as well, and then modify the THREE SerialCores 01753 // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups. 01754 01755 // Make sure B's views are consistent with A even in serial. 01756 const Epetra_Map* colmap_op_A = NULL; 01757 if(numProcs > 1){ 01758 colmap_op_A = &(A.ColMap()); 01759 targetMap_B = colmap_op_A; 01760 } 01761 01762 //Now import any needed remote rows and populate the Bview struct. 01763 if(call_FillComplete_on_result) { 01764 EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer())); 01765 } 01766 else { 01767 EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) ); 01768 } 01769 01770 #ifdef ENABLE_MMM_TIMINGS 01771 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Multiply"))); 01772 #endif 01773 01774 // Zero if filled 01775 if(C.Filled()) C.PutScalar(0.0); 01776 01777 //Now call the appropriate method to perform the actual multiplication. 01778 CrsWrapper_Epetra_CrsMatrix ecrsmat(C); 01779 EPETRA_CHK_ERR( jacobi_A_B(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result) ); 01780 01781 //Finally, delete the objects that were potentially created 01782 //during the course of importing remote sections of A and B. 01783 delete mapunion1; mapunion1 = NULL; 01784 delete workmap1; workmap1 = NULL; 01785 delete workmap2; workmap2 = NULL; 01786 01787 return(0); 01788 } 01789 01790 01791 01792 int MatrixMatrix::Jacobi(double omega, 01793 const Epetra_Vector & Dinv, 01794 const Epetra_CrsMatrix& A, 01795 const Epetra_CrsMatrix& B, 01796 Epetra_CrsMatrix& C, 01797 bool call_FillComplete_on_result) 01798 { 01799 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01800 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01801 return TJacobi<int>(omega, Dinv, A, B, C, call_FillComplete_on_result); 01802 } 01803 else 01804 #endif 01805 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01806 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01807 return TJacobi<long long>(omega, Dinv, A, B, C, call_FillComplete_on_result); 01808 } 01809 else 01810 #endif 01811 throw std::runtime_error("EpetraExt::MatrixMatrix::Jacobi: GlobalIndices type unknown"); 01812 } 01813 01814 01815 01816 01817 01818 01819 01820 01821 01822 01823 } // namespace EpetraExt 01824
1.7.6.1