|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP 00043 #define TPETRA_MATRIXMATRIX_DEF_HPP 00044 00045 #include "TpetraExt_MatrixMatrix_decl.hpp" 00046 #include "Teuchos_VerboseObject.hpp" 00047 #include "Teuchos_Array.hpp" 00048 #include "Tpetra_Util.hpp" 00049 #include "Tpetra_ConfigDefs.hpp" 00050 #include "Tpetra_CrsMatrix.hpp" 00051 #include "TpetraExt_MMHelpers_def.hpp" 00052 #include "Tpetra_RowMatrixTransposer.hpp" 00053 #include "Tpetra_ConfigDefs.hpp" 00054 #include "Tpetra_Map.hpp" 00055 #include "Tpetra_Export.hpp" 00056 #include "Tpetra_Import_Util.hpp" 00057 #include "Tpetra_Import_Util2.hpp" 00058 #include <algorithm> 00059 #include "Teuchos_FancyOStream.hpp" 00060 00061 //#define COMPUTE_MMM_STATISTICS 00062 00063 00069 namespace Tpetra { 00070 00071 00072 namespace MatrixMatrix{ 00073 00074 00075 template <class Scalar, 00076 class LocalOrdinal, 00077 class GlobalOrdinal, 00078 class Node> 00079 void Multiply( 00080 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00081 bool transposeA, 00082 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00083 bool transposeB, 00084 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C, 00085 bool call_FillComplete_on_result) 00086 { 00087 #ifdef HAVE_TPETRA_MMM_TIMINGS 00088 using Teuchos::TimeMonitor; 00089 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All Setup"))); 00090 #endif 00091 00092 //TEUCHOS_FUNC_TIME_MONITOR_DIFF("My Matrix Mult", mmm_multiply); 00093 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> Matrix_t; 00094 // 00095 //This method forms the matrix-matrix product C = op(A) * op(B), where 00096 //op(A) == A if transposeA is false, 00097 //op(A) == A^T if transposeA is true, 00098 //and similarly for op(B). 00099 // 00100 00101 //A and B should already be Filled. 00102 //(Should we go ahead and call FillComplete() on them if necessary? 00103 // or error out? For now, we choose to error out.) 00104 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete."); 00105 TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete."); 00106 TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed."); 00107 00108 //Convience typedefs 00109 typedef CrsMatrixStruct< 00110 Scalar, 00111 LocalOrdinal, 00112 GlobalOrdinal, 00113 Node> CrsMatrixStruct_t; 00114 typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t; 00115 00116 RCP<const Matrix_t > Aprime = null; 00117 RCP<const Matrix_t > Bprime = null; 00118 00119 // Is this a "clean" matrix 00120 bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed(); 00121 00122 bool use_optimized_ATB=false; 00123 if(transposeA && !transposeB && call_FillComplete_on_result && NewFlag) { 00124 use_optimized_ATB=true; 00125 } 00126 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later. 00127 use_optimized_ATB=false; 00128 #endif 00129 00130 00131 if(!use_optimized_ATB && transposeA) { 00132 RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> at (Teuchos::rcpFromRef (A)); 00133 Aprime = at.createTranspose(); 00134 } 00135 else{ 00136 Aprime = rcpFromRef(A); 00137 } 00138 00139 if(transposeB){ 00140 RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> bt (Teuchos::rcpFromRef (B)); 00141 Bprime=bt.createTranspose(); 00142 } 00143 else{ 00144 Bprime = rcpFromRef(B); 00145 } 00146 00147 00148 //now check size compatibility 00149 global_size_t numACols = A.getDomainMap()->getGlobalNumElements(); 00150 global_size_t numBCols = B.getDomainMap()->getGlobalNumElements(); 00151 global_size_t Aouter = transposeA ? numACols : A.getGlobalNumRows(); 00152 global_size_t Bouter = transposeB ? B.getGlobalNumRows() : numBCols; 00153 global_size_t Ainner = transposeA ? A.getGlobalNumRows() : numACols; 00154 global_size_t Binner = transposeB ? numBCols : B.getGlobalNumRows(); 00155 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, 00156 "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) " 00157 "must match for matrix-matrix product. op(A) is " 00158 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl); 00159 00160 //The result matrix C must at least have a row-map that reflects the 00161 //correct row-size. Don't check the number of columns because rectangular 00162 //matrices which were constructed with only one map can still end up 00163 //having the correct capacity and dimensions when filled. 00164 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error, 00165 "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 00166 "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows() 00167 << " rows, should have at least "<<Aouter << std::endl); 00168 00169 //It doesn't matter whether C is already Filled or not. If it is already 00170 //Filled, it must have space allocated for the positions that will be 00171 //referenced in forming C = op(A)*op(B). If it doesn't have enough space, 00172 //we'll error out later when trying to store result values. 00173 00174 // CGB: However, matrix must be in active-fill 00175 TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false ); 00176 00177 //We're going to need to import remotely-owned sections of A and/or B 00178 //if more than 1 processor is performing this run, depending on the scenario. 00179 int numProcs = A.getComm()->getSize(); 00180 00181 //Declare a couple of structs that will be used to hold views of the data 00182 //of A and B, to be used for fast access during the matrix-multiplication. 00183 CrsMatrixStruct_t Aview; 00184 CrsMatrixStruct_t Bview; 00185 00186 RCP<const Map_t > targetMap_A = Aprime->getRowMap(); 00187 RCP<const Map_t > targetMap_B = Bprime->getRowMap(); 00188 00189 #ifdef HAVE_TPETRA_MMM_TIMINGS 00190 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All I&X"))); 00191 #endif 00192 00193 //Now import any needed remote rows and populate the Aview struct. 00194 // NOTE: We assert that an import isn't needed --- since we do the transpose above to handle that. 00195 if(!use_optimized_ATB) { 00196 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter; 00197 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,true); 00198 } 00199 00200 00201 //We will also need local access to all rows of B that correspond to the 00202 //column-map of op(A). 00203 if (numProcs > 1) { 00204 targetMap_B = Aprime->getColMap(); //colmap_op_A; 00205 } 00206 00207 //Now import any needed remote rows and populate the Bview struct. 00208 if(!use_optimized_ATB) 00209 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter()); 00210 00211 #ifdef HAVE_TPETRA_MMM_TIMINGS 00212 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All Multiply"))); 00213 #endif 00214 00215 00216 //Now call the appropriate method to perform the actual multiplication. 00217 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C); 00218 00219 if(use_optimized_ATB) { 00220 MMdetails::mult_AT_B_newmatrix(A, B, C); 00221 } 00222 else if(call_FillComplete_on_result && NewFlag ) { 00223 MMdetails::mult_A_B_newmatrix(Aview, Bview, C); 00224 } 00225 else { 00226 MMdetails::mult_A_B(Aview, Bview, crsmat); 00227 #ifdef HAVE_TPETRA_MMM_TIMINGS 00228 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All FillComplete"))); 00229 #endif 00230 if (call_FillComplete_on_result) { 00231 //We'll call FillComplete on the C matrix before we exit, and give 00232 //it a domain-map and a range-map. 00233 //The domain-map will be the domain-map of B, unless 00234 //op(B)==transpose(B), in which case the range-map of B will be used. 00235 //The range-map will be the range-map of A, unless 00236 //op(A)==transpose(A), in which case the domain-map of A will be used. 00237 if (!C.isFillComplete()) { 00238 C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap()); 00239 } 00240 } 00241 } 00242 00243 } 00244 00245 00246 template <class Scalar, 00247 class LocalOrdinal, 00248 class GlobalOrdinal, 00249 class Node> 00250 void Jacobi(Scalar omega, 00251 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv, 00252 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00253 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00254 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C, 00255 bool call_FillComplete_on_result) 00256 { 00257 #ifdef HAVE_TPETRA_MMM_TIMINGS 00258 using Teuchos::TimeMonitor; 00259 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All Setup"))); 00260 #endif 00261 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> Matrix_t; 00262 00263 //A and B should already be Filled. 00264 //(Should we go ahead and call FillComplete() on them if necessary? 00265 // or error out? For now, we choose to error out.) 00266 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete."); 00267 TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete."); 00268 TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed."); 00269 00270 //Convience typedefs 00271 typedef CrsMatrixStruct< 00272 Scalar, 00273 LocalOrdinal, 00274 GlobalOrdinal, 00275 Node> CrsMatrixStruct_t; 00276 typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t; 00277 00278 RCP<const Matrix_t > Aprime = rcpFromRef(A); 00279 RCP<const Matrix_t > Bprime = rcpFromRef(B); 00280 00281 //now check size compatibility 00282 global_size_t numACols = A.getDomainMap()->getGlobalNumElements(); 00283 global_size_t numBCols = B.getDomainMap()->getGlobalNumElements(); 00284 global_size_t Aouter = A.getGlobalNumRows(); 00285 global_size_t Bouter = numBCols; 00286 global_size_t Ainner = numACols; 00287 global_size_t Binner = B.getGlobalNumRows(); 00288 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, 00289 "MatrixMatrix::Jacobi: ERROR, inner dimensions of op(A) and op(B) " 00290 "must match for matrix-matrix product. op(A) is " 00291 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl); 00292 00293 //The result matrix C must at least have a row-map that reflects the 00294 //correct row-size. Don't check the number of columns because rectangular 00295 //matrices which were constructed with only one map can still end up 00296 //having the correct capacity and dimensions when filled. 00297 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error, 00298 "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 00299 "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows() 00300 << " rows, should have at least "<<Aouter << std::endl); 00301 00302 //It doesn't matter whether C is already Filled or not. If it is already 00303 //Filled, it must have space allocated for the positions that will be 00304 //referenced in forming C = op(A)*op(B). If it doesn't have enough space, 00305 //we'll error out later when trying to store result values. 00306 00307 // CGB: However, matrix must be in active-fill 00308 TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false ); 00309 00310 //We're going to need to import remotely-owned sections of A and/or B 00311 //if more than 1 processor is performing this run, depending on the scenario. 00312 int numProcs = A.getComm()->getSize(); 00313 00314 //Declare a couple of structs that will be used to hold views of the data 00315 //of A and B, to be used for fast access during the matrix-multiplication. 00316 CrsMatrixStruct_t Aview; 00317 CrsMatrixStruct_t Bview; 00318 00319 RCP<const Map_t > targetMap_A = Aprime->getRowMap(); 00320 RCP<const Map_t > targetMap_B = Bprime->getRowMap(); 00321 00322 #ifdef HAVE_TPETRA_MMM_TIMINGS 00323 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All I&X"))); 00324 #endif 00325 00326 //Now import any needed remote rows and populate the Aview struct. 00327 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview); 00328 00329 //We will also need local access to all rows of B that correspond to the 00330 //column-map of op(A). 00331 if (numProcs > 1) { 00332 targetMap_B = Aprime->getColMap(); //colmap_op_A; 00333 } 00334 00335 //Now import any needed remote rows and populate the Bview struct. 00336 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter()); 00337 00338 #ifdef HAVE_TPETRA_MMM_TIMINGS 00339 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All Multiply"))); 00340 #endif 00341 00342 00343 //Now call the appropriate method to perform the actual multiplication. 00344 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C); 00345 00346 // Is this a "clean" matrix 00347 bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed(); 00348 00349 if(call_FillComplete_on_result && NewFlag ) { 00350 MMdetails::jacobi_A_B_newmatrix(omega,Dinv,Aview, Bview, C); 00351 } 00352 else { 00353 TEUCHOS_TEST_FOR_EXCEPTION( 00354 true, std::runtime_error, 00355 "jacobi_A_B_general not implemented"); 00356 // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm 00357 // commenting it out. 00358 // #ifdef HAVE_TPETRA_MMM_TIMINGS 00359 // MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi FillComplete"))); 00360 // #endif 00361 // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm 00362 // commenting it out. 00363 // if (call_FillComplete_on_result) { 00364 // //We'll call FillComplete on the C matrix before we exit, and give 00365 // //it a domain-map and a range-map. 00366 // //The domain-map will be the domain-map of B, unless 00367 // //op(B)==transpose(B), in which case the range-map of B will be used. 00368 // //The range-map will be the range-map of A, unless 00369 // //op(A)==transpose(A), in which case the domain-map of A will be used. 00370 // if (!C.isFillComplete()) { 00371 // C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap()); 00372 // } 00373 // } 00374 } 00375 } 00376 00377 00378 00379 template <class Scalar, 00380 class LocalOrdinal, 00381 class GlobalOrdinal, 00382 class Node> 00383 void Add( 00384 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00385 bool transposeA, 00386 Scalar scalarA, 00387 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00388 Scalar scalarB ) 00389 { 00390 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, 00391 "MatrixMatrix::Add ERROR, input matrix A.isFillComplete() is false; it is required to be true. (Result matrix B is not required to be isFillComplete())."); 00392 TEUCHOS_TEST_FOR_EXCEPTION(B.isFillComplete() , std::runtime_error, 00393 "MatrixMatrix::Add ERROR, input matrix B must not be fill complete!"); 00394 TEUCHOS_TEST_FOR_EXCEPTION(B.isStaticGraph() , std::runtime_error, 00395 "MatrixMatrix::Add ERROR, input matrix B must not have static graph!"); 00396 TEUCHOS_TEST_FOR_EXCEPTION(B.isLocallyIndexed() , std::runtime_error, 00397 "MatrixMatrix::Add ERROR, input matrix B must not be locally indexed!"); 00398 TEUCHOS_TEST_FOR_EXCEPTION(B.getProfileType()!=DynamicProfile, std::runtime_error, 00399 "MatrixMatrix::Add ERROR, input matrix B must have a dynamic profile!"); 00400 //Convience typedef 00401 typedef CrsMatrix< 00402 Scalar, 00403 LocalOrdinal, 00404 GlobalOrdinal, 00405 Node> CrsMatrix_t; 00406 RCP<const CrsMatrix_t> Aprime = null; 00407 if( transposeA ){ 00408 RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> theTransposer(Teuchos::rcpFromRef (A)); 00409 Aprime = theTransposer.createTranspose(); 00410 } 00411 else{ 00412 Aprime = rcpFromRef(A); 00413 } 00414 size_t a_numEntries; 00415 Array<GlobalOrdinal> a_inds(A.getNodeMaxNumRowEntries()); 00416 Array<Scalar> a_vals(A.getNodeMaxNumRowEntries()); 00417 GlobalOrdinal row; 00418 00419 if(scalarB != ScalarTraits<Scalar>::one()){ 00420 B.scale(scalarB); 00421 } 00422 00423 bool bFilled = B.isFillComplete(); 00424 size_t numMyRows = B.getNodeNumRows(); 00425 if(scalarA != ScalarTraits<Scalar>::zero()){ 00426 for(LocalOrdinal i = 0; (size_t)i < numMyRows; ++i){ 00427 row = B.getRowMap()->getGlobalElement(i); 00428 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries); 00429 if(scalarA != ScalarTraits<Scalar>::one()){ 00430 for(size_t j =0; j<a_numEntries; ++j){ 00431 a_vals[j] *= scalarA; 00432 } 00433 } 00434 if(bFilled){ 00435 B.sumIntoGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries)); 00436 } 00437 else{ 00438 B.insertGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries)); 00439 } 00440 00441 } 00442 } 00443 } 00444 00445 00446 template <class Scalar, 00447 class LocalOrdinal, 00448 class GlobalOrdinal, 00449 class Node> 00450 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > 00451 add (const Scalar& alpha, 00452 const bool transposeA, 00453 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00454 const Scalar& beta, 00455 const bool transposeB, 00456 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00457 const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& domainMap, 00458 const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& rangeMap, 00459 const Teuchos::RCP<Teuchos::ParameterList>& params) 00460 { 00461 using Teuchos::RCP; 00462 using Teuchos::rcp; 00463 using Teuchos::rcpFromRef; 00464 using Teuchos::rcp_dynamic_cast; 00465 using Teuchos::rcp_implicit_cast; 00466 typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> row_matrix_type; 00467 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type; 00468 typedef RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer_type; 00469 00470 TEUCHOS_TEST_FOR_EXCEPTION( 00471 ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument, 00472 "Tpetra::MatrixMatrix::add: A and B must both be fill complete."); 00473 00474 #ifdef HAVE_TPETRA_DEBUG 00475 // The matrices don't have domain or range Maps unless they are fill complete. 00476 if (A.isFillComplete () && B.isFillComplete ()) { 00477 const bool domainMapsSame = 00478 (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) || 00479 (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) || 00480 (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))); 00481 TEUCHOS_TEST_FOR_EXCEPTION( 00482 domainMapsSame, std::invalid_argument, 00483 "Tpetra::MatrixMatrix::add: The domain Maps of Op(A) and Op(B) are not the same."); 00484 00485 const bool rangeMapsSame = 00486 (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) || 00487 (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) || 00488 (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))); 00489 TEUCHOS_TEST_FOR_EXCEPTION( 00490 rangeMapsSame, std::invalid_argument, 00491 "Tpetra::MatrixMatrix::add: The range Maps of Op(A) and Op(B) are not the same."); 00492 } 00493 #endif // HAVE_TPETRA_DEBUG 00494 00495 // Form the explicit transpose of A if necessary. 00496 RCP<const crs_matrix_type> Aprime; 00497 if (transposeA) { 00498 transposer_type theTransposer (rcpFromRef (A)); 00499 Aprime = theTransposer.createTranspose (); 00500 } else { 00501 Aprime = rcpFromRef (A); 00502 } 00503 00504 #ifdef HAVE_TPETRA_DEBUG 00505 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error, 00506 "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). " 00507 "Please report this bug to the Tpetra developers."); 00508 #endif // HAVE_TPETRA_DEBUG 00509 00510 // Form the explicit transpose of B if necessary. 00511 RCP<const crs_matrix_type> Bprime; 00512 if (transposeB) { 00513 transposer_type theTransposer (rcpFromRef (B)); 00514 Bprime = theTransposer.createTranspose (); 00515 } else { 00516 Bprime = rcpFromRef (B); 00517 } 00518 00519 #ifdef HAVE_TPETRA_DEBUG 00520 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error, 00521 "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). " 00522 "Please report this bug to the Tpetra developers."); 00523 00524 TEUCHOS_TEST_FOR_EXCEPTION( 00525 ! Aprime->isFillComplete () || ! Bprime->isFillComplete (), std::invalid_argument, 00526 "Tpetra::MatrixMatrix::add: Aprime and Bprime must both be fill complete. " 00527 "Please report this bug to the Tpetra developers."); 00528 #endif // HAVE_TPETRA_DEBUG 00529 00530 00531 RCP<row_matrix_type> C = 00532 Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime), 00533 beta, domainMap, rangeMap, params); 00534 return rcp_dynamic_cast<crs_matrix_type> (C); 00535 } 00536 00537 00538 00539 00540 template <class Scalar, 00541 class LocalOrdinal, 00542 class GlobalOrdinal, 00543 class Node> 00544 void Add( 00545 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00546 bool transposeA, 00547 Scalar scalarA, 00548 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00549 bool transposeB, 00550 Scalar scalarB, 00551 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > C) 00552 { 00553 using Teuchos::as; 00554 using Teuchos::Array; 00555 using Teuchos::ArrayRCP; 00556 using Teuchos::ArrayView; 00557 using Teuchos::RCP; 00558 using Teuchos::rcp; 00559 using Teuchos::rcp_dynamic_cast; 00560 using Teuchos::rcpFromRef; 00561 using Teuchos::tuple; 00562 using std::endl; 00563 // typedef typename ArrayView<const Scalar>::size_type size_type; 00564 typedef Teuchos::ScalarTraits<Scalar> STS; 00565 typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type; 00566 // typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type; 00567 // typedef RowGraph<LocalOrdinal, GlobalOrdinal, Node> row_graph_type; 00568 // typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type; 00569 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type; 00570 typedef RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer_type; 00571 00572 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error, 00573 "Tpetra::MatrixMatrix::Add: The case C == null does not actually work. " 00574 "Fixing this will require an interface change."); 00575 00576 TEUCHOS_TEST_FOR_EXCEPTION( 00577 ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument, 00578 "Tpetra::MatrixMatrix::Add: Both input matrices must be fill complete " 00579 "before calling this function."); 00580 00581 #ifdef HAVE_TPETRA_DEBUG 00582 { 00583 const bool domainMapsSame = 00584 (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) || 00585 (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) || 00586 (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))); 00587 TEUCHOS_TEST_FOR_EXCEPTION( 00588 domainMapsSame, std::invalid_argument, 00589 "Tpetra::MatrixMatrix::Add: The domain Maps of Op(A) and Op(B) are not the same."); 00590 00591 const bool rangeMapsSame = 00592 (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) || 00593 (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) || 00594 (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))); 00595 TEUCHOS_TEST_FOR_EXCEPTION( 00596 rangeMapsSame, std::invalid_argument, 00597 "Tpetra::MatrixMatrix::Add: The range Maps of Op(A) and Op(B) are not the same."); 00598 } 00599 #endif // HAVE_TPETRA_DEBUG 00600 00601 // Form the explicit transpose of A if necessary. 00602 RCP<const crs_matrix_type> Aprime; 00603 if (transposeA) { 00604 transposer_type theTransposer (rcpFromRef (A)); 00605 Aprime = theTransposer.createTranspose (); 00606 } else { 00607 Aprime = rcpFromRef (A); 00608 } 00609 00610 #ifdef HAVE_TPETRA_DEBUG 00611 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error, 00612 "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). " 00613 "Please report this bug to the Tpetra developers."); 00614 #endif // HAVE_TPETRA_DEBUG 00615 00616 // Form the explicit transpose of B if necessary. 00617 RCP<const crs_matrix_type> Bprime; 00618 if (transposeB) { 00619 transposer_type theTransposer (rcpFromRef (B)); 00620 Bprime = theTransposer.createTranspose (); 00621 } else { 00622 Bprime = rcpFromRef (B); 00623 } 00624 00625 #ifdef HAVE_TPETRA_DEBUG 00626 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error, 00627 "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). " 00628 "Please report this bug to the Tpetra developers."); 00629 #endif // HAVE_TPETRA_DEBUG 00630 00631 // Allocate or zero the entries of the result matrix. 00632 if (! C.is_null ()) { 00633 C->setAllToScalar (STS::zero ()); 00634 } else { 00635 #if 0 00636 // If Aprime and Bprime have the same row Map, and if C is null, 00637 // we can optimize construction and fillComplete of C. For now, 00638 // we just check pointer equality, to avoid the all-reduce in 00639 // isSameAs. It may be worth that all-reduce to check, however. 00640 //if (Aprime->getRowMap ().getRawPtr () == Bprime->getRowMap ().getRawPtr ()) { 00641 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) { 00642 RCP<const map_type> rowMap = Aprime->getRowMap (); 00643 00644 RCP<const crs_graph_type> A_graph = 00645 rcp_dynamic_cast<const crs_graph_type> (Aprime->getGraph (), true); 00646 #ifdef HAVE_TPETRA_DEBUG 00647 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error, 00648 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. " 00649 "Please report this bug to the Tpetra developers."); 00650 #endif // HAVE_TPETRA_DEBUG 00651 RCP<const crs_graph_type> B_graph = 00652 rcp_dynamic_cast<const crs_graph_type> (Bprime->getGraph (), true); 00653 #ifdef HAVE_TPETRA_DEBUG 00654 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error, 00655 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. " 00656 "Please report this bug to the Tpetra developers."); 00657 #endif // HAVE_TPETRA_DEBUG 00658 RCP<const map_type> A_colMap = A_graph->getColMap (); 00659 #ifdef HAVE_TPETRA_DEBUG 00660 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error, 00661 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. " 00662 "Please report this bug to the Tpetra developers."); 00663 #endif // HAVE_TPETRA_DEBUG 00664 RCP<const map_type> B_colMap = B_graph->getColMap (); 00665 #ifdef HAVE_TPETRA_DEBUG 00666 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error, 00667 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. " 00668 "Please report this bug to the Tpetra developers."); 00669 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (), 00670 std::logic_error, 00671 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. " 00672 "Please report this bug to the Tpetra developers."); 00673 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (), 00674 std::logic_error, 00675 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. " 00676 "Please report this bug to the Tpetra developers."); 00677 #endif // HAVE_TPETRA_DEBUG 00678 00679 // Compute the (column Map and) Import of the matrix sum. 00680 RCP<const import_type> sumImport = 00681 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ())); 00682 RCP<const map_type> C_colMap = sumImport->getTargetMap (); 00683 00684 // First, count the number of entries in each row. Then, go 00685 // back over the rows again, and compute the actual sum. 00686 // Remember that C may have a different column Map than Aprime 00687 // or Bprime, so its local indices may be different. That's why 00688 // we have to convert from local to global indices. 00689 00690 ArrayView<const LocalOrdinal> A_local_ind; 00691 Array<GlobalOrdinal> A_global_ind; 00692 ArrayView<const LocalOrdinal> B_local_ind; 00693 Array<GlobalOrdinal> B_global_ind; 00694 00695 const size_t localNumRows = rowMap->getNodeNumElements (); 00696 ArrayRCP<size_t> numEntriesPerRow (localNumRows); 00697 // Compute the max number of entries in any row of A+B on this 00698 // process, so that we won't have to resize temporary arrays. 00699 size_t maxNumEntriesPerRow = 0; 00700 for (size_t localRow = 0; localRow < localNumRows; ++localRow) { 00701 // Get view of current row of A_graph, in its local indices. 00702 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind); 00703 const size_type A_numEnt = A_local_ind.size (); 00704 if (A_numEnt > A_global_ind.size ()) { 00705 A_global_ind.resize (A_numEnt); 00706 } 00707 // Convert A's local indices to global indices. 00708 for (size_type k = 0; k < A_numEnt; ++k) { 00709 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]); 00710 } 00711 00712 // Get view of current row of B_graph, in its local indices. 00713 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind); 00714 const size_type B_numEnt = B_local_ind.size (); 00715 if (B_numEnt > B_global_ind.size ()) { 00716 B_global_ind.resize (B_numEnt); 00717 } 00718 // Convert B's local indices to global indices. 00719 for (size_type k = 0; k < B_numEnt; ++k) { 00720 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]); 00721 } 00722 00723 // Count the number of entries in the merged row of A + B. 00724 const size_t curNumEntriesPerRow = 00725 keyMergeCount (A_global_ind.begin (), A_global_ind.end (), 00726 B_global_ind.begin (), B_global_ind.end ()); 00727 numEntriesPerRow[localRow] = curNumEntriesPerRow; 00728 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow); 00729 } 00730 00731 // Create C, using the sum column Map and number of entries per 00732 // row that we computed above. Having the exact number of 00733 // entries per row lets us use static profile, making it valid 00734 // to call expertStaticFillComplete. 00735 C = rcp (new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow, StaticProfile)); 00736 00737 // Go back through the rows and actually compute the sum. We 00738 // don't ever have to resize A_global_ind or B_global_ind below, 00739 // since we've already done it above. 00740 ArrayView<const Scalar> A_val; 00741 ArrayView<const Scalar> B_val; 00742 00743 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow); 00744 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow); 00745 Array<Scalar> AplusB_val (maxNumEntriesPerRow); 00746 00747 for (size_t localRow = 0; localRow < localNumRows; ++localRow) { 00748 // Get view of current row of A, in A's local indices. 00749 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val); 00750 // Convert A's local indices to global indices. 00751 for (size_type k = 0; k < A_local_ind.size (); ++k) { 00752 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]); 00753 } 00754 00755 // Get view of current row of B, in B's local indices. 00756 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val); 00757 // Convert B's local indices to global indices. 00758 for (size_type k = 0; k < B_local_ind.size (); ++k) { 00759 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]); 00760 } 00761 00762 const size_t curNumEntries = numEntriesPerRow[localRow]; 00763 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries); 00764 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries); 00765 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries); 00766 00767 // Sum the entries in the current row of A plus B. 00768 keyValueMerge (A_global_ind.begin (), A_global_ind.end (), 00769 A_val.begin (), A_val.end (), 00770 B_global_ind.begin (), B_global_ind.end (), 00771 B_val.begin (), B_val.end (), 00772 C_global_ind.begin (), C_val.begin (), 00773 std::plus<Scalar> ()); 00774 // Convert the sum's global indices into C's local indices. 00775 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) { 00776 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]); 00777 } 00778 // Give the current row sum to C. 00779 C->replaceLocalValues (localRow, C_local_ind, C_val); 00780 } 00781 00782 // Use "expert static fill complete" to bypass construction of 00783 // the Import and Export (if applicable) object(s). 00784 RCP<const map_type> domainMap = A_graph->getDomainMap (); 00785 RCP<const map_type> rangeMap = A_graph->getRangeMap (); 00786 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ()); 00787 00788 return; // Now we're done! 00789 } 00790 else { 00791 // FIXME (mfh 08 May 2013) When I first looked at this method, I 00792 // noticed that C was being given the row Map of Aprime (the 00793 // possibly transposed version of A). Is this what we want? 00794 C = rcp (new crs_matrix_type (Aprime->getRowMap (), null)); 00795 } 00796 00797 #else 00798 // FIXME (mfh 08 May 2013) When I first looked at this method, I 00799 // noticed that C was being given the row Map of Aprime (the 00800 // possibly transposed version of A). Is this what we want? 00801 C = rcp (new crs_matrix_type (Aprime->getRowMap (), 0)); 00802 #endif // 0 00803 } 00804 00805 #ifdef HAVE_TPETRA_DEBUG 00806 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error, 00807 "Tpetra::MatrixMatrix::Add: At this point, Aprime is null. " 00808 "Please report this bug to the Tpetra developers."); 00809 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error, 00810 "Tpetra::MatrixMatrix::Add: At this point, Bprime is null. " 00811 "Please report this bug to the Tpetra developers."); 00812 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error, 00813 "Tpetra::MatrixMatrix::Add: At this point, C is null. " 00814 "Please report this bug to the Tpetra developers."); 00815 #endif // HAVE_TPETRA_DEBUG 00816 00817 Array<RCP<const crs_matrix_type> > Mat = 00818 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime); 00819 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB); 00820 00821 // do a loop over each matrix to add: A reordering might be more efficient 00822 for (int k = 0; k < 2; ++k) { 00823 Array<GlobalOrdinal> Indices; 00824 Array<Scalar> Values; 00825 00826 // Loop over each locally owned row of the current matrix (either 00827 // Aprime or Bprime), and sum its entries into the corresponding 00828 // row of C. This works regardless of whether Aprime or Bprime 00829 // has the same row Map as C, because both sumIntoGlobalValues and 00830 // insertGlobalValues allow summing resp. inserting into nonowned 00831 // rows of C. 00832 #ifdef HAVE_TPETRA_DEBUG 00833 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error, 00834 "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. " 00835 "Please report this bug to the Tpetra developers."); 00836 #endif // HAVE_TPETRA_DEBUG 00837 RCP<const map_type> curRowMap = Mat[k]->getRowMap (); 00838 #ifdef HAVE_TPETRA_DEBUG 00839 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error, 00840 "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. " 00841 "Please report this bug to the Tpetra developers."); 00842 #endif // HAVE_TPETRA_DEBUG 00843 00844 const size_t localNumRows = Mat[k]->getNodeNumRows (); 00845 for (size_t i = 0; i < localNumRows; ++i) { 00846 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i); 00847 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow); 00848 if (numEntries > 0) { 00849 Indices.resize (numEntries); 00850 Values.resize (numEntries); 00851 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries); 00852 00853 if (scalar[k] != STS::one ()) { 00854 for (size_t j = 0; j < numEntries; ++j) { 00855 Values[j] *= scalar[k]; 00856 } 00857 } 00858 00859 if (C->isFillComplete ()) { 00860 C->sumIntoGlobalValues (globalRow, Indices, Values); 00861 } else { 00862 C->insertGlobalValues (globalRow, Indices, Values); 00863 } 00864 } 00865 } 00866 } 00867 } 00868 00869 00870 00871 } //End namespace MatrixMatrix 00872 00873 namespace MMdetails{ 00874 00875 // Prints MMM-style statistics on communication done with an Import or Export object 00876 template <class TransferType> 00877 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer, const std::string &label){ 00878 if(Transfer.is_null()) return; 00879 00880 const Distributor & Distor = Transfer->getDistributor(); 00881 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm(); 00882 00883 size_t rows_send = Transfer->getNumExportIDs(); 00884 size_t rows_recv = Transfer->getNumRemoteIDs(); 00885 00886 size_t round1_send = Transfer->getNumExportIDs() * sizeof(size_t); 00887 size_t round1_recv = Transfer->getNumRemoteIDs() * sizeof(size_t); 00888 size_t num_send_neighbors = Distor.getNumSends(); 00889 size_t num_recv_neighbors = Distor.getNumReceives(); 00890 size_t round2_send, round2_recv; 00891 Distor.getLastDoStatistics(round2_send,round2_recv); 00892 00893 int myPID = Comm->getRank(); 00894 int NumProcs = Comm->getSize(); 00895 00896 // Processor by processor statistics 00897 // printf("[%d] %s Statistics: neigh[s/r]=%d/%d rows[s/r]=%d/%d r1bytes[s/r]=%d/%d r2bytes[s/r]=%d/%d\n", 00898 // myPID,label.c_str(),num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv); 00899 00900 // Global statistics 00901 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv}; 00902 size_t gstats_min[8], gstats_max[8]; 00903 00904 double lstats_avg[8], gstats_avg[8]; 00905 for(int i=0; i<8; i++) 00906 lstats_avg[i] = ((double)lstats[i])/NumProcs; 00907 00908 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min); 00909 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max); 00910 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg); 00911 00912 if(!myPID) { 00913 printf("%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(), 00914 (int)gstats_min[0],gstats_avg[0],(int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(int)gstats_max[2], 00915 (int)gstats_min[4],gstats_avg[4],(int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(int)gstats_max[6]); 00916 printf("%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(), 00917 (int)gstats_min[1],gstats_avg[1],(int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(int)gstats_max[3], 00918 (int)gstats_min[5],gstats_avg[5],(int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(int)gstats_max[7]); 00919 } 00920 } 00921 00922 00923 //kernel method for computing the local portion of C = A*B 00924 template<class Scalar, 00925 class LocalOrdinal, 00926 class GlobalOrdinal, 00927 class Node> 00928 void mult_AT_B_newmatrix( 00929 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00930 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B, 00931 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C) { 00932 00933 // Using & Typedefs 00934 using Teuchos::RCP; 00935 using Teuchos::rcp; 00936 typedef CrsMatrixStruct< 00937 Scalar, 00938 LocalOrdinal, 00939 GlobalOrdinal, 00940 Node> CrsMatrixStruct_t; 00941 00942 #ifdef HAVE_TPETRA_MMM_TIMINGS 00943 using Teuchos::TimeMonitor; 00944 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T Transpose"))); 00945 #endif 00946 00947 /*************************************************************/ 00948 /* 1) Local Transpose of A */ 00949 /*************************************************************/ 00950 RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> at (Teuchos::rcpFromRef (A)); 00951 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = at.createTransposeLocal(); 00952 00953 /*************************************************************/ 00954 /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */ 00955 /*************************************************************/ 00956 #ifdef HAVE_TPETRA_MMM_TIMINGS 00957 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T I&X"))); 00958 #endif 00959 00960 // Get views, asserting that no import is required to speed up computation 00961 CrsMatrixStruct_t Aview; 00962 CrsMatrixStruct_t Bview; 00963 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter; 00964 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,true); 00965 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,true); 00966 00967 #ifdef HAVE_TPETRA_MMM_TIMINGS 00968 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T AB-core"))); 00969 #endif 00970 00971 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp; 00972 00973 // If Atrans has no Exporter, we can use C instead of having to create a temp matrix 00974 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null(); 00975 if(needs_final_export) 00976 Ctemp = rcp(new Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>(Atrans->getRowMap(),0)); 00977 else 00978 Ctemp = rcp(&C,false);// don't allow deallocation 00979 00980 // Multiply 00981 mult_A_B_newmatrix(Aview,Bview,*Ctemp); 00982 00983 /*************************************************************/ 00984 /* 4) exportAndFillComplete matrix */ 00985 /*************************************************************/ 00986 #ifdef HAVE_TPETRA_MMM_TIMINGS 00987 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T exportAndFillComplete"))); 00988 #endif 00989 00990 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,false); 00991 if(needs_final_export) 00992 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(), 00993 B.getDomainMap(),A.getDomainMap()); 00994 00995 #ifdef COMPUTE_MMM_STATISTICS 00996 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(),std::string("AT_B MMM")); 00997 #endif 00998 } 00999 01000 01001 //kernel method for computing the local portion of C = A*B 01002 template<class Scalar, 01003 class LocalOrdinal, 01004 class GlobalOrdinal, 01005 class Node> 01006 void mult_A_B( 01007 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview, 01008 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview, 01009 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C) 01010 { 01011 typedef Teuchos::ScalarTraits<Scalar> STS; 01012 //TEUCHOS_FUNC_TIME_MONITOR_DIFF("mult_A_B", mult_A_B); 01013 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex(); 01014 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex(); 01015 01016 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero(); 01017 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid(); 01018 01019 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList(); 01020 ArrayView<const GlobalOrdinal> bcols_import = null; 01021 if (Bview.importColMap != null) { 01022 C_firstCol_import = Bview.importColMap->getMinLocalIndex(); 01023 C_lastCol_import = Bview.importColMap->getMaxLocalIndex(); 01024 01025 bcols_import = Bview.importColMap->getNodeElementList(); 01026 } 01027 01028 size_t C_numCols = C_lastCol - C_firstCol + 01029 OrdinalTraits<LocalOrdinal>::one(); 01030 size_t C_numCols_import = C_lastCol_import - C_firstCol_import + 01031 OrdinalTraits<LocalOrdinal>::one(); 01032 01033 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 01034 01035 Array<Scalar> dwork = Array<Scalar>(C_numCols); 01036 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols); 01037 Array<size_t> iwork2 = Array<size_t>(C_numCols); 01038 01039 Array<Scalar> C_row_i = dwork; 01040 Array<GlobalOrdinal> C_cols = iwork; 01041 Array<size_t> c_index = iwork2; 01042 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols); 01043 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols); 01044 01045 size_t C_row_i_length, j, k, last_index; 01046 01047 // Run through all the hash table lookups once and for all 01048 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid(); 01049 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01050 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01051 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){ 01052 // Maps are the same: Use local IDs as the hash 01053 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= 01054 Aview.colMap->getMaxLocalIndex(); i++) 01055 Acol2Brow[i]=i; 01056 } 01057 else { 01058 // Maps are not the same: Use the map's hash 01059 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= 01060 Aview.colMap->getMaxLocalIndex(); i++) { 01061 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i); 01062 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID); 01063 if(BLID != LO_INVALID) Acol2Brow[i] = BLID; 01064 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID); 01065 } 01066 } 01067 01068 //To form C = A*B we're going to execute this expression: 01069 // 01070 // C(i,j) = sum_k( A(i,k)*B(k,j) ) 01071 // 01072 //Our goal, of course, is to navigate the data in A and B once, without 01073 //performing searches for column-indices, etc. 01074 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP; 01075 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP; 01076 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP; 01077 ArrayView<const size_t> Arowptr, Browptr, Irowptr; 01078 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind; 01079 ArrayView<const Scalar> Avals, Bvals, Ivals; 01080 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP); 01081 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP); 01082 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP(); 01083 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP(); 01084 if(!Bview.importMatrix.is_null()) { 01085 Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP); 01086 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP(); 01087 } 01088 01089 bool C_filled = C.isFillComplete(); 01090 01091 for (size_t i = 0; i < C_numCols; i++) 01092 c_index[i] = OrdinalTraits<size_t>::invalid(); 01093 01094 //loop over the rows of A. 01095 size_t Arows = Aview.rowMap->getNodeNumElements(); 01096 for(size_t i=0; i<Arows; ++i) { 01097 01098 //only navigate the local portion of Aview... which is, thankfully, all of A 01099 //since this routine doesn't do transpose modes 01100 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i); 01101 01102 //loop across the i-th row of A and for each corresponding row 01103 //in B, loop across colums and accumulate product 01104 //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words, 01105 //as we stride across B(k,:) we're calculating updates for row i of the 01106 //result matrix C. 01107 01108 01109 C_row_i_length = OrdinalTraits<size_t>::zero(); 01110 01111 for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) { 01112 LocalOrdinal Ak = Acol2Brow[Acolind[k]]; 01113 Scalar Aval = Avals[k]; 01114 if (Aval == STS::zero()) 01115 continue; 01116 01117 if (Ak==LO_INVALID) continue; 01118 01119 for(j=Browptr[Ak]; j< Browptr[Ak+1]; ++j) { 01120 LocalOrdinal col = Bcolind[j]; 01121 //assert(col >= 0 && col < C_numCols); 01122 if (c_index[col] == OrdinalTraits<size_t>::invalid()){ 01123 //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols); 01124 // This has to be a += so insertGlobalValue goes out 01125 C_row_i[C_row_i_length] = Aval*Bvals[j]; 01126 C_cols[C_row_i_length] = col; 01127 c_index[col] = C_row_i_length; 01128 C_row_i_length++; 01129 } 01130 else { 01131 C_row_i[c_index[col]] += Aval*Bvals[j]; 01132 } 01133 } 01134 } 01135 01136 for (size_t ii = 0; ii < C_row_i_length; ii++) { 01137 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid(); 01138 C_cols[ii] = bcols[C_cols[ii]]; 01139 combined_index[ii] = C_cols[ii]; 01140 combined_values[ii] = C_row_i[ii]; 01141 } 01142 last_index = C_row_i_length; 01143 01144 // 01145 //Now put the C_row_i values into C. 01146 // 01147 // We might have to revamp this later. 01148 C_row_i_length = OrdinalTraits<size_t>::zero(); 01149 01150 for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) { 01151 LocalOrdinal Ak = Acol2Brow[Acolind[k]]; 01152 Scalar Aval = Avals[k]; 01153 if (Aval == STS::zero()) 01154 continue; 01155 01156 if (Ak!=LO_INVALID) continue; 01157 01158 Ak = Acol2Irow[Acolind[k]]; 01159 for(j=Irowptr[Ak]; j< Irowptr[Ak+1]; ++j) { 01160 LocalOrdinal col = Icolind[j]; 01161 //assert(col >= 0 && col < C_numCols); 01162 if (c_index[col] == OrdinalTraits<size_t>::invalid()){ 01163 //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols); 01164 // This has to be a += so insertGlobalValue goes out 01165 C_row_i[C_row_i_length] = Aval*Ivals[j]; 01166 C_cols[C_row_i_length] = col; 01167 c_index[col] = C_row_i_length; 01168 C_row_i_length++; 01169 } 01170 else { 01171 // This has to be a += so insertGlobalValue goes out 01172 C_row_i[c_index[col]] += Aval*Ivals[j]; 01173 } 01174 } 01175 } 01176 01177 for (size_t ii = 0; ii < C_row_i_length; ii++) { 01178 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid(); 01179 C_cols[ii] = bcols_import[C_cols[ii]]; 01180 combined_index[last_index] = C_cols[ii]; 01181 combined_values[last_index] = C_row_i[ii]; 01182 last_index++; 01183 } 01184 01185 // 01186 //Now put the C_row_i values into C. 01187 // 01188 // We might have to revamp this later. 01189 C_filled ? 01190 C.sumIntoGlobalValues( 01191 global_row, 01192 combined_index.view(OrdinalTraits<size_t>::zero(), last_index), 01193 combined_values.view(OrdinalTraits<size_t>::zero(), last_index)) 01194 : 01195 C.insertGlobalValues( 01196 global_row, 01197 combined_index.view(OrdinalTraits<size_t>::zero(), last_index), 01198 combined_values.view(OrdinalTraits<size_t>::zero(), last_index)); 01199 01200 } 01201 01202 } 01203 01204 template<class Scalar, 01205 class LocalOrdinal, 01206 class GlobalOrdinal, 01207 class Node> 01208 void setMaxNumEntriesPerRow( 01209 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) 01210 { 01211 typedef typename Array<ArrayView<const LocalOrdinal> >::size_type local_length_size; 01212 Mview.maxNumRowEntries = OrdinalTraits<local_length_size>::zero(); 01213 if(Mview.indices.size() > OrdinalTraits<local_length_size>::zero() ){ 01214 Mview.maxNumRowEntries = Mview.indices[0].size(); 01215 for(local_length_size i = 1; i<Mview.indices.size(); ++i){ 01216 if(Mview.indices[i].size() > Mview.maxNumRowEntries){ 01217 Mview.maxNumRowEntries = Mview.indices[i].size(); 01218 } 01219 } 01220 } 01221 } 01222 01223 01224 template<class CrsMatrixType> 01225 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){ 01226 // Follows the NZ estimate in ML's ml_matmatmult.c 01227 size_t Aest = 100, Best=100; 01228 if(A.getNodeNumEntries() > 0) 01229 Aest = (A.getNodeNumRows()>0)? A.getNodeNumEntries()/A.getNodeNumEntries():100; 01230 if(B.getNodeNumEntries() > 0) 01231 Best=(B.getNodeNumRows()>0)? B.getNodeNumEntries()/B.getNodeNumEntries():100; 01232 01233 size_t nnzperrow=(size_t)(sqrt((double)Aest) + sqrt((double)Best) - 1); 01234 nnzperrow*=nnzperrow; 01235 01236 return (size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100); 01237 } 01238 01239 01240 01241 //kernel method for computing the local portion of C = A*B 01242 template<class Scalar, 01243 class LocalOrdinal, 01244 class GlobalOrdinal, 01245 class Node> 01246 void mult_A_B_newmatrix( 01247 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview, 01248 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview, 01249 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C) 01250 { 01251 using Teuchos::RCP; 01252 using Teuchos::rcp; 01253 using Teuchos::ArrayView; 01254 typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type; 01255 typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type; 01256 01257 #ifdef HAVE_TPETRA_MMM_TIMINGS 01258 using Teuchos::TimeMonitor; 01259 RCP<TimeMonitor> MM = 01260 rcp (new TimeMonitor (* (TimeMonitor::getNewTimer ("TpetraExt: MMM M5 Cmap")))); 01261 #endif 01262 size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid(); 01263 LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid(); 01264 01265 01266 // Build the final importer / column map, hash table lookups for C 01267 RCP<const import_type> Cimport; 01268 RCP<const map_type> Ccolmap; 01269 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter(); 01270 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter(); 01271 Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol; 01272 01273 if(Bview.importMatrix.is_null()) { 01274 Cimport = Bimport; 01275 Ccolmap = Bview.colMap; 01276 // Bcol2Ccol is trivial 01277 for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) { 01278 Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i); 01279 } 01280 } 01281 else { 01282 // Choose the right variant of setUnion 01283 if(!Bimport.is_null() && !Iimport.is_null()){ 01284 Cimport = Bimport->setUnion(*Iimport); 01285 Ccolmap = Cimport->getTargetMap(); 01286 } 01287 else if(!Bimport.is_null() && Iimport.is_null()) { 01288 Cimport = Bimport->setUnion(); 01289 } 01290 else if(Bimport.is_null() && !Iimport.is_null()) { 01291 Cimport = Iimport->setUnion(); 01292 } 01293 else 01294 throw std::runtime_error("TpetraExt::MMM status of matrix importers is nonsensical"); 01295 01296 Ccolmap = Cimport->getTargetMap(); 01297 01298 if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap())) 01299 throw std::runtime_error("Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way"); 01300 01301 // NOTE: This is not efficient and should be folded into setUnion 01302 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements()); 01303 ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList(); 01304 ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList(); 01305 01306 for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++) 01307 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]); 01308 for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++) 01309 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]); 01310 } 01311 01312 #ifdef HAVE_TPETRA_MMM_TIMINGS 01313 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix SerialCore"))); 01314 #endif 01315 01316 // Sizes 01317 size_t m=Aview.origMatrix->getNodeNumRows(); 01318 size_t n=Ccolmap->getNodeNumElements(); 01319 01320 // Get Data Pointers 01321 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP; 01322 ArrayRCP<size_t> Crowptr_RCP; 01323 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP; 01324 ArrayRCP<LocalOrdinal> Ccolind_RCP; 01325 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP; 01326 ArrayRCP<Scalar> Cvals_RCP; 01327 01328 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP); 01329 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP); 01330 if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP); 01331 01332 01333 // For efficiency 01334 ArrayView<const size_t> Arowptr, Browptr, Irowptr; 01335 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind; 01336 ArrayView<const Scalar> Avals, Bvals, Ivals; 01337 ArrayView<size_t> Crowptr; 01338 ArrayView<LocalOrdinal> Ccolind; 01339 ArrayView<Scalar> Cvals; 01340 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP(); 01341 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP(); 01342 if(!Bview.importMatrix.is_null()) { 01343 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP(); 01344 } 01345 01346 // The status array will contain the index into colind where this entry was last deposited. 01347 // c_status[i] < CSR_ip - not in the row yet. 01348 // c_status[i] >= CSR_ip, this is the entry where you can find the data 01349 // We start with this filled with INVALID's indicating that there are no entries yet. 01350 // Sadly, this complicates the code due to the fact that size_t's are unsigned. 01351 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid(); 01352 Array<size_t> c_status(n, ST_INVALID); 01353 01354 // Classic csr assembly (low memory edition) 01355 size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n); 01356 size_t CSR_ip=0,OLD_ip=0; 01357 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP(); 01358 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP(); 01359 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP(); 01360 01361 // Run through all the hash table lookups once and for all 01362 Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01363 Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01364 01365 if(Aview.colMap->isSameAs(*Bview.rowMap)){ 01366 // Maps are the same: Use local IDs as the hash 01367 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) { 01368 LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01369 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID; 01370 else { 01371 LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01372 targetMapToImportRow[i] = I_LID; 01373 } 01374 } 01375 } 01376 else { 01377 // Maps are not the same: Use the map's hash 01378 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) { 01379 LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01380 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID; 01381 else { 01382 LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01383 targetMapToImportRow[i] = I_LID; 01384 } 01385 } 01386 } 01387 01388 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero(); 01389 01390 // For each row of A/C 01391 for(size_t i=0; i<m; i++){ 01392 Crowptr[i]=CSR_ip; 01393 01394 for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){ 01395 LocalOrdinal Ak = Acolind[k]; 01396 Scalar Aval = Avals[k]; 01397 if(Aval==SC_ZERO) continue; 01398 01399 if(targetMapToOrigRow[Ak] != LO_INVALID){ 01400 // Local matrix 01401 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]); 01402 01403 for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 01404 LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]]; 01405 01406 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){ 01407 // New entry 01408 c_status[Cj] = CSR_ip; 01409 Ccolind[CSR_ip]= Cj; 01410 Cvals[CSR_ip] = Aval*Bvals[j]; 01411 CSR_ip++; 01412 } 01413 else 01414 Cvals[c_status[Cj]]+=Aval*Bvals[j]; 01415 } 01416 } 01417 else{ 01418 // Remote matrix 01419 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]); 01420 for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 01421 LocalOrdinal Cj=Icol2Ccol[Icolind[j]]; 01422 01423 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){ 01424 // New entry 01425 c_status[Cj]=CSR_ip; 01426 Ccolind[CSR_ip]=Cj; 01427 Cvals[CSR_ip]=Aval*Ivals[j]; 01428 CSR_ip++; 01429 } 01430 else 01431 Cvals[c_status[Cj]]+=Aval*Ivals[j]; 01432 } 01433 } 01434 } 01435 01436 // Resize for next pass if needed 01437 if(CSR_ip + n > CSR_alloc){ 01438 CSR_alloc*=2; 01439 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP(); 01440 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP(); 01441 } 01442 OLD_ip=CSR_ip; 01443 } 01444 01445 Crowptr[m]=CSR_ip; 01446 01447 // Downward resize 01448 Cvals_RCP.resize(CSR_ip); 01449 Ccolind_RCP.resize(CSR_ip); 01450 01451 01452 #ifdef HAVE_TPETRA_MMM_TIMINGS 01453 MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix Final Sort")))); 01454 #endif 01455 01456 // Replace the column map 01457 C.replaceColMap(Ccolmap); 01458 01459 // Final sort & set of CRS arrays 01460 Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP()); 01461 C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP); 01462 01463 #ifdef HAVE_TPETRA_MMM_TIMINGS 01464 MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix ESFC")))); 01465 #endif 01466 01467 // Final FillComplete 01468 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport); 01469 } 01470 01471 01472 //kernel method for computing the local portion of C = (I-omega D^{-1} A)*B 01473 template<class Scalar, 01474 class LocalOrdinal, 01475 class GlobalOrdinal, 01476 class Node> 01477 void jacobi_A_B_newmatrix( 01478 Scalar omega, 01479 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv, 01480 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview, 01481 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview, 01482 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C) 01483 { 01484 using Teuchos::RCP; 01485 using Teuchos::rcp; 01486 using Teuchos::ArrayView; 01487 typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type; 01488 typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type; 01489 01490 #ifdef HAVE_TPETRA_MMM_TIMINGS 01491 using Teuchos::TimeMonitor; 01492 RCP<TimeMonitor> MM = 01493 rcp (new TimeMonitor (* (TimeMonitor::getNewTimer ("TpetraExt: Jacobi M5 Cmap")))); 01494 #endif 01495 size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid(); 01496 LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid(); 01497 01498 01499 // Build the final importer / column map, hash table lookups for C 01500 RCP<const import_type> Cimport; 01501 RCP<const map_type> Ccolmap; 01502 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter(); 01503 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter(); 01504 Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol; 01505 01506 if(Bview.importMatrix.is_null()) { 01507 Cimport = Bimport; 01508 Ccolmap = Bview.colMap; 01509 // Bcol2Ccol is trivial 01510 for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) { 01511 Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i); 01512 } 01513 } 01514 else { 01515 // Choose the right variant of setUnion 01516 if(!Bimport.is_null() && !Iimport.is_null()){ 01517 Cimport = Bimport->setUnion(*Iimport); 01518 Ccolmap = Cimport->getTargetMap(); 01519 } 01520 else if(!Bimport.is_null() && Iimport.is_null()) { 01521 Cimport = Bimport->setUnion(); 01522 } 01523 else if(Bimport.is_null() && !Iimport.is_null()) { 01524 Cimport = Iimport->setUnion(); 01525 } 01526 else 01527 throw std::runtime_error("TpetraExt::Jacobi status of matrix importers is nonsensical"); 01528 01529 Ccolmap = Cimport->getTargetMap(); 01530 01531 if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap())) 01532 throw std::runtime_error("Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way"); 01533 01534 // NOTE: This is not efficient and should be folded into setUnion 01535 Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements()); 01536 ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList(); 01537 ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList(); 01538 01539 for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++) 01540 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]); 01541 for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++) 01542 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]); 01543 } 01544 01545 #ifdef HAVE_TPETRA_MMM_TIMINGS 01546 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix SerialCore"))); 01547 #endif 01548 01549 // Sizes 01550 size_t m=Aview.origMatrix->getNodeNumRows(); 01551 size_t n=Ccolmap->getNodeNumElements(); 01552 01553 // Get Data Pointers 01554 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP; 01555 ArrayRCP<size_t> Crowptr_RCP; 01556 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP; 01557 ArrayRCP<LocalOrdinal> Ccolind_RCP; 01558 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP; 01559 ArrayRCP<Scalar> Cvals_RCP; 01560 ArrayRCP<const Scalar> Dvals_RCP; 01561 01562 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP); 01563 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP); 01564 if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP); 01565 Dvals_RCP = Dinv.getData(); 01566 01567 // For efficiency 01568 ArrayView<const size_t> Arowptr, Browptr, Irowptr; 01569 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind; 01570 ArrayView<const Scalar> Avals, Bvals, Ivals; 01571 ArrayView<size_t> Crowptr; 01572 ArrayView<LocalOrdinal> Ccolind; 01573 ArrayView<Scalar> Cvals; 01574 ArrayView<const Scalar> Dvals; 01575 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP(); 01576 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP(); 01577 if(!Bview.importMatrix.is_null()) { 01578 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP(); 01579 } 01580 Dvals = Dvals_RCP(); 01581 01582 // The status array will contain the index into colind where this entry was last deposited. 01583 // c_status[i] < CSR_ip - not in the row yet. 01584 // c_status[i] >= CSR_ip, this is the entry where you can find the data 01585 // We start with this filled with INVALID's indicating that there are no entries yet. 01586 // Sadly, this complicates the code due to the fact that size_t's are unsigned. 01587 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid(); 01588 Array<size_t> c_status(n, ST_INVALID); 01589 01590 // Classic csr assembly (low memory edition) 01591 size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n); 01592 size_t CSR_ip=0,OLD_ip=0; 01593 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP(); 01594 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP(); 01595 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP(); 01596 01597 // Run through all the hash table lookups once and for all 01598 Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01599 Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID); 01600 01601 if(Aview.colMap->isSameAs(*Bview.rowMap)){ 01602 // Maps are the same: Use local IDs as the hash 01603 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) { 01604 LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01605 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID; 01606 else { 01607 LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01608 targetMapToImportRow[i] = I_LID; 01609 } 01610 } 01611 } 01612 else { 01613 // Maps are not the same: Use the map's hash 01614 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) { 01615 LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01616 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID; 01617 else { 01618 LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i)); 01619 targetMapToImportRow[i] = I_LID; 01620 } 01621 } 01622 } 01623 01624 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero(); 01625 01626 // For each row of A/C 01627 for(size_t i=0; i<m; i++){ 01628 Crowptr[i]=CSR_ip; 01629 Scalar Dval = Dvals[i]; 01630 01631 // Entries of B 01632 for(size_t k=Browptr[i]; k<Browptr[i+1]; k++){ 01633 Scalar Bval = Bvals[k]; 01634 if(Bval==SC_ZERO) continue; 01635 LocalOrdinal Ck=Bcol2Ccol[Bcolind[k]]; 01636 01637 // Assume no repeated entries in B 01638 c_status[Ck] = CSR_ip; 01639 Ccolind[CSR_ip] = Ck; 01640 Cvals[CSR_ip] = Bvals[k]; 01641 CSR_ip++; 01642 } 01643 01644 01645 // Entries of -omega * Dinv * A * B 01646 for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){ 01647 LocalOrdinal Ak = Acolind[k]; 01648 Scalar Aval = Avals[k]; 01649 if(Aval==SC_ZERO) continue; 01650 01651 if(targetMapToOrigRow[Ak] != LO_INVALID){ 01652 // Local matrix 01653 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]); 01654 01655 for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 01656 LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]]; 01657 01658 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){ 01659 // New entry 01660 c_status[Cj] = CSR_ip; 01661 Ccolind[CSR_ip] = Cj; 01662 Cvals[CSR_ip] = - omega * Dval* Aval * Bvals[j]; 01663 CSR_ip++; 01664 } 01665 else 01666 Cvals[c_status[Cj]] -= omega * Dval* Aval * Bvals[j]; 01667 } 01668 } 01669 else{ 01670 // Remote matrix 01671 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]); 01672 for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 01673 LocalOrdinal Cj=Icol2Ccol[Icolind[j]]; 01674 01675 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){ 01676 // New entry 01677 c_status[Cj] = CSR_ip; 01678 Ccolind[CSR_ip] = Cj; 01679 Cvals[CSR_ip] = - omega * Dval* Aval * Ivals[j]; 01680 CSR_ip++; 01681 } 01682 else 01683 Cvals[c_status[Cj]] -= omega * Dval* Aval * Ivals[j]; 01684 } 01685 } 01686 } 01687 01688 // Resize for next pass if needed 01689 if(CSR_ip + n > CSR_alloc){ 01690 CSR_alloc*=2; 01691 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP(); 01692 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP(); 01693 } 01694 OLD_ip=CSR_ip; 01695 } 01696 01697 Crowptr[m]=CSR_ip; 01698 01699 // Downward resize 01700 Cvals_RCP.resize(CSR_ip); 01701 Ccolind_RCP.resize(CSR_ip); 01702 01703 01704 #ifdef HAVE_TPETRA_MMM_TIMINGS 01705 MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix Final Sort")))); 01706 #endif 01707 01708 // Replace the column map 01709 C.replaceColMap(Ccolmap); 01710 01711 // Final sort & set of CRS arrays 01712 Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP()); 01713 C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP); 01714 01715 #ifdef HAVE_TPETRA_MMM_TIMINGS 01716 MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix ESFC")))); 01717 #endif 01718 01719 // Final FillComplete 01720 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport); 01721 } 01722 01723 01724 template<class Scalar, 01725 class LocalOrdinal, 01726 class GlobalOrdinal, 01727 class Node> 01728 void import_and_extract_views( 01729 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M, 01730 RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap, 01731 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview, 01732 RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter, 01733 bool userAssertsThereAreNoRemotes) 01734 { 01735 #ifdef HAVE_TPETRA_MMM_TIMINGS 01736 using Teuchos::TimeMonitor; 01737 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Alloc"))); 01738 #endif 01739 01740 //Convience typedef 01741 typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t; 01742 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> CrsMatrix_t; 01743 // The goal of this method is to populate the 'Mview' struct with views of the 01744 // rows of M, including all rows that correspond to elements in 'targetMap'. 01745 // 01746 // If targetMap includes local elements that correspond to remotely-owned rows 01747 // of M, then those remotely-owned rows will be imported into 01748 // 'Mview.importMatrix', and views of them will be included in 'Mview'. 01749 Mview.deleteContents(); 01750 01751 RCP<const Map_t> Mrowmap = M.getRowMap(); 01752 RCP<const Map_t> MremoteRowMap; 01753 const int numProcs = Mrowmap->getComm()->getSize(); 01754 01755 ArrayView<const GlobalOrdinal> Mrows = targetMap->getNodeElementList(); 01756 01757 size_t numRemote = 0; 01758 size_t numRows = targetMap->getNodeNumElements(); 01759 Mview.origMatrix = Teuchos::rcp(&M,false); 01760 Mview.origRowMap = M.getRowMap(); 01761 Mview.rowMap = targetMap; 01762 Mview.colMap = M.getColMap(); 01763 Mview.domainMap = M.getDomainMap(); 01764 Mview.importColMap = null; 01765 01766 // Short circuit if the user swears there are no remotes 01767 if(userAssertsThereAreNoRemotes) return; 01768 01769 #ifdef HAVE_TPETRA_MMM_TIMINGS 01770 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X RemoteMap"))); 01771 #endif 01772 01773 // mark each row in targetMap as local or remote, and go ahead and get a view for the local rows 01774 int mode = 0; 01775 if(!prototypeImporter.is_null() && prototypeImporter->getSourceMap()->isSameAs(*Mrowmap) && prototypeImporter->getTargetMap()->isSameAs(*targetMap)) { 01776 // We have a valid prototype importer --- ask it for the remotes 01777 numRemote = prototypeImporter->getNumRemoteIDs(); 01778 Array<GlobalOrdinal> MremoteRows(numRemote); 01779 ArrayView<const LocalOrdinal> RemoteLIDs = prototypeImporter->getRemoteLIDs(); 01780 for(size_t i=0; i<numRemote; i++) { 01781 MremoteRows[i] = targetMap->getGlobalElement(RemoteLIDs[i]); 01782 } 01783 01784 MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode())); 01785 mode=1; 01786 } 01787 else if(prototypeImporter.is_null()) { 01788 // No prototype importer --- count the remotes the hard way 01789 Array<GlobalOrdinal> MremoteRows(numRows); 01790 for(size_t i=0; i < numRows; ++i) { 01791 const LocalOrdinal mlid = Mrowmap->getLocalElement(Mrows[i]); 01792 01793 if (mlid == OrdinalTraits<LocalOrdinal>::invalid()) { 01794 MremoteRows[numRemote]=Mrows[i]; 01795 ++numRemote; 01796 } 01797 } 01798 MremoteRows.resize(numRemote); 01799 MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode())); 01800 mode=2; 01801 } 01802 else { 01803 // prototypeImporter is bad. But if we're in serial that's OK. 01804 mode=3; 01805 } 01806 01807 if (numProcs < 2) { 01808 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error, 01809 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows." <<std::endl); 01810 //If only one processor we don't need to import any remote rows, so return. 01811 return; 01812 } 01813 01814 // 01815 // Now we will import the needed remote rows of M, if the global maximum 01816 // value of numRemote is greater than 0. 01817 // 01818 #ifdef HAVE_TPETRA_MMM_TIMINGS 01819 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Collective-0"))); 01820 #endif 01821 01822 global_size_t globalMaxNumRemote = 0; 01823 Teuchos::reduceAll(*(Mrowmap->getComm()) , Teuchos::REDUCE_MAX, (global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) ); 01824 01825 01826 if (globalMaxNumRemote > 0) { 01827 #ifdef HAVE_TPETRA_MMM_TIMINGS 01828 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-2"))); 01829 #endif 01830 // Create an importer with target-map MremoteRowMap and source-map Mrowmap. 01831 RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > importer; 01832 01833 if(mode==1) 01834 importer = prototypeImporter->createRemoteOnlyImport(MremoteRowMap); 01835 else if(mode==2) 01836 importer=rcp(new Import<LocalOrdinal, GlobalOrdinal, Node>(Mrowmap, MremoteRowMap)); 01837 else 01838 throw std::runtime_error("prototypeImporter->SourceMap() does not match M.getRowMap()!"); 01839 01840 #ifdef HAVE_TPETRA_MMM_TIMINGS 01841 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-3"))); 01842 #endif 01843 01844 // Now create a new matrix into which we can import the remote rows of M that we need. 01845 Mview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<CrsMatrix_t>(Teuchos::rcp(&M,false),*importer,M.getDomainMap(),M.getRangeMap(),Teuchos::null); 01846 01847 #ifdef COMPUTE_MMM_STATISTICS 01848 printMultiplicationStatistics(importer,std::string("I&X MMM")); 01849 #endif 01850 01851 01852 #ifdef HAVE_TPETRA_MMM_TIMINGS 01853 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-4"))); 01854 #endif 01855 01856 // Save the column map of the imported matrix, so that we can convert indices back to global for arithmetic later 01857 Mview.importColMap = Mview.importMatrix->getColMap(); 01858 } 01859 } 01860 01861 01862 01863 } //End namepsace MMdetails 01864 01865 } //End namespace Tpetra 01866 // 01867 // Explicit instantiation macro 01868 // 01869 // Must be expanded from within the Tpetra namespace! 01870 // 01871 01872 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 01873 \ 01874 template \ 01875 void MatrixMatrix::Multiply( \ 01876 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 01877 bool transposeA, \ 01878 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 01879 bool transposeB, \ 01880 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 01881 bool call_FillComplete_on_result); \ 01882 \ 01883 template \ 01884 void MatrixMatrix::Jacobi( \ 01885 SCALAR omega, \ 01886 const Vector< SCALAR, LO, GO, NODE > & Dinv, \ 01887 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 01888 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 01889 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 01890 bool call_FillComplete_on_result); \ 01891 \ 01892 template \ 01893 void MatrixMatrix::Add( \ 01894 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 01895 bool transposeA, \ 01896 SCALAR scalarA, \ 01897 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 01898 bool transposeB, \ 01899 SCALAR scalarB, \ 01900 RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \ 01901 \ 01902 template \ 01903 void MatrixMatrix::Add( \ 01904 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 01905 bool transposeA, \ 01906 SCALAR scalarA, \ 01907 CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 01908 SCALAR scalarB ); \ 01909 \ 01910 template \ 01911 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \ 01912 MatrixMatrix::add (const SCALAR & alpha, \ 01913 const bool transposeA, \ 01914 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 01915 const SCALAR & beta, \ 01916 const bool transposeB, \ 01917 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 01918 const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \ 01919 const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \ 01920 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 01921 \ 01922 01923 01924 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
1.7.6.1