|
Amesos2 - Direct Sparse Solver Interfaces
Version of the Day
|
00001 // @HEADER 00002 // 00003 // *********************************************************************** 00004 // 00005 // Amesos2: Templated Direct Sparse Solver Package 00006 // Copyright 2011 Sandia Corporation 00007 // 00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00009 // the U.S. Government retains certain rights in this software. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00039 // 00040 // *********************************************************************** 00041 // 00042 // @HEADER 00043 00052 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP 00053 #define AMESOS2_SUPERLUDIST_DEF_HPP 00054 00055 #include <Teuchos_Tuple.hpp> 00056 #include <Teuchos_StandardParameterEntryValidators.hpp> 00057 #include <Teuchos_DefaultMpiComm.hpp> 00058 00059 #include "Amesos2_SolverCore_def.hpp" 00060 #include "Amesos2_Superludist_TypeMap.hpp" 00061 #include "Amesos2_Util.hpp" 00062 00063 00064 namespace Amesos2 { 00065 00066 00067 template <class Matrix, class Vector> 00068 Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A, 00069 Teuchos::RCP<Vector> X, 00070 Teuchos::RCP<const Vector> B) 00071 : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B) 00072 , nzvals_() // initialization to empty arrays 00073 , colind_() 00074 , rowptr_() 00075 , bvals_() 00076 , xvals_() 00077 , in_grid_(false) 00078 { 00080 // Set up the SuperLU_DIST processor grid // 00082 00083 int nprocs = this->getComm()->getSize(); 00084 SLUD::int_t nprow, npcol; 00085 get_default_grid_size(nprocs, nprow, npcol); 00086 data_.mat_comm = dynamic_cast<const Teuchos::MpiComm<int>* >(this->matrixA_->getComm().getRawPtr())->getRawMpiComm()->operator()(); 00087 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid)); 00088 00090 // Set Some default parameters. // 00091 // // 00092 // Must do this after grid has been created in // 00093 // case user specifies the nprow and npcol parameters // 00095 Teuchos::RCP<Teuchos::ParameterList> default_params 00096 = Teuchos::parameterList( *(this->getValidParameters()) ); 00097 this->setParameters(default_params); 00098 00099 // Set some internal options 00100 data_.options.Fact = SLUD::DOFACT; 00101 data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed 00102 data_.options.SolveInitialized = SLUD::NO; 00103 data_.options.RefineInitialized = SLUD::NO; 00104 data_.rowequ = false; 00105 data_.colequ = false; 00106 data_.perm_r.resize(this->globalNumRows_); 00107 data_.perm_c.resize(this->globalNumCols_); 00108 00110 // Set up a communicator for the parallel column ordering and // 00111 // parallel symbolic factorization. // 00113 data_.symb_comm = MPI_COMM_NULL; 00114 int color = MPI_UNDEFINED; 00115 int my_rank = this->rank_; 00116 00117 /* domains is the next power of 2 less than nprow*npcol. This 00118 * value will be used for creating an MPI communicator for the 00119 * pre-ordering and symbolic factorization methods. 00120 */ 00121 data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) ); 00122 00123 if( this->rank_ < data_.domains ) color = 0; 00124 MPI_Comm_split (data_.mat_comm, color, my_rank, &(data_.symb_comm)); 00125 00127 // Set up a row map that maps to only processors that are in the // 00128 // SuperLU processor grid. This will be used for redistributing A. // 00130 00131 int my_weight = 0; 00132 if( this->rank_ < nprow * npcol ){ 00133 in_grid_ = true; my_weight = 1; // I am in the grid, and I get some of the matrix rows 00134 } 00135 // TODO: might only need to initialize if parallel symbolic factorization is requested. 00136 // TODO: Need to fix this map for indexbase ? 00137 superlu_rowmap_ 00138 = Tpetra::createWeightedContigMapWithNode<local_ordinal_type, 00139 global_ordinal_type, 00140 node_type>(my_weight, 00141 this->globalNumRows_, 00142 this->getComm(), 00143 KokkosClassic::DefaultNode::getDefaultNode()); 00144 // TODO: the node above should technically come from the matrix 00145 // itself. Might need to add a getNode method to the matrix 00146 // adapter. 00147 00149 // Do some other initialization // 00151 00152 data_.A.Store = NULL; 00153 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu)); 00154 SLUD::PStatInit(&(data_.stat)); 00155 // We do not use ScalePermstructInit because we will use our own 00156 // arrays for storing perm_r and perm_c 00157 data_.scale_perm.perm_r = data_.perm_r.getRawPtr(); 00158 data_.scale_perm.perm_c = data_.perm_c.getRawPtr(); 00159 } 00160 00161 00162 template <class Matrix, class Vector> 00163 Superludist<Matrix,Vector>::~Superludist( ) 00164 { 00165 /* Free SuperLU_DIST data_types 00166 * - Matrices 00167 * - Vectors 00168 * - Stat object 00169 * - ScalePerm, LUstruct, grid, and solve objects 00170 * 00171 * Note: the function definitions are the same regardless whether 00172 * complex or real, so we arbitrarily use the D namespace 00173 */ 00174 if ( this->status_.getNumPreOrder() > 0 ){ 00175 free( data_.sizes ); 00176 free( data_.fstVtxSep ); 00177 } 00178 00179 // Cleanup old matrix store memory if it's non-NULL. Our 00180 // Teuchos::Array's will destroy rowind, colptr, and nzval for us 00181 if( data_.A.Store != NULL ){ 00182 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) ); 00183 } 00184 00185 // LU data is initialized in numericFactorization_impl() 00186 if ( this->status_.getNumNumericFact() > 0 ){ 00187 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu)); 00188 } 00189 function_map::LUstructFree(&(data_.lu)); 00190 00191 // If a symbolic factorization is ever performed without a 00192 // follow-up numericfactorization, there are some arrays in the 00193 // Pslu_freeable struct which will never be free'd by 00194 // SuperLU_DIST. 00195 if ( this->status_.symbolicFactorizationDone() && 00196 !this->status_.numericFactorizationDone() ){ 00197 if ( data_.pslu_freeable.xlsub != NULL ){ 00198 free( data_.pslu_freeable.xlsub ); 00199 free( data_.pslu_freeable.lsub ); 00200 } 00201 if ( data_.pslu_freeable.xusub != NULL ){ 00202 free( data_.pslu_freeable.xusub ); 00203 free( data_.pslu_freeable.usub ); 00204 } 00205 if ( data_.pslu_freeable.supno_loc != NULL ){ 00206 free( data_.pslu_freeable.supno_loc ); 00207 free( data_.pslu_freeable.xsup_beg_loc ); 00208 free( data_.pslu_freeable.xsup_end_loc ); 00209 } 00210 free( data_.pslu_freeable.globToLoc ); 00211 } 00212 00213 SLUD::PStatFree( &(data_.stat) ) ; 00214 00215 // Teuchos::Arrays will free R, C, perm_r, and perm_c 00216 // SLUD::D::ScalePermstructFree(&(data_.scale_perm)); 00217 00218 if ( data_.options.SolveInitialized == SLUD::YES ) 00219 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct)); 00220 00221 SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any 00222 // cases where grid 00223 // wouldn't be initialized? 00224 00225 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm)); 00226 } 00227 00228 template<class Matrix, class Vector> 00229 int 00230 Superludist<Matrix,Vector>::preOrdering_impl() 00231 { 00232 // We will always use the NATURAL row ordering to avoid the 00233 // sequential bottleneck present when doing any other row 00234 // ordering scheme from SuperLU_DIST 00235 // 00236 // Set perm_r to be the natural ordering 00237 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_); 00238 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i; 00239 00240 // loadA_impl(); // Refresh matrix values 00241 00242 if( in_grid_ ){ 00243 // If this function has been called at least once, then the 00244 // sizes, and fstVtxSep arrays were allocated in 00245 // get_perm_c_parmetis. Delete them before calling that 00246 // function again. These arrays will also be dealloc'd in the 00247 // deconstructor. 00248 if( this->status_.getNumPreOrder() > 0 ){ 00249 free( data_.sizes ); 00250 free( data_.fstVtxSep ); 00251 } 00252 #ifdef HAVE_AMESOS2_TIMERS 00253 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ ); 00254 #endif 00255 00256 float info = 0.0; 00257 info = SLUD::get_perm_c_parmetis( &(data_.A), 00258 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(), 00259 data_.grid.nprow * data_.grid.npcol, data_.domains, 00260 &(data_.sizes), &(data_.fstVtxSep), 00261 &(data_.grid), &(data_.symb_comm) ); 00262 00263 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0, 00264 std::runtime_error, 00265 "SuperLU_DIST pre-ordering ran out of memory after allocating " 00266 << info << " bytes of memory" ); 00267 } 00268 00269 // Ordering will be applied directly before numeric factorization, 00270 // after we have a chance to get updated coefficients from the 00271 // matrix 00272 00273 return EXIT_SUCCESS; 00274 } 00275 00276 00277 00278 template <class Matrix, class Vector> 00279 int 00280 Superludist<Matrix,Vector>::symbolicFactorization_impl() 00281 { 00282 // loadA_impl(); // Refresh matrix values 00283 00284 if( in_grid_ ){ 00285 00286 #ifdef HAVE_AMESOS2_TIMERS 00287 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ ); 00288 #endif 00289 00290 float info = 0.0; 00291 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol), 00292 data_.domains, &(data_.A), data_.perm_c.getRawPtr(), 00293 data_.perm_r.getRawPtr(), data_.sizes, 00294 data_.fstVtxSep, &(data_.pslu_freeable), 00295 &(data_.grid.comm), &(data_.symb_comm), 00296 &(data_.mem_usage)); 00297 00298 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0, 00299 std::runtime_error, 00300 "SuperLU_DIST symbolic factorization ran out of memory after" 00301 " allocating " << info << " bytes of memory" ); 00302 } 00303 same_symbolic_ = false; 00304 same_solve_struct_ = false; 00305 00306 return EXIT_SUCCESS; 00307 } 00308 00309 00310 template <class Matrix, class Vector> 00311 int 00312 Superludist<Matrix,Vector>::numericFactorization_impl(){ 00313 using Teuchos::as; 00314 00315 // loadA_impl(); // Refresh the matrix values 00316 00317 // if( data_.options.Equil == SLUD::YES ){ 00318 // // Apply the scalings computed in preOrdering 00319 // function_map::laqgs(&(data_.A), data_.R.getRawPtr(), 00320 // data_.C.getRawPtr(), data_.rowcnd, data_.colcnd, 00321 // data_.amax, &(data_.equed)); 00322 00323 // data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH); 00324 // data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH); 00325 // } 00326 00327 if( in_grid_ ){ 00328 // Apply the column ordering, so that AC is the column-permuted A, and compute etree 00329 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc; 00330 for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]]; 00331 00332 // Distribute data from the symbolic factorization 00333 if( same_symbolic_ ){ 00334 // Note: with the SamePattern_SameRowPerm options, it does not 00335 // matter that the glu_freeable member has never been 00336 // initialized, because it is never accessed. It is a 00337 // placeholder arg. The real work is done in data_.lu 00338 function_map::pdistribute(SLUD::SamePattern_SameRowPerm, 00339 as<SLUD::int_t>(this->globalNumRows_), // aka "n" 00340 &(data_.A), &(data_.scale_perm), 00341 &(data_.glu_freeable), &(data_.lu), 00342 &(data_.grid)); 00343 } else { 00344 function_map::dist_psymbtonum(SLUD::DOFACT, 00345 as<SLUD::int_t>(this->globalNumRows_), // aka "n" 00346 &(data_.A), &(data_.scale_perm), 00347 &(data_.pslu_freeable), &(data_.lu), 00348 &(data_.grid)); 00349 } 00350 00351 // Retrieve the normI of A (required by gstrf). 00352 double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid)); 00353 00354 int info = 0; 00355 { 00356 #ifdef HAVE_AMESOS2_TIMERS 00357 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_); 00358 #endif 00359 00360 function_map::gstrf(&(data_.options), this->globalNumRows_, 00361 this->globalNumCols_, anorm, &(data_.lu), 00362 &(data_.grid), &(data_.stat), &info); 00363 } 00364 00365 // Check output 00366 TEUCHOS_TEST_FOR_EXCEPTION( info > 0, 00367 std::runtime_error, 00368 "L and U factors have been computed but U(" 00369 << info << "," << info << ") is exactly zero " 00370 "(i.e. U is singular)"); 00371 } 00372 00373 // The other option, that info_st < 0, denotes invalid parameters 00374 // to the function, but we'll assume for now that that won't 00375 // happen. 00376 00377 data_.options.Fact = SLUD::FACTORED; 00378 same_symbolic_ = true; 00379 00380 return EXIT_SUCCESS; 00381 } 00382 00383 00384 template <class Matrix, class Vector> 00385 int 00386 Superludist<Matrix,Vector>::solve_impl(const Teuchos::Ptr<MultiVecAdapter<Vector> > X, 00387 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const 00388 { 00389 using Teuchos::as; 00390 00391 // local_len_rhs is how many of the multivector rows belong to 00392 // this processor in the SuperLU_DIST processor grid. 00393 const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements(); 00394 const global_size_type nrhs = X->getGlobalNumVectors(); 00395 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex(); 00396 00397 // make sure our multivector storage is sized appropriately 00398 bvals_.resize(nrhs * local_len_rhs); 00399 xvals_.resize(nrhs * local_len_rhs); 00400 00401 // We assume the global length of the two vectors have already been 00402 // checked for compatibility 00403 00404 { // get the values from B 00405 #ifdef HAVE_AMESOS2_TIMERS 00406 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_); 00407 #endif 00408 00409 { 00410 // The input dense matrix for B should be distributed in the 00411 // same manner as the superlu_dist matrix. That is, if a 00412 // processor has m_loc rows of A, then it should also have 00413 // m_loc rows of B (and the same rows). We accomplish this by 00414 // distributing the multivector rows with the same Map that 00415 // the matrix A's rows are distributed. 00416 #ifdef HAVE_AMESOS2_TIMERS 00417 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); 00418 #endif 00419 00420 // get grid-distributed mv data. The multivector data will be 00421 // distributed across the processes in the SuperLU_DIST grid. 00422 typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper; 00423 copy_helper::do_get(B, 00424 bvals_(), 00425 local_len_rhs, 00426 Teuchos::ptrInArg(*superlu_rowmap_)); 00427 } 00428 } // end block for conversion time 00429 00430 if( in_grid_ ){ 00431 // if( data_.options.trans == SLUD::NOTRANS ){ 00432 // if( data_.rowequ ){ // row equilibration has been done on AC 00433 // // scale bxvals_ by diag(R) 00434 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(), 00435 // SLUD::slu_mt_mult<slu_type,magnitude_type>()); 00436 // } 00437 // } else if( data_.colequ ){ // column equilibration has been done on AC 00438 // // scale bxvals_ by diag(C) 00439 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(), 00440 // SLUD::slu_mt_mult<slu_type,magnitude_type>()); 00441 // } 00442 00443 // Initialize the SOLVEstruct_t. 00444 // 00445 // We are able to reuse the solve struct if we have not changed 00446 // the sparsity pattern of L and U since the last solve 00447 if( !same_solve_struct_ ){ 00448 if( data_.options.SolveInitialized == SLUD::YES ){ 00449 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct)); 00450 } 00451 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(), 00452 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu), 00453 &(data_.grid), &(data_.solve_struct)); 00454 // Flag that we can reuse this solve_struct unless another 00455 // symbolicFactorization is called between here and the next 00456 // solve. 00457 same_solve_struct_ = true; 00458 } 00459 00460 int ierr = 0; // returned error code 00461 { 00462 #ifdef HAVE_AMESOS2_TIMERS 00463 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_); 00464 #endif 00465 00466 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu), 00467 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(), 00468 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b), 00469 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs), 00470 &(data_.solve_struct), &(data_.stat), &ierr); 00471 } // end block for solve time 00472 00473 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0, 00474 std::runtime_error, 00475 "Argument " << -ierr << " to gstrs had an illegal value" ); 00476 00477 // "Un-scale" the solution so that it is a solution of the original system 00478 // if( data_.options.trans == SLUD::NOTRANS ){ 00479 // if( data_.colequ ){ // column equilibration has been done on AC 00480 // // scale bxvals_ by diag(C) 00481 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(), 00482 // SLUD::slu_mt_mult<slu_type,magnitude_type>()); 00483 // } 00484 // } else if( data_.rowequ ){ // row equilibration has been done on AC 00485 // // scale bxvals_ by diag(R) 00486 // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(), 00487 // SLUD::slu_mt_mult<slu_type,magnitude_type>()); 00488 // } 00489 { // permute B to a solution of the original system 00490 #ifdef HAVE_AMESOS2_TIMERS 00491 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); 00492 #endif 00493 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs); 00494 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b), 00495 as<SLUD::int_t>(local_len_rhs), 00496 data_.solve_struct.row_to_proc, 00497 data_.solve_struct.inv_perm_c, 00498 bvals_.getRawPtr(), ld, 00499 xvals_.getRawPtr(), ld, 00500 as<int>(nrhs), 00501 &(data_.grid)); 00502 } 00503 } 00504 00505 /* Update X's global values */ 00506 { 00507 #ifdef HAVE_AMESOS2_TIMERS 00508 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); 00509 #endif 00510 00511 typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper; 00512 put_helper::do_put(X, 00513 xvals_(), 00514 local_len_rhs, 00515 Teuchos::ptrInArg(*superlu_rowmap_)); 00516 } 00517 00518 return EXIT_SUCCESS; 00519 } 00520 00521 00522 template <class Matrix, class Vector> 00523 bool 00524 Superludist<Matrix,Vector>::matrixShapeOK_impl() const 00525 { 00526 // SuperLU_DIST requires square matrices 00527 return( this->globalNumRows_ == this->globalNumCols_ ); 00528 } 00529 00530 00531 template <class Matrix, class Vector> 00532 void 00533 Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList ) 00534 { 00535 using Teuchos::as; 00536 using Teuchos::RCP; 00537 using Teuchos::getIntegralValue; 00538 using Teuchos::ParameterEntryValidator; 00539 00540 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl(); 00541 00542 if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){ 00543 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") && 00544 parameterList->isParameter("npcol")), 00545 std::invalid_argument, 00546 "nprow and npcol must be set together" ); 00547 00548 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow"); 00549 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol"); 00550 00551 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(), 00552 std::invalid_argument, 00553 "nprow and npcol combination invalid" ); 00554 00555 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){ 00556 // De-allocate the default grid that was initialized in the constructor 00557 SLUD::superlu_gridexit(&(data_.grid)); 00558 // Create a new grid 00559 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid)); 00560 } // else our grid has not changed size since the last initialization 00561 } 00562 00563 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_, 00564 std::invalid_argument, 00565 "SuperLU_DIST does not support solving the tranpose system" ); 00566 00567 data_.options.Trans = SLUD::NOTRANS; // should always be set this way; 00568 00569 // TODO: Uncomment when supported 00570 // bool equil = parameterList->get<bool>("Equil", true); 00571 // data_.options.Equil = equil ? SLUD::YES : SLUD::NO; 00572 data_.options.Equil = SLUD::NO; 00573 00574 if( parameterList->isParameter("ColPerm") ){ 00575 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator(); 00576 parameterList->getEntry("ColPerm").setValidator(colperm_validator); 00577 00578 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm"); 00579 } 00580 00581 // Always use the "NOROWPERM" option to avoid a serial bottleneck 00582 // with the weighted bipartite matching algorithm used for the 00583 // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU 00584 // User guide (which states that the value should be "NATURAL"). 00585 data_.options.RowPerm = SLUD::NOROWPERM; 00586 00587 // TODO: Uncomment when supported 00588 // if( parameterList->isParameter("IterRefine") ){ 00589 // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator(); 00590 // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator); 00591 00592 // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine"); 00593 // } 00594 data_.options.IterRefine = SLUD::NOREFINE; 00595 00596 bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true); 00597 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO; 00598 } 00599 00600 00601 template <class Matrix, class Vector> 00602 Teuchos::RCP<const Teuchos::ParameterList> 00603 Superludist<Matrix,Vector>::getValidParameters_impl() const 00604 { 00605 using std::string; 00606 using Teuchos::tuple; 00607 using Teuchos::ParameterList; 00608 using Teuchos::EnhancedNumberValidator; 00609 using Teuchos::setStringToIntegralParameter; 00610 using Teuchos::stringToIntegralParameterEntryValidator; 00611 00612 static Teuchos::RCP<const Teuchos::ParameterList> valid_params; 00613 00614 if( is_null(valid_params) ){ 00615 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList(); 00616 00617 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator 00618 = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() ); 00619 col_row_validator->setMin(1); 00620 00621 pl->set("npcol", data_.grid.npcol, 00622 "Number of columns in the processor grid. " 00623 "Must be set with nprow", col_row_validator); 00624 pl->set("nprow", data_.grid.nprow, 00625 "Number of rows in the SuperLU_DIST processor grid. " 00626 "Must be set together with npcol", col_row_validator); 00627 00628 // validator will catch any value besides NOTRANS 00629 setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS", 00630 "Solve for the transpose system or not", 00631 tuple<string>("NOTRANS"), 00632 tuple<string>("Do not solve with transpose"), 00633 tuple<SLUD::trans_t>(SLUD::NOTRANS), 00634 pl.getRawPtr()); 00635 00636 // TODO: uncomment when supported 00637 // pl->set("Equil", false, "Whether to equilibrate the system before solve"); 00638 00639 // TODO: uncomment when supported 00640 // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE", 00641 // "Type of iterative refinement to use", 00642 // tuple<string>("NOREFINE", "DOUBLE"), 00643 // tuple<string>("Do not use iterative refinement", 00644 // "Do double iterative refinement"), 00645 // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE, 00646 // SLUD::DOUBLE), 00647 // pl.getRawPtr()); 00648 00649 pl->set("ReplaceTinyPivot", true, 00650 "Specifies whether to replace tiny diagonals during LU factorization"); 00651 00652 setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS", 00653 "Specifies how to permute the columns of the " 00654 "matrix for sparsity preservation", 00655 tuple<string>("NATURAL", "PARMETIS"), 00656 tuple<string>("Natural ordering", 00657 "ParMETIS ordering on A^T + A"), 00658 tuple<SLUD::colperm_t>(SLUD::NATURAL, 00659 SLUD::PARMETIS), 00660 pl.getRawPtr()); 00661 00662 valid_params = pl; 00663 } 00664 00665 return valid_params; 00666 } 00667 00668 00669 template <class Matrix, class Vector> 00670 void 00671 Superludist<Matrix,Vector>::get_default_grid_size(int nprocs, 00672 SLUD::int_t& nprow, 00673 SLUD::int_t& npcol) const { 00674 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1, 00675 std::invalid_argument, 00676 "Number of MPI processes must be at least 1" ); 00677 SLUD::int_t c, r = 1; 00678 while( r*r <= nprocs ) r++; 00679 nprow = npcol = --r; // fall back to square grid 00680 c = nprocs / r; 00681 while( (r--)*c != nprocs ){ 00682 c = nprocs / r; // note integer division 00683 } 00684 ++r; 00685 // prefer the square grid over a single row (which will only happen 00686 // in the case of a prime nprocs 00687 if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases 00688 nprow = r; 00689 npcol = c; 00690 } 00691 } 00692 00693 00694 template <class Matrix, class Vector> 00695 bool 00696 Superludist<Matrix,Vector>::loadA_impl(EPhase current_phase){ 00697 // Extract the necessary information from mat and call SLU function 00698 using Teuchos::Array; 00699 using Teuchos::ArrayView; 00700 using Teuchos::ptrInArg; 00701 using Teuchos::as; 00702 00703 using SLUD::int_t; 00704 00705 #ifdef HAVE_AMESOS2_TIMERS 00706 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_); 00707 #endif 00708 00709 // Cleanup old store memory if it's non-NULL 00710 if( data_.A.Store != NULL ){ 00711 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) ); 00712 data_.A.Store = NULL; 00713 } 00714 00715 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat 00716 = this->matrixA_->get(ptrInArg(*superlu_rowmap_)); 00717 00718 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row; 00719 l_nnz = as<int_t>(redist_mat->getLocalNNZ()); 00720 l_rows = as<int_t>(redist_mat->getLocalNumRows()); 00721 g_rows = as<int_t>(redist_mat->getGlobalNumRows()); 00722 g_cols = g_rows; // we deal with square matrices 00723 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex()); 00724 00725 nzvals_.resize(l_nnz); 00726 colind_.resize(l_nnz); 00727 rowptr_.resize(l_rows + 1); 00728 00729 int_t nnz_ret = 0; 00730 { 00731 #ifdef HAVE_AMESOS2_TIMERS 00732 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); 00733 #endif 00734 00735 Util::get_crs_helper< 00736 MatrixAdapter<Matrix>, 00737 slu_type, int_t, int_t >::do_get(redist_mat.ptr(), 00738 nzvals_(), colind_(), rowptr_(), 00739 nnz_ret, 00740 ptrInArg(*superlu_rowmap_), 00741 ARBITRARY); 00742 } 00743 00744 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz, 00745 std::runtime_error, 00746 "Did not get the expected number of non-zero vals"); 00747 00748 // Get the SLU data type for this type of matrix 00749 SLUD::Dtype_t dtype = type_map::dtype; 00750 00751 if( in_grid_ ){ 00752 function_map::create_CompRowLoc_Matrix(&(data_.A), 00753 g_rows, g_cols, 00754 l_nnz, l_rows, fst_global_row, 00755 nzvals_.getRawPtr(), 00756 colind_.getRawPtr(), 00757 rowptr_.getRawPtr(), 00758 SLUD::SLU_NR_loc, 00759 dtype, SLUD::SLU_GE); 00760 } 00761 00762 return true; 00763 } 00764 00765 00766 template<class Matrix, class Vector> 00767 const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST"; 00768 00769 00770 } // end namespace Amesos2 00771 00772 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
1.7.6.1