|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef __MatrixMarket_Tpetra_hpp 00043 #define __MatrixMarket_Tpetra_hpp 00044 00056 #include "Tpetra_config.h" 00057 #include "Tpetra_CrsMatrix.hpp" 00058 #include "Tpetra_Vector.hpp" 00059 #include "Tpetra_ComputeGatherMap.hpp" 00060 #include "Teuchos_MatrixMarket_Raw_Adder.hpp" 00061 #include "Teuchos_MatrixMarket_SymmetrizingAdder.hpp" 00062 #include "Teuchos_MatrixMarket_assignScalar.hpp" 00063 #include "Teuchos_MatrixMarket_Banner.hpp" 00064 #include "Teuchos_MatrixMarket_CoordDataReader.hpp" 00065 #include "Teuchos_MatrixMarket_SetScientific.hpp" 00066 00067 #include <algorithm> 00068 #include <fstream> 00069 #include <iostream> 00070 #include <iterator> 00071 #include <vector> 00072 #include <stdexcept> 00073 00074 00075 // Macro that marks a function as "possibly unused," in order to 00076 // suppress build warnings. 00077 #if ! defined(TRILINOS_UNUSED_FUNCTION) 00078 # if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) 00079 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__)) 00080 # elif defined(__clang__) 00081 # if __has_attribute(unused) 00082 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__)) 00083 # else 00084 # define TRILINOS_UNUSED_FUNCTION 00085 # endif // Clang has 'unused' attribute 00086 # elif defined(__IBMCPP__) 00087 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'. 00088 // 00089 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp 00090 # define TRILINOS_UNUSED_FUNCTION 00091 # else // some other compiler 00092 # define TRILINOS_UNUSED_FUNCTION 00093 # endif 00094 #endif // ! defined(TRILINOS_UNUSED_FUNCTION) 00095 00096 00097 namespace Tpetra { 00126 namespace MatrixMarket { 00127 00128 namespace { 00129 #ifdef HAVE_MPI 00130 // We're communicating integers of type IntType. Figure 00131 // out the right MPI_Datatype for IntType. Usually it 00132 // is int or long, so these are good enough for now. 00133 template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00134 getMpiDatatype () { 00135 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 00136 "Not implemented for IntType != int, long, or long long"); 00137 } 00138 00139 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00140 getMpiDatatype<int> () { return MPI_INT; } 00141 00142 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00143 getMpiDatatype<long> () { return MPI_LONG; } 00144 00145 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype 00146 getMpiDatatype<long long> () { return MPI_LONG_LONG; } 00147 #endif // HAVE_MPI 00148 00149 template<class IntType> 00150 void 00151 reduceSum (const IntType sendbuf[], 00152 IntType recvbuf[], 00153 const int count, 00154 const int root, 00155 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) 00156 { 00157 #ifdef HAVE_MPI 00158 using Teuchos::RCP; 00159 using Teuchos::rcp_dynamic_cast; 00160 using Teuchos::MpiComm; 00161 00162 // Get the raw MPI communicator, so we don't have to wrap MPI_Reduce in Teuchos. 00163 RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm); 00164 if (! mpiComm.is_null ()) { 00165 MPI_Datatype rawMpiType = getMpiDatatype<IntType> (); 00166 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ()); 00167 const int err = 00168 MPI_Reduce (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)), 00169 reinterpret_cast<void*> (const_cast<IntType*> (recvbuf)), 00170 count, rawMpiType, MPI_SUM, root, rawMpiComm); 00171 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "MPI_Reduce failed"); 00172 return; 00173 } 00174 #endif // HAVE_MPI 00175 // Serial communicator case: just copy. count is the 00176 // amount to receive, so it's the amount to copy. 00177 std::copy (sendbuf, sendbuf + count, recvbuf); 00178 } 00179 00180 } // namespace (anonymous) 00181 00237 template<class SparseMatrixType> 00238 class Reader { 00239 public: 00241 typedef SparseMatrixType sparse_matrix_type; 00242 typedef RCP<sparse_matrix_type> sparse_matrix_ptr; 00243 00246 typedef typename SparseMatrixType::scalar_type scalar_type; 00249 typedef typename SparseMatrixType::local_ordinal_type local_ordinal_type; 00257 typedef typename SparseMatrixType::global_ordinal_type 00258 global_ordinal_type; 00261 typedef typename SparseMatrixType::node_type node_type; 00262 00264 typedef MultiVector<scalar_type, 00265 local_ordinal_type, 00266 global_ordinal_type, 00267 node_type> multivector_type; 00268 00270 typedef Vector<scalar_type, 00271 local_ordinal_type, 00272 global_ordinal_type, 00273 node_type> vector_type; 00274 00275 typedef RCP<node_type> node_ptr; 00276 typedef Comm<int> comm_type; 00277 typedef RCP<const comm_type> comm_ptr; 00278 typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; 00279 typedef RCP<const map_type> map_ptr; 00280 00281 private: 00284 typedef typename ArrayRCP<global_ordinal_type>::size_type size_type; 00285 00297 static RCP<const map_type> 00298 makeRangeMap (const RCP<const comm_type>& pComm, 00299 const RCP<node_type>& pNode, 00300 const global_ordinal_type numRows) 00301 { 00302 // A conventional, uniformly partitioned, contiguous map. 00303 return rcp (new map_type (static_cast<global_size_t> (numRows), 00304 static_cast<global_ordinal_type> (0), 00305 pComm, GloballyDistributed, pNode)); 00306 } 00307 00336 static RCP<const map_type> 00337 makeRowMap (const RCP<const map_type>& pRowMap, 00338 const RCP<const comm_type>& pComm, 00339 const RCP<node_type>& pNode, 00340 const global_ordinal_type numRows) 00341 { 00342 // If the caller didn't provide a map, return a conventional, 00343 // uniformly partitioned, contiguous map. 00344 if (pRowMap.is_null()) { 00345 return rcp (new map_type (static_cast<global_size_t> (numRows), 00346 static_cast<global_ordinal_type> (0), 00347 pComm, GloballyDistributed, pNode)); 00348 } else { 00349 TEUCHOS_TEST_FOR_EXCEPTION(! pRowMap->isDistributed() && pComm->getSize() > 1, 00350 std::invalid_argument, 00351 "The specified row map is not distributed, but " 00352 "the given communicator includes more than one " 00353 "rank (in fact, there are " << pComm->getSize() 00354 << " ranks)."); 00355 TEUCHOS_TEST_FOR_EXCEPTION(pRowMap->getComm() != pComm, 00356 std::invalid_argument, 00357 "The specified row map's communicator (pRowMap->" 00358 "getComm()) is different than the given separately " 00359 "supplied communicator pComm."); 00360 TEUCHOS_TEST_FOR_EXCEPTION(pRowMap->getNode() != pNode, 00361 std::invalid_argument, 00362 "The specified row map's node (pRowMap->getNode()) " 00363 "is different than the given separately supplied " 00364 "node pNode."); 00365 return pRowMap; 00366 } 00367 } 00368 00383 static Teuchos::RCP<const map_type> 00384 makeDomainMap (const Teuchos::RCP<const map_type>& pRangeMap, 00385 const global_ordinal_type numRows, 00386 const global_ordinal_type numCols) 00387 { 00388 // Abbreviations so that the map creation call isn't too long. 00389 typedef local_ordinal_type LO; 00390 typedef global_ordinal_type GO; 00391 typedef node_type NT; 00392 00393 if (numRows == numCols) { 00394 return pRangeMap; 00395 } else { 00396 return createUniformContigMapWithNode<LO,GO,NT> (numCols, 00397 pRangeMap->getComm (), 00398 pRangeMap->getNode ()); 00399 } 00400 } 00401 00474 static void 00475 distribute (ArrayRCP<size_t>& myNumEntriesPerRow, 00476 ArrayRCP<size_t>& myRowPtr, 00477 ArrayRCP<global_ordinal_type>& myColInd, 00478 ArrayRCP<scalar_type>& myValues, 00479 const RCP<const map_type>& pRowMap, 00480 ArrayRCP<size_t>& numEntriesPerRow, 00481 ArrayRCP<size_t>& rowPtr, 00482 ArrayRCP<global_ordinal_type>& colInd, 00483 ArrayRCP<scalar_type>& values, 00484 const bool debug=false) 00485 { 00486 using Teuchos::as; 00487 using Teuchos::CommRequest; 00488 using Teuchos::receive; 00489 using Teuchos::send; 00490 using std::cerr; 00491 using std::endl; 00492 00493 const bool extraDebug = false; 00494 comm_ptr pComm = pRowMap->getComm(); 00495 const int numProcs = pComm->getSize (); 00496 const int myRank = pComm->getRank (); 00497 const int rootRank = 0; 00498 00499 // Type abbreviations to make the code more concise. 00500 typedef global_ordinal_type GO; 00501 00502 // List of the global indices of my rows. They may or may 00503 // not be contiguous, and the row map need not be one-to-one. 00504 ArrayView<const GO> myRows = pRowMap->getNodeElementList(); 00505 const size_type myNumRows = myRows.size(); 00506 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(myNumRows) != 00507 pRowMap->getNodeNumElements(), 00508 std::logic_error, 00509 "pRowMap->getNodeElementList().size() = " 00510 << myNumRows 00511 << " != pRowMap->getNodeNumElements() = " 00512 << pRowMap->getNodeNumElements() << ". " 00513 "Please report this bug to the Tpetra developers."); 00514 TEUCHOS_TEST_FOR_EXCEPTION(myRank == 0 && numEntriesPerRow.size() < myNumRows, 00515 std::logic_error, 00516 "On Proc 0: numEntriesPerRow.size() = " 00517 << numEntriesPerRow.size() 00518 << " != pRowMap->getNodeElementList().size() = " 00519 << myNumRows << ". Please report this bug to the " 00520 "Tpetra developers."); 00521 00522 // Space for my proc's number of entries per row. Will be 00523 // filled in below. 00524 myNumEntriesPerRow = arcp<size_t> (myNumRows); 00525 00526 if (myRank != rootRank) { 00527 // Tell the root how many rows we have. If we're sending 00528 // none, then we don't have anything else to send, nor does 00529 // the root have to receive anything else. 00530 send (*pComm, myNumRows, rootRank); 00531 if (myNumRows != 0) { 00532 // Now send my rows' global indices. Hopefully the cast 00533 // to int doesn't overflow. This is unlikely, since it 00534 // should fit in a LO, even though it is a GO. 00535 send (*pComm, static_cast<int> (myNumRows), 00536 myRows.getRawPtr(), rootRank); 00537 00538 // I (this proc) don't care if my global row indices are 00539 // contiguous, though the root proc does (since otherwise 00540 // it needs to pack noncontiguous data into contiguous 00541 // storage before sending). That's why we don't check 00542 // for contiguousness here. 00543 00544 // Ask the root process for my part of the array of the 00545 // number of entries per row. 00546 receive (*pComm, rootRank, 00547 static_cast<int> (myNumRows), 00548 myNumEntriesPerRow.getRawPtr()); 00549 00550 // Use the resulting array to figure out how many column 00551 // indices and values I should ask from the root process. 00552 const local_ordinal_type myNumEntries = 00553 std::accumulate (myNumEntriesPerRow.begin(), 00554 myNumEntriesPerRow.end(), 0); 00555 00556 // Make space for my entries of the sparse matrix. Note 00557 // that they don't have to be sorted by row index. 00558 // Iterating through all my rows requires computing a 00559 // running sum over myNumEntriesPerRow. 00560 myColInd = arcp<GO> (myNumEntries); 00561 myValues = arcp<scalar_type> (myNumEntries); 00562 if (myNumEntries > 0) { 00563 // Ask for that many column indices and values, if 00564 // there are any. 00565 receive (*pComm, rootRank, 00566 static_cast<int> (myNumEntries), 00567 myColInd.getRawPtr()); 00568 receive (*pComm, rootRank, 00569 static_cast<int> (myNumEntries), 00570 myValues.getRawPtr()); 00571 } 00572 } // If I own at least one row 00573 } // If I am not the root processor 00574 else { // I _am_ the root processor 00575 if (debug) { 00576 cerr << "-- Proc 0: Copying my data from global arrays" << endl; 00577 } 00578 // Proc 0 still needs to (allocate, if not done already) 00579 // and fill its part of the matrix (my*). 00580 for (size_type k = 0; k < myNumRows; ++k) { 00581 const GO myCurRow = myRows[k]; 00582 const local_ordinal_type numEntriesInThisRow = numEntriesPerRow[myCurRow]; 00583 myNumEntriesPerRow[k] = numEntriesInThisRow; 00584 } 00585 if (extraDebug && debug) { 00586 cerr << "Proc " << pRowMap->getComm ()->getRank () 00587 << ": myNumEntriesPerRow[0.." << (myNumRows-1) << "] = ["; 00588 for (size_type k = 0; k < myNumRows; ++k) { 00589 cerr << myNumEntriesPerRow[k]; 00590 if (k < myNumRows-1) { 00591 cerr << " "; 00592 } 00593 } 00594 cerr << "]" << endl; 00595 } 00596 // The total number of matrix entries that my proc owns. 00597 const local_ordinal_type myNumEntries = 00598 std::accumulate (myNumEntriesPerRow.begin(), 00599 myNumEntriesPerRow.end(), 0); 00600 if (debug) { 00601 cerr << "-- Proc 0: I own " << myNumRows << " rows and " 00602 << myNumEntries << " entries" << endl; 00603 } 00604 myColInd = arcp<GO> (myNumEntries); 00605 myValues = arcp<scalar_type> (myNumEntries); 00606 00607 // Copy Proc 0's part of the matrix into the my* arrays. 00608 // It's important that myCurPos be updated _before_ k, 00609 // otherwise myCurPos will get the wrong number of entries 00610 // per row (it should be for the row in the just-completed 00611 // iteration, not for the next iteration's row). 00612 local_ordinal_type myCurPos = 0; 00613 for (size_type k = 0; k < myNumRows; 00614 myCurPos += myNumEntriesPerRow[k], ++k) { 00615 const local_ordinal_type curNumEntries = myNumEntriesPerRow[k]; 00616 const GO myRow = myRows[k]; 00617 const size_t curPos = rowPtr[myRow]; 00618 // Only copy if there are entries to copy, in order not 00619 // to construct empty ranges for the ArrayRCP views. 00620 if (curNumEntries > 0) { 00621 ArrayView<GO> colIndView = colInd (curPos, curNumEntries); 00622 ArrayView<GO> myColIndView = myColInd (myCurPos, curNumEntries); 00623 std::copy (colIndView.begin(), colIndView.end(), 00624 myColIndView.begin()); 00625 00626 ArrayView<scalar_type> valuesView = 00627 values (curPos, curNumEntries); 00628 ArrayView<scalar_type> myValuesView = 00629 myValues (myCurPos, curNumEntries); 00630 std::copy (valuesView.begin(), valuesView.end(), 00631 myValuesView.begin()); 00632 } 00633 } 00634 00635 // Proc 0 processes each other proc p in turn. 00636 for (int p = 1; p < numProcs; ++p) { 00637 if (debug) { 00638 cerr << "-- Proc 0: Processing proc " << p << endl; 00639 } 00640 00641 size_type theirNumRows = 0; 00642 // Ask Proc p how many rows it has. If it doesn't 00643 // have any, we can move on to the next proc. This 00644 // has to be a standard receive so that we can avoid 00645 // the degenerate case of sending zero data. 00646 receive (*pComm, p, &theirNumRows); 00647 if (debug) { 00648 cerr << "-- Proc 0: Proc " << p << " owns " 00649 << theirNumRows << " rows" << endl; 00650 } 00651 if (theirNumRows != 0) { 00652 // Ask Proc p which rows it owns. The resulting global 00653 // row indices are not guaranteed to be contiguous or 00654 // sorted. Global row indices are themselves indices 00655 // into the numEntriesPerRow array. 00656 ArrayRCP<GO> theirRows = arcp<GO> (theirNumRows); 00657 receive (*pComm, p, as<int> (theirNumRows), 00658 theirRows.getRawPtr ()); 00659 // Extra test to make sure that the rows we received 00660 // are all sensible. This is a good idea since we are 00661 // going to use the global row indices we've received 00662 // to index into the numEntriesPerRow array. Better to 00663 // catch any bugs here and print a sensible error 00664 // message, rather than segfault and print a cryptic 00665 // error message. 00666 { 00667 const global_size_t numRows = pRowMap->getGlobalNumElements (); 00668 const GO indexBase = pRowMap->getIndexBase (); 00669 bool theirRowsValid = true; 00670 for (size_type k = 0; k < theirNumRows; ++k) { 00671 if (theirRows[k] < indexBase || 00672 as<global_size_t> (theirRows[k] - indexBase) >= numRows) { 00673 theirRowsValid = false; 00674 } 00675 } 00676 if (! theirRowsValid) { 00677 TEUCHOS_TEST_FOR_EXCEPTION( 00678 ! theirRowsValid, std::logic_error, 00679 "Proc " << p << " has at least one invalid row index. " 00680 "Here are all of them: " << 00681 Teuchos::toString (theirRows ()) << ". Valid row index " 00682 "range (zero-based): [0, " << (numRows - 1) << "]."); 00683 } 00684 } 00685 00686 // Perhaps we could save a little work if we check 00687 // whether Proc p's row indices are contiguous. That 00688 // would make lookups in the global data arrays 00689 // faster. For now, we just implement the general 00690 // case and don't prematurely optimize. (Remember 00691 // that you're making Proc 0 read the whole file, so 00692 // you've already lost scalability.) 00693 00694 // Compute the number of entries in each of Proc p's 00695 // rows. (Proc p will compute its row pointer array 00696 // on its own, after it gets the data from Proc 0.) 00697 ArrayRCP<size_t> theirNumEntriesPerRow; 00698 theirNumEntriesPerRow = arcp<size_t> (theirNumRows); 00699 for (size_type k = 0; k < theirNumRows; ++k) { 00700 theirNumEntriesPerRow[k] = numEntriesPerRow[theirRows[k]]; 00701 } 00702 00703 // Tell Proc p the number of entries in each of its 00704 // rows. Hopefully the cast to int doesn't overflow. 00705 // This is unlikely, since it should fit in a LO, 00706 // even though it is a GO. 00707 send (*pComm, static_cast<int> (theirNumRows), 00708 theirNumEntriesPerRow.getRawPtr(), p); 00709 00710 // Figure out how many entries Proc p owns. 00711 const local_ordinal_type theirNumEntries = 00712 std::accumulate (theirNumEntriesPerRow.begin(), 00713 theirNumEntriesPerRow.end(), 0); 00714 00715 if (debug) { 00716 cerr << "-- Proc 0: Proc " << p << " owns " 00717 << theirNumEntries << " entries" << endl; 00718 } 00719 00720 // If there are no entries to send, then we're done 00721 // with Proc p. 00722 if (theirNumEntries == 0) { 00723 continue; 00724 } 00725 00726 // Construct (views of) proc p's column indices and 00727 // values. Later, we might like to optimize for the 00728 // (common) contiguous case, for which we don't need to 00729 // copy data into separate "their*" arrays (we can just 00730 // use contiguous views of the global arrays). 00731 ArrayRCP<GO> theirColInd (theirNumEntries); 00732 ArrayRCP<scalar_type> theirValues (theirNumEntries); 00733 // Copy Proc p's part of the matrix into the their* 00734 // arrays. It's important that theirCurPos be updated 00735 // _before_ k, otherwise theirCurPos will get the wrong 00736 // number of entries per row (it should be for the row 00737 // in the just-completed iteration, not for the next 00738 // iteration's row). 00739 local_ordinal_type theirCurPos = 0; 00740 for (size_type k = 0; k < theirNumRows; 00741 theirCurPos += theirNumEntriesPerRow[k], k++) { 00742 const local_ordinal_type curNumEntries = theirNumEntriesPerRow[k]; 00743 const GO theirRow = theirRows[k]; 00744 const local_ordinal_type curPos = rowPtr[theirRow]; 00745 00746 // Only copy if there are entries to copy, in order 00747 // not to construct empty ranges for the ArrayRCP 00748 // views. 00749 if (curNumEntries > 0) { 00750 ArrayView<GO> colIndView = 00751 colInd (curPos, curNumEntries); 00752 ArrayView<GO> theirColIndView = 00753 theirColInd (theirCurPos, curNumEntries); 00754 std::copy (colIndView.begin(), colIndView.end(), 00755 theirColIndView.begin()); 00756 00757 ArrayView<scalar_type> valuesView = 00758 values (curPos, curNumEntries); 00759 ArrayView<scalar_type> theirValuesView = 00760 theirValues (theirCurPos, curNumEntries); 00761 std::copy (valuesView.begin(), valuesView.end(), 00762 theirValuesView.begin()); 00763 } 00764 } 00765 // Send Proc p its column indices and values. 00766 // Hopefully the cast to int doesn't overflow. This 00767 // is unlikely, since it should fit in a LO, even 00768 // though it is a GO. 00769 send (*pComm, static_cast<int> (theirNumEntries), 00770 theirColInd.getRawPtr(), p); 00771 send (*pComm, static_cast<int> (theirNumEntries), 00772 theirValues.getRawPtr(), p); 00773 00774 if (debug) { 00775 cerr << "-- Proc 0: Finished with proc " << p << endl; 00776 } 00777 } // If proc p owns at least one row 00778 } // For each proc p not the root proc 0 00779 } // If I'm (not) the root proc 0 00780 00781 // Invalidate the input data to save space, since we don't 00782 // need it anymore. 00783 numEntriesPerRow = null; 00784 rowPtr = null; 00785 colInd = null; 00786 values = null; 00787 00788 if (debug && myRank == 0) { 00789 cerr << "-- Proc 0: About to fill in myRowPtr" << endl; 00790 } 00791 00792 // Allocate and fill in myRowPtr (the row pointer array for 00793 // my rank's rows). We delay this until the end because we 00794 // don't need it to compute anything else in distribute(). 00795 // Each proc can do this work for itself, since it only needs 00796 // myNumEntriesPerRow to do so. 00797 myRowPtr = arcp<size_t> (myNumRows+1); 00798 myRowPtr[0] = 0; 00799 for (size_type k = 1; k < myNumRows+1; ++k) { 00800 myRowPtr[k] = myRowPtr[k-1] + myNumEntriesPerRow[k-1]; 00801 } 00802 if (extraDebug && debug) { 00803 cerr << "Proc " << Teuchos::rank (*(pRowMap->getComm())) 00804 << ": myRowPtr[0.." << myNumRows << "] = ["; 00805 for (size_type k = 0; k < myNumRows+1; ++k) { 00806 cerr << myRowPtr[k]; 00807 if (k < myNumRows) { 00808 cerr << " "; 00809 } 00810 } 00811 cerr << "]" << endl << endl; 00812 } 00813 00814 if (debug && myRank == 0) { 00815 cerr << "-- Proc 0: Done with distribute" << endl; 00816 } 00817 } 00818 00832 static sparse_matrix_ptr 00833 makeMatrix (ArrayRCP<size_t>& myNumEntriesPerRow, 00834 ArrayRCP<size_t>& myRowPtr, 00835 ArrayRCP<global_ordinal_type>& myColInd, 00836 ArrayRCP<scalar_type>& myValues, 00837 const map_ptr& pRowMap, 00838 const map_ptr& pRangeMap, 00839 const map_ptr& pDomainMap, 00840 const bool callFillComplete = true) 00841 { 00842 using std::cerr; 00843 using std::endl; 00844 // Typedef to make certain type declarations shorter. 00845 typedef global_ordinal_type GO; 00846 00847 // The row pointer array always has at least one entry, even 00848 // if the matrix has zero rows. myNumEntriesPerRow, myColInd, 00849 // and myValues would all be empty arrays in that degenerate 00850 // case, but the row and domain maps would still be nonnull 00851 // (though they would be trivial maps). 00852 TEUCHOS_TEST_FOR_EXCEPTION(myRowPtr.is_null(), std::logic_error, 00853 "makeMatrix: myRowPtr array is null. " 00854 "Please report this bug to the Tpetra developers."); 00855 TEUCHOS_TEST_FOR_EXCEPTION(pDomainMap.is_null(), std::logic_error, 00856 "makeMatrix: domain map is null. " 00857 "Please report this bug to the Tpetra developers."); 00858 TEUCHOS_TEST_FOR_EXCEPTION(pRangeMap.is_null(), std::logic_error, 00859 "makeMatrix: range map is null. " 00860 "Please report this bug to the Tpetra developers."); 00861 TEUCHOS_TEST_FOR_EXCEPTION(pRowMap.is_null(), std::logic_error, 00862 "makeMatrix: row map is null. " 00863 "Please report this bug to the Tpetra developers."); 00864 00865 // Construct the CrsMatrix, using the row map, with the 00866 // constructor specifying the number of nonzeros for each row. 00867 // Create with DynamicProfile, so that the fillComplete() can 00868 // do first-touch reallocation (a NUMA (Non-Uniform Memory 00869 // Access) optimization on multicore CPUs). 00870 sparse_matrix_ptr A = 00871 rcp (new sparse_matrix_type (pRowMap, myNumEntriesPerRow, 00872 DynamicProfile)); 00873 00874 // List of the global indices of my rows. 00875 // They may or may not be contiguous. 00876 ArrayView<const GO> myRows = pRowMap->getNodeElementList (); 00877 const size_type myNumRows = myRows.size (); 00878 00879 // Add this processor's matrix entries to the CrsMatrix. 00880 const GO indexBase = pRowMap->getIndexBase (); 00881 for (size_type i = 0; i < myNumRows; ++i) { 00882 const size_type myCurPos = myRowPtr[i]; 00883 const local_ordinal_type curNumEntries = myNumEntriesPerRow[i]; 00884 ArrayView<GO> curColInd = myColInd.view (myCurPos, curNumEntries); 00885 ArrayView<scalar_type> curValues = myValues.view (myCurPos, curNumEntries); 00886 00887 // Modify the column indices in place to have the right index base. 00888 for (size_type k = 0; k < curNumEntries; ++k) { 00889 curColInd[k] += indexBase; 00890 } 00891 // Avoid constructing empty views of ArrayRCP objects. 00892 if (curNumEntries > 0) { 00893 A->insertGlobalValues (myRows[i], curColInd, curValues); 00894 } 00895 } 00896 // We've entered in all our matrix entries, so we can delete 00897 // the original data. This will save memory when we call 00898 // fillComplete(), so that we never keep more than two copies 00899 // of the matrix's data in memory at once. 00900 myNumEntriesPerRow = null; 00901 myRowPtr = null; 00902 myColInd = null; 00903 myValues = null; 00904 00905 if (callFillComplete) { 00906 A->fillComplete (pDomainMap, pRangeMap); 00907 } 00908 return A; 00909 } 00910 00916 static sparse_matrix_ptr 00917 makeMatrix (ArrayRCP<size_t>& myNumEntriesPerRow, 00918 ArrayRCP<size_t>& myRowPtr, 00919 ArrayRCP<global_ordinal_type>& myColInd, 00920 ArrayRCP<scalar_type>& myValues, 00921 const map_ptr& pRowMap, 00922 const map_ptr& pRangeMap, 00923 const map_ptr& pDomainMap, 00924 const RCP<Teuchos::ParameterList>& constructorParams, 00925 const RCP<Teuchos::ParameterList>& fillCompleteParams) 00926 { 00927 using std::cerr; 00928 using std::endl; 00929 // Typedef to make certain type declarations shorter. 00930 typedef global_ordinal_type GO; 00931 00932 // The row pointer array always has at least one entry, even 00933 // if the matrix has zero rows. myNumEntriesPerRow, myColInd, 00934 // and myValues would all be empty arrays in that degenerate 00935 // case, but the row and domain maps would still be nonnull 00936 // (though they would be trivial maps). 00937 TEUCHOS_TEST_FOR_EXCEPTION( 00938 myRowPtr.is_null(), std::logic_error, 00939 "makeMatrix: myRowPtr array is null. " 00940 "Please report this bug to the Tpetra developers."); 00941 TEUCHOS_TEST_FOR_EXCEPTION( 00942 pDomainMap.is_null(), std::logic_error, 00943 "makeMatrix: domain map is null. " 00944 "Please report this bug to the Tpetra developers."); 00945 TEUCHOS_TEST_FOR_EXCEPTION( 00946 pRangeMap.is_null(), std::logic_error, 00947 "makeMatrix: range map is null. " 00948 "Please report this bug to the Tpetra developers."); 00949 TEUCHOS_TEST_FOR_EXCEPTION( 00950 pRowMap.is_null(), std::logic_error, 00951 "makeMatrix: row map is null. " 00952 "Please report this bug to the Tpetra developers."); 00953 00954 // Construct the CrsMatrix, using the row map, with the 00955 // constructor specifying the number of nonzeros for each row. 00956 // Create with DynamicProfile, so that the fillComplete() can 00957 // do first-touch reallocation (a NUMA (Non-Uniform Memory 00958 // Access) optimization on multicore CPUs). 00959 sparse_matrix_ptr A = 00960 rcp (new sparse_matrix_type (pRowMap, myNumEntriesPerRow, 00961 DynamicProfile, constructorParams)); 00962 00963 // List of the global indices of my rows. 00964 // They may or may not be contiguous. 00965 ArrayView<const GO> myRows = pRowMap->getNodeElementList(); 00966 const size_type myNumRows = myRows.size(); 00967 00968 // Add this processor's matrix entries to the CrsMatrix. 00969 const GO indexBase = pRowMap->getIndexBase (); 00970 for (size_type i = 0; i < myNumRows; ++i) { 00971 const size_type myCurPos = myRowPtr[i]; 00972 const local_ordinal_type curNumEntries = myNumEntriesPerRow[i]; 00973 ArrayView<GO> curColInd = myColInd.view (myCurPos, curNumEntries); 00974 ArrayView<scalar_type> curValues = myValues.view (myCurPos, curNumEntries); 00975 00976 // Modify the column indices in place to have the right index base. 00977 for (size_type k = 0; k < curNumEntries; ++k) { 00978 curColInd[k] += indexBase; 00979 } 00980 if (curNumEntries > 0) { 00981 A->insertGlobalValues (myRows[i], curColInd, curValues); 00982 } 00983 } 00984 // We've entered in all our matrix entries, so we can delete 00985 // the original data. This will save memory when we call 00986 // fillComplete(), so that we never keep more than two copies 00987 // of the matrix's data in memory at once. 00988 myNumEntriesPerRow = null; 00989 myRowPtr = null; 00990 myColInd = null; 00991 myValues = null; 00992 00993 A->fillComplete (pDomainMap, pRangeMap, fillCompleteParams); 00994 return A; 00995 } 00996 01001 static sparse_matrix_ptr 01002 makeMatrix (Teuchos::ArrayRCP<size_t>& myNumEntriesPerRow, 01003 Teuchos::ArrayRCP<size_t>& myRowPtr, 01004 Teuchos::ArrayRCP<global_ordinal_type>& myColInd, 01005 Teuchos::ArrayRCP<scalar_type>& myValues, 01006 const Teuchos::RCP<const map_type>& rowMap, 01007 Teuchos::RCP<const map_type>& colMap, 01008 const Teuchos::RCP<const map_type>& domainMap, 01009 const Teuchos::RCP<const map_type>& rangeMap, 01010 const bool callFillComplete = true) 01011 { 01012 using Teuchos::ArrayView; 01013 using Teuchos::as; 01014 using Teuchos::null; 01015 using Teuchos::RCP; 01016 using Teuchos::rcp; 01017 typedef global_ordinal_type GO; 01018 typedef typename ArrayView<const GO>::size_type size_type; 01019 01020 // Construct the CrsMatrix. 01021 // 01022 // Create with DynamicProfile, so that the fillComplete() can 01023 // do first-touch reallocation. 01024 RCP<sparse_matrix_type> A; // the matrix to return. 01025 if (colMap.is_null ()) { // the user didn't provide a column Map 01026 A = rcp (new sparse_matrix_type (rowMap, myNumEntriesPerRow, DynamicProfile)); 01027 } else { // the user provided a column Map 01028 A = rcp (new sparse_matrix_type (rowMap, colMap, myNumEntriesPerRow, DynamicProfile)); 01029 } 01030 01031 // List of the global indices of my rows. 01032 // They may or may not be contiguous. 01033 ArrayView<const GO> myRows = rowMap->getNodeElementList (); 01034 const size_type myNumRows = myRows.size (); 01035 01036 // Add this process' matrix entries to the CrsMatrix. 01037 const GO indexBase = rowMap->getIndexBase (); 01038 for (size_type i = 0; i < myNumRows; ++i) { 01039 const size_type myCurPos = myRowPtr[i]; 01040 const size_type curNumEntries = as<size_type> (myNumEntriesPerRow[i]); 01041 ArrayView<GO> curColInd = myColInd.view (myCurPos, curNumEntries); 01042 ArrayView<scalar_type> curValues = myValues.view (myCurPos, curNumEntries); 01043 01044 // Modify the column indices in place to have the right index base. 01045 for (size_type k = 0; k < curNumEntries; ++k) { 01046 curColInd[k] += indexBase; 01047 } 01048 if (curNumEntries > 0) { 01049 A->insertGlobalValues (myRows[i], curColInd, curValues); 01050 } 01051 } 01052 // We've entered in all our matrix entries, so we can delete 01053 // the original data. This will save memory when we call 01054 // fillComplete(), so that we never keep more than two copies 01055 // of the matrix's data in memory at once. 01056 myNumEntriesPerRow = null; 01057 myRowPtr = null; 01058 myColInd = null; 01059 myValues = null; 01060 01061 if (callFillComplete) { 01062 A->fillComplete (domainMap, rangeMap); 01063 if (colMap.is_null ()) { 01064 colMap = A->getColMap (); 01065 } 01066 } 01067 return A; 01068 } 01069 01070 private: 01071 01088 static RCP<const Teuchos::MatrixMarket::Banner> 01089 readBanner (std::istream& in, 01090 size_t& lineNumber, 01091 const bool tolerant=false, 01092 const bool debug=false) 01093 { 01094 using Teuchos::MatrixMarket::Banner; 01095 using std::cerr; 01096 using std::endl; 01097 typedef Teuchos::ScalarTraits<scalar_type> STS; 01098 01099 RCP<Banner> pBanner; // On output, if successful: the read-in Banner. 01100 std::string line; // If read from stream successful: the Banner line 01101 01102 // Try to read a line from the input stream. 01103 const bool readFailed = ! getline(in, line); 01104 TEUCHOS_TEST_FOR_EXCEPTION(readFailed, std::invalid_argument, 01105 "Failed to get Matrix Market banner line from input."); 01106 01107 // We read a line from the input stream. 01108 lineNumber++; 01109 01110 // Assume that the line we found is the Banner line. 01111 try { 01112 pBanner = rcp (new Banner (line, tolerant)); 01113 } catch (std::exception& e) { 01114 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, 01115 "Matrix Market banner line contains syntax error(s): " 01116 << e.what()); 01117 } 01118 TEUCHOS_TEST_FOR_EXCEPTION(pBanner->objectType() != "matrix", 01119 std::invalid_argument, "The Matrix Market file does not contain " 01120 "matrix data. Its Banner (first) line says that its object type is \"" 01121 << pBanner->matrixType() << "\", rather than the required \"matrix\"."); 01122 01123 // Validate the data type of the matrix, with respect to the 01124 // Scalar type of the CrsMatrix entries. 01125 TEUCHOS_TEST_FOR_EXCEPTION( 01126 ! STS::isComplex && pBanner->dataType() == "complex", 01127 std::invalid_argument, 01128 "The Matrix Market file contains complex-valued data, but you are " 01129 "trying to read it into a matrix containing entries of the real-" 01130 "valued Scalar type \"" 01131 << Teuchos::TypeNameTraits<scalar_type>::name() << "\"."); 01132 TEUCHOS_TEST_FOR_EXCEPTION( 01133 pBanner->dataType() != "real" && 01134 pBanner->dataType() != "complex" && 01135 pBanner->dataType() != "integer", 01136 std::invalid_argument, 01137 "When reading Matrix Market data into a Tpetra::CrsMatrix, the " 01138 "Matrix Market file may not contain a \"pattern\" matrix. A " 01139 "pattern matrix is really just a graph with no weights. It " 01140 "should be stored in a CrsGraph, not a CrsMatrix."); 01141 01142 return pBanner; 01143 } 01144 01167 static Tuple<global_ordinal_type, 3> 01168 readCoordDims (std::istream& in, 01169 size_t& lineNumber, 01170 const RCP<const Teuchos::MatrixMarket::Banner>& pBanner, 01171 const comm_ptr& pComm, 01172 const bool tolerant = false, 01173 const bool debug = false) 01174 { 01175 using Teuchos::MatrixMarket::readCoordinateDimensions; 01176 01177 // Packed coordinate matrix dimensions (numRows, numCols, 01178 // numNonzeros); computed on Rank 0 and broadcasted to all MPI 01179 // ranks. 01180 Tuple<global_ordinal_type, 3> dims; 01181 01182 // Read in the coordinate matrix dimensions from the input 01183 // stream. "success" tells us whether reading in the 01184 // coordinate matrix dimensions succeeded ("Guilty unless 01185 // proven innocent"). 01186 bool success = false; 01187 if (pComm->getRank() == 0) { 01188 TEUCHOS_TEST_FOR_EXCEPTION(pBanner->matrixType() != "coordinate", 01189 std::invalid_argument, "The Tpetra::CrsMatrix Matrix Market reader " 01190 "only accepts \"coordinate\" (sparse) matrix data."); 01191 // Unpacked coordinate matrix dimensions 01192 global_ordinal_type numRows, numCols, numNonzeros; 01193 // Only MPI Rank 0 reads from the input stream 01194 success = readCoordinateDimensions (in, numRows, numCols, 01195 numNonzeros, lineNumber, 01196 tolerant); 01197 // Pack up the data into a Tuple so we can send them with 01198 // one broadcast instead of three. 01199 dims[0] = numRows; 01200 dims[1] = numCols; 01201 dims[2] = numNonzeros; 01202 } 01203 // Only Rank 0 did the reading, so it decides success. 01204 // 01205 // FIXME (mfh 02 Feb 2011) Teuchos::broadcast doesn't know how 01206 // to send bools. For now, we convert to/from int instead, 01207 // using the usual "true is 1, false is 0" encoding. 01208 { 01209 int the_success = success ? 1 : 0; // only matters on MPI Rank 0 01210 Teuchos::broadcast (*pComm, 0, &the_success); 01211 success = (the_success == 1); 01212 } 01213 if (success) { 01214 // Broadcast (numRows, numCols, numNonzeros) from Rank 0 01215 // to all the other MPI ranks. 01216 Teuchos::broadcast (*pComm, 0, dims); 01217 } 01218 else { 01219 // Perhaps in tolerant mode, we could set all the 01220 // dimensions to zero for now, and deduce correct 01221 // dimensions by reading all of the file's entries and 01222 // computing the max(row index) and max(column index). 01223 // However, for now we just error out in that case. 01224 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, 01225 "Error reading Matrix Market sparse matrix: failed to read " 01226 "coordinate matrix dimensions."); 01227 } 01228 return dims; 01229 } 01230 01241 typedef Teuchos::MatrixMarket::SymmetrizingAdder<Teuchos::MatrixMarket::Raw::Adder<scalar_type, global_ordinal_type> > adder_type; 01242 01268 static RCP<adder_type> 01269 makeAdder (const RCP<const Comm<int> >& pComm, 01270 RCP<const Teuchos::MatrixMarket::Banner>& pBanner, 01271 const Tuple<global_ordinal_type, 3>& dims, 01272 const bool tolerant=false, 01273 const bool debug=false) 01274 { 01275 if (pComm->getRank() == 0) { 01276 typedef Teuchos::MatrixMarket::Raw::Adder<scalar_type, global_ordinal_type> raw_adder_type; 01277 RCP<raw_adder_type> pRaw = 01278 rcp (new raw_adder_type (dims[0], dims[1], dims[2], tolerant, debug)); 01279 return rcp (new adder_type (pRaw, pBanner->symmType())); 01280 } 01281 else { 01282 return null; 01283 } 01284 } 01285 01286 public: 01287 01312 static sparse_matrix_ptr 01313 readSparseFile (const std::string& filename, 01314 const RCP<const Comm<int> >& pComm, 01315 const RCP<node_type>& pNode, 01316 const bool callFillComplete=true, 01317 const bool tolerant=false, 01318 const bool debug=false) 01319 { 01320 const int myRank = Teuchos::rank (*pComm); 01321 std::ifstream in; 01322 01323 // Only open the file on Rank 0. 01324 if (myRank == 0) 01325 in.open (filename.c_str()); 01326 return readSparse (in, pComm, pNode, callFillComplete, tolerant, debug); 01327 // We can rely on the destructor of the input stream to close 01328 // the file on scope exit, even if readSparse() throws an 01329 // exception. 01330 } 01331 01361 static sparse_matrix_ptr 01362 readSparseFile (const std::string& filename, 01363 const RCP<const Comm<int> >& pComm, 01364 const RCP<node_type>& pNode, 01365 const RCP<Teuchos::ParameterList>& constructorParams, 01366 const RCP<Teuchos::ParameterList>& fillCompleteParams, 01367 const bool tolerant=false, 01368 const bool debug=false) 01369 { 01370 std::ifstream in; 01371 if (pComm->getRank () == 0) { // only open on Process 0 01372 in.open (filename.c_str()); 01373 } 01374 return readSparse (in, pComm, pNode, constructorParams, 01375 fillCompleteParams, tolerant, debug); 01376 } 01377 01415 static sparse_matrix_ptr 01416 readSparseFile (const std::string& filename, 01417 const RCP<const map_type>& rowMap, 01418 RCP<const map_type>& colMap, 01419 const RCP<const map_type>& domainMap, 01420 const RCP<const map_type>& rangeMap, 01421 const bool callFillComplete=true, 01422 const bool tolerant=false, 01423 const bool debug=false) 01424 { 01425 using Teuchos::broadcast; 01426 using Teuchos::outArg; 01427 TEUCHOS_TEST_FOR_EXCEPTION( 01428 rowMap.is_null (), std::invalid_argument, 01429 "Row Map must be nonnull."); 01430 01431 RCP<const Comm<int> > comm = rowMap->getComm (); 01432 RCP<node_type> node = rowMap->getNode (); 01433 const int myRank = comm->getRank (); 01434 01435 // Only open the file on Process 0. Test carefully to make 01436 // sure that the file opened successfully (and broadcast that 01437 // result to all processes to prevent a hang on exception 01438 // throw), since it's a common mistake to misspell a filename. 01439 std::ifstream in; 01440 int opened = 0; 01441 if (myRank == 0) { 01442 try { 01443 in.open (filename.c_str ()); 01444 opened = 1; 01445 } 01446 catch (...) { 01447 opened = 0; 01448 } 01449 } 01450 broadcast<int, int> (*comm, 0, outArg (opened)); 01451 TEUCHOS_TEST_FOR_EXCEPTION( 01452 opened == 0, std::runtime_error, 01453 "readSparseFile: Failed to open file \"" << filename << "\" on " 01454 "Process 0."); 01455 return readSparse (in, rowMap, colMap, domainMap, rangeMap, 01456 callFillComplete, tolerant, debug); 01457 } 01458 01485 static sparse_matrix_ptr 01486 readSparse (std::istream& in, 01487 const RCP<const Comm<int> >& pComm, 01488 const RCP<node_type>& pNode, 01489 const bool callFillComplete=true, 01490 const bool tolerant=false, 01491 const bool debug=false) 01492 { 01493 using Teuchos::MatrixMarket::Banner; 01494 using Teuchos::broadcast; 01495 using Teuchos::ptr; 01496 using Teuchos::reduceAll; 01497 using std::cerr; 01498 using std::endl; 01499 typedef Teuchos::ScalarTraits<scalar_type> STS; 01500 01501 const bool extraDebug = false; 01502 const int myRank = pComm->getRank (); 01503 const int rootRank = 0; 01504 01505 // Current line number in the input stream. Various calls 01506 // will modify this depending on the number of lines that are 01507 // read from the input stream. Only Rank 0 modifies this. 01508 size_t lineNumber = 1; 01509 01510 if (debug && myRank == rootRank) { 01511 cerr << "Matrix Market reader: readSparse:" << endl 01512 << "-- Reading banner line" << endl; 01513 } 01514 01515 // The "Banner" tells you whether the input stream represents 01516 // a sparse matrix, the symmetry type of the matrix, and the 01517 // type of the data it contains. 01518 // 01519 // pBanner will only be nonnull on MPI Rank 0. It will be 01520 // null on all other MPI processes. 01521 RCP<const Banner> pBanner; 01522 { 01523 // We read and validate the Banner on Proc 0, but broadcast 01524 // the validation result to all processes. 01525 // Teuchos::broadcast doesn't currently work with bool, so 01526 // we use int (true -> 1, false -> 0). 01527 int bannerIsCorrect = 1; 01528 std::ostringstream errMsg; 01529 01530 if (myRank == rootRank) { 01531 // Read the Banner line from the input stream. 01532 try { 01533 pBanner = readBanner (in, lineNumber, tolerant, debug); 01534 } 01535 catch (std::exception& e) { 01536 errMsg << "Attempt to read the Matrix Market file's Banner line " 01537 "threw an exception: " << e.what(); 01538 bannerIsCorrect = 0; 01539 } 01540 01541 if (bannerIsCorrect) { 01542 // Validate the Banner for the case of a sparse matrix. 01543 // We validate on Proc 0, since it reads the Banner. 01544 01545 // In intolerant mode, the matrix type must be "coordinate". 01546 if (! tolerant && pBanner->matrixType() != "coordinate") { 01547 bannerIsCorrect = 0; 01548 errMsg << "The Matrix Market input file must contain a " 01549 "\"coordinate\"-format sparse matrix in order to create a " 01550 "Tpetra::CrsMatrix object from it, but the file's matrix " 01551 "type is \"" << pBanner->matrixType() << "\" instead."; 01552 } 01553 // In tolerant mode, we allow the matrix type to be 01554 // anything other than "array" (which would mean that 01555 // the file contains a dense matrix). 01556 if (tolerant && pBanner->matrixType() == "array") { 01557 bannerIsCorrect = 0; 01558 errMsg << "Matrix Market file must contain a \"coordinate\"-" 01559 "format sparse matrix in order to create a Tpetra::CrsMatrix " 01560 "object from it, but the file's matrix type is \"array\" " 01561 "instead. That probably means the file contains dense matrix " 01562 "data."; 01563 } 01564 } 01565 } // Proc 0: Done reading the Banner, hopefully successfully. 01566 01567 // Broadcast from Proc 0 whether the Banner was read correctly. 01568 broadcast (*pComm, rootRank, ptr (&bannerIsCorrect)); 01569 01570 // If the Banner is invalid, all processes throw an 01571 // exception. Only Proc 0 gets the exception message, but 01572 // that's OK, since the main point is to "stop the world" 01573 // (rather than throw an exception on one process and leave 01574 // the others hanging). 01575 TEUCHOS_TEST_FOR_EXCEPTION(bannerIsCorrect == 0, 01576 std::invalid_argument, errMsg.str ()); 01577 } // Done reading the Banner line and broadcasting success. 01578 if (debug && myRank == rootRank) { 01579 cerr << "-- Reading dimensions line" << endl; 01580 } 01581 01582 // Read the matrix dimensions from the Matrix Market metadata. 01583 // dims = (numRows, numCols, numEntries). Proc 0 does the 01584 // reading, but it broadcasts the results to all MPI 01585 // processes. Thus, readCoordDims() is a collective 01586 // operation. It does a collective check for correctness too. 01587 Tuple<global_ordinal_type, 3> dims = 01588 readCoordDims (in, lineNumber, pBanner, pComm, tolerant, debug); 01589 01590 if (debug && myRank == rootRank) { 01591 cerr << "-- Making Adder for collecting matrix data" << endl; 01592 } 01593 01594 // "Adder" object for collecting all the sparse matrix entries 01595 // from the input stream. This is only nonnull on Proc 0. 01596 RCP<adder_type> pAdder = 01597 makeAdder (pComm, pBanner, dims, tolerant, debug); 01598 01599 if (debug && myRank == rootRank) { 01600 cerr << "-- Reading matrix data" << endl; 01601 } 01602 // 01603 // Read the matrix entries from the input stream on Proc 0. 01604 // 01605 { 01606 // We use readSuccess to broadcast the results of the read 01607 // (succeeded or not) to all MPI processes. Since 01608 // Teuchos::broadcast doesn't currently know how to send 01609 // bools, we convert to int (true -> 1, false -> 0). 01610 int readSuccess = 1; 01611 std::ostringstream errMsg; // Exception message (only valid on Proc 0) 01612 if (myRank == rootRank) { 01613 try { 01614 // Reader for "coordinate" format sparse matrix data. 01615 typedef Teuchos::MatrixMarket::CoordDataReader<adder_type, 01616 global_ordinal_type, scalar_type, STS::isComplex> reader_type; 01617 reader_type reader (pAdder); 01618 01619 // Read the sparse matrix entries. 01620 std::pair<bool, std::vector<size_t> > results = 01621 reader.read (in, lineNumber, tolerant, debug); 01622 readSuccess = results.first ? 1 : 0; 01623 } 01624 catch (std::exception& e) { 01625 readSuccess = 0; 01626 errMsg << e.what(); 01627 } 01628 } 01629 broadcast (*pComm, rootRank, ptr (&readSuccess)); 01630 01631 // It would be nice to add a "verbose" flag, so that in 01632 // tolerant mode, we could log any bad line number(s) on 01633 // Proc 0. For now, we just throw if the read fails to 01634 // succeed. 01635 // 01636 // Question: If we're in tolerant mode, and if the read did 01637 // not succeed, should we attempt to call fillComplete()? 01638 TEUCHOS_TEST_FOR_EXCEPTION(readSuccess == 0, std::runtime_error, 01639 "Failed to read the Matrix Market sparse matrix file: " 01640 << errMsg.str()); 01641 } // Done reading the matrix entries (stored on Proc 0 for now) 01642 01643 if (debug && myRank == rootRank) { 01644 cerr << "-- Successfully read the Matrix Market data" << endl; 01645 } 01646 01647 // In tolerant mode, we need to rebroadcast the matrix 01648 // dimensions, since they may be different after reading the 01649 // actual matrix data. We only need to broadcast the number 01650 // of rows and columns. Only Rank 0 needs to know the actual 01651 // global number of entries, since (a) we need to merge 01652 // duplicates on Rank 0 first anyway, and (b) when we 01653 // distribute the entries, each rank other than Rank 0 will 01654 // only need to know how many entries it owns, not the total 01655 // number of entries. 01656 if (tolerant) { 01657 if (debug && myRank == rootRank) { 01658 cerr << "-- Tolerant mode: rebroadcasting matrix dimensions" 01659 << endl 01660 << "----- Dimensions before: " 01661 << dims[0] << " x " << dims[1] 01662 << endl; 01663 } 01664 // Packed coordinate matrix dimensions (numRows, numCols). 01665 Tuple<global_ordinal_type, 2> updatedDims; 01666 if (myRank == rootRank) { 01667 // If one or more bottom rows of the matrix contain no 01668 // entries, then the Adder will report that the number 01669 // of rows is less than that specified in the 01670 // metadata. We allow this case, and favor the 01671 // metadata so that the zero row(s) will be included. 01672 updatedDims[0] = 01673 std::max (dims[0], pAdder->getAdder()->numRows()); 01674 updatedDims[1] = pAdder->getAdder()->numCols(); 01675 } 01676 broadcast (*pComm, rootRank, updatedDims); 01677 dims[0] = updatedDims[0]; 01678 dims[1] = updatedDims[1]; 01679 if (debug && myRank == rootRank) { 01680 cerr << "----- Dimensions after: " << dims[0] << " x " 01681 << dims[1] << endl; 01682 } 01683 } 01684 else { 01685 // In strict mode, we require that the matrix's metadata and 01686 // its actual data agree, at least somewhat. In particular, 01687 // the number of rows must agree, since otherwise we cannot 01688 // distribute the matrix correctly. 01689 01690 // Teuchos::broadcast() doesn't know how to broadcast bools, 01691 // so we use an int with the standard 1 == true, 0 == false 01692 // encoding. 01693 int dimsMatch = 1; 01694 if (myRank == rootRank) { 01695 // If one or more bottom rows of the matrix contain no 01696 // entries, then the Adder will report that the number of 01697 // rows is less than that specified in the metadata. We 01698 // allow this case, and favor the metadata, but do not 01699 // allow the Adder to think there are more rows in the 01700 // matrix than the metadata says. 01701 if (dims[0] < pAdder->getAdder ()->numRows ()) { 01702 dimsMatch = 0; 01703 } 01704 } 01705 broadcast (*pComm, 0, ptr (&dimsMatch)); 01706 if (dimsMatch == 0) { 01707 // We're in an error state anyway, so we might as well 01708 // work a little harder to print an informative error 01709 // message. 01710 // 01711 // Broadcast the Adder's idea of the matrix dimensions 01712 // from Proc 0 to all processes. 01713 Tuple<global_ordinal_type, 2> addersDims; 01714 if (myRank == rootRank) { 01715 addersDims[0] = pAdder->getAdder()->numRows(); 01716 addersDims[1] = pAdder->getAdder()->numCols(); 01717 } 01718 broadcast (*pComm, 0, addersDims); 01719 TEUCHOS_TEST_FOR_EXCEPTION( 01720 dimsMatch == 0, std::runtime_error, 01721 "The matrix metadata says that the matrix is " << dims[0] << " x " 01722 << dims[1] << ", but the actual data says that the matrix is " 01723 << addersDims[0] << " x " << addersDims[1] << ". That means the " 01724 "data includes more rows than reported in the metadata. This " 01725 "is not allowed when parsing in strict mode. Parse the matrix in " 01726 "tolerant mode to ignore the metadata when it disagrees with the " 01727 "data."); 01728 } 01729 } // Matrix dimensions (# rows, # cols, # entries) agree. 01730 01731 if (debug && myRank == rootRank) { 01732 cerr << "-- Converting matrix data into CSR format on Proc 0" << endl; 01733 } 01734 01735 // Now that we've read in all the matrix entries from the 01736 // input stream into the adder on Proc 0, post-process them 01737 // into CSR format (still on Proc 0). This will facilitate 01738 // distributing them to all the processors. 01739 // 01740 // These arrays represent the global matrix data as a CSR 01741 // matrix (with numEntriesPerRow as redundant but convenient 01742 // metadata, since it's computable from rowPtr and vice 01743 // versa). They are valid only on Proc 0. 01744 ArrayRCP<size_t> numEntriesPerRow; 01745 ArrayRCP<size_t> rowPtr; 01746 ArrayRCP<global_ordinal_type> colInd; 01747 ArrayRCP<scalar_type> values; 01748 01749 // Proc 0 first merges duplicate entries, and then converts 01750 // the coordinate-format matrix data to CSR. 01751 { 01752 int mergeAndConvertSucceeded = 1; 01753 std::ostringstream errMsg; 01754 01755 if (myRank == rootRank) { 01756 try { 01757 typedef Teuchos::MatrixMarket::Raw::Element<scalar_type, 01758 global_ordinal_type> element_type; 01759 01760 // Number of rows in the matrix. If we are in tolerant 01761 // mode, we've already synchronized dims with the actual 01762 // matrix data. If in strict mode, we should use dims 01763 // (as read from the file's metadata) rather than the 01764 // matrix data to determine the dimensions. (The matrix 01765 // data will claim fewer rows than the metadata, if one 01766 // or more rows have no entries stored in the file.) 01767 const size_type numRows = dims[0]; 01768 01769 // Additively merge duplicate matrix entries. 01770 pAdder->getAdder()->merge (); 01771 01772 // Get a temporary const view of the merged matrix entries. 01773 const std::vector<element_type>& entries = 01774 pAdder->getAdder()->getEntries(); 01775 01776 // Number of matrix entries (after merging). 01777 const size_t numEntries = (size_t)entries.size(); 01778 01779 if (debug) { 01780 cerr << "----- Proc 0: Matrix has numRows=" << numRows 01781 << " rows and numEntries=" << numEntries 01782 << " entries." << endl; 01783 } 01784 01785 // Make space for the CSR matrix data. Converting to 01786 // CSR is easier if we fill numEntriesPerRow with zeros 01787 // at first. 01788 numEntriesPerRow = arcp<size_t> (numRows); 01789 std::fill (numEntriesPerRow.begin(), numEntriesPerRow.end(), 0); 01790 rowPtr = arcp<size_t> (numRows+1); 01791 std::fill (rowPtr.begin(), rowPtr.end(), 0); 01792 colInd = arcp<global_ordinal_type> (numEntries); 01793 values = arcp<scalar_type> (numEntries); 01794 01795 // Convert from array-of-structs coordinate format to CSR 01796 // (compressed sparse row) format. 01797 global_ordinal_type prvRow = 0; 01798 size_t curPos = 0; 01799 rowPtr[0] = 0; 01800 for (curPos = 0; curPos < numEntries; ++curPos) { 01801 const element_type& curEntry = entries[curPos]; 01802 const global_ordinal_type curRow = curEntry.rowIndex(); 01803 TEUCHOS_TEST_FOR_EXCEPTION( 01804 curRow < prvRow, std::logic_error, 01805 "Row indices are out of order, even though they are supposed " 01806 "to be sorted. curRow = " << curRow << ", prvRow = " 01807 << prvRow << ", at curPos = " << curPos << ". Please report " 01808 "this bug to the Tpetra developers."); 01809 if (curRow > prvRow) { 01810 for (global_ordinal_type r = prvRow+1; r <= curRow; ++r) { 01811 rowPtr[r] = curPos; 01812 } 01813 prvRow = curRow; 01814 } 01815 numEntriesPerRow[curRow]++; 01816 colInd[curPos] = curEntry.colIndex(); 01817 values[curPos] = curEntry.value(); 01818 } 01819 // rowPtr has one more entry than numEntriesPerRow. The 01820 // last entry of rowPtr is the number of entries in 01821 // colInd and values. 01822 rowPtr[numRows] = numEntries; 01823 } // Finished conversion to CSR format 01824 catch (std::exception& e) { 01825 mergeAndConvertSucceeded = 0; 01826 errMsg << "Failed to merge sparse matrix entries and convert to " 01827 "CSR format: " << e.what(); 01828 } 01829 01830 if (debug && mergeAndConvertSucceeded) { 01831 // Number of rows in the matrix. 01832 const size_type numRows = dims[0]; 01833 const size_type maxToDisplay = 100; 01834 01835 cerr << "----- Proc 0: numEntriesPerRow[0.." 01836 << (numEntriesPerRow.size()-1) << "] "; 01837 if (numRows > maxToDisplay) { 01838 cerr << "(only showing first and last few entries) "; 01839 } 01840 cerr << "= ["; 01841 if (numRows > 0) { 01842 if (numRows > maxToDisplay) { 01843 for (size_type k = 0; k < 2; ++k) { 01844 cerr << numEntriesPerRow[k] << " "; 01845 } 01846 cerr << "... "; 01847 for (size_type k = numRows-2; k < numRows-1; ++k) { 01848 cerr << numEntriesPerRow[k] << " "; 01849 } 01850 } 01851 else { 01852 for (size_type k = 0; k < numRows-1; ++k) { 01853 cerr << numEntriesPerRow[k] << " "; 01854 } 01855 } 01856 cerr << numEntriesPerRow[numRows-1]; 01857 } // numRows > 0 01858 cerr << "]" << endl; 01859 01860 cerr << "----- Proc 0: rowPtr "; 01861 if (numRows > maxToDisplay) { 01862 cerr << "(only showing first and last few entries) "; 01863 } 01864 cerr << "= ["; 01865 if (numRows > maxToDisplay) { 01866 for (size_type k = 0; k < 2; ++k) { 01867 cerr << rowPtr[k] << " "; 01868 } 01869 cerr << "... "; 01870 for (size_type k = numRows-2; k < numRows; ++k) { 01871 cerr << rowPtr[k] << " "; 01872 } 01873 } 01874 else { 01875 for (size_type k = 0; k < numRows; ++k) { 01876 cerr << rowPtr[k] << " "; 01877 } 01878 } 01879 cerr << rowPtr[numRows] << "]" << endl; 01880 } 01881 } // if myRank == rootRank 01882 } // Done converting sparse matrix data to CSR format 01883 01884 // Now we're done with the Adder, so we can release the 01885 // reference ("free" it) to save space. This only actually 01886 // does anything on Rank 0, since pAdder is null on all the 01887 // other MPI processes. 01888 pAdder = null; 01889 01890 if (debug && myRank == rootRank) { 01891 cerr << "-- Making range, domain, and row maps" << endl; 01892 } 01893 01894 // Make the maps that describe the matrix's range and domain, 01895 // and the distribution of its rows. Creating a Map is a 01896 // collective operation, so we don't have to do a broadcast of 01897 // a success Boolean. 01898 map_ptr pRangeMap = makeRangeMap (pComm, pNode, dims[0]); 01899 map_ptr pDomainMap = makeDomainMap (pRangeMap, dims[0], dims[1]); 01900 map_ptr pRowMap = makeRowMap (null, pComm, pNode, dims[0]); 01901 01902 if (debug && myRank == rootRank) { 01903 cerr << "-- Distributing the matrix data" << endl; 01904 } 01905 01906 // Distribute the matrix data. Each processor has to add the 01907 // rows that it owns. If you try to make Proc 0 call 01908 // insertGlobalValues() for _all_ the rows, not just those it 01909 // owns, then fillComplete() will compute the number of 01910 // columns incorrectly. That's why Proc 0 has to distribute 01911 // the matrix data and why we make all the processors (not 01912 // just Proc 0) call insertGlobalValues() on their own data. 01913 // 01914 // These arrays represent each processor's part of the matrix 01915 // data, in "CSR" format (sort of, since the row indices might 01916 // not be contiguous). 01917 ArrayRCP<size_t> myNumEntriesPerRow; 01918 ArrayRCP<size_t> myRowPtr; 01919 ArrayRCP<global_ordinal_type> myColInd; 01920 ArrayRCP<scalar_type> myValues; 01921 // Distribute the matrix data. This is a collective operation. 01922 distribute (myNumEntriesPerRow, myRowPtr, myColInd, myValues, pRowMap, 01923 numEntriesPerRow, rowPtr, colInd, values, debug); 01924 01925 if (debug && myRank == rootRank) { 01926 cerr << "-- Inserting matrix entries on each processor"; 01927 if (callFillComplete) { 01928 cerr << " and calling fillComplete()"; 01929 } 01930 cerr << endl; 01931 } 01932 // Each processor inserts its part of the matrix data, and 01933 // then they all call fillComplete(). This method invalidates 01934 // the my* distributed matrix data before calling 01935 // fillComplete(), in order to save space. In general, we 01936 // never store more than two copies of the matrix's entries in 01937 // memory at once, which is no worse than what Tpetra 01938 // promises. 01939 sparse_matrix_ptr pMatrix = 01940 makeMatrix (myNumEntriesPerRow, myRowPtr, myColInd, myValues, 01941 pRowMap, pRangeMap, pDomainMap, callFillComplete); 01942 // Only use a reduce-all in debug mode to check if pMatrix is 01943 // null. Otherwise, just throw an exception. We never expect 01944 // a null pointer here, so we can save a communication. 01945 if (debug) { 01946 int localIsNull = pMatrix.is_null () ? 1 : 0; 01947 int globalIsNull = 0; 01948 reduceAll (*pComm, Teuchos::REDUCE_MAX, localIsNull, ptr (&globalIsNull)); 01949 TEUCHOS_TEST_FOR_EXCEPTION(globalIsNull != 0, std::logic_error, 01950 "Reader::makeMatrix() returned a null pointer on at least one " 01951 "process. Please report this bug to the Tpetra developers."); 01952 } 01953 else { 01954 TEUCHOS_TEST_FOR_EXCEPTION(pMatrix.is_null(), std::logic_error, 01955 "Reader::makeMatrix() returned a null pointer. " 01956 "Please report this bug to the Tpetra developers."); 01957 } 01958 01959 // We can't get the dimensions of the matrix until after 01960 // fillComplete() is called. Thus, we can't do the sanity 01961 // check (dimensions read from the Matrix Market data, 01962 // vs. dimensions reported by the CrsMatrix) unless the user 01963 // asked makeMatrix() to call fillComplete(). 01964 // 01965 // Note that pMatrix->getGlobalNum{Rows,Cols}() does _not_ do 01966 // what one might think it does, so you have to ask the range 01967 // resp. domain map for the number of rows resp. columns. 01968 if (callFillComplete) { 01969 const int numProcs = pComm->getSize (); 01970 01971 if (extraDebug && debug) { 01972 const global_size_t globalNumRows = 01973 pRangeMap->getGlobalNumElements(); 01974 const global_size_t globalNumCols = 01975 pDomainMap->getGlobalNumElements(); 01976 if (myRank == rootRank) { 01977 cerr << "-- Matrix is " 01978 << globalNumRows << " x " << globalNumCols 01979 << " with " << pMatrix->getGlobalNumEntries() 01980 << " entries, and index base " 01981 << pMatrix->getIndexBase() << "." << endl; 01982 } 01983 pComm->barrier (); 01984 for (int p = 0; p < numProcs; ++p) { 01985 if (myRank == p) { 01986 cerr << "-- Proc " << p << " owns " 01987 << pMatrix->getNodeNumCols() << " columns, and " 01988 << pMatrix->getNodeNumEntries() << " entries." << endl; 01989 } 01990 pComm->barrier (); 01991 } 01992 } // if (extraDebug && debug) 01993 } // if (callFillComplete) 01994 01995 if (debug && myRank == rootRank) { 01996 cerr << "-- Done creating the CrsMatrix from the Matrix Market data" 01997 << endl; 01998 } 01999 return pMatrix; 02000 } 02001 02002 02032 static sparse_matrix_ptr 02033 readSparse (std::istream& in, 02034 const RCP<const Comm<int> >& pComm, 02035 const RCP<node_type>& pNode, 02036 const RCP<Teuchos::ParameterList>& constructorParams, 02037 const RCP<Teuchos::ParameterList>& fillCompleteParams, 02038 const bool tolerant=false, 02039 const bool debug=false) 02040 { 02041 using Teuchos::MatrixMarket::Banner; 02042 using Teuchos::broadcast; 02043 using Teuchos::ptr; 02044 using Teuchos::reduceAll; 02045 using std::cerr; 02046 using std::endl; 02047 typedef Teuchos::ScalarTraits<scalar_type> STS; 02048 02049 const bool extraDebug = false; 02050 const int myRank = pComm->getRank (); 02051 const int rootRank = 0; 02052 02053 // Current line number in the input stream. Various calls 02054 // will modify this depending on the number of lines that are 02055 // read from the input stream. Only Rank 0 modifies this. 02056 size_t lineNumber = 1; 02057 02058 if (debug && myRank == rootRank) { 02059 cerr << "Matrix Market reader: readSparse:" << endl 02060 << "-- Reading banner line" << endl; 02061 } 02062 02063 // The "Banner" tells you whether the input stream represents 02064 // a sparse matrix, the symmetry type of the matrix, and the 02065 // type of the data it contains. 02066 // 02067 // pBanner will only be nonnull on MPI Rank 0. It will be 02068 // null on all other MPI processes. 02069 RCP<const Banner> pBanner; 02070 { 02071 // We read and validate the Banner on Proc 0, but broadcast 02072 // the validation result to all processes. 02073 // Teuchos::broadcast doesn't currently work with bool, so 02074 // we use int (true -> 1, false -> 0). 02075 int bannerIsCorrect = 1; 02076 std::ostringstream errMsg; 02077 02078 if (myRank == rootRank) { 02079 // Read the Banner line from the input stream. 02080 try { 02081 pBanner = readBanner (in, lineNumber, tolerant, debug); 02082 } 02083 catch (std::exception& e) { 02084 errMsg << "Attempt to read the Matrix Market file's Banner line " 02085 "threw an exception: " << e.what(); 02086 bannerIsCorrect = 0; 02087 } 02088 02089 if (bannerIsCorrect) { 02090 // Validate the Banner for the case of a sparse matrix. 02091 // We validate on Proc 0, since it reads the Banner. 02092 02093 // In intolerant mode, the matrix type must be "coordinate". 02094 if (! tolerant && pBanner->matrixType() != "coordinate") { 02095 bannerIsCorrect = 0; 02096 errMsg << "The Matrix Market input file must contain a " 02097 "\"coordinate\"-format sparse matrix in order to create a " 02098 "Tpetra::CrsMatrix object from it, but the file's matrix " 02099 "type is \"" << pBanner->matrixType() << "\" instead."; 02100 } 02101 // In tolerant mode, we allow the matrix type to be 02102 // anything other than "array" (which would mean that 02103 // the file contains a dense matrix). 02104 if (tolerant && pBanner->matrixType() == "array") { 02105 bannerIsCorrect = 0; 02106 errMsg << "Matrix Market file must contain a \"coordinate\"-" 02107 "format sparse matrix in order to create a Tpetra::CrsMatrix " 02108 "object from it, but the file's matrix type is \"array\" " 02109 "instead. That probably means the file contains dense matrix " 02110 "data."; 02111 } 02112 } 02113 } // Proc 0: Done reading the Banner, hopefully successfully. 02114 02115 // Broadcast from Proc 0 whether the Banner was read correctly. 02116 broadcast (*pComm, rootRank, ptr (&bannerIsCorrect)); 02117 02118 // If the Banner is invalid, all processes throw an 02119 // exception. Only Proc 0 gets the exception message, but 02120 // that's OK, since the main point is to "stop the world" 02121 // (rather than throw an exception on one process and leave 02122 // the others hanging). 02123 TEUCHOS_TEST_FOR_EXCEPTION(bannerIsCorrect == 0, 02124 std::invalid_argument, errMsg.str ()); 02125 } // Done reading the Banner line and broadcasting success. 02126 if (debug && myRank == rootRank) { 02127 cerr << "-- Reading dimensions line" << endl; 02128 } 02129 02130 // Read the matrix dimensions from the Matrix Market metadata. 02131 // dims = (numRows, numCols, numEntries). Proc 0 does the 02132 // reading, but it broadcasts the results to all MPI 02133 // processes. Thus, readCoordDims() is a collective 02134 // operation. It does a collective check for correctness too. 02135 Tuple<global_ordinal_type, 3> dims = 02136 readCoordDims (in, lineNumber, pBanner, pComm, tolerant, debug); 02137 02138 if (debug && myRank == rootRank) { 02139 cerr << "-- Making Adder for collecting matrix data" << endl; 02140 } 02141 02142 // "Adder" object for collecting all the sparse matrix entries 02143 // from the input stream. This is only nonnull on Proc 0. 02144 RCP<adder_type> pAdder = 02145 makeAdder (pComm, pBanner, dims, tolerant, debug); 02146 02147 if (debug && myRank == rootRank) { 02148 cerr << "-- Reading matrix data" << endl; 02149 } 02150 // 02151 // Read the matrix entries from the input stream on Proc 0. 02152 // 02153 { 02154 // We use readSuccess to broadcast the results of the read 02155 // (succeeded or not) to all MPI processes. Since 02156 // Teuchos::broadcast doesn't currently know how to send 02157 // bools, we convert to int (true -> 1, false -> 0). 02158 int readSuccess = 1; 02159 std::ostringstream errMsg; // Exception message (only valid on Proc 0) 02160 if (myRank == rootRank) { 02161 try { 02162 // Reader for "coordinate" format sparse matrix data. 02163 typedef Teuchos::MatrixMarket::CoordDataReader<adder_type, 02164 global_ordinal_type, scalar_type, STS::isComplex> reader_type; 02165 reader_type reader (pAdder); 02166 02167 // Read the sparse matrix entries. 02168 std::pair<bool, std::vector<size_t> > results = 02169 reader.read (in, lineNumber, tolerant, debug); 02170 readSuccess = results.first ? 1 : 0; 02171 } 02172 catch (std::exception& e) { 02173 readSuccess = 0; 02174 errMsg << e.what(); 02175 } 02176 } 02177 broadcast (*pComm, rootRank, ptr (&readSuccess)); 02178 02179 // It would be nice to add a "verbose" flag, so that in 02180 // tolerant mode, we could log any bad line number(s) on 02181 // Proc 0. For now, we just throw if the read fails to 02182 // succeed. 02183 // 02184 // Question: If we're in tolerant mode, and if the read did 02185 // not succeed, should we attempt to call fillComplete()? 02186 TEUCHOS_TEST_FOR_EXCEPTION(readSuccess == 0, std::runtime_error, 02187 "Failed to read the Matrix Market sparse matrix file: " 02188 << errMsg.str()); 02189 } // Done reading the matrix entries (stored on Proc 0 for now) 02190 02191 if (debug && myRank == rootRank) { 02192 cerr << "-- Successfully read the Matrix Market data" << endl; 02193 } 02194 02195 // In tolerant mode, we need to rebroadcast the matrix 02196 // dimensions, since they may be different after reading the 02197 // actual matrix data. We only need to broadcast the number 02198 // of rows and columns. Only Rank 0 needs to know the actual 02199 // global number of entries, since (a) we need to merge 02200 // duplicates on Rank 0 first anyway, and (b) when we 02201 // distribute the entries, each rank other than Rank 0 will 02202 // only need to know how many entries it owns, not the total 02203 // number of entries. 02204 if (tolerant) { 02205 if (debug && myRank == rootRank) { 02206 cerr << "-- Tolerant mode: rebroadcasting matrix dimensions" 02207 << endl 02208 << "----- Dimensions before: " 02209 << dims[0] << " x " << dims[1] 02210 << endl; 02211 } 02212 // Packed coordinate matrix dimensions (numRows, numCols). 02213 Tuple<global_ordinal_type, 2> updatedDims; 02214 if (myRank == rootRank) { 02215 // If one or more bottom rows of the matrix contain no 02216 // entries, then the Adder will report that the number 02217 // of rows is less than that specified in the 02218 // metadata. We allow this case, and favor the 02219 // metadata so that the zero row(s) will be included. 02220 updatedDims[0] = 02221 std::max (dims[0], pAdder->getAdder()->numRows()); 02222 updatedDims[1] = pAdder->getAdder()->numCols(); 02223 } 02224 broadcast (*pComm, rootRank, updatedDims); 02225 dims[0] = updatedDims[0]; 02226 dims[1] = updatedDims[1]; 02227 if (debug && myRank == rootRank) { 02228 cerr << "----- Dimensions after: " << dims[0] << " x " 02229 << dims[1] << endl; 02230 } 02231 } 02232 else { 02233 // In strict mode, we require that the matrix's metadata and 02234 // its actual data agree, at least somewhat. In particular, 02235 // the number of rows must agree, since otherwise we cannot 02236 // distribute the matrix correctly. 02237 02238 // Teuchos::broadcast() doesn't know how to broadcast bools, 02239 // so we use an int with the standard 1 == true, 0 == false 02240 // encoding. 02241 int dimsMatch = 1; 02242 if (myRank == rootRank) { 02243 // If one or more bottom rows of the matrix contain no 02244 // entries, then the Adder will report that the number of 02245 // rows is less than that specified in the metadata. We 02246 // allow this case, and favor the metadata, but do not 02247 // allow the Adder to think there are more rows in the 02248 // matrix than the metadata says. 02249 if (dims[0] < pAdder->getAdder ()->numRows ()) { 02250 dimsMatch = 0; 02251 } 02252 } 02253 broadcast (*pComm, 0, ptr (&dimsMatch)); 02254 if (dimsMatch == 0) { 02255 // We're in an error state anyway, so we might as well 02256 // work a little harder to print an informative error 02257 // message. 02258 // 02259 // Broadcast the Adder's idea of the matrix dimensions 02260 // from Proc 0 to all processes. 02261 Tuple<global_ordinal_type, 2> addersDims; 02262 if (myRank == rootRank) { 02263 addersDims[0] = pAdder->getAdder()->numRows(); 02264 addersDims[1] = pAdder->getAdder()->numCols(); 02265 } 02266 broadcast (*pComm, 0, addersDims); 02267 TEUCHOS_TEST_FOR_EXCEPTION( 02268 dimsMatch == 0, std::runtime_error, 02269 "The matrix metadata says that the matrix is " << dims[0] << " x " 02270 << dims[1] << ", but the actual data says that the matrix is " 02271 << addersDims[0] << " x " << addersDims[1] << ". That means the " 02272 "data includes more rows than reported in the metadata. This " 02273 "is not allowed when parsing in strict mode. Parse the matrix in " 02274 "tolerant mode to ignore the metadata when it disagrees with the " 02275 "data."); 02276 } 02277 } // Matrix dimensions (# rows, # cols, # entries) agree. 02278 02279 if (debug && myRank == rootRank) { 02280 cerr << "-- Converting matrix data into CSR format on Proc 0" << endl; 02281 } 02282 02283 // Now that we've read in all the matrix entries from the 02284 // input stream into the adder on Proc 0, post-process them 02285 // into CSR format (still on Proc 0). This will facilitate 02286 // distributing them to all the processors. 02287 // 02288 // These arrays represent the global matrix data as a CSR 02289 // matrix (with numEntriesPerRow as redundant but convenient 02290 // metadata, since it's computable from rowPtr and vice 02291 // versa). They are valid only on Proc 0. 02292 ArrayRCP<size_t> numEntriesPerRow; 02293 ArrayRCP<size_t> rowPtr; 02294 ArrayRCP<global_ordinal_type> colInd; 02295 ArrayRCP<scalar_type> values; 02296 02297 // Proc 0 first merges duplicate entries, and then converts 02298 // the coordinate-format matrix data to CSR. 02299 { 02300 int mergeAndConvertSucceeded = 1; 02301 std::ostringstream errMsg; 02302 02303 if (myRank == rootRank) { 02304 try { 02305 typedef Teuchos::MatrixMarket::Raw::Element<scalar_type, 02306 global_ordinal_type> element_type; 02307 02308 // Number of rows in the matrix. If we are in tolerant 02309 // mode, we've already synchronized dims with the actual 02310 // matrix data. If in strict mode, we should use dims 02311 // (as read from the file's metadata) rather than the 02312 // matrix data to determine the dimensions. (The matrix 02313 // data will claim fewer rows than the metadata, if one 02314 // or more rows have no entries stored in the file.) 02315 const size_type numRows = dims[0]; 02316 02317 // Additively merge duplicate matrix entries. 02318 pAdder->getAdder()->merge (); 02319 02320 // Get a temporary const view of the merged matrix entries. 02321 const std::vector<element_type>& entries = 02322 pAdder->getAdder()->getEntries(); 02323 02324 // Number of matrix entries (after merging). 02325 const size_t numEntries = (size_t)entries.size(); 02326 02327 if (debug) { 02328 cerr << "----- Proc 0: Matrix has numRows=" << numRows 02329 << " rows and numEntries=" << numEntries 02330 << " entries." << endl; 02331 } 02332 02333 // Make space for the CSR matrix data. Converting to 02334 // CSR is easier if we fill numEntriesPerRow with zeros 02335 // at first. 02336 numEntriesPerRow = arcp<size_t> (numRows); 02337 std::fill (numEntriesPerRow.begin(), numEntriesPerRow.end(), 0); 02338 rowPtr = arcp<size_t> (numRows+1); 02339 std::fill (rowPtr.begin(), rowPtr.end(), 0); 02340 colInd = arcp<global_ordinal_type> (numEntries); 02341 values = arcp<scalar_type> (numEntries); 02342 02343 // Convert from array-of-structs coordinate format to CSR 02344 // (compressed sparse row) format. 02345 global_ordinal_type prvRow = 0; 02346 size_t curPos = 0; 02347 rowPtr[0] = 0; 02348 for (curPos = 0; curPos < numEntries; ++curPos) { 02349 const element_type& curEntry = entries[curPos]; 02350 const global_ordinal_type curRow = curEntry.rowIndex(); 02351 TEUCHOS_TEST_FOR_EXCEPTION( 02352 curRow < prvRow, std::logic_error, 02353 "Row indices are out of order, even though they are supposed " 02354 "to be sorted. curRow = " << curRow << ", prvRow = " 02355 << prvRow << ", at curPos = " << curPos << ". Please report " 02356 "this bug to the Tpetra developers."); 02357 if (curRow > prvRow) { 02358 for (global_ordinal_type r = prvRow+1; r <= curRow; ++r) { 02359 rowPtr[r] = curPos; 02360 } 02361 prvRow = curRow; 02362 } 02363 numEntriesPerRow[curRow]++; 02364 colInd[curPos] = curEntry.colIndex(); 02365 values[curPos] = curEntry.value(); 02366 } 02367 // rowPtr has one more entry than numEntriesPerRow. The 02368 // last entry of rowPtr is the number of entries in 02369 // colInd and values. 02370 rowPtr[numRows] = numEntries; 02371 } // Finished conversion to CSR format 02372 catch (std::exception& e) { 02373 mergeAndConvertSucceeded = 0; 02374 errMsg << "Failed to merge sparse matrix entries and convert to " 02375 "CSR format: " << e.what(); 02376 } 02377 02378 if (debug && mergeAndConvertSucceeded) { 02379 // Number of rows in the matrix. 02380 const size_type numRows = dims[0]; 02381 const size_type maxToDisplay = 100; 02382 02383 cerr << "----- Proc 0: numEntriesPerRow[0.." 02384 << (numEntriesPerRow.size()-1) << "] "; 02385 if (numRows > maxToDisplay) { 02386 cerr << "(only showing first and last few entries) "; 02387 } 02388 cerr << "= ["; 02389 if (numRows > 0) { 02390 if (numRows > maxToDisplay) { 02391 for (size_type k = 0; k < 2; ++k) { 02392 cerr << numEntriesPerRow[k] << " "; 02393 } 02394 cerr << "... "; 02395 for (size_type k = numRows-2; k < numRows-1; ++k) { 02396 cerr << numEntriesPerRow[k] << " "; 02397 } 02398 } 02399 else { 02400 for (size_type k = 0; k < numRows-1; ++k) { 02401 cerr << numEntriesPerRow[k] << " "; 02402 } 02403 } 02404 cerr << numEntriesPerRow[numRows-1]; 02405 } // numRows > 0 02406 cerr << "]" << endl; 02407 02408 cerr << "----- Proc 0: rowPtr "; 02409 if (numRows > maxToDisplay) { 02410 cerr << "(only showing first and last few entries) "; 02411 } 02412 cerr << "= ["; 02413 if (numRows > maxToDisplay) { 02414 for (size_type k = 0; k < 2; ++k) { 02415 cerr << rowPtr[k] << " "; 02416 } 02417 cerr << "... "; 02418 for (size_type k = numRows-2; k < numRows; ++k) { 02419 cerr << rowPtr[k] << " "; 02420 } 02421 } 02422 else { 02423 for (size_type k = 0; k < numRows; ++k) { 02424 cerr << rowPtr[k] << " "; 02425 } 02426 } 02427 cerr << rowPtr[numRows] << "]" << endl; 02428 } 02429 } // if myRank == rootRank 02430 } // Done converting sparse matrix data to CSR format 02431 02432 // Now we're done with the Adder, so we can release the 02433 // reference ("free" it) to save space. This only actually 02434 // does anything on Rank 0, since pAdder is null on all the 02435 // other MPI processes. 02436 pAdder = null; 02437 02438 if (debug && myRank == rootRank) { 02439 cerr << "-- Making range, domain, and row maps" << endl; 02440 } 02441 02442 // Make the maps that describe the matrix's range and domain, 02443 // and the distribution of its rows. Creating a Map is a 02444 // collective operation, so we don't have to do a broadcast of 02445 // a success Boolean. 02446 map_ptr pRangeMap = makeRangeMap (pComm, pNode, dims[0]); 02447 map_ptr pDomainMap = makeDomainMap (pRangeMap, dims[0], dims[1]); 02448 map_ptr pRowMap = makeRowMap (null, pComm, pNode, dims[0]); 02449 02450 if (debug && myRank == rootRank) { 02451 cerr << "-- Distributing the matrix data" << endl; 02452 } 02453 02454 // Distribute the matrix data. Each processor has to add the 02455 // rows that it owns. If you try to make Proc 0 call 02456 // insertGlobalValues() for _all_ the rows, not just those it 02457 // owns, then fillComplete() will compute the number of 02458 // columns incorrectly. That's why Proc 0 has to distribute 02459 // the matrix data and why we make all the processors (not 02460 // just Proc 0) call insertGlobalValues() on their own data. 02461 // 02462 // These arrays represent each processor's part of the matrix 02463 // data, in "CSR" format (sort of, since the row indices might 02464 // not be contiguous). 02465 ArrayRCP<size_t> myNumEntriesPerRow; 02466 ArrayRCP<size_t> myRowPtr; 02467 ArrayRCP<global_ordinal_type> myColInd; 02468 ArrayRCP<scalar_type> myValues; 02469 // Distribute the matrix data. This is a collective operation. 02470 distribute (myNumEntriesPerRow, myRowPtr, myColInd, myValues, pRowMap, 02471 numEntriesPerRow, rowPtr, colInd, values, debug); 02472 02473 if (debug && myRank == rootRank) { 02474 cerr << "-- Inserting matrix entries on each process " 02475 "and calling fillComplete()" << endl; 02476 } 02477 // Each processor inserts its part of the matrix data, and 02478 // then they all call fillComplete(). This method invalidates 02479 // the my* distributed matrix data before calling 02480 // fillComplete(), in order to save space. In general, we 02481 // never store more than two copies of the matrix's entries in 02482 // memory at once, which is no worse than what Tpetra 02483 // promises. 02484 sparse_matrix_ptr pMatrix = 02485 makeMatrix (myNumEntriesPerRow, myRowPtr, myColInd, myValues, 02486 pRowMap, pRangeMap, pDomainMap, constructorParams, 02487 fillCompleteParams); 02488 // Only use a reduce-all in debug mode to check if pMatrix is 02489 // null. Otherwise, just throw an exception. We never expect 02490 // a null pointer here, so we can save a communication. 02491 if (debug) { 02492 int localIsNull = pMatrix.is_null () ? 1 : 0; 02493 int globalIsNull = 0; 02494 reduceAll (*pComm, Teuchos::REDUCE_MAX, localIsNull, ptr (&globalIsNull)); 02495 TEUCHOS_TEST_FOR_EXCEPTION(globalIsNull != 0, std::logic_error, 02496 "Reader::makeMatrix() returned a null pointer on at least one " 02497 "process. Please report this bug to the Tpetra developers."); 02498 } 02499 else { 02500 TEUCHOS_TEST_FOR_EXCEPTION(pMatrix.is_null(), std::logic_error, 02501 "Reader::makeMatrix() returned a null pointer. " 02502 "Please report this bug to the Tpetra developers."); 02503 } 02504 02505 // Sanity check for dimensions (read from the Matrix Market 02506 // data, vs. dimensions reported by the CrsMatrix). 02507 // 02508 // Note that pMatrix->getGlobalNum{Rows,Cols}() does _not_ do 02509 // what one might think it does, so you have to ask the range 02510 // resp. domain map for the number of rows resp. columns. 02511 if (extraDebug && debug) { 02512 const int numProcs = pComm->getSize (); 02513 const global_size_t globalNumRows = 02514 pRangeMap->getGlobalNumElements(); 02515 const global_size_t globalNumCols = 02516 pDomainMap->getGlobalNumElements(); 02517 if (myRank == rootRank) { 02518 cerr << "-- Matrix is " 02519 << globalNumRows << " x " << globalNumCols 02520 << " with " << pMatrix->getGlobalNumEntries() 02521 << " entries, and index base " 02522 << pMatrix->getIndexBase() << "." << endl; 02523 } 02524 pComm->barrier (); 02525 for (int p = 0; p < numProcs; ++p) { 02526 if (myRank == p) { 02527 cerr << "-- Proc " << p << " owns " 02528 << pMatrix->getNodeNumCols() << " columns, and " 02529 << pMatrix->getNodeNumEntries() << " entries." << endl; 02530 } 02531 pComm->barrier (); 02532 } 02533 } // if (extraDebug && debug) 02534 02535 if (debug && myRank == rootRank) { 02536 cerr << "-- Done creating the CrsMatrix from the Matrix Market data" 02537 << endl; 02538 } 02539 return pMatrix; 02540 } 02541 02582 static sparse_matrix_ptr 02583 readSparse (std::istream& in, 02584 const RCP<const map_type>& rowMap, 02585 RCP<const map_type>& colMap, 02586 const RCP<const map_type>& domainMap, 02587 const RCP<const map_type>& rangeMap, 02588 const bool callFillComplete=true, 02589 const bool tolerant=false, 02590 const bool debug=false) 02591 { 02592 using Teuchos::MatrixMarket::Banner; 02593 using Teuchos::as; 02594 using Teuchos::broadcast; 02595 using Teuchos::ptr; 02596 using Teuchos::reduceAll; 02597 using std::cerr; 02598 using std::endl; 02599 typedef Teuchos::ScalarTraits<scalar_type> STS; 02600 02601 RCP<const Comm<int> > pComm = rowMap->getComm (); 02602 const int myRank = pComm->getRank (); 02603 const int rootRank = 0; 02604 const bool extraDebug = false; 02605 02606 // Fast checks for invalid input. We can't check other 02607 // attributes of the Maps until we've read in the matrix 02608 // dimensions. 02609 TEUCHOS_TEST_FOR_EXCEPTION( 02610 rowMap.is_null (), std::invalid_argument, 02611 "Row Map must be nonnull."); 02612 TEUCHOS_TEST_FOR_EXCEPTION( 02613 rangeMap.is_null (), std::invalid_argument, 02614 "Range Map must be nonnull."); 02615 TEUCHOS_TEST_FOR_EXCEPTION( 02616 domainMap.is_null (), std::invalid_argument, 02617 "Domain Map must be nonnull."); 02618 TEUCHOS_TEST_FOR_EXCEPTION( 02619 rowMap->getComm().getRawPtr() != pComm.getRawPtr(), 02620 std::invalid_argument, 02621 "The specified row Map's communicator (rowMap->getComm())" 02622 "differs from the given separately supplied communicator pComm."); 02623 TEUCHOS_TEST_FOR_EXCEPTION( 02624 domainMap->getComm().getRawPtr() != pComm.getRawPtr(), 02625 std::invalid_argument, 02626 "The specified domain Map's communicator (domainMap->getComm())" 02627 "differs from the given separately supplied communicator pComm."); 02628 TEUCHOS_TEST_FOR_EXCEPTION( 02629 rangeMap->getComm().getRawPtr() != pComm.getRawPtr(), 02630 std::invalid_argument, 02631 "The specified range Map's communicator (rangeMap->getComm())" 02632 "differs from the given separately supplied communicator pComm."); 02633 02634 // Current line number in the input stream. Various calls 02635 // will modify this depending on the number of lines that are 02636 // read from the input stream. Only Rank 0 modifies this. 02637 size_t lineNumber = 1; 02638 02639 if (debug && myRank == rootRank) { 02640 cerr << "Matrix Market reader: readSparse:" << endl 02641 << "-- Reading banner line" << endl; 02642 } 02643 02644 // The "Banner" tells you whether the input stream represents 02645 // a sparse matrix, the symmetry type of the matrix, and the 02646 // type of the data it contains. 02647 // 02648 // pBanner will only be nonnull on MPI Rank 0. It will be 02649 // null on all other MPI processes. 02650 RCP<const Banner> pBanner; 02651 { 02652 // We read and validate the Banner on Proc 0, but broadcast 02653 // the validation result to all processes. 02654 // Teuchos::broadcast doesn't currently work with bool, so 02655 // we use int (true -> 1, false -> 0). 02656 int bannerIsCorrect = 1; 02657 std::ostringstream errMsg; 02658 02659 if (myRank == rootRank) { 02660 // Read the Banner line from the input stream. 02661 try { 02662 pBanner = readBanner (in, lineNumber, tolerant, debug); 02663 } 02664 catch (std::exception& e) { 02665 errMsg << "Attempt to read the Matrix Market file's Banner line " 02666 "threw an exception: " << e.what(); 02667 bannerIsCorrect = 0; 02668 } 02669 02670 if (bannerIsCorrect) { 02671 // Validate the Banner for the case of a sparse matrix. 02672 // We validate on Proc 0, since it reads the Banner. 02673 02674 // In intolerant mode, the matrix type must be "coordinate". 02675 if (! tolerant && pBanner->matrixType() != "coordinate") { 02676 bannerIsCorrect = 0; 02677 errMsg << "The Matrix Market input file must contain a " 02678 "\"coordinate\"-format sparse matrix in order to create a " 02679 "Tpetra::CrsMatrix object from it, but the file's matrix " 02680 "type is \"" << pBanner->matrixType() << "\" instead."; 02681 } 02682 // In tolerant mode, we allow the matrix type to be 02683 // anything other than "array" (which would mean that 02684 // the file contains a dense matrix). 02685 if (tolerant && pBanner->matrixType() == "array") { 02686 bannerIsCorrect = 0; 02687 errMsg << "Matrix Market file must contain a \"coordinate\"-" 02688 "format sparse matrix in order to create a Tpetra::CrsMatrix " 02689 "object from it, but the file's matrix type is \"array\" " 02690 "instead. That probably means the file contains dense matrix " 02691 "data."; 02692 } 02693 } 02694 } // Proc 0: Done reading the Banner, hopefully successfully. 02695 02696 // Broadcast from Proc 0 whether the Banner was read correctly. 02697 broadcast (*pComm, rootRank, ptr (&bannerIsCorrect)); 02698 02699 // If the Banner is invalid, all processes throw an 02700 // exception. Only Proc 0 gets the exception message, but 02701 // that's OK, since the main point is to "stop the world" 02702 // (rather than throw an exception on one process and leave 02703 // the others hanging). 02704 TEUCHOS_TEST_FOR_EXCEPTION(bannerIsCorrect == 0, 02705 std::invalid_argument, errMsg.str ()); 02706 } // Done reading the Banner line and broadcasting success. 02707 if (debug && myRank == rootRank) { 02708 cerr << "-- Reading dimensions line" << endl; 02709 } 02710 02711 // Read the matrix dimensions from the Matrix Market metadata. 02712 // dims = (numRows, numCols, numEntries). Proc 0 does the 02713 // reading, but it broadcasts the results to all MPI 02714 // processes. Thus, readCoordDims() is a collective 02715 // operation. It does a collective check for correctness too. 02716 Tuple<global_ordinal_type, 3> dims = 02717 readCoordDims (in, lineNumber, pBanner, pComm, tolerant, debug); 02718 02719 if (debug && myRank == rootRank) { 02720 cerr << "-- Making Adder for collecting matrix data" << endl; 02721 } 02722 02723 // "Adder" object for collecting all the sparse matrix entries 02724 // from the input stream. This is only nonnull on Proc 0. 02725 // The Adder internally converts the one-based indices (native 02726 // Matrix Market format) into zero-based indices. 02727 RCP<adder_type> pAdder = 02728 makeAdder (pComm, pBanner, dims, tolerant, debug); 02729 02730 if (debug && myRank == rootRank) { 02731 cerr << "-- Reading matrix data" << endl; 02732 } 02733 // 02734 // Read the matrix entries from the input stream on Proc 0. 02735 // 02736 { 02737 // We use readSuccess to broadcast the results of the read 02738 // (succeeded or not) to all MPI processes. Since 02739 // Teuchos::broadcast doesn't currently know how to send 02740 // bools, we convert to int (true -> 1, false -> 0). 02741 int readSuccess = 1; 02742 std::ostringstream errMsg; // Exception message (only valid on Proc 0) 02743 if (myRank == rootRank) { 02744 try { 02745 // Reader for "coordinate" format sparse matrix data. 02746 typedef Teuchos::MatrixMarket::CoordDataReader<adder_type, 02747 global_ordinal_type, scalar_type, STS::isComplex> reader_type; 02748 reader_type reader (pAdder); 02749 02750 // Read the sparse matrix entries. 02751 std::pair<bool, std::vector<size_t> > results = 02752 reader.read (in, lineNumber, tolerant, debug); 02753 readSuccess = results.first ? 1 : 0; 02754 } 02755 catch (std::exception& e) { 02756 readSuccess = 0; 02757 errMsg << e.what(); 02758 } 02759 } 02760 broadcast (*pComm, rootRank, ptr (&readSuccess)); 02761 02762 // It would be nice to add a "verbose" flag, so that in 02763 // tolerant mode, we could log any bad line number(s) on 02764 // Proc 0. For now, we just throw if the read fails to 02765 // succeed. 02766 // 02767 // Question: If we're in tolerant mode, and if the read did 02768 // not succeed, should we attempt to call fillComplete()? 02769 TEUCHOS_TEST_FOR_EXCEPTION(readSuccess == 0, std::runtime_error, 02770 "Failed to read the Matrix Market sparse matrix file: " 02771 << errMsg.str()); 02772 } // Done reading the matrix entries (stored on Proc 0 for now) 02773 02774 if (debug && myRank == rootRank) { 02775 cerr << "-- Successfully read the Matrix Market data" << endl; 02776 } 02777 02778 // In tolerant mode, we need to rebroadcast the matrix 02779 // dimensions, since they may be different after reading the 02780 // actual matrix data. We only need to broadcast the number 02781 // of rows and columns. Only Rank 0 needs to know the actual 02782 // global number of entries, since (a) we need to merge 02783 // duplicates on Rank 0 first anyway, and (b) when we 02784 // distribute the entries, each rank other than Rank 0 will 02785 // only need to know how many entries it owns, not the total 02786 // number of entries. 02787 if (tolerant) { 02788 if (debug && myRank == rootRank) { 02789 cerr << "-- Tolerant mode: rebroadcasting matrix dimensions" 02790 << endl 02791 << "----- Dimensions before: " 02792 << dims[0] << " x " << dims[1] 02793 << endl; 02794 } 02795 // Packed coordinate matrix dimensions (numRows, numCols). 02796 Tuple<global_ordinal_type, 2> updatedDims; 02797 if (myRank == rootRank) { 02798 // If one or more bottom rows of the matrix contain no 02799 // entries, then the Adder will report that the number 02800 // of rows is less than that specified in the 02801 // metadata. We allow this case, and favor the 02802 // metadata so that the zero row(s) will be included. 02803 updatedDims[0] = 02804 std::max (dims[0], pAdder->getAdder()->numRows()); 02805 updatedDims[1] = pAdder->getAdder()->numCols(); 02806 } 02807 broadcast (*pComm, rootRank, updatedDims); 02808 dims[0] = updatedDims[0]; 02809 dims[1] = updatedDims[1]; 02810 if (debug && myRank == rootRank) { 02811 cerr << "----- Dimensions after: " << dims[0] << " x " 02812 << dims[1] << endl; 02813 } 02814 } 02815 else { 02816 // In strict mode, we require that the matrix's metadata and 02817 // its actual data agree, at least somewhat. In particular, 02818 // the number of rows must agree, since otherwise we cannot 02819 // distribute the matrix correctly. 02820 02821 // Teuchos::broadcast() doesn't know how to broadcast bools, 02822 // so we use an int with the standard 1 == true, 0 == false 02823 // encoding. 02824 int dimsMatch = 1; 02825 if (myRank == rootRank) { 02826 // If one or more bottom rows of the matrix contain no 02827 // entries, then the Adder will report that the number of 02828 // rows is less than that specified in the metadata. We 02829 // allow this case, and favor the metadata, but do not 02830 // allow the Adder to think there are more rows in the 02831 // matrix than the metadata says. 02832 if (dims[0] < pAdder->getAdder ()->numRows ()) { 02833 dimsMatch = 0; 02834 } 02835 } 02836 broadcast (*pComm, 0, ptr (&dimsMatch)); 02837 if (dimsMatch == 0) { 02838 // We're in an error state anyway, so we might as well 02839 // work a little harder to print an informative error 02840 // message. 02841 // 02842 // Broadcast the Adder's idea of the matrix dimensions 02843 // from Proc 0 to all processes. 02844 Tuple<global_ordinal_type, 2> addersDims; 02845 if (myRank == rootRank) { 02846 addersDims[0] = pAdder->getAdder()->numRows(); 02847 addersDims[1] = pAdder->getAdder()->numCols(); 02848 } 02849 broadcast (*pComm, 0, addersDims); 02850 TEUCHOS_TEST_FOR_EXCEPTION( 02851 dimsMatch == 0, std::runtime_error, 02852 "The matrix metadata says that the matrix is " << dims[0] << " x " 02853 << dims[1] << ", but the actual data says that the matrix is " 02854 << addersDims[0] << " x " << addersDims[1] << ". That means the " 02855 "data includes more rows than reported in the metadata. This " 02856 "is not allowed when parsing in strict mode. Parse the matrix in " 02857 "tolerant mode to ignore the metadata when it disagrees with the " 02858 "data."); 02859 } 02860 } // Matrix dimensions (# rows, # cols, # entries) agree. 02861 02862 if (debug && myRank == rootRank) { 02863 cerr << "-- Converting matrix data into CSR format on Proc 0" << endl; 02864 } 02865 02866 // Now that we've read in all the matrix entries from the 02867 // input stream into the adder on Proc 0, post-process them 02868 // into CSR format (still on Proc 0). This will facilitate 02869 // distributing them to all the processors. 02870 // 02871 // These arrays represent the global matrix data as a CSR 02872 // matrix (with numEntriesPerRow as redundant but convenient 02873 // metadata, since it's computable from rowPtr and vice 02874 // versa). They are valid only on Proc 0. 02875 ArrayRCP<size_t> numEntriesPerRow; 02876 ArrayRCP<size_t> rowPtr; 02877 ArrayRCP<global_ordinal_type> colInd; 02878 ArrayRCP<scalar_type> values; 02879 02880 // Proc 0 first merges duplicate entries, and then converts 02881 // the coordinate-format matrix data to CSR. 02882 { 02883 int mergeAndConvertSucceeded = 1; 02884 std::ostringstream errMsg; 02885 02886 if (myRank == rootRank) { 02887 try { 02888 typedef Teuchos::MatrixMarket::Raw::Element<scalar_type, 02889 global_ordinal_type> element_type; 02890 02891 // Number of rows in the matrix. If we are in tolerant 02892 // mode, we've already synchronized dims with the actual 02893 // matrix data. If in strict mode, we should use dims 02894 // (as read from the file's metadata) rather than the 02895 // matrix data to determine the dimensions. (The matrix 02896 // data will claim fewer rows than the metadata, if one 02897 // or more rows have no entries stored in the file.) 02898 const size_type numRows = dims[0]; 02899 02900 // Additively merge duplicate matrix entries. 02901 pAdder->getAdder()->merge (); 02902 02903 // Get a temporary const view of the merged matrix entries. 02904 const std::vector<element_type>& entries = 02905 pAdder->getAdder()->getEntries(); 02906 02907 // Number of matrix entries (after merging). 02908 const size_t numEntries = (size_t)entries.size(); 02909 02910 if (debug) { 02911 cerr << "----- Proc 0: Matrix has numRows=" << numRows 02912 << " rows and numEntries=" << numEntries 02913 << " entries." << endl; 02914 } 02915 02916 // Make space for the CSR matrix data. Converting to 02917 // CSR is easier if we fill numEntriesPerRow with zeros 02918 // at first. 02919 numEntriesPerRow = arcp<size_t> (numRows); 02920 std::fill (numEntriesPerRow.begin(), numEntriesPerRow.end(), 0); 02921 rowPtr = arcp<size_t> (numRows+1); 02922 std::fill (rowPtr.begin(), rowPtr.end(), 0); 02923 colInd = arcp<global_ordinal_type> (numEntries); 02924 values = arcp<scalar_type> (numEntries); 02925 02926 // Convert from array-of-structs coordinate format to CSR 02927 // (compressed sparse row) format. 02928 global_ordinal_type prvRow = 0; 02929 size_t curPos = 0; 02930 rowPtr[0] = 0; 02931 for (curPos = 0; curPos < numEntries; ++curPos) { 02932 const element_type& curEntry = entries[curPos]; 02933 const global_ordinal_type curRow = curEntry.rowIndex(); 02934 TEUCHOS_TEST_FOR_EXCEPTION( 02935 curRow < prvRow, std::logic_error, 02936 "Row indices are out of order, even though they are supposed " 02937 "to be sorted. curRow = " << curRow << ", prvRow = " 02938 << prvRow << ", at curPos = " << curPos << ". Please report " 02939 "this bug to the Tpetra developers."); 02940 if (curRow > prvRow) { 02941 for (global_ordinal_type r = prvRow+1; r <= curRow; ++r) { 02942 rowPtr[r] = curPos; 02943 } 02944 prvRow = curRow; 02945 } 02946 numEntriesPerRow[curRow]++; 02947 colInd[curPos] = curEntry.colIndex(); 02948 values[curPos] = curEntry.value(); 02949 } 02950 // rowPtr has one more entry than numEntriesPerRow. The 02951 // last entry of rowPtr is the number of entries in 02952 // colInd and values. 02953 rowPtr[numRows] = numEntries; 02954 } // Finished conversion to CSR format 02955 catch (std::exception& e) { 02956 mergeAndConvertSucceeded = 0; 02957 errMsg << "Failed to merge sparse matrix entries and convert to " 02958 "CSR format: " << e.what(); 02959 } 02960 02961 if (debug && mergeAndConvertSucceeded) { 02962 // Number of rows in the matrix. 02963 const size_type numRows = dims[0]; 02964 const size_type maxToDisplay = 100; 02965 02966 cerr << "----- Proc 0: numEntriesPerRow[0.." 02967 << (numEntriesPerRow.size()-1) << "] "; 02968 if (numRows > maxToDisplay) { 02969 cerr << "(only showing first and last few entries) "; 02970 } 02971 cerr << "= ["; 02972 if (numRows > 0) { 02973 if (numRows > maxToDisplay) { 02974 for (size_type k = 0; k < 2; ++k) { 02975 cerr << numEntriesPerRow[k] << " "; 02976 } 02977 cerr << "... "; 02978 for (size_type k = numRows-2; k < numRows-1; ++k) { 02979 cerr << numEntriesPerRow[k] << " "; 02980 } 02981 } 02982 else { 02983 for (size_type k = 0; k < numRows-1; ++k) { 02984 cerr << numEntriesPerRow[k] << " "; 02985 } 02986 } 02987 cerr << numEntriesPerRow[numRows-1]; 02988 } // numRows > 0 02989 cerr << "]" << endl; 02990 02991 cerr << "----- Proc 0: rowPtr "; 02992 if (numRows > maxToDisplay) { 02993 cerr << "(only showing first and last few entries) "; 02994 } 02995 cerr << "= ["; 02996 if (numRows > maxToDisplay) { 02997 for (size_type k = 0; k < 2; ++k) { 02998 cerr << rowPtr[k] << " "; 02999 } 03000 cerr << "... "; 03001 for (size_type k = numRows-2; k < numRows; ++k) { 03002 cerr << rowPtr[k] << " "; 03003 } 03004 } 03005 else { 03006 for (size_type k = 0; k < numRows; ++k) { 03007 cerr << rowPtr[k] << " "; 03008 } 03009 } 03010 cerr << rowPtr[numRows] << "]" << endl; 03011 03012 cerr << "----- Proc 0: colInd = ["; 03013 for (size_t k = 0; k < rowPtr[numRows]; ++k) { 03014 cerr << colInd[k] << " "; 03015 } 03016 cerr << "]" << endl; 03017 } 03018 } // if myRank == rootRank 03019 } // Done converting sparse matrix data to CSR format 03020 03021 // Now we're done with the Adder, so we can release the 03022 // reference ("free" it) to save space. This only actually 03023 // does anything on Rank 0, since pAdder is null on all the 03024 // other MPI processes. 03025 pAdder = null; 03026 03027 // Verify details of the Maps. Don't count the global number 03028 // of entries in the row Map, since that number doesn't 03029 // correctly count overlap. 03030 if (debug && myRank == rootRank) { 03031 cerr << "-- Verifying Maps" << endl; 03032 } 03033 TEUCHOS_TEST_FOR_EXCEPTION( 03034 as<global_size_t> (dims[0]) != rangeMap->getGlobalNumElements(), 03035 std::invalid_argument, 03036 "The range Map has " << rangeMap->getGlobalNumElements () 03037 << " entries, but the matrix has a global number of rows " << dims[0] 03038 << "."); 03039 TEUCHOS_TEST_FOR_EXCEPTION( 03040 as<global_size_t> (dims[1]) != domainMap->getGlobalNumElements (), 03041 std::invalid_argument, 03042 "The domain Map has " << domainMap->getGlobalNumElements () 03043 << " entries, but the matrix has a global number of columns " 03044 << dims[1] << "."); 03045 03046 // Create a row Map which is entirely owned on Proc 0. 03047 RCP<Teuchos::FancyOStream> err = debug ? 03048 Teuchos::getFancyOStream (Teuchos::rcpFromRef (cerr)) : null; 03049 03050 RCP<const map_type> gatherRowMap = Details::computeGatherMap (rowMap, err, debug); 03051 ArrayView<const global_ordinal_type> myRows = 03052 gatherRowMap->getNodeElementList (); 03053 const size_type myNumRows = myRows.size (); 03054 const global_ordinal_type indexBase = gatherRowMap->getIndexBase (); 03055 03056 ArrayRCP<size_t> gatherNumEntriesPerRow = arcp<size_t>(myNumRows); 03057 for (size_type i_ = 0; i_ < myNumRows; i_++) { 03058 gatherNumEntriesPerRow[i_] = numEntriesPerRow[myRows[i_]-indexBase]; 03059 } 03060 03061 // Create a matrix using this Map, and fill in on Proc 0. We 03062 // know how many entries there are in each row, so we can use 03063 // static profile. 03064 RCP<sparse_matrix_type> A_proc0 = 03065 rcp (new sparse_matrix_type (gatherRowMap, gatherNumEntriesPerRow, 03066 Tpetra::StaticProfile)); 03067 if (myRank == rootRank) { 03068 if (debug) { 03069 cerr << "-- Proc 0: Filling gather matrix" << endl; 03070 } 03071 if (debug) { 03072 cerr << "---- Rows: " << Teuchos::toString (myRows) << endl; 03073 } 03074 03075 // Add Proc 0's matrix entries to the CrsMatrix. 03076 for (size_type i_ = 0; i_ < myNumRows; ++i_) { 03077 size_type i = myRows[i_] - indexBase; 03078 03079 const size_type curPos = as<size_type> (rowPtr[i]); 03080 const local_ordinal_type curNumEntries = numEntriesPerRow[i]; 03081 ArrayView<global_ordinal_type> curColInd = 03082 colInd.view (curPos, curNumEntries); 03083 ArrayView<scalar_type> curValues = 03084 values.view (curPos, curNumEntries); 03085 03086 // Modify the column indices in place to have the right index base. 03087 for (size_type k = 0; k < curNumEntries; ++k) { 03088 curColInd[k] += indexBase; 03089 } 03090 if (debug) { 03091 cerr << "------ Columns: " << Teuchos::toString (curColInd) << endl; 03092 cerr << "------ Values: " << Teuchos::toString (curValues) << endl; 03093 } 03094 // Avoid constructing empty views of ArrayRCP objects. 03095 if (curNumEntries > 0) { 03096 A_proc0->insertGlobalValues (myRows[i_], curColInd, curValues); 03097 } 03098 } 03099 // Now we can save space by deallocating numEntriesPerRow, 03100 // rowPtr, colInd, and values, since we've already put those 03101 // data in the matrix. 03102 numEntriesPerRow = null; 03103 rowPtr = null; 03104 colInd = null; 03105 values = null; 03106 } // if myRank == rootRank 03107 03108 RCP<sparse_matrix_type> A; 03109 if (colMap.is_null ()) { 03110 A = rcp (new sparse_matrix_type (rowMap, 0)); 03111 } else { 03112 A = rcp (new sparse_matrix_type (rowMap, colMap, 0)); 03113 } 03114 typedef Export<local_ordinal_type, global_ordinal_type, node_type> export_type; 03115 export_type exp (gatherRowMap, rowMap); 03116 A->doExport (*A_proc0, exp, INSERT); 03117 03118 if (callFillComplete) { 03119 A->fillComplete (domainMap, rangeMap); 03120 } 03121 03122 // We can't get the dimensions of the matrix until after 03123 // fillComplete() is called. Thus, we can't do the sanity 03124 // check (dimensions read from the Matrix Market data, 03125 // vs. dimensions reported by the CrsMatrix) unless the user 03126 // asked us to call fillComplete(). 03127 // 03128 // Note that pMatrix->getGlobalNum{Rows,Cols}() does _not_ do 03129 // what one might think it does, so you have to ask the range 03130 // resp. domain map for the number of rows resp. columns. 03131 if (callFillComplete) { 03132 const int numProcs = pComm->getSize (); 03133 03134 if (extraDebug && debug) { 03135 const global_size_t globalNumRows = rangeMap->getGlobalNumElements (); 03136 const global_size_t globalNumCols = domainMap->getGlobalNumElements (); 03137 if (myRank == rootRank) { 03138 cerr << "-- Matrix is " 03139 << globalNumRows << " x " << globalNumCols 03140 << " with " << A->getGlobalNumEntries() 03141 << " entries, and index base " 03142 << A->getIndexBase() << "." << endl; 03143 } 03144 pComm->barrier (); 03145 for (int p = 0; p < numProcs; ++p) { 03146 if (myRank == p) { 03147 cerr << "-- Proc " << p << " owns " 03148 << A->getNodeNumCols() << " columns, and " 03149 << A->getNodeNumEntries() << " entries." << endl; 03150 } 03151 pComm->barrier (); 03152 } 03153 } // if (extraDebug && debug) 03154 } // if (callFillComplete) 03155 03156 if (debug && myRank == rootRank) { 03157 cerr << "-- Done creating the CrsMatrix from the Matrix Market data" 03158 << endl; 03159 } 03160 return A; 03161 } 03162 03163 03193 static RCP<multivector_type> 03194 readDenseFile (const std::string& filename, 03195 const RCP<const comm_type>& comm, 03196 const RCP<node_type>& node, 03197 RCP<const map_type>& map, 03198 const bool tolerant=false, 03199 const bool debug=false) 03200 { 03201 std::ifstream in; 03202 if (comm->getRank () == 0) { // Only open the file on Proc 0. 03203 in.open (filename.c_str ()); // Destructor closes safely 03204 } 03205 return readDense (in, comm, node, map, tolerant, debug); 03206 } 03207 03238 static RCP<vector_type> 03239 readVectorFile (const std::string& filename, 03240 const RCP<const comm_type>& comm, 03241 const RCP<node_type>& node, 03242 RCP<const map_type>& map, 03243 const bool tolerant=false, 03244 const bool debug=false) 03245 { 03246 std::ifstream in; 03247 if (comm->getRank () == 0) { // Only open the file on Proc 0. 03248 in.open (filename.c_str ()); // Destructor closes safely 03249 } 03250 return readVector (in, comm, node, map, tolerant, debug); 03251 } 03252 03320 static RCP<multivector_type> 03321 readDense (std::istream& in, 03322 const RCP<const comm_type>& comm, 03323 const RCP<node_type>& node, 03324 RCP<const map_type>& map, 03325 const bool tolerant=false, 03326 const bool debug=false) 03327 { 03328 Teuchos::RCP<Teuchos::FancyOStream> err = 03329 Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)); 03330 return readDenseImpl<scalar_type> (in, comm, node, map, 03331 err, tolerant, debug); 03332 } 03333 03335 static RCP<vector_type> 03336 readVector (std::istream& in, 03337 const RCP<const comm_type>& comm, 03338 const RCP<node_type>& node, 03339 RCP<const map_type>& map, 03340 const bool tolerant=false, 03341 const bool debug=false) 03342 { 03343 Teuchos::RCP<Teuchos::FancyOStream> err = 03344 Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)); 03345 return readVectorImpl<scalar_type> (in, comm, node, map, 03346 err, tolerant, debug); 03347 } 03348 03370 static RCP<const map_type> 03371 readMapFile (const std::string& filename, 03372 const RCP<const comm_type>& comm, 03373 const RCP<node_type>& node, 03374 const bool tolerant=false, 03375 const bool debug=false) 03376 { 03377 using Teuchos::inOutArg; 03378 using Teuchos::broadcast; 03379 std::ifstream in; 03380 03381 int success = 1; 03382 if (comm->getRank () == 0) { // Only open the file on Proc 0. 03383 in.open (filename.c_str ()); // Destructor closes safely 03384 if (! in) { 03385 success = 0; 03386 } 03387 } 03388 broadcast<int, int> (*comm, 0, inOutArg (success)); 03389 TEUCHOS_TEST_FOR_EXCEPTION( 03390 success == 0, std::runtime_error, 03391 "Tpetra::MatrixMarket::Reader::readMapFile: " 03392 "Failed to read file \"" << filename << "\" on Process 0."); 03393 return readMap (in, comm, node, tolerant, debug); 03394 } 03395 03396 private: 03397 template<class MultiVectorScalarType> 03398 static RCP<Tpetra::MultiVector<MultiVectorScalarType, 03399 local_ordinal_type, 03400 global_ordinal_type, 03401 node_type> > 03402 readDenseImpl (std::istream& in, 03403 const RCP<const comm_type>& comm, 03404 const RCP<node_type>& node, 03405 RCP<const map_type>& map, 03406 const Teuchos::RCP<Teuchos::FancyOStream>& err, 03407 const bool tolerant=false, 03408 const bool debug=false) 03409 { 03410 using Teuchos::MatrixMarket::Banner; 03411 using Teuchos::MatrixMarket::checkCommentLine; 03412 using Teuchos::as; 03413 using Teuchos::broadcast; 03414 using Teuchos::outArg; 03415 using std::endl; 03416 typedef MultiVectorScalarType ST; 03417 typedef local_ordinal_type LO; 03418 typedef global_ordinal_type GO; 03419 typedef node_type NT; 03420 typedef Teuchos::ScalarTraits<ST> STS; 03421 typedef typename STS::magnitudeType MT; 03422 typedef Teuchos::ScalarTraits<MT> STM; 03423 typedef Tpetra::MultiVector<ST, LO, GO, NT> MV; 03424 03425 // Rank 0 is the only (MPI) process allowed to read from the 03426 // input stream. 03427 const int myRank = comm->getRank (); 03428 03429 if (! err.is_null ()) { 03430 err->pushTab (); 03431 } 03432 if (debug) { 03433 *err << myRank << ": readDenseImpl" << endl; 03434 } 03435 if (! err.is_null ()) { 03436 err->pushTab (); 03437 } 03438 03439 // mfh 17 Feb 2013: It's not strictly necessary that the Comm 03440 // instances be identical and that the Node instances be 03441 // identical. The essential condition is more complicated to 03442 // test and isn't the same for all Node types. Thus, we just 03443 // leave it up to the user. 03444 03445 // // If map is nonnull, check the precondition that its 03446 // // communicator resp. node equal comm resp. node. Checking 03447 // // now avoids doing a lot of file reading before we detect the 03448 // // violated precondition. 03449 // TEUCHOS_TEST_FOR_EXCEPTION( 03450 // ! map.is_null() && (map->getComm() != comm || map->getNode () != node, 03451 // std::invalid_argument, "If you supply a nonnull Map, the Map's " 03452 // "communicator and node must equal the supplied communicator resp. " 03453 // "node."); 03454 03455 // Process 0 will read in the matrix dimensions from the file, 03456 // and broadcast them to all ranks in the given communicator. 03457 // There are only 2 dimensions in the matrix, but we use the 03458 // third element of the Tuple to encode the banner's reported 03459 // data type: "real" == 0, "complex" == 1, and "integer" == 0 03460 // (same as "real"). We don't allow pattern matrices (i.e., 03461 // graphs) since they only make sense for sparse data. 03462 Tuple<GO, 3> dims; 03463 dims[0] = 0; 03464 dims[1] = 0; 03465 03466 // Current line number in the input stream. Only valid on 03467 // Proc 0. Various calls will modify this depending on the 03468 // number of lines that are read from the input stream. 03469 size_t lineNumber = 1; 03470 03471 // Capture errors and their messages on Proc 0. 03472 std::ostringstream exMsg; 03473 int localBannerReadSuccess = 1; 03474 int localDimsReadSuccess = 1; 03475 03476 // Only Proc 0 gets to read matrix data from the input stream. 03477 if (myRank == 0) { 03478 if (debug) { 03479 *err << myRank << ": readDenseImpl: Reading banner line (dense)" << endl; 03480 } 03481 03482 // The "Banner" tells you whether the input stream 03483 // represents a dense matrix, the symmetry type of the 03484 // matrix, and the type of the data it contains. 03485 RCP<const Banner> pBanner; 03486 try { 03487 pBanner = readBanner (in, lineNumber, tolerant, debug); 03488 } catch (std::exception& e) { 03489 exMsg << e.what (); 03490 localBannerReadSuccess = 0; 03491 } 03492 // Make sure the input stream is the right kind of data. 03493 if (localBannerReadSuccess) { 03494 if (pBanner->matrixType () != "array") { 03495 exMsg << "The Matrix Market file does not contain dense matrix " 03496 "data. Its banner (first) line says that its matrix type is \"" 03497 << pBanner->matrixType () << "\", rather that the required " 03498 "\"array\"."; 03499 localBannerReadSuccess = 0; 03500 } else if (pBanner->dataType() == "pattern") { 03501 exMsg << "The Matrix Market file's banner (first) " 03502 "line claims that the matrix's data type is \"pattern\". This does " 03503 "not make sense for a dense matrix, yet the file reports the matrix " 03504 "as dense. The only valid data types for a dense matrix are " 03505 "\"real\", \"complex\", and \"integer\"."; 03506 localBannerReadSuccess = 0; 03507 } else { 03508 // Encode the data type reported by the Banner as the 03509 // third element of the dimensions Tuple. 03510 dims[2] = encodeDataType (pBanner->dataType ()); 03511 } 03512 } // if we successfully read the banner line 03513 03514 // At this point, we've successfully read the banner line. 03515 // Now read the dimensions line. 03516 if (localBannerReadSuccess) { 03517 if (debug) { 03518 *err << myRank << ": readDenseImpl: Reading dimensions line (dense)" << endl; 03519 } 03520 // Keep reading lines from the input stream until we find 03521 // a non-comment line, or until we run out of lines. The 03522 // latter is an error, since every "array" format Matrix 03523 // Market file must have a dimensions line after the 03524 // banner (even if the matrix has zero rows or columns, or 03525 // zero entries). 03526 std::string line; 03527 bool commentLine = true; 03528 03529 while (commentLine) { 03530 // Test whether it is even valid to read from the input 03531 // stream wrapping the line. 03532 if (in.eof () || in.fail ()) { 03533 exMsg << "Unable to get array dimensions line (at all) from line " 03534 << lineNumber << " of input stream. The input stream " 03535 << "claims that it is " 03536 << (in.eof() ? "at end-of-file." : "in a failed state."); 03537 localDimsReadSuccess = 0; 03538 } else { 03539 // Try to get the next line from the input stream. 03540 if (getline (in, line)) { 03541 ++lineNumber; // We did actually read a line. 03542 } 03543 // Is the current line a comment line? Ignore start 03544 // and size; they are only useful for reading the 03545 // actual matrix entries. (We could use them here as 03546 // an optimization, but we've chosen not to.) 03547 size_t start = 0, size = 0; 03548 commentLine = checkCommentLine (line, start, size, lineNumber, tolerant); 03549 } // whether we failed to read the line at all 03550 } // while the line we just read is a comment line 03551 03552 // 03553 // Get <numRows> <numCols> from the line we just read. 03554 // 03555 std::istringstream istr (line); 03556 03557 // Test whether it is even valid to read from the input 03558 // stream wrapping the line. 03559 if (istr.eof () || istr.fail ()) { 03560 exMsg << "Unable to read any data from line " << lineNumber 03561 << " of input; the line should contain the matrix dimensions " 03562 << "\"<numRows> <numCols>\"."; 03563 localDimsReadSuccess = 0; 03564 } else { // It's valid to read from the line. 03565 GO theNumRows = 0; 03566 istr >> theNumRows; // Read in the number of rows. 03567 if (istr.fail ()) { 03568 exMsg << "Failed to get number of rows from line " 03569 << lineNumber << " of input; the line should contains the " 03570 << "matrix dimensions \"<numRows> <numCols>\"."; 03571 localDimsReadSuccess = 0; 03572 } else { // We successfully read the number of rows 03573 dims[0] = theNumRows; // Save the number of rows 03574 if (istr.eof ()) { // Do we still have data to read? 03575 exMsg << "No more data after number of rows on line " 03576 << lineNumber << " of input; the line should contain the " 03577 << "matrix dimensions \"<numRows> <numCols>\"."; 03578 localDimsReadSuccess = 0; 03579 } else { // Still data left to read; read in number of columns. 03580 GO theNumCols = 0; 03581 istr >> theNumCols; // Read in the number of columns 03582 if (istr.fail ()) { 03583 exMsg << "Failed to get number of columns from line " 03584 << lineNumber << " of input; the line should contain " 03585 << "the matrix dimensions \"<numRows> <numCols>\"."; 03586 localDimsReadSuccess = 0; 03587 } else { // We successfully read the number of columns 03588 dims[1] = theNumCols; // Save the number of columns 03589 } // if istr.fail () 03590 } // if istr.eof () 03591 } // if we read the number of rows 03592 } // if the input stream wrapping the dims line was (in)valid 03593 } // if we successfully read the banner line 03594 } // if (myRank == 0) 03595 03596 // Broadcast the matrix dimensions, the encoded data type, and 03597 // whether or not Proc 0 succeeded in reading the banner and 03598 // dimensions. 03599 Tuple<GO, 5> bannerDimsReadResult; 03600 if (myRank == 0) { 03601 bannerDimsReadResult[0] = dims[0]; // numRows 03602 bannerDimsReadResult[1] = dims[1]; // numCols 03603 bannerDimsReadResult[2] = dims[2]; // encoded data type 03604 bannerDimsReadResult[3] = localBannerReadSuccess; 03605 bannerDimsReadResult[4] = localDimsReadSuccess; 03606 } 03607 // Broadcast matrix dimensions and the encoded data type from 03608 // Proc 0 to all the MPI processes. 03609 broadcast (*comm, 0, bannerDimsReadResult); 03610 03611 TEUCHOS_TEST_FOR_EXCEPTION( 03612 bannerDimsReadResult[3] == 0, std::runtime_error, 03613 "Failed to read banner line: " << exMsg.str ()); 03614 TEUCHOS_TEST_FOR_EXCEPTION( 03615 bannerDimsReadResult[4] == 0, std::runtime_error, 03616 "Failed to read matrix dimensions line: " << exMsg.str ()); 03617 if (myRank != 0) { 03618 dims[0] = bannerDimsReadResult[0]; 03619 dims[1] = bannerDimsReadResult[1]; 03620 dims[2] = bannerDimsReadResult[2]; 03621 } 03622 03623 // Tpetra objects want the matrix dimensions in these types. 03624 const global_size_t numRows = static_cast<global_size_t> (dims[0]); 03625 const size_t numCols = static_cast<size_t> (dims[1]); 03626 03627 // Make a "Proc 0 owns everything" Map that we will use to 03628 // read in the multivector entries in the correct order on 03629 // Proc 0. This must be a collective 03630 RCP<const map_type> proc0Map; // "Proc 0 owns everything" Map 03631 if (map.is_null ()) { 03632 // The user didn't supply a Map. Make a contiguous 03633 // distributed Map for them, using the read-in multivector 03634 // dimensions. 03635 map = createUniformContigMapWithNode<LO, GO, NT> (numRows, comm, node); 03636 const size_t localNumRows = (myRank == 0) ? numRows : 0; 03637 proc0Map = createContigMapWithNode<LO, GO, NT> (numRows, localNumRows, comm, node); 03638 } 03639 else { // The user supplied a Map. 03640 proc0Map = Details::computeGatherMap<map_type> (map, err, debug); 03641 } 03642 03643 // Make a multivector X owned entirely by Proc 0. 03644 RCP<MV> X = createMultiVector<ST, LO, GO, NT> (proc0Map, numCols); 03645 03646 // 03647 // On Proc 0, read the Matrix Market data from the input 03648 // stream into the multivector X. 03649 // 03650 int localReadDataSuccess = 1; 03651 if (myRank == 0) { 03652 try { 03653 if (debug) { 03654 *err << myRank << ": readDenseImpl: Reading matrix data (dense)" 03655 << endl; 03656 } 03657 03658 // Make sure that we can get a 1-D view of X. 03659 TEUCHOS_TEST_FOR_EXCEPTION( 03660 ! X->isConstantStride (), std::logic_error, 03661 "Can't get a 1-D view of the entries of the MultiVector X on " 03662 "Process 0, because the stride between the columns of X is not " 03663 "constant. This shouldn't happen because we just created X and " 03664 "haven't filled it in yet. Please report this bug to the Tpetra " 03665 "developers."); 03666 03667 // Get a writeable 1-D view of the entries of X. Rank 0 03668 // owns all of them. The view will expire at the end of 03669 // scope, so (if necessary) it will be written back to X 03670 // at this time. 03671 ArrayRCP<ST> X_view = X->get1dViewNonConst (); 03672 TEUCHOS_TEST_FOR_EXCEPTION( 03673 as<global_size_t> (X_view.size ()) < numRows * numCols, 03674 std::logic_error, 03675 "The view of X has size " << X_view << " which is not enough to " 03676 "accommodate the expected number of entries numRows*numCols = " 03677 << numRows << "*" << numCols << " = " << numRows*numCols << ". " 03678 "Please report this bug to the Tpetra developers."); 03679 const size_t stride = X->getStride (); 03680 03681 // The third element of the dimensions Tuple encodes the data 03682 // type reported by the Banner: "real" == 0, "complex" == 1, 03683 // "integer" == 0 (same as "real"), "pattern" == 2. We do not 03684 // allow dense matrices to be pattern matrices, so dims[2] == 03685 // 0 or 1. We've already checked for this above. 03686 const bool isComplex = (dims[2] == 1); 03687 size_type count = 0, curRow = 0, curCol = 0; 03688 03689 std::string line; 03690 while (getline (in, line)) { 03691 ++lineNumber; 03692 // Is the current line a comment line? If it's not, 03693 // line.substr(start,size) contains the data. 03694 size_t start = 0, size = 0; 03695 const bool commentLine = 03696 checkCommentLine (line, start, size, lineNumber, tolerant); 03697 if (! commentLine) { 03698 // Make sure we have room in which to put the new matrix 03699 // entry. We check this only after checking for a 03700 // comment line, because there may be one or more 03701 // comment lines at the end of the file. In tolerant 03702 // mode, we simply ignore any extra data. 03703 if (count >= X_view.size()) { 03704 if (tolerant) { 03705 break; 03706 } 03707 else { 03708 TEUCHOS_TEST_FOR_EXCEPTION( 03709 count >= X_view.size(), 03710 std::runtime_error, 03711 "The Matrix Market input stream has more data in it than " 03712 "its metadata reported. Current line number is " 03713 << lineNumber << "."); 03714 } 03715 } 03716 03717 // mfh 19 Dec 2012: Ignore everything up to the initial 03718 // colon. writeDense() has the option to print out the 03719 // global row index in front of each entry, followed by 03720 // a colon and space. 03721 { 03722 const size_t pos = line.substr (start, size).find (':'); 03723 if (pos != std::string::npos) { 03724 start = pos+1; 03725 } 03726 } 03727 std::istringstream istr (line.substr (start, size)); 03728 // Does the line contain anything at all? Can we 03729 // safely read from the input stream wrapping the 03730 // line? 03731 if (istr.eof() || istr.fail()) { 03732 // In tolerant mode, simply ignore the line. 03733 if (tolerant) { 03734 break; 03735 } 03736 // We repeat the full test here so the exception 03737 // message is more informative. 03738 TEUCHOS_TEST_FOR_EXCEPTION( 03739 ! tolerant && (istr.eof() || istr.fail()), 03740 std::runtime_error, 03741 "Line " << lineNumber << " of the Matrix Market file is " 03742 "empty, or we cannot read from it for some other reason."); 03743 } 03744 // Current matrix entry to read in. 03745 ST val = STS::zero(); 03746 // Real and imaginary parts of the current matrix entry. 03747 // The imaginary part is zero if the matrix is real-valued. 03748 MT real = STM::zero(), imag = STM::zero(); 03749 03750 // isComplex refers to the input stream's data, not to 03751 // the scalar type S. It's OK to read real-valued 03752 // data into a matrix storing complex-valued data; in 03753 // that case, all entries' imaginary parts are zero. 03754 if (isComplex) { 03755 // STS::real() and STS::imag() return a copy of 03756 // their respective components, not a writeable 03757 // reference. Otherwise we could just assign to 03758 // them using the istream extraction operator (>>). 03759 // That's why we have separate magnitude type "real" 03760 // and "imag" variables. 03761 03762 // Attempt to read the real part of the current entry. 03763 istr >> real; 03764 if (istr.fail()) { 03765 TEUCHOS_TEST_FOR_EXCEPTION( 03766 ! tolerant && istr.eof(), std::runtime_error, 03767 "Failed to get the real part of a complex-valued matrix " 03768 "entry from line " << lineNumber << " of the Matrix Market " 03769 "file."); 03770 // In tolerant mode, just skip bad lines. 03771 if (tolerant) { 03772 break; 03773 } 03774 } else if (istr.eof()) { 03775 TEUCHOS_TEST_FOR_EXCEPTION( 03776 ! tolerant && istr.eof(), std::runtime_error, 03777 "Missing imaginary part of a complex-valued matrix entry " 03778 "on line " << lineNumber << " of the Matrix Market file."); 03779 // In tolerant mode, let any missing imaginary part be 0. 03780 } else { 03781 // Attempt to read the imaginary part of the current 03782 // matrix entry. 03783 istr >> imag; 03784 TEUCHOS_TEST_FOR_EXCEPTION( 03785 ! tolerant && istr.fail(), std::runtime_error, 03786 "Failed to get the imaginary part of a complex-valued " 03787 "matrix entry from line " << lineNumber << " of the " 03788 "Matrix Market file."); 03789 // In tolerant mode, let any missing or corrupted 03790 // imaginary part be 0. 03791 } 03792 } else { // Matrix Market file contains real-valued data. 03793 // Attempt to read the current matrix entry. 03794 istr >> real; 03795 TEUCHOS_TEST_FOR_EXCEPTION( 03796 ! tolerant && istr.fail(), std::runtime_error, 03797 "Failed to get a real-valued matrix entry from line " 03798 << lineNumber << " of the Matrix Market file."); 03799 // In tolerant mode, simply ignore the line if 03800 // we failed to read a matrix entry. 03801 if (istr.fail() && tolerant) { 03802 break; 03803 } 03804 } 03805 // In tolerant mode, we simply let pass through whatever 03806 // data we got. 03807 TEUCHOS_TEST_FOR_EXCEPTION( 03808 ! tolerant && istr.fail(), std::runtime_error, 03809 "Failed to read matrix data from line " << lineNumber 03810 << " of the Matrix Market file."); 03811 03812 // Assign val = ST(real, imag). 03813 Teuchos::MatrixMarket::details::assignScalar<ST> (val, real, imag); 03814 03815 curRow = count % numRows; 03816 curCol = count / numRows; 03817 X_view[curRow + curCol*stride] = val; 03818 ++count; 03819 } // if not a comment line 03820 } // while there are still lines in the file, get the next one 03821 03822 TEUCHOS_TEST_FOR_EXCEPTION( 03823 ! tolerant && static_cast<global_size_t> (count) < numRows * numCols, 03824 std::runtime_error, 03825 "The Matrix Market metadata reports that the dense matrix is " 03826 << numRows << " x " << numCols << ", and thus has " 03827 << numRows*numCols << " total entries, but we only found " << count 03828 << " entr" << (count == 1 ? "y" : "ies") << " in the file."); 03829 } catch (std::exception& e) { 03830 exMsg << e.what (); 03831 localReadDataSuccess = 0; 03832 } 03833 } // if (myRank == 0) 03834 03835 if (debug) { 03836 *err << myRank << ": readDenseImpl: done reading data" << endl; 03837 } 03838 03839 // Synchronize on whether Proc 0 successfully read the data. 03840 int globalReadDataSuccess = localReadDataSuccess; 03841 broadcast (*comm, 0, outArg (globalReadDataSuccess)); 03842 TEUCHOS_TEST_FOR_EXCEPTION( 03843 globalReadDataSuccess == 0, std::runtime_error, 03844 "Failed to read the multivector's data: " << exMsg.str ()); 03845 03846 // If there's only one MPI process and the user didn't supply 03847 // a Map (i.e., pMap is null), we're done. Set pMap to the 03848 // Map used to distribute X, and return X. 03849 if (comm->getSize () == 1 && map.is_null ()) { 03850 map = proc0Map; 03851 if (! err.is_null ()) { 03852 err->popTab (); 03853 } 03854 if (debug) { 03855 *err << myRank << ": readDenseImpl: done" << endl; 03856 } 03857 if (! err.is_null ()) { 03858 err->popTab (); 03859 } 03860 return X; 03861 } 03862 03863 if (debug) { 03864 *err << myRank << ": readDenseImpl: Creating target MV" << endl; 03865 } 03866 03867 // Make a multivector Y with the distributed map pMap. 03868 RCP<MV> Y = createMultiVector<ST, LO, GO, NT> (map, numCols); 03869 03870 if (debug) { 03871 *err << myRank << ": readDenseImpl: Creating Export" << endl; 03872 } 03873 03874 // Make an Export object that will export X to Y. First 03875 // argument is the source map, second argument is the target 03876 // map. 03877 Export<LO, GO, NT> exporter (proc0Map, map, err); 03878 03879 if (debug) { 03880 *err << myRank << ": readDenseImpl: Exporting" << endl; 03881 } 03882 // Export X into Y. 03883 Y->doExport (*X, exporter, INSERT); 03884 03885 if (! err.is_null ()) { 03886 err->popTab (); 03887 } 03888 if (debug) { 03889 *err << myRank << ": readDenseImpl: done" << endl; 03890 } 03891 if (! err.is_null ()) { 03892 err->popTab (); 03893 } 03894 03895 // Y is distributed over all process(es) in the communicator. 03896 return Y; 03897 } 03898 03899 03900 template<class VectorScalarType> 03901 static RCP<Tpetra::Vector<VectorScalarType, 03902 local_ordinal_type, 03903 global_ordinal_type, 03904 node_type> > 03905 readVectorImpl (std::istream& in, 03906 const RCP<const comm_type>& comm, 03907 const RCP<node_type>& node, 03908 RCP<const map_type>& map, 03909 const Teuchos::RCP<Teuchos::FancyOStream>& err, 03910 const bool tolerant=false, 03911 const bool debug=false) 03912 { 03913 using Teuchos::MatrixMarket::Banner; 03914 using Teuchos::MatrixMarket::checkCommentLine; 03915 using Teuchos::as; 03916 using Teuchos::broadcast; 03917 using Teuchos::outArg; 03918 using std::endl; 03919 typedef VectorScalarType ST; 03920 typedef local_ordinal_type LO; 03921 typedef global_ordinal_type GO; 03922 typedef node_type NT; 03923 typedef Teuchos::ScalarTraits<ST> STS; 03924 typedef typename STS::magnitudeType MT; 03925 typedef Teuchos::ScalarTraits<MT> STM; 03926 typedef Tpetra::Vector<ST, LO, GO, NT> MV; 03927 03928 // Rank 0 is the only (MPI) process allowed to read from the 03929 // input stream. 03930 const int myRank = comm->getRank (); 03931 03932 if (! err.is_null ()) { 03933 err->pushTab (); 03934 } 03935 if (debug) { 03936 *err << myRank << ": readDenseImpl" << endl; 03937 } 03938 if (! err.is_null ()) { 03939 err->pushTab (); 03940 } 03941 03942 // mfh 17 Feb 2013: It's not strictly necessary that the Comm 03943 // instances be identical and that the Node instances be 03944 // identical. The essential condition is more complicated to 03945 // test and isn't the same for all Node types. Thus, we just 03946 // leave it up to the user. 03947 03948 // // If map is nonnull, check the precondition that its 03949 // // communicator resp. node equal comm resp. node. Checking 03950 // // now avoids doing a lot of file reading before we detect the 03951 // // violated precondition. 03952 // TEUCHOS_TEST_FOR_EXCEPTION( 03953 // ! map.is_null() && (map->getComm() != comm || map->getNode () != node, 03954 // std::invalid_argument, "If you supply a nonnull Map, the Map's " 03955 // "communicator and node must equal the supplied communicator resp. " 03956 // "node."); 03957 03958 // Process 0 will read in the matrix dimensions from the file, 03959 // and broadcast them to all ranks in the given communicator. 03960 // There are only 2 dimensions in the matrix, but we use the 03961 // third element of the Tuple to encode the banner's reported 03962 // data type: "real" == 0, "complex" == 1, and "integer" == 0 03963 // (same as "real"). We don't allow pattern matrices (i.e., 03964 // graphs) since they only make sense for sparse data. 03965 Tuple<GO, 3> dims; 03966 dims[0] = 0; 03967 dims[1] = 0; 03968 03969 // Current line number in the input stream. Only valid on 03970 // Proc 0. Various calls will modify this depending on the 03971 // number of lines that are read from the input stream. 03972 size_t lineNumber = 1; 03973 03974 // Capture errors and their messages on Proc 0. 03975 std::ostringstream exMsg; 03976 int localBannerReadSuccess = 1; 03977 int localDimsReadSuccess = 1; 03978 03979 // Only Proc 0 gets to read matrix data from the input stream. 03980 if (myRank == 0) { 03981 if (debug) { 03982 *err << myRank << ": readDenseImpl: Reading banner line (dense)" << endl; 03983 } 03984 03985 // The "Banner" tells you whether the input stream 03986 // represents a dense matrix, the symmetry type of the 03987 // matrix, and the type of the data it contains. 03988 RCP<const Banner> pBanner; 03989 try { 03990 pBanner = readBanner (in, lineNumber, tolerant, debug); 03991 } catch (std::exception& e) { 03992 exMsg << e.what (); 03993 localBannerReadSuccess = 0; 03994 } 03995 // Make sure the input stream is the right kind of data. 03996 if (localBannerReadSuccess) { 03997 if (pBanner->matrixType () != "array") { 03998 exMsg << "The Matrix Market file does not contain dense matrix " 03999 "data. Its banner (first) line says that its matrix type is \"" 04000 << pBanner->matrixType () << "\", rather that the required " 04001 "\"array\"."; 04002 localBannerReadSuccess = 0; 04003 } else if (pBanner->dataType() == "pattern") { 04004 exMsg << "The Matrix Market file's banner (first) " 04005 "line claims that the matrix's data type is \"pattern\". This does " 04006 "not make sense for a dense matrix, yet the file reports the matrix " 04007 "as dense. The only valid data types for a dense matrix are " 04008 "\"real\", \"complex\", and \"integer\"."; 04009 localBannerReadSuccess = 0; 04010 } else { 04011 // Encode the data type reported by the Banner as the 04012 // third element of the dimensions Tuple. 04013 dims[2] = encodeDataType (pBanner->dataType ()); 04014 } 04015 } // if we successfully read the banner line 04016 04017 // At this point, we've successfully read the banner line. 04018 // Now read the dimensions line. 04019 if (localBannerReadSuccess) { 04020 if (debug) { 04021 *err << myRank << ": readDenseImpl: Reading dimensions line (dense)" << endl; 04022 } 04023 // Keep reading lines from the input stream until we find 04024 // a non-comment line, or until we run out of lines. The 04025 // latter is an error, since every "array" format Matrix 04026 // Market file must have a dimensions line after the 04027 // banner (even if the matrix has zero rows or columns, or 04028 // zero entries). 04029 std::string line; 04030 bool commentLine = true; 04031 04032 while (commentLine) { 04033 // Test whether it is even valid to read from the input 04034 // stream wrapping the line. 04035 if (in.eof () || in.fail ()) { 04036 exMsg << "Unable to get array dimensions line (at all) from line " 04037 << lineNumber << " of input stream. The input stream " 04038 << "claims that it is " 04039 << (in.eof() ? "at end-of-file." : "in a failed state."); 04040 localDimsReadSuccess = 0; 04041 } else { 04042 // Try to get the next line from the input stream. 04043 if (getline (in, line)) { 04044 ++lineNumber; // We did actually read a line. 04045 } 04046 // Is the current line a comment line? Ignore start 04047 // and size; they are only useful for reading the 04048 // actual matrix entries. (We could use them here as 04049 // an optimization, but we've chosen not to.) 04050 size_t start = 0, size = 0; 04051 commentLine = checkCommentLine (line, start, size, lineNumber, tolerant); 04052 } // whether we failed to read the line at all 04053 } // while the line we just read is a comment line 04054 04055 // 04056 // Get <numRows> <numCols> from the line we just read. 04057 // 04058 std::istringstream istr (line); 04059 04060 // Test whether it is even valid to read from the input 04061 // stream wrapping the line. 04062 if (istr.eof () || istr.fail ()) { 04063 exMsg << "Unable to read any data from line " << lineNumber 04064 << " of input; the line should contain the matrix dimensions " 04065 << "\"<numRows> <numCols>\"."; 04066 localDimsReadSuccess = 0; 04067 } else { // It's valid to read from the line. 04068 GO theNumRows = 0; 04069 istr >> theNumRows; // Read in the number of rows. 04070 if (istr.fail ()) { 04071 exMsg << "Failed to get number of rows from line " 04072 << lineNumber << " of input; the line should contains the " 04073 << "matrix dimensions \"<numRows> <numCols>\"."; 04074 localDimsReadSuccess = 0; 04075 } else { // We successfully read the number of rows 04076 dims[0] = theNumRows; // Save the number of rows 04077 if (istr.eof ()) { // Do we still have data to read? 04078 exMsg << "No more data after number of rows on line " 04079 << lineNumber << " of input; the line should contain the " 04080 << "matrix dimensions \"<numRows> <numCols>\"."; 04081 localDimsReadSuccess = 0; 04082 } else { // Still data left to read; read in number of columns. 04083 GO theNumCols = 0; 04084 istr >> theNumCols; // Read in the number of columns 04085 if (istr.fail ()) { 04086 exMsg << "Failed to get number of columns from line " 04087 << lineNumber << " of input; the line should contain " 04088 << "the matrix dimensions \"<numRows> <numCols>\"."; 04089 localDimsReadSuccess = 0; 04090 } else { // We successfully read the number of columns 04091 dims[1] = theNumCols; // Save the number of columns 04092 } // if istr.fail () 04093 } // if istr.eof () 04094 } // if we read the number of rows 04095 } // if the input stream wrapping the dims line was (in)valid 04096 } // if we successfully read the banner line 04097 } // if (myRank == 0) 04098 04099 // Check if file has a Vector 04100 if (dims[1]!=1) { 04101 exMsg << "File does not contain a 1D Vector"; 04102 localDimsReadSuccess = 0; 04103 } 04104 04105 // Broadcast the matrix dimensions, the encoded data type, and 04106 // whether or not Proc 0 succeeded in reading the banner and 04107 // dimensions. 04108 Tuple<GO, 5> bannerDimsReadResult; 04109 if (myRank == 0) { 04110 bannerDimsReadResult[0] = dims[0]; // numRows 04111 bannerDimsReadResult[1] = dims[1]; // numCols 04112 bannerDimsReadResult[2] = dims[2]; // encoded data type 04113 bannerDimsReadResult[3] = localBannerReadSuccess; 04114 bannerDimsReadResult[4] = localDimsReadSuccess; 04115 } 04116 04117 // Broadcast matrix dimensions and the encoded data type from 04118 // Proc 0 to all the MPI processes. 04119 broadcast (*comm, 0, bannerDimsReadResult); 04120 04121 TEUCHOS_TEST_FOR_EXCEPTION( 04122 bannerDimsReadResult[3] == 0, std::runtime_error, 04123 "Failed to read banner line: " << exMsg.str ()); 04124 TEUCHOS_TEST_FOR_EXCEPTION( 04125 bannerDimsReadResult[4] == 0, std::runtime_error, 04126 "Failed to read matrix dimensions line: " << exMsg.str ()); 04127 if (myRank != 0) { 04128 dims[0] = bannerDimsReadResult[0]; 04129 dims[1] = bannerDimsReadResult[1]; 04130 dims[2] = bannerDimsReadResult[2]; 04131 } 04132 04133 // Tpetra objects want the matrix dimensions in these types. 04134 const global_size_t numRows = static_cast<global_size_t> (dims[0]); 04135 const size_t numCols = static_cast<size_t> (dims[1]); 04136 04137 // Make a "Proc 0 owns everything" Map that we will use to 04138 // read in the multivector entries in the correct order on 04139 // Proc 0. This must be a collective 04140 RCP<const map_type> proc0Map; // "Proc 0 owns everything" Map 04141 if (map.is_null ()) { 04142 // The user didn't supply a Map. Make a contiguous 04143 // distributed Map for them, using the read-in multivector 04144 // dimensions. 04145 map = createUniformContigMapWithNode<LO, GO, NT> (numRows, comm, node); 04146 const size_t localNumRows = (myRank == 0) ? numRows : 0; 04147 proc0Map = createContigMapWithNode<LO, GO, NT> (numRows, localNumRows, comm, node); 04148 } 04149 else { // The user supplied a Map. 04150 proc0Map = Details::computeGatherMap<map_type> (map, err, debug); 04151 } 04152 04153 // Make a multivector X owned entirely by Proc 0. 04154 RCP<MV> X = createVector<ST, LO, GO, NT> (proc0Map); 04155 04156 // 04157 // On Proc 0, read the Matrix Market data from the input 04158 // stream into the multivector X. 04159 // 04160 int localReadDataSuccess = 1; 04161 if (myRank == 0) { 04162 try { 04163 if (debug) { 04164 *err << myRank << ": readDenseImpl: Reading matrix data (dense)" 04165 << endl; 04166 } 04167 04168 // Make sure that we can get a 1-D view of X. 04169 TEUCHOS_TEST_FOR_EXCEPTION( 04170 ! X->isConstantStride (), std::logic_error, 04171 "Can't get a 1-D view of the entries of the MultiVector X on " 04172 "Process 0, because the stride between the columns of X is not " 04173 "constant. This shouldn't happen because we just created X and " 04174 "haven't filled it in yet. Please report this bug to the Tpetra " 04175 "developers."); 04176 04177 // Get a writeable 1-D view of the entries of X. Rank 0 04178 // owns all of them. The view will expire at the end of 04179 // scope, so (if necessary) it will be written back to X 04180 // at this time. 04181 ArrayRCP<ST> X_view = X->get1dViewNonConst (); 04182 TEUCHOS_TEST_FOR_EXCEPTION( 04183 as<global_size_t> (X_view.size ()) < numRows * numCols, 04184 std::logic_error, 04185 "The view of X has size " << X_view << " which is not enough to " 04186 "accommodate the expected number of entries numRows*numCols = " 04187 << numRows << "*" << numCols << " = " << numRows*numCols << ". " 04188 "Please report this bug to the Tpetra developers."); 04189 const size_t stride = X->getStride (); 04190 04191 // The third element of the dimensions Tuple encodes the data 04192 // type reported by the Banner: "real" == 0, "complex" == 1, 04193 // "integer" == 0 (same as "real"), "pattern" == 2. We do not 04194 // allow dense matrices to be pattern matrices, so dims[2] == 04195 // 0 or 1. We've already checked for this above. 04196 const bool isComplex = (dims[2] == 1); 04197 size_type count = 0, curRow = 0, curCol = 0; 04198 04199 std::string line; 04200 while (getline (in, line)) { 04201 ++lineNumber; 04202 // Is the current line a comment line? If it's not, 04203 // line.substr(start,size) contains the data. 04204 size_t start = 0, size = 0; 04205 const bool commentLine = 04206 checkCommentLine (line, start, size, lineNumber, tolerant); 04207 if (! commentLine) { 04208 // Make sure we have room in which to put the new matrix 04209 // entry. We check this only after checking for a 04210 // comment line, because there may be one or more 04211 // comment lines at the end of the file. In tolerant 04212 // mode, we simply ignore any extra data. 04213 if (count >= X_view.size()) { 04214 if (tolerant) { 04215 break; 04216 } 04217 else { 04218 TEUCHOS_TEST_FOR_EXCEPTION( 04219 count >= X_view.size(), 04220 std::runtime_error, 04221 "The Matrix Market input stream has more data in it than " 04222 "its metadata reported. Current line number is " 04223 << lineNumber << "."); 04224 } 04225 } 04226 04227 // mfh 19 Dec 2012: Ignore everything up to the initial 04228 // colon. writeDense() has the option to print out the 04229 // global row index in front of each entry, followed by 04230 // a colon and space. 04231 { 04232 const size_t pos = line.substr (start, size).find (':'); 04233 if (pos != std::string::npos) { 04234 start = pos+1; 04235 } 04236 } 04237 std::istringstream istr (line.substr (start, size)); 04238 // Does the line contain anything at all? Can we 04239 // safely read from the input stream wrapping the 04240 // line? 04241 if (istr.eof() || istr.fail()) { 04242 // In tolerant mode, simply ignore the line. 04243 if (tolerant) { 04244 break; 04245 } 04246 // We repeat the full test here so the exception 04247 // message is more informative. 04248 TEUCHOS_TEST_FOR_EXCEPTION( 04249 ! tolerant && (istr.eof() || istr.fail()), 04250 std::runtime_error, 04251 "Line " << lineNumber << " of the Matrix Market file is " 04252 "empty, or we cannot read from it for some other reason."); 04253 } 04254 // Current matrix entry to read in. 04255 ST val = STS::zero(); 04256 // Real and imaginary parts of the current matrix entry. 04257 // The imaginary part is zero if the matrix is real-valued. 04258 MT real = STM::zero(), imag = STM::zero(); 04259 04260 // isComplex refers to the input stream's data, not to 04261 // the scalar type S. It's OK to read real-valued 04262 // data into a matrix storing complex-valued data; in 04263 // that case, all entries' imaginary parts are zero. 04264 if (isComplex) { 04265 // STS::real() and STS::imag() return a copy of 04266 // their respective components, not a writeable 04267 // reference. Otherwise we could just assign to 04268 // them using the istream extraction operator (>>). 04269 // That's why we have separate magnitude type "real" 04270 // and "imag" variables. 04271 04272 // Attempt to read the real part of the current entry. 04273 istr >> real; 04274 if (istr.fail()) { 04275 TEUCHOS_TEST_FOR_EXCEPTION( 04276 ! tolerant && istr.eof(), std::runtime_error, 04277 "Failed to get the real part of a complex-valued matrix " 04278 "entry from line " << lineNumber << " of the Matrix Market " 04279 "file."); 04280 // In tolerant mode, just skip bad lines. 04281 if (tolerant) { 04282 break; 04283 } 04284 } else if (istr.eof()) { 04285 TEUCHOS_TEST_FOR_EXCEPTION( 04286 ! tolerant && istr.eof(), std::runtime_error, 04287 "Missing imaginary part of a complex-valued matrix entry " 04288 "on line " << lineNumber << " of the Matrix Market file."); 04289 // In tolerant mode, let any missing imaginary part be 0. 04290 } else { 04291 // Attempt to read the imaginary part of the current 04292 // matrix entry. 04293 istr >> imag; 04294 TEUCHOS_TEST_FOR_EXCEPTION( 04295 ! tolerant && istr.fail(), std::runtime_error, 04296 "Failed to get the imaginary part of a complex-valued " 04297 "matrix entry from line " << lineNumber << " of the " 04298 "Matrix Market file."); 04299 // In tolerant mode, let any missing or corrupted 04300 // imaginary part be 0. 04301 } 04302 } else { // Matrix Market file contains real-valued data. 04303 // Attempt to read the current matrix entry. 04304 istr >> real; 04305 TEUCHOS_TEST_FOR_EXCEPTION( 04306 ! tolerant && istr.fail(), std::runtime_error, 04307 "Failed to get a real-valued matrix entry from line " 04308 << lineNumber << " of the Matrix Market file."); 04309 // In tolerant mode, simply ignore the line if 04310 // we failed to read a matrix entry. 04311 if (istr.fail() && tolerant) { 04312 break; 04313 } 04314 } 04315 // In tolerant mode, we simply let pass through whatever 04316 // data we got. 04317 TEUCHOS_TEST_FOR_EXCEPTION( 04318 ! tolerant && istr.fail(), std::runtime_error, 04319 "Failed to read matrix data from line " << lineNumber 04320 << " of the Matrix Market file."); 04321 04322 // Assign val = ST(real, imag). 04323 Teuchos::MatrixMarket::details::assignScalar<ST> (val, real, imag); 04324 04325 curRow = count % numRows; 04326 curCol = count / numRows; 04327 X_view[curRow + curCol*stride] = val; 04328 ++count; 04329 } // if not a comment line 04330 } // while there are still lines in the file, get the next one 04331 04332 TEUCHOS_TEST_FOR_EXCEPTION( 04333 ! tolerant && static_cast<global_size_t> (count) < numRows * numCols, 04334 std::runtime_error, 04335 "The Matrix Market metadata reports that the dense matrix is " 04336 << numRows << " x " << numCols << ", and thus has " 04337 << numRows*numCols << " total entries, but we only found " << count 04338 << " entr" << (count == 1 ? "y" : "ies") << " in the file."); 04339 } catch (std::exception& e) { 04340 exMsg << e.what (); 04341 localReadDataSuccess = 0; 04342 } 04343 } // if (myRank == 0) 04344 04345 if (debug) { 04346 *err << myRank << ": readDenseImpl: done reading data" << endl; 04347 } 04348 04349 // Synchronize on whether Proc 0 successfully read the data. 04350 int globalReadDataSuccess = localReadDataSuccess; 04351 broadcast (*comm, 0, outArg (globalReadDataSuccess)); 04352 TEUCHOS_TEST_FOR_EXCEPTION( 04353 globalReadDataSuccess == 0, std::runtime_error, 04354 "Failed to read the multivector's data: " << exMsg.str ()); 04355 04356 // If there's only one MPI process and the user didn't supply 04357 // a Map (i.e., pMap is null), we're done. Set pMap to the 04358 // Map used to distribute X, and return X. 04359 if (comm->getSize () == 1 && map.is_null ()) { 04360 map = proc0Map; 04361 if (! err.is_null ()) { 04362 err->popTab (); 04363 } 04364 if (debug) { 04365 *err << myRank << ": readDenseImpl: done" << endl; 04366 } 04367 if (! err.is_null ()) { 04368 err->popTab (); 04369 } 04370 return X; 04371 } 04372 04373 if (debug) { 04374 *err << myRank << ": readDenseImpl: Creating target MV" << endl; 04375 } 04376 04377 // Make a multivector Y with the distributed map pMap. 04378 RCP<MV> Y = createVector<ST, LO, GO, NT> (map); 04379 04380 if (debug) { 04381 *err << myRank << ": readDenseImpl: Creating Export" << endl; 04382 } 04383 04384 // Make an Export object that will export X to Y. First 04385 // argument is the source map, second argument is the target 04386 // map. 04387 Export<LO, GO, NT> exporter (proc0Map, map, err); 04388 04389 if (debug) { 04390 *err << myRank << ": readDenseImpl: Exporting" << endl; 04391 } 04392 // Export X into Y. 04393 Y->doExport (*X, exporter, INSERT); 04394 04395 if (! err.is_null ()) { 04396 err->popTab (); 04397 } 04398 if (debug) { 04399 *err << myRank << ": readDenseImpl: done" << endl; 04400 } 04401 if (! err.is_null ()) { 04402 err->popTab (); 04403 } 04404 04405 // Y is distributed over all process(es) in the communicator. 04406 return Y; 04407 } 04408 04409 public: 04430 static RCP<const map_type> 04431 readMap (std::istream& in, 04432 const RCP<const comm_type>& comm, 04433 const RCP<node_type>& node, 04434 const bool tolerant=false, 04435 const bool debug=false) 04436 { 04437 Teuchos::RCP<Teuchos::FancyOStream> err = 04438 Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)); 04439 return readMap (in, comm, node, err, tolerant, debug); 04440 } 04441 04468 static RCP<const map_type> 04469 readMap (std::istream& in, 04470 const RCP<const comm_type>& comm, 04471 const RCP<node_type>& node, 04472 const Teuchos::RCP<Teuchos::FancyOStream>& err, 04473 const bool tolerant=false, 04474 const bool debug=false) 04475 { 04476 using Teuchos::arcp; 04477 using Teuchos::Array; 04478 using Teuchos::ArrayRCP; 04479 using Teuchos::as; 04480 using Teuchos::broadcast; 04481 using Teuchos::Comm; 04482 using Teuchos::CommRequest; 04483 using Teuchos::inOutArg; 04484 using Teuchos::ireceive; 04485 using Teuchos::outArg; 04486 using Teuchos::receive; 04487 using Teuchos::reduceAll; 04488 using Teuchos::REDUCE_MIN; 04489 using Teuchos::isend; 04490 using Teuchos::SerialComm; 04491 using Teuchos::toString; 04492 using Teuchos::wait; 04493 using std::endl; 04494 typedef Tpetra::global_size_t GST; 04495 typedef ptrdiff_t int_type; // Can hold int and GO 04496 typedef local_ordinal_type LO; 04497 typedef global_ordinal_type GO; 04498 typedef node_type NT; 04499 typedef Tpetra::MultiVector<GO, LO, GO, NT> MV; 04500 04501 const int numProcs = comm->getSize (); 04502 const int myRank = comm->getRank (); 04503 04504 if (err.is_null ()) { 04505 err->pushTab (); 04506 } 04507 if (debug) { 04508 std::ostringstream os; 04509 os << myRank << ": readMap: " << endl; 04510 *err << os.str (); 04511 } 04512 if (err.is_null ()) { 04513 err->pushTab (); 04514 } 04515 04516 // Tag for receive-size / send-size messages. writeMap used 04517 // tags 1337 and 1338; we count up from there. 04518 const int sizeTag = 1339; 04519 // Tag for receive-data / send-data messages. 04520 const int dataTag = 1340; 04521 04522 // These are for sends on Process 0, and for receives on all 04523 // other processes. sizeReq is for the {receive,send}-size 04524 // message, and dataReq is for the message containing the 04525 // actual GIDs to belong to the receiving process. 04526 RCP<CommRequest<int> > sizeReq; 04527 RCP<CommRequest<int> > dataReq; 04528 04529 // Each process will have to receive the number of GIDs to 04530 // expect. Thus, we can post the receives now, and cancel 04531 // them if something should go wrong in the meantime. 04532 ArrayRCP<int_type> numGidsToRecv (1); 04533 numGidsToRecv[0] = 0; 04534 if (myRank != 0) { 04535 sizeReq = ireceive<int, int_type> (numGidsToRecv, 0, sizeTag, *comm); 04536 } 04537 04538 int readSuccess = 1; 04539 std::ostringstream exMsg; 04540 RCP<MV> data; // Will only be valid on Proc 0 04541 if (myRank == 0) { 04542 // If we want to reuse readDenseImpl, we have to make a 04543 // communicator that only contains Proc 0. Otherwise, 04544 // readDenseImpl will redistribute the data to all 04545 // processes. While we eventually want that, neither we nor 04546 // readDenseImpl know the correct Map to use at the moment. 04547 // That depends on the second column of the multivector. 04548 RCP<const Comm<int> > proc0Comm (new SerialComm<int> ()); 04549 try { 04550 RCP<const map_type> dataMap; 04551 // This is currently the only place where we use the 04552 // 'tolerant' argument. Later, if we want to be clever, 04553 // we could have tolerant mode allow PIDs out of order. 04554 data = readDenseImpl<GO> (in, proc0Comm, node, dataMap, 04555 err, tolerant, debug); 04556 (void) dataMap; // Silence "unused" warnings 04557 if (data.is_null ()) { 04558 readSuccess = 0; 04559 exMsg << "readDenseImpl() returned null." << endl; 04560 } 04561 } catch (std::exception& e) { 04562 readSuccess = 0; 04563 exMsg << e.what () << endl; 04564 } 04565 } 04566 04567 // Map from PID to all the GIDs for that PID. 04568 // Only populated on Process 0. 04569 std::map<int, Array<GO> > pid2gids; 04570 04571 // The index base must be the global minimum GID. 04572 // We will compute this on Process 0 and broadcast, 04573 // so that all processes can set up the Map. 04574 int_type globalNumGIDs = 0; 04575 04576 // The index base must be the global minimum GID. 04577 // We will compute this on Process 0 and broadcast, 04578 // so that all processes can set up the Map. 04579 GO indexBase = 0; 04580 04581 // Process 0: If the above read of the MultiVector succeeded, 04582 // extract the GIDs and PIDs into pid2gids, and find the 04583 // global min GID. 04584 if (myRank == 0 && readSuccess == 1) { 04585 if (data->getNumVectors () == 2) { // Map format 1.0 04586 ArrayRCP<const GO> GIDs = data->getData (0); 04587 ArrayRCP<const GO> PIDs = data->getData (1); // convert to int 04588 globalNumGIDs = GIDs.size (); 04589 04590 // Start computing the global min GID, while collecting 04591 // the GIDs for each PID. 04592 if (globalNumGIDs > 0) { 04593 const int pid = static_cast<int> (PIDs[0]); 04594 04595 if (pid < 0 || pid >= numProcs) { 04596 readSuccess = 0; 04597 exMsg << "Tpetra::MatrixMarket::readMap: " 04598 << "Encountered invalid PID " << pid << "." << endl; 04599 } 04600 else { 04601 const GO gid = GIDs[0]; 04602 pid2gids[pid].push_back (gid); 04603 indexBase = gid; // the current min GID 04604 } 04605 } 04606 if (readSuccess == 1) { 04607 // Collect the rest of the GIDs for each PID, and compute 04608 // the global min GID. 04609 for (size_type k = 1; k < globalNumGIDs; ++k) { 04610 const int pid = static_cast<int> (PIDs[k]); 04611 if (pid < 0 || pid >= numProcs) { 04612 readSuccess = 0; 04613 exMsg << "Tpetra::MatrixMarket::readMap: " 04614 << "Encountered invalid PID " << pid << "." << endl; 04615 } 04616 else { 04617 const int_type gid = GIDs[k]; 04618 pid2gids[pid].push_back (gid); 04619 if (gid < indexBase) { 04620 indexBase = gid; // the current min GID 04621 } 04622 } 04623 } 04624 } 04625 } 04626 else if (data->getNumVectors () == 1) { // Map format 2.0 04627 if (data->getGlobalLength () % 2 != static_cast<GST> (0)) { 04628 readSuccess = 0; 04629 exMsg << "Tpetra::MatrixMarket::readMap: Input data has the " 04630 "wrong format (for Map format 2.0). The global number of rows " 04631 "in the MultiVector must be even (divisible by 2)." << endl; 04632 } 04633 else { 04634 ArrayRCP<const GO> theData = data->getData (0); 04635 globalNumGIDs = static_cast<GO> (data->getGlobalLength ()) / 04636 static_cast<GO> (2); 04637 04638 // Start computing the global min GID, while 04639 // collecting the GIDs for each PID. 04640 if (globalNumGIDs > 0) { 04641 const int pid = static_cast<int> (theData[1]); 04642 if (pid < 0 || pid >= numProcs) { 04643 readSuccess = 0; 04644 exMsg << "Tpetra::MatrixMarket::readMap: " 04645 << "Encountered invalid PID " << pid << "." << endl; 04646 } 04647 else { 04648 const GO gid = theData[0]; 04649 pid2gids[pid].push_back (gid); 04650 indexBase = gid; // the current min GID 04651 } 04652 } 04653 // Collect the rest of the GIDs for each PID, and 04654 // compute the global min GID. 04655 for (int_type k = 1; k < globalNumGIDs; ++k) { 04656 const int pid = static_cast<int> (theData[2*k + 1]); 04657 if (pid < 0 || pid >= numProcs) { 04658 readSuccess = 0; 04659 exMsg << "Tpetra::MatrixMarket::readMap: " 04660 << "Encountered invalid PID " << pid << "." << endl; 04661 } 04662 else { 04663 const GO gid = theData[2*k]; 04664 pid2gids[pid].push_back (gid); 04665 if (gid < indexBase) { 04666 indexBase = gid; // the current min GID 04667 } 04668 } 04669 } // for each GID 04670 } // if the amount of data is correct 04671 } 04672 else { 04673 readSuccess = 0; 04674 exMsg << "Tpetra::MatrixMarket::readMap: Input data must have " 04675 "either 1 column (for the new Map format 2.0) or 2 columns (for " 04676 "the old Map format 1.0)."; 04677 } 04678 } // myRank is zero 04679 04680 // Broadcast the indexBase, the global number of GIDs, and the 04681 // current success status. Use int_type for all of these. 04682 { 04683 int_type readResults[3]; 04684 readResults[0] = static_cast<int_type> (indexBase); 04685 readResults[1] = static_cast<int_type> (globalNumGIDs); 04686 readResults[2] = static_cast<int_type> (readSuccess); 04687 broadcast<int, int_type> (*comm, 0, 3, readResults); 04688 04689 indexBase = static_cast<GO> (readResults[0]); 04690 globalNumGIDs = static_cast<int_type> (readResults[1]); 04691 readSuccess = static_cast<int> (readResults[2]); 04692 } 04693 04694 // Unwinding the stack will invoke sizeReq's destructor, which 04695 // will cancel the receive-size request on all processes that 04696 // posted it. 04697 TEUCHOS_TEST_FOR_EXCEPTION( 04698 readSuccess != 1, std::runtime_error, 04699 "Tpetra::MatrixMarket::readMap: Reading the Map failed with the " 04700 "following exception message: " << exMsg.str ()); 04701 04702 if (myRank == 0) { 04703 // Proc 0: Send each process' number of GIDs to that process. 04704 for (int p = 1; p < numProcs; ++p) { 04705 ArrayRCP<int_type> numGidsToSend (1); 04706 04707 typename std::map<int, Array<GO> >::const_iterator it = pid2gids.find (p); 04708 if (it == pid2gids.end ()) { 04709 numGidsToSend[0] = 0; 04710 } else { 04711 numGidsToSend[0] = it->second.size (); 04712 } 04713 sizeReq = isend<int, int_type> (numGidsToSend, p, sizeTag, *comm); 04714 wait<int> (*comm, outArg (sizeReq)); 04715 } 04716 } 04717 else { 04718 // Wait on the receive-size message to finish. 04719 wait<int> (*comm, outArg (sizeReq)); 04720 } 04721 04722 // Allocate / get the array for my GIDs. 04723 // Only Process 0 will have its actual GIDs at this point. 04724 ArrayRCP<GO> myGids; 04725 int_type myNumGids = 0; 04726 if (myRank == 0) { 04727 GO* myGidsRaw = NULL; 04728 04729 typename std::map<int, Array<GO> >::iterator it = pid2gids.find (0); 04730 if (it != pid2gids.end ()) { 04731 myGidsRaw = it->second.getRawPtr (); 04732 myNumGids = it->second.size (); 04733 // Nonowning ArrayRCP just views the Array. 04734 myGids = arcp<GO> (myGidsRaw, 0, myNumGids, false); 04735 } 04736 } 04737 else { // myRank != 0 04738 myNumGids = numGidsToRecv[0]; 04739 myGids = arcp<GO> (myNumGids); 04740 } 04741 04742 if (myRank != 0) { 04743 // Post receive for data, now that we know how much data we 04744 // will receive. Only post receive if my process actually 04745 // has nonzero GIDs. 04746 if (myNumGids > 0) { 04747 dataReq = ireceive<int, GO> (myGids, 0, dataTag, *comm); 04748 } 04749 } 04750 04751 for (int p = 1; p < numProcs; ++p) { 04752 if (myRank == 0) { 04753 ArrayRCP<GO> sendGids; // to send to Process p 04754 GO* sendGidsRaw = NULL; 04755 int_type numSendGids = 0; 04756 04757 typename std::map<int, Array<GO> >::iterator it = pid2gids.find (p); 04758 if (it != pid2gids.end ()) { 04759 numSendGids = it->second.size (); 04760 sendGidsRaw = it->second.getRawPtr (); 04761 sendGids = arcp<GO> (sendGidsRaw, 0, numSendGids, false); 04762 } 04763 // Only send if that process actually has nonzero GIDs. 04764 if (numSendGids > 0) { 04765 dataReq = isend<int, GO> (sendGids, p, dataTag, *comm); 04766 } 04767 wait<int> (*comm, outArg (dataReq)); 04768 } 04769 else if (myRank == p) { 04770 // Wait on my receive of GIDs to finish. 04771 wait<int> (*comm, outArg (dataReq)); 04772 } 04773 } // for each process rank p in 1, 2, ..., numProcs-1 04774 04775 if (debug) { 04776 std::ostringstream os; 04777 os << myRank << ": readMap: creating Map" << endl; 04778 *err << os.str (); 04779 } 04780 const GST INVALID = Teuchos::OrdinalTraits<GST>::invalid (); 04781 RCP<const map_type> newMap = 04782 rcp (new map_type (INVALID, myGids (), indexBase, comm, node)); 04783 04784 if (err.is_null ()) { 04785 err->popTab (); 04786 } 04787 if (debug) { 04788 std::ostringstream os; 04789 os << myRank << ": readMap: done" << endl; 04790 *err << os.str (); 04791 } 04792 if (err.is_null ()) { 04793 err->popTab (); 04794 } 04795 return newMap; 04796 } 04797 04798 private: 04799 04810 static int 04811 encodeDataType (const std::string& dataType) 04812 { 04813 if (dataType == "real" || dataType == "integer") { 04814 return 0; 04815 } else if (dataType == "complex") { 04816 return 1; 04817 } else if (dataType == "pattern") { 04818 return 2; 04819 } else { 04820 // We should never get here, since Banner validates the 04821 // reported data type and ensures it is one of the accepted 04822 // values. 04823 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 04824 "Unrecognized Matrix Market data type \"" << dataType 04825 << "\". We should never get here. " 04826 "Please report this bug to the Tpetra developers."); 04827 } 04828 } 04829 }; 04830 04859 template<class SparseMatrixType> 04860 class Writer { 04861 public: 04862 typedef SparseMatrixType sparse_matrix_type; 04863 typedef RCP<sparse_matrix_type> sparse_matrix_ptr; 04864 04866 typedef typename SparseMatrixType::scalar_type scalar_type; 04868 typedef typename SparseMatrixType::local_ordinal_type local_ordinal_type; 04874 typedef typename SparseMatrixType::global_ordinal_type global_ordinal_type; 04876 typedef typename SparseMatrixType::node_type node_type; 04877 04879 typedef MultiVector<scalar_type, 04880 local_ordinal_type, 04881 global_ordinal_type, 04882 node_type> multivector_type; 04884 typedef Map<local_ordinal_type, global_ordinal_type, node_type> map_type; 04885 04917 static void 04918 writeSparseFile (const std::string& filename, 04919 const Teuchos::RCP<const sparse_matrix_type>& pMatrix, 04920 const std::string& matrixName, 04921 const std::string& matrixDescription, 04922 const bool debug=false) 04923 { 04924 TEUCHOS_TEST_FOR_EXCEPTION( 04925 pMatrix.is_null (), std::invalid_argument, 04926 "The input matrix is null."); 04927 Teuchos::RCP<const Teuchos::Comm<int> > comm = pMatrix->getComm (); 04928 TEUCHOS_TEST_FOR_EXCEPTION( 04929 comm.is_null (), std::invalid_argument, 04930 "The input matrix's communicator (Teuchos::Comm object) is null."); 04931 const int myRank = comm->getRank (); 04932 std::ofstream out; 04933 04934 // Only open the file on Rank 0. 04935 if (myRank == 0) { 04936 out.open (filename.c_str ()); 04937 } 04938 writeSparse (out, pMatrix, matrixName, matrixDescription, debug); 04939 // We can rely on the destructor of the output stream to close 04940 // the file on scope exit, even if writeSparse() throws an 04941 // exception. 04942 } 04943 04964 static void 04965 writeSparseFile (const std::string& filename, 04966 const Teuchos::RCP<const sparse_matrix_type>& pMatrix, 04967 const bool debug=false) 04968 { 04969 writeSparseFile (filename, pMatrix, "", "", debug); 04970 } 04971 05002 static void 05003 writeSparse (std::ostream& out, 05004 const Teuchos::RCP<const sparse_matrix_type>& pMatrix, 05005 const std::string& matrixName, 05006 const std::string& matrixDescription, 05007 const bool debug=false) 05008 { 05009 using Teuchos::Comm; 05010 using Teuchos::FancyOStream; 05011 using Teuchos::getFancyOStream; 05012 using Teuchos::null; 05013 using Teuchos::RCP; 05014 using Teuchos::rcpFromRef; 05015 using std::cerr; 05016 using std::endl; 05017 typedef scalar_type ST; 05018 typedef local_ordinal_type LO; 05019 typedef global_ordinal_type GO; 05020 typedef typename Teuchos::ScalarTraits<ST> STS; 05021 typedef typename ArrayView<const LO>::const_iterator lo_iter; 05022 typedef typename ArrayView<const GO>::const_iterator go_iter; 05023 typedef typename ArrayView<const ST>::const_iterator st_iter; 05024 05025 TEUCHOS_TEST_FOR_EXCEPTION( 05026 pMatrix.is_null (), std::invalid_argument, 05027 "The input matrix is null."); 05028 05029 // Make the output stream write floating-point numbers in 05030 // scientific notation. It will politely put the output 05031 // stream back to its state on input, when this scope 05032 // terminates. 05033 Teuchos::MatrixMarket::details::SetScientific<ST> sci (out); 05034 05035 // Get the matrix's communicator. 05036 RCP<const Comm<int> > comm = pMatrix->getComm (); 05037 TEUCHOS_TEST_FOR_EXCEPTION( 05038 comm.is_null (), std::invalid_argument, 05039 "The input matrix's communicator (Teuchos::Comm object) is null."); 05040 const int myRank = comm->getRank (); 05041 05042 // Optionally, make a stream for debugging output. 05043 RCP<FancyOStream> err = 05044 debug ? getFancyOStream (rcpFromRef (std::cerr)) : null; 05045 if (debug) { 05046 std::ostringstream os; 05047 os << myRank << ": writeSparse" << endl; 05048 *err << os.str (); 05049 comm->barrier (); 05050 os << "-- " << myRank << ": past barrier" << endl; 05051 *err << os.str (); 05052 } 05053 05054 // Whether to print debugging output to stderr. 05055 const bool debugPrint = debug && myRank == 0; 05056 05057 RCP<const map_type> rowMap = pMatrix->getRowMap (); 05058 RCP<const map_type> colMap = pMatrix->getColMap (); 05059 RCP<const map_type> domainMap = pMatrix->getDomainMap (); 05060 RCP<const map_type> rangeMap = pMatrix->getRangeMap (); 05061 05062 const global_size_t numRows = rangeMap->getGlobalNumElements (); 05063 const global_size_t numCols = domainMap->getGlobalNumElements (); 05064 05065 if (debug && myRank == 0) { 05066 std::ostringstream os; 05067 os << "-- Input sparse matrix is:" 05068 << "---- " << numRows << " x " << numCols << " with " 05069 << pMatrix->getGlobalNumEntries() << " entries;" << endl 05070 << "---- " 05071 << (pMatrix->isGloballyIndexed() ? "Globally" : "Locally") 05072 << " indexed." << endl 05073 << "---- Its row map has " << rowMap->getGlobalNumElements () 05074 << " elements." << endl 05075 << "---- Its col map has " << colMap->getGlobalNumElements () 05076 << " elements." << endl; 05077 *err << os.str (); 05078 } 05079 // Make the "gather" row map, where Proc 0 owns all rows and 05080 // the other procs own no rows. 05081 const size_t localNumRows = (myRank == 0) ? numRows : 0; 05082 RCP<node_type> node = rowMap->getNode(); 05083 if (debug) { 05084 std::ostringstream os; 05085 os << "-- " << myRank << ": making gatherRowMap" << endl; 05086 *err << os.str (); 05087 } 05088 RCP<const map_type> gatherRowMap = 05089 Details::computeGatherMap (rowMap, err, debug); 05090 05091 // Since the matrix may in general be non-square, we need to 05092 // make a column map as well. In this case, the column map 05093 // contains all the columns of the original matrix, because we 05094 // are gathering the whole matrix onto Proc 0. We call 05095 // computeGatherMap to preserve the original order of column 05096 // indices over all the processes. 05097 const size_t localNumCols = (myRank == 0) ? numCols : 0; 05098 RCP<const map_type> gatherColMap = 05099 Details::computeGatherMap (colMap, err, debug); 05100 05101 // Current map is the source map, gather map is the target map. 05102 typedef Import<LO, GO, node_type> import_type; 05103 import_type importer (rowMap, gatherRowMap); 05104 05105 // Create a new CrsMatrix to hold the result of the import. 05106 // The constructor needs a column map as well as a row map, 05107 // for the case that the matrix is not square. 05108 RCP<sparse_matrix_type> newMatrix = 05109 rcp (new sparse_matrix_type (gatherRowMap, gatherColMap, 05110 static_cast<size_t> (0))); 05111 // Import the sparse matrix onto Proc 0. 05112 newMatrix->doImport (*pMatrix, importer, INSERT); 05113 05114 // fillComplete() needs the domain and range maps for the case 05115 // that the matrix is not square. 05116 { 05117 RCP<const map_type> gatherDomainMap = 05118 rcp (new map_type (numCols, localNumCols, 05119 domainMap->getIndexBase (), 05120 comm, node)); 05121 RCP<const map_type> gatherRangeMap = 05122 rcp (new map_type (numRows, localNumRows, 05123 rangeMap->getIndexBase (), 05124 comm, node)); 05125 newMatrix->fillComplete (gatherDomainMap, gatherRangeMap); 05126 } 05127 05128 if (debugPrint) { 05129 cerr << "-- Output sparse matrix is:" 05130 << "---- " << newMatrix->getRangeMap ()->getGlobalNumElements () 05131 << " x " 05132 << newMatrix->getDomainMap ()->getGlobalNumElements () 05133 << " with " 05134 << newMatrix->getGlobalNumEntries () << " entries;" << endl 05135 << "---- " 05136 << (newMatrix->isGloballyIndexed () ? "Globally" : "Locally") 05137 << " indexed." << endl 05138 << "---- Its row map has " 05139 << newMatrix->getRowMap ()->getGlobalNumElements () 05140 << " elements, with index base " 05141 << newMatrix->getRowMap ()->getIndexBase () << "." << endl 05142 << "---- Its col map has " 05143 << newMatrix->getColMap ()->getGlobalNumElements () 05144 << " elements, with index base " 05145 << newMatrix->getColMap ()->getIndexBase () << "." << endl 05146 << "---- Element count of output matrix's column Map may differ " 05147 << "from that of the input matrix's column Map, if some columns " 05148 << "of the matrix contain no entries." << endl; 05149 } 05150 05151 // 05152 // Print the metadata and the matrix entries on Rank 0. 05153 // 05154 if (myRank == 0) { 05155 // Print the Matrix Market banner line. CrsMatrix stores 05156 // data nonsymmetrically ("general"). This implies that 05157 // readSparse() on a symmetrically stored input file, 05158 // followed by writeSparse() on the resulting sparse matrix, 05159 // will result in an output file with a different banner 05160 // line than the original input file. 05161 out << "%%MatrixMarket matrix coordinate " 05162 << (STS::isComplex ? "complex" : "real") 05163 << " general" << endl; 05164 05165 // Print comments (the matrix name and / or description). 05166 if (matrixName != "") { 05167 printAsComment (out, matrixName); 05168 } 05169 if (matrixDescription != "") { 05170 printAsComment (out, matrixDescription); 05171 } 05172 05173 // Print the Matrix Market header (# rows, # columns, # 05174 // nonzeros). Use the range resp. domain map for the number 05175 // of rows resp. columns, since Tpetra::CrsMatrix uses the 05176 // column map for the number of columns. That only 05177 // corresponds to the "linear-algebraic" number of columns 05178 // when the column map is uniquely owned (a.k.a. one-to-one), 05179 // which only happens if the matrix is (block) diagonal. 05180 out << newMatrix->getRangeMap ()->getGlobalNumElements () << " " 05181 << newMatrix->getDomainMap ()->getGlobalNumElements () << " " 05182 << newMatrix->getGlobalNumEntries () << endl; 05183 05184 // The Matrix Market format expects one-based row and column 05185 // indices. We'll convert the indices on output from 05186 // whatever index base they use to one-based indices. 05187 const GO rowIndexBase = gatherRowMap->getIndexBase (); 05188 const GO colIndexBase = newMatrix->getColMap()->getIndexBase (); 05189 // 05190 // Print the entries of the matrix. 05191 // 05192 // newMatrix can never be globally indexed, since we called 05193 // fillComplete() on it. We include code for both cases 05194 // (globally or locally indexed) just in case that ever 05195 // changes. 05196 if (newMatrix->isGloballyIndexed()) { 05197 // We know that the "gather" row Map is contiguous, so we 05198 // don't need to get the list of GIDs. 05199 const GO minAllGlobalIndex = gatherRowMap->getMinAllGlobalIndex (); 05200 const GO maxAllGlobalIndex = gatherRowMap->getMaxAllGlobalIndex (); 05201 for (GO globalRowIndex = minAllGlobalIndex; 05202 globalRowIndex <= maxAllGlobalIndex; // inclusive range 05203 ++globalRowIndex) { 05204 ArrayView<const GO> ind; 05205 ArrayView<const ST> val; 05206 newMatrix->getGlobalRowView (globalRowIndex, ind, val); 05207 go_iter indIter = ind.begin (); 05208 st_iter valIter = val.begin (); 05209 for (; indIter != ind.end() && valIter != val.end(); 05210 ++indIter, ++valIter) { 05211 const GO globalColIndex = *indIter; 05212 // Convert row and column indices to 1-based. 05213 // This works because the global index type is signed. 05214 out << (globalRowIndex + 1 - rowIndexBase) << " " 05215 << (globalColIndex + 1 - colIndexBase) << " "; 05216 if (STS::isComplex) { 05217 out << STS::real (*valIter) << " " << STS::imag (*valIter); 05218 } else { 05219 out << *valIter; 05220 } 05221 out << endl; 05222 } // For each entry in the current row 05223 } // For each row of the "gather" matrix 05224 } else { // newMatrix is locally indexed 05225 typedef OrdinalTraits<GO> OTG; 05226 for (LO localRowIndex = gatherRowMap->getMinLocalIndex(); 05227 localRowIndex <= gatherRowMap->getMaxLocalIndex(); 05228 ++localRowIndex) { 05229 // Convert from local to global row index. 05230 const GO globalRowIndex = 05231 gatherRowMap->getGlobalElement (localRowIndex); 05232 TEUCHOS_TEST_FOR_EXCEPTION( 05233 globalRowIndex == OTG::invalid(), std::logic_error, 05234 "Failed to convert the supposed local row index " 05235 << localRowIndex << " into a global row index. " 05236 "Please report this bug to the Tpetra developers."); 05237 ArrayView<const LO> ind; 05238 ArrayView<const ST> val; 05239 newMatrix->getLocalRowView (localRowIndex, ind, val); 05240 lo_iter indIter = ind.begin (); 05241 st_iter valIter = val.begin (); 05242 for (; indIter != ind.end() && valIter != val.end(); 05243 ++indIter, ++valIter) { 05244 // Convert the column index from local to global. 05245 const GO globalColIndex = 05246 newMatrix->getColMap()->getGlobalElement (*indIter); 05247 TEUCHOS_TEST_FOR_EXCEPTION( 05248 globalColIndex == OTG::invalid(), std::logic_error, 05249 "On local row " << localRowIndex << " of the sparse matrix: " 05250 "Failed to convert the supposed local column index " 05251 << *indIter << " into a global column index. Please report " 05252 "this bug to the Tpetra developers."); 05253 // Convert row and column indices to 1-based. 05254 // This works because the global index type is signed. 05255 out << (globalRowIndex + 1 - rowIndexBase) << " " 05256 << (globalColIndex + 1 - colIndexBase) << " "; 05257 if (STS::isComplex) { 05258 out << STS::real (*valIter) << " " << STS::imag (*valIter); 05259 } else { 05260 out << *valIter; 05261 } 05262 out << endl; 05263 } // For each entry in the current row 05264 } // For each row of the "gather" matrix 05265 } // Whether the "gather" matrix is locally or globally indexed 05266 } // If my process' rank is 0 05267 } 05268 05291 static void 05292 writeSparse (std::ostream& out, 05293 const RCP<const sparse_matrix_type>& pMatrix, 05294 const bool debug=false) 05295 { 05296 writeSparse (out, pMatrix, "", "", debug); 05297 } 05298 05327 static void 05328 writeDenseFile (const std::string& filename, 05329 const multivector_type& X, 05330 const std::string& matrixName, 05331 const std::string& matrixDescription, 05332 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 05333 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 05334 { 05335 const int myRank = X.getMap ()->getComm ()->getRank (); 05336 std::ofstream out; 05337 05338 if (myRank == 0) { // Only open the file on Process 0. 05339 out.open (filename.c_str()); 05340 } 05341 05342 writeDense (out, X, matrixName, matrixDescription, err, dbg); 05343 // We can rely on the destructor of the output stream to close 05344 // the file on scope exit, even if writeDense() throws an 05345 // exception. 05346 } 05347 05353 static void 05354 writeDenseFile (const std::string& filename, 05355 const Teuchos::RCP<const multivector_type>& X, 05356 const std::string& matrixName, 05357 const std::string& matrixDescription, 05358 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 05359 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 05360 { 05361 TEUCHOS_TEST_FOR_EXCEPTION( 05362 X.is_null (), std::invalid_argument, "Tpetra::MatrixMarket::" 05363 "writeDenseFile: The input MultiVector X is null."); 05364 writeDenseFile (filename, *X, matrixName, matrixDescription, err, dbg); 05365 } 05366 05372 static void 05373 writeDenseFile (const std::string& filename, 05374 const multivector_type& X, 05375 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 05376 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 05377 { 05378 writeDenseFile (filename, X, "", "", err, dbg); 05379 } 05380 05386 static void 05387 writeDenseFile (const std::string& filename, 05388 const RCP<const multivector_type>& X, 05389 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 05390 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 05391 { 05392 TEUCHOS_TEST_FOR_EXCEPTION( 05393 X.is_null (), std::invalid_argument, "Tpetra::MatrixMarket::" 05394 "writeDenseFile: The input MultiVector X is null."); 05395 writeDenseFile (filename, *X, err, dbg); 05396 } 05397 05398 05428 static void 05429 writeDense (std::ostream& out, 05430 const multivector_type& X, 05431 const std::string& matrixName, 05432 const std::string& matrixDescription, 05433 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 05434 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 05435 { 05436 using Teuchos::arcp; 05437 using Teuchos::ArrayRCP; 05438 using Teuchos::ArrayView; 05439 using Teuchos::CommRequest; 05440 using Teuchos::ireceive; 05441 using Teuchos::isend; 05442 using Teuchos::outArg; 05443 using Teuchos::REDUCE_MAX; 05444 using Teuchos::reduceAll; 05445 using Teuchos::RCP; 05446 using Teuchos::TypeNameTraits; 05447 using Teuchos::wait; 05448 using std::endl; 05449 typedef typename multivector_type::scalar_type scalar_type; 05450 typedef Teuchos::ScalarTraits<scalar_type> STS; 05451 05452 const Comm<int>& comm = * (X.getMap ()->getComm ()); 05453 const int myRank = comm.getRank (); 05454 const int numProcs = comm.getSize (); 05455 int lclErr = 0; // whether this MPI process has seen an error 05456 int gblErr = 0; // whether we know if some MPI process has seen an error 05457 05458 // If the caller provides a nonnull debug output stream, we 05459 // print debugging output to it. This is a local thing; we 05460 // don't have to check across processes. 05461 const bool debug = ! dbg.is_null (); 05462 05463 if (debug) { 05464 dbg->pushTab (); 05465 std::ostringstream os; 05466 os << myRank << ": writeDense" << endl; 05467 *dbg << os.str (); 05468 dbg->pushTab (); 05469 } 05470 05471 const size_t myNumRows = X.getLocalLength (); 05472 const size_t numCols = X.getNumVectors (); 05473 // Use a different tag for the "size" messages than for the 05474 // "data" messages, in order to help us debug any mix-ups. 05475 const int sizeTag = 1337; 05476 const int dataTag = 1338; 05477 05478 // Process 0 pipelines nonblocking receives with file output. 05479 // 05480 // Constraints: 05481 // - Process 0 can't post a receive for another process' 05482 // actual data, until it posts and waits on the receive 05483 // from that process with the amount of data to receive. 05484 // (We could just post receives with a max data size, but 05485 // I feel uncomfortable about that.) 05486 // - The C++ standard library doesn't allow nonblocking 05487 // output to an std::ostream. 05488 // 05489 // Process 0: Post receive-size receives from Processes 1 and 2. 05490 // Process 1: Post send-size send to Process 0. 05491 // Process 2: Post send-size send to Process 0. 05492 // 05493 // All processes: Pack my entries. 05494 // 05495 // Process 1: 05496 // - Post send-data send to Process 0. 05497 // - Wait on my send-size send to Process 0. 05498 // 05499 // Process 0: 05500 // - Print MatrixMarket header. 05501 // - Print my entries. 05502 // - Wait on receive-size receive from Process 1. 05503 // - Post receive-data receive from Process 1. 05504 // 05505 // For each process p = 1, 2, ... numProcs-1: 05506 // If I am Process 0: 05507 // - Post receive-size receive from Process p + 2 05508 // - Wait on receive-size receive from Process p + 1 05509 // - Post receive-data receive from Process p + 1 05510 // - Wait on receive-data receive from Process p 05511 // - Write data from Process p. 05512 // Else if I am Process p: 05513 // - Wait on my send-data send. 05514 // Else if I am Process p+1: 05515 // - Post send-data send to Process 0. 05516 // - Wait on my send-size send. 05517 // Else if I am Process p+2: 05518 // - Post send-size send to Process 0. 05519 // 05520 // Pipelining has three goals here: 05521 // 1. Overlap communication (the receives) with file I/O 05522 // 2. Give Process 0 a chance to prepost some receives, 05523 // before sends show up, by packing local data before 05524 // posting sends 05525 // 3. Don't post _all_ receives or _all_ sends, because that 05526 // wouldn't be memory scalable. (Just because we can't 05527 // see how much memory MPI consumes, doesn't mean that it 05528 // doesn't consume any!) 05529 05530 // These are used on every process. sendReqSize[0] holds the 05531 // number of rows on this process, and sendReqBuf holds this 05532 // process' data. Process 0 packs into sendReqBuf, but 05533 // doesn't send; it only uses that for printing. All other 05534 // processes send both of these to Process 0. 05535 RCP<CommRequest<int> > sendReqSize, sendReqData; 05536 05537 // These are used only on Process 0, for received data. Keep 05538 // 3 of each, and treat the arrays as circular buffers. When 05539 // receiving from Process p, the corresponding array index 05540 // here is p % 3. 05541 Array<ArrayRCP<size_t> > recvSizeBufs (3); 05542 Array<ArrayRCP<scalar_type> > recvDataBufs (3); 05543 Array<RCP<CommRequest<int> > > recvSizeReqs (3); 05544 Array<RCP<CommRequest<int> > > recvDataReqs (3); 05545 05546 // Buffer for nonblocking send of the "send size." 05547 ArrayRCP<size_t> sendDataSize (1); 05548 sendDataSize[0] = myNumRows; 05549 05550 if (myRank == 0) { 05551 if (debug) { 05552 std::ostringstream os; 05553 os << myRank << ": Post receive-size receives from " 05554 "Procs 1 and 2: tag = " << sizeTag << endl; 05555 *dbg << os.str (); 05556 } 05557 // Process 0: Post receive-size receives from Processes 1 and 2. 05558 recvSizeBufs[0].resize (1); 05559 // Set these three to an invalid value as a flag. If we 05560 // don't get these messages, then the invalid value will 05561 // remain, so we can test for it. 05562 (recvSizeBufs[0])[0] = Teuchos::OrdinalTraits<size_t>::invalid (); 05563 recvSizeBufs[1].resize (1); 05564 (recvSizeBufs[1])[0] = Teuchos::OrdinalTraits<size_t>::invalid (); 05565 recvSizeBufs[2].resize (1); 05566 (recvSizeBufs[2])[0] = Teuchos::OrdinalTraits<size_t>::invalid (); 05567 if (numProcs > 1) { 05568 recvSizeReqs[1] = 05569 ireceive<int, size_t> (recvSizeBufs[1], 1, sizeTag, comm); 05570 } 05571 if (numProcs > 2) { 05572 recvSizeReqs[2] = 05573 ireceive<int, size_t> (recvSizeBufs[2], 2, sizeTag, comm); 05574 } 05575 } 05576 else if (myRank == 1 || myRank == 2) { 05577 if (debug) { 05578 std::ostringstream os; 05579 os << myRank << ": Post send-size send: size = " 05580 << sendDataSize[0] << ", tag = " << sizeTag << endl; 05581 *dbg << os.str (); 05582 } 05583 // Prime the pipeline by having Processes 1 and 2 start 05584 // their send-size sends. We don't want _all_ the processes 05585 // to start their send-size sends, because that wouldn't be 05586 // memory scalable. 05587 sendReqSize = isend<int, size_t> (sendDataSize, 0, sizeTag, comm); 05588 } 05589 else { 05590 if (debug) { 05591 std::ostringstream os; 05592 os << myRank << ": Not posting my send-size send yet" << endl; 05593 *dbg << os.str (); 05594 } 05595 } 05596 05597 // 05598 // Pack my entries, in column-major order. 05599 // 05600 if (debug) { 05601 std::ostringstream os; 05602 os << myRank << ": Pack my entries" << endl; 05603 *dbg << os.str (); 05604 } 05605 ArrayRCP<scalar_type> sendDataBuf; 05606 try { 05607 sendDataBuf = arcp<scalar_type> (myNumRows * numCols); 05608 X.get1dCopy (sendDataBuf (), myNumRows); 05609 } 05610 catch (std::exception& e) { 05611 lclErr = 1; 05612 if (! err.is_null ()) { 05613 std::ostringstream os; 05614 os << "Process " << myRank << ": Attempt to pack my MultiVector " 05615 "entries threw an exception: " << e.what () << endl; 05616 *err << os.str (); 05617 } 05618 } 05619 if (debug) { 05620 std::ostringstream os; 05621 os << myRank << ": Done packing my entries" << endl; 05622 *dbg << os.str (); 05623 } 05624 05625 // 05626 // Process 1: post send-data send to Process 0. 05627 // 05628 if (myRank == 1) { 05629 if (debug) { 05630 *dbg << myRank << ": Post send-data send: tag = " << dataTag 05631 << endl; 05632 } 05633 sendReqData = isend<int, scalar_type> (sendDataBuf, 0, dataTag, comm); 05634 } 05635 05636 // 05637 // Process 0: Write the MatrixMarket header. 05638 // 05639 if (myRank == 0) { 05640 if (debug) { 05641 *dbg << myRank << ": Write MatrixMarket header" << endl; 05642 } 05643 // Print the Matrix Market header. MultiVector stores data 05644 // nonsymmetrically, hence "general" in the banner line. 05645 // Print first to a temporary string output stream, and then 05646 // write it to the main output stream, so that at least the 05647 // header output has transactional semantics. We can't 05648 // guarantee transactional semantics for the whole output, 05649 // since that would not be memory scalable. (This could be 05650 // done in the file system by using a temporary file; we 05651 // don't do this, but users could.) 05652 std::ostringstream hdr; 05653 { 05654 std::string dataType; 05655 if (STS::isComplex) { 05656 dataType = "complex"; 05657 } else if (STS::isOrdinal) { 05658 dataType = "integer"; 05659 } else { 05660 dataType = "real"; 05661 } 05662 hdr << "%%MatrixMarket matrix array " << dataType << " general" 05663 << endl; 05664 } 05665 05666 // Print comments (the matrix name and / or description). 05667 if (matrixName != "") { 05668 printAsComment (hdr, matrixName); 05669 } 05670 if (matrixDescription != "") { 05671 printAsComment (hdr, matrixDescription); 05672 } 05673 // Print the Matrix Market dimensions header for dense matrices. 05674 hdr << X.getGlobalLength () << " " << X.getNumVectors () << endl; 05675 05676 // Write the MatrixMarket header to the output stream. 05677 out << hdr.str (); 05678 05679 if (debug) { 05680 std::ostringstream os; 05681 os << myRank << ": Write my entries" << endl; 05682 *dbg << os.str (); 05683 } 05684 05685 // Write Process 0's data to the output stream. 05686 // Matrix Market prints dense matrices in column-major order. 05687 const size_t printNumRows = myNumRows; 05688 ArrayView<const scalar_type> printData = sendDataBuf (); 05689 const size_t printStride = printNumRows; 05690 if (static_cast<size_t> (printData.size ()) < printStride * numCols) { 05691 lclErr = 1; 05692 if (! err.is_null ()) { 05693 std::ostringstream os; 05694 os << "Process " << myRank << ": My MultiVector data's size " 05695 << printData.size () << " does not match my local dimensions " 05696 << printStride << " x " << numCols << "." << endl; 05697 *err << os.str (); 05698 } 05699 } 05700 else { 05701 // Matrix Market dense format wants one number per line. 05702 // It wants each complex number as two real numbers (real 05703 // resp. imaginary parts) with a space between. 05704 for (size_t col = 0; col < numCols; ++col) { 05705 for (size_t row = 0; row < printNumRows; ++row) { 05706 if (STS::isComplex) { 05707 out << STS::real (printData[row + col * printStride]) << " " 05708 << STS::imag (printData[row + col * printStride]) << endl; 05709 } else { 05710 out << printData[row + col * printStride] << endl; 05711 } 05712 } 05713 out << endl; 05714 } 05715 } 05716 } 05717 05718 if (myRank == 0) { 05719 // Wait on receive-size receive from Process 1. 05720 const int recvRank = 1; 05721 const int circBufInd = recvRank % 3; 05722 if (debug) { 05723 std::ostringstream os; 05724 os << myRank << ": Wait on receive-size receive from Process " 05725 << recvRank << endl; 05726 *dbg << os.str (); 05727 } 05728 if (numProcs > 1) { 05729 wait<int> (comm, outArg (recvSizeReqs[circBufInd])); 05730 05731 // We received the number of rows of data. (The data 05732 // come in two columns.) 05733 size_t recvNumRows = (recvSizeBufs[circBufInd])[0]; 05734 if (recvNumRows == Teuchos::OrdinalTraits<size_t>::invalid ()) { 05735 lclErr = 1; 05736 if (! err.is_null ()) { 05737 std::ostringstream os; 05738 os << myRank << ": Result of receive-size receive from Process " 05739 << recvRank << " is Teuchos::OrdinalTraits<size_t>::invalid() " 05740 << "= " << Teuchos::OrdinalTraits<size_t>::invalid () << ". " 05741 "This should never happen, and suggests that the receive never " 05742 "got posted. Please report this bug to the Tpetra developers." 05743 << endl; 05744 *err << os.str (); 05745 } 05746 05747 // If we're going to continue after error, set the 05748 // number of rows to receive to a reasonable size. This 05749 // may cause MPI_ERR_TRUNCATE if the sending process is 05750 // sending more than 0 rows, but that's better than MPI 05751 // overflowing due to the huge positive value that is 05752 // Teuchos::OrdinalTraits<size_t>::invalid(). 05753 recvNumRows = 0; 05754 } 05755 05756 // Post receive-data receive from Process 1. 05757 recvDataBufs[circBufInd].resize (recvNumRows * numCols); 05758 if (debug) { 05759 std::ostringstream os; 05760 os << myRank << ": Post receive-data receive from Process " 05761 << recvRank << ": tag = " << dataTag << ", buffer size = " 05762 << recvDataBufs[circBufInd].size () << endl; 05763 *dbg << os.str (); 05764 } 05765 if (! recvSizeReqs[circBufInd].is_null ()) { 05766 lclErr = 1; 05767 if (! err.is_null ()) { 05768 std::ostringstream os; 05769 os << myRank << ": recvSizeReqs[" << circBufInd << "] is not " 05770 "null, before posting the receive-data receive from Process " 05771 << recvRank << ". This should never happen. Please report " 05772 "this bug to the Tpetra developers." << endl; 05773 *err << os.str (); 05774 } 05775 } 05776 recvDataReqs[circBufInd] = 05777 ireceive<int, scalar_type> (recvDataBufs[circBufInd], 05778 recvRank, dataTag, comm); 05779 } // numProcs > 1 05780 } 05781 else if (myRank == 1) { 05782 // Wait on my send-size send. 05783 if (debug) { 05784 std::ostringstream os; 05785 os << myRank << ": Wait on my send-size send" << endl; 05786 *dbg << os.str (); 05787 } 05788 wait<int> (comm, outArg (sendReqSize)); 05789 } 05790 05791 // 05792 // Pipeline loop 05793 // 05794 for (int p = 1; p < numProcs; ++p) { 05795 if (myRank == 0) { 05796 if (p + 2 < numProcs) { 05797 // Post receive-size receive from Process p + 2. 05798 const int recvRank = p + 2; 05799 const int circBufInd = recvRank % 3; 05800 if (debug) { 05801 std::ostringstream os; 05802 os << myRank << ": Post receive-size receive from Process " 05803 << recvRank << ": tag = " << sizeTag << endl; 05804 *dbg << os.str (); 05805 } 05806 if (! recvSizeReqs[circBufInd].is_null ()) { 05807 lclErr = 1; 05808 if (! err.is_null ()) { 05809 std::ostringstream os; 05810 os << myRank << ": recvSizeReqs[" << circBufInd << "] is not " 05811 << "null, for the receive-size receive from Process " 05812 << recvRank << "! This may mean that this process never " 05813 << "finished waiting for the receive from Process " 05814 << (recvRank - 3) << "." << endl; 05815 *err << os.str (); 05816 } 05817 } 05818 recvSizeReqs[circBufInd] = 05819 ireceive<int, size_t> (recvSizeBufs[circBufInd], 05820 recvRank, sizeTag, comm); 05821 } 05822 05823 if (p + 1 < numProcs) { 05824 const int recvRank = p + 1; 05825 const int circBufInd = recvRank % 3; 05826 05827 // Wait on receive-size receive from Process p + 1. 05828 if (debug) { 05829 std::ostringstream os; 05830 os << myRank << ": Wait on receive-size receive from Process " 05831 << recvRank << endl; 05832 *dbg << os.str (); 05833 } 05834 wait<int> (comm, outArg (recvSizeReqs[circBufInd])); 05835 05836 // We received the number of rows of data. (The data 05837 // come in two columns.) 05838 size_t recvNumRows = (recvSizeBufs[circBufInd])[0]; 05839 if (recvNumRows == Teuchos::OrdinalTraits<size_t>::invalid ()) { 05840 lclErr = 1; 05841 if (! err.is_null ()) { 05842 std::ostringstream os; 05843 os << myRank << ": Result of receive-size receive from Process " 05844 << recvRank << " is Teuchos::OrdinalTraits<size_t>::invalid() " 05845 << "= " << Teuchos::OrdinalTraits<size_t>::invalid () << ". " 05846 "This should never happen, and suggests that the receive never " 05847 "got posted. Please report this bug to the Tpetra developers." 05848 << endl; 05849 *err << os.str (); 05850 } 05851 // If we're going to continue after error, set the 05852 // number of rows to receive to a reasonable size. 05853 // This may cause MPI_ERR_TRUNCATE if the sending 05854 // process sends more than 0 rows, but that's better 05855 // than MPI overflowing due to the huge positive value 05856 // Teuchos::OrdinalTraits<size_t>::invalid(). 05857 recvNumRows = 0; 05858 } 05859 05860 // Post receive-data receive from Process p + 1. 05861 recvDataBufs[circBufInd].resize (recvNumRows * numCols); 05862 if (debug) { 05863 std::ostringstream os; 05864 os << myRank << ": Post receive-data receive from Process " 05865 << recvRank << ": tag = " << dataTag << ", buffer size = " 05866 << recvDataBufs[circBufInd].size () << endl; 05867 *dbg << os.str (); 05868 } 05869 if (! recvDataReqs[circBufInd].is_null ()) { 05870 lclErr = 1; 05871 if (! err.is_null ()) { 05872 std::ostringstream os; 05873 os << myRank << ": recvDataReqs[" << circBufInd << "] is not " 05874 << "null, for the receive-data receive from Process " 05875 << recvRank << "! This may mean that this process never " 05876 << "finished waiting for the receive from Process " 05877 << (recvRank - 3) << "." << endl; 05878 *err << os.str (); 05879 } 05880 } 05881 recvDataReqs[circBufInd] = 05882 ireceive<int, scalar_type> (recvDataBufs[circBufInd], 05883 recvRank, dataTag, comm); 05884 } 05885 05886 // Wait on receive-data receive from Process p. 05887 const int recvRank = p; 05888 const int circBufInd = recvRank % 3; 05889 if (debug) { 05890 std::ostringstream os; 05891 os << myRank << ": Wait on receive-data receive from Process " 05892 << recvRank << endl; 05893 *dbg << os.str (); 05894 } 05895 wait<int> (comm, outArg (recvDataReqs[circBufInd])); 05896 05897 // Write Process p's data. Number of rows lives in 05898 // recvSizeBufs[circBufInd], and the actual data live in 05899 // recvDataBufs[circBufInd]. Do this after posting receives, 05900 // in order to expose overlap of comm. with file I/O. 05901 if (debug) { 05902 std::ostringstream os; 05903 os << myRank << ": Write entries from Process " << recvRank 05904 << endl; 05905 *dbg << os.str () << endl; 05906 } 05907 size_t printNumRows = (recvSizeBufs[circBufInd])[0]; 05908 if (printNumRows == Teuchos::OrdinalTraits<size_t>::invalid ()) { 05909 lclErr = 1; 05910 if (! err.is_null ()) { 05911 std::ostringstream os; 05912 os << myRank << ": Result of receive-size receive from Process " 05913 << recvRank << " was Teuchos::OrdinalTraits<size_t>::" 05914 "invalid() = " << Teuchos::OrdinalTraits<size_t>::invalid () 05915 << ". This should never happen, and suggests that its " 05916 "receive-size receive was never posted. " 05917 "Please report this bug to the Tpetra developers." << endl; 05918 *err << os.str (); 05919 } 05920 // If we're going to continue after error, set the 05921 // number of rows to print to a reasonable size. 05922 printNumRows = 0; 05923 } 05924 if (printNumRows > 0 && recvDataBufs[circBufInd].is_null ()) { 05925 lclErr = 1; 05926 if (! err.is_null ()) { 05927 std::ostringstream os; 05928 os << myRank << ": Result of receive-size receive from Proc " 05929 << recvRank << " was " << printNumRows << " > 0, but " 05930 "recvDataBufs[" << circBufInd << "] is null. This should " 05931 "never happen. Please report this bug to the Tpetra " 05932 "developers." << endl; 05933 *err << os.str (); 05934 } 05935 // If we're going to continue after error, set the 05936 // number of rows to print to a reasonable size. 05937 printNumRows = 0; 05938 } 05939 05940 // Write the received data to the output stream. 05941 // Matrix Market prints dense matrices in column-major order. 05942 ArrayView<const scalar_type> printData = (recvDataBufs[circBufInd]) (); 05943 const size_t printStride = printNumRows; 05944 // Matrix Market dense format wants one number per line. 05945 // It wants each complex number as two real numbers (real 05946 // resp. imaginary parts) with a space between. 05947 for (size_t col = 0; col < numCols; ++col) { 05948 for (size_t row = 0; row < printNumRows; ++row) { 05949 if (STS::isComplex) { 05950 out << STS::real (printData[row + col * printStride]) << " " 05951 << STS::imag (printData[row + col * printStride]) << endl; 05952 } else { 05953 out << printData[row + col * printStride] << endl; 05954 } 05955 } 05956 out << endl; 05957 } 05958 } 05959 else if (myRank == p) { // Process p 05960 // Wait on my send-data send. 05961 if (debug) { 05962 std::ostringstream os; 05963 os << myRank << ": Wait on my send-data send" << endl; 05964 *dbg << os.str (); 05965 } 05966 wait<int> (comm, outArg (sendReqData)); 05967 } 05968 else if (myRank == p + 1) { // Process p + 1 05969 // Post send-data send to Process 0. 05970 if (debug) { 05971 std::ostringstream os; 05972 os << myRank << ": Post send-data send: tag = " << dataTag 05973 << endl; 05974 *dbg << os.str (); 05975 } 05976 sendReqData = isend<int, scalar_type> (sendDataBuf, 0, dataTag, comm); 05977 // Wait on my send-size send. 05978 if (debug) { 05979 std::ostringstream os; 05980 os << myRank << ": Wait on my send-size send" << endl; 05981 *dbg << os.str (); 05982 } 05983 wait<int> (comm, outArg (sendReqSize)); 05984 } 05985 else if (myRank == p + 2) { // Process p + 2 05986 // Post send-size send to Process 0. 05987 if (debug) { 05988 std::ostringstream os; 05989 os << myRank << ": Post send-size send: size = " 05990 << sendDataSize[0] << ", tag = " << sizeTag << endl; 05991 *dbg << os.str (); 05992 } 05993 sendReqSize = isend<int, size_t> (sendDataSize, 0, sizeTag, comm); 05994 } 05995 } 05996 05997 // Establish global agreement on the error state. 05998 reduceAll<int, int> (comm, REDUCE_MAX, lclErr, outArg (gblErr)); 05999 TEUCHOS_TEST_FOR_EXCEPTION( 06000 gblErr == 1, std::runtime_error, "Tpetra::MatrixMarket::writeDense " 06001 "experienced some kind of error and was unable to complete."); 06002 06003 if (debug) { 06004 dbg->popTab (); 06005 *dbg << myRank << ": writeMap: Done" << endl; 06006 dbg->popTab (); 06007 } 06008 } 06009 06015 static void 06016 writeDense (std::ostream& out, 06017 const RCP<const multivector_type>& X, 06018 const std::string& matrixName, 06019 const std::string& matrixDescription, 06020 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 06021 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 06022 { 06023 TEUCHOS_TEST_FOR_EXCEPTION( 06024 X.is_null (), std::invalid_argument, "Tpetra::MatrixMarket::" 06025 "writeDense: The input MultiVector X is null."); 06026 writeDense (out, *X, matrixName, matrixDescription, err, dbg); 06027 } 06028 06034 static void 06035 writeDense (std::ostream& out, 06036 const multivector_type& X, 06037 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 06038 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 06039 { 06040 writeDense (out, X, "", "", err, dbg); 06041 } 06042 06048 static void 06049 writeDense (std::ostream& out, 06050 const RCP<const multivector_type>& X, 06051 const Teuchos::RCP<Teuchos::FancyOStream>& err = Teuchos::null, 06052 const Teuchos::RCP<Teuchos::FancyOStream>& dbg = Teuchos::null) 06053 { 06054 TEUCHOS_TEST_FOR_EXCEPTION( 06055 X.is_null (), std::invalid_argument, "Tpetra::MatrixMarket::" 06056 "writeDense: The input MultiVector X is null."); 06057 writeDense (out, *X, "", "", err, dbg); 06058 } 06059 06079 static void 06080 writeMap (std::ostream& out, const map_type& map, const bool debug=false) 06081 { 06082 Teuchos::RCP<Teuchos::FancyOStream> err = 06083 Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr)); 06084 writeMap (out, map, err, debug); 06085 } 06086 06095 static void 06096 writeMap (std::ostream& out, 06097 const map_type& map, 06098 const Teuchos::RCP<Teuchos::FancyOStream>& err, 06099 const bool debug=false) 06100 { 06101 using Teuchos::ArrayRCP; 06102 using Teuchos::ArrayView; 06103 using Teuchos::CommRequest; 06104 using Teuchos::ireceive; 06105 using Teuchos::isend; 06106 using Teuchos::RCP; 06107 using Teuchos::TypeNameTraits; 06108 using Teuchos::wait; 06109 using std::endl; 06110 typedef global_ordinal_type GO; 06111 typedef int pid_type; 06112 06113 // Treat the Map as a 1-column "multivector." This differs 06114 // from the previous two-column format, in which column 0 held 06115 // the GIDs, and column 1 held the corresponding PIDs. It 06116 // differs because printing that format requires knowing the 06117 // entire first column -- that is, all the GIDs -- in advance. 06118 // Sending messages from each process one at a time saves 06119 // memory, but it means that Process 0 doesn't ever have all 06120 // the GIDs at once. 06121 // 06122 // We pack the entries as ptrdiff_t, since this should be the 06123 // biggest signed built-in integer type that can hold any GO 06124 // or pid_type (= int) quantity without overflow. Test this 06125 // assumption at run time. 06126 typedef ptrdiff_t int_type; 06127 TEUCHOS_TEST_FOR_EXCEPTION( 06128 sizeof (GO) > sizeof (int_type), std::logic_error, 06129 "The global ordinal type GO=" << TypeNameTraits<GO>::name () 06130 << " is too big for ptrdiff_t. sizeof(GO) = " << sizeof (GO) 06131 << " > sizeof(ptrdiff_t) = " << sizeof (ptrdiff_t) << "."); 06132 TEUCHOS_TEST_FOR_EXCEPTION( 06133 sizeof (pid_type) > sizeof (int_type), std::logic_error, 06134 "The (MPI) process rank type pid_type=" << 06135 TypeNameTraits<pid_type>::name () << " is too big for ptrdiff_t. " 06136 "sizeof(pid_type) = " << sizeof (pid_type) << " > sizeof(ptrdiff_t)" 06137 " = " << sizeof (ptrdiff_t) << "."); 06138 06139 const Comm<int>& comm = * (map.getComm ()); 06140 const int myRank = comm.getRank (); 06141 const int numProcs = comm.getSize (); 06142 06143 if (! err.is_null ()) { 06144 err->pushTab (); 06145 } 06146 if (debug) { 06147 std::ostringstream os; 06148 os << myRank << ": writeMap" << endl; 06149 *err << os.str (); 06150 } 06151 if (! err.is_null ()) { 06152 err->pushTab (); 06153 } 06154 06155 const size_t myNumRows = map.getNodeNumElements (); 06156 // Use a different tag for the "size" messages than for the 06157 // "data" messages, in order to help us debug any mix-ups. 06158 const int sizeTag = 1337; 06159 const int dataTag = 1338; 06160 06161 // Process 0 pipelines nonblocking receives with file output. 06162 // 06163 // Constraints: 06164 // - Process 0 can't post a receive for another process' 06165 // actual data, until it posts and waits on the receive 06166 // from that process with the amount of data to receive. 06167 // (We could just post receives with a max data size, but 06168 // I feel uncomfortable about that.) 06169 // - The C++ standard library doesn't allow nonblocking 06170 // output to an std::ostream. 06171 // 06172 // Process 0: Post receive-size receives from Processes 1 and 2. 06173 // Process 1: Post send-size send to Process 0. 06174 // Process 2: Post send-size send to Process 0. 06175 // 06176 // All processes: Pack my GIDs and PIDs. 06177 // 06178 // Process 1: 06179 // - Post send-data send to Process 0. 06180 // - Wait on my send-size send to Process 0. 06181 // 06182 // Process 0: 06183 // - Print MatrixMarket header. 06184 // - Print my GIDs and PIDs. 06185 // - Wait on receive-size receive from Process 1. 06186 // - Post receive-data receive from Process 1. 06187 // 06188 // For each process p = 1, 2, ... numProcs-1: 06189 // If I am Process 0: 06190 // - Post receive-size receive from Process p + 2 06191 // - Wait on receive-size receive from Process p + 1 06192 // - Post receive-data receive from Process p + 1 06193 // - Wait on receive-data receive from Process p 06194 // - Write data from Process p. 06195 // Else if I am Process p: 06196 // - Wait on my send-data send. 06197 // Else if I am Process p+1: 06198 // - Post send-data send to Process 0. 06199 // - Wait on my send-size send. 06200 // Else if I am Process p+2: 06201 // - Post send-size send to Process 0. 06202 // 06203 // Pipelining has three goals here: 06204 // 1. Overlap communication (the receives) with file I/O 06205 // 2. Give Process 0 a chance to prepost some receives, 06206 // before sends show up, by packing local data before 06207 // posting sends 06208 // 3. Don't post _all_ receives or _all_ sends, because that 06209 // wouldn't be memory scalable. (Just because we can't 06210 // see how much memory MPI consumes, doesn't mean that it 06211 // doesn't consume any!) 06212 06213 // These are used on every process. sendReqSize[0] holds the 06214 // number of rows on this process, and sendReqBuf holds this 06215 // process' data. Process 0 packs into sendReqBuf, but 06216 // doesn't send; it only uses that for printing. All other 06217 // processes send both of these to Process 0. 06218 RCP<CommRequest<int> > sendReqSize, sendReqData; 06219 06220 // These are used only on Process 0, for received data. Keep 06221 // 3 of each, and treat the arrays as circular buffers. When 06222 // receiving from Process p, the corresponding array index 06223 // here is p % 3. 06224 Array<ArrayRCP<int_type> > recvSizeBufs (3); 06225 Array<ArrayRCP<int_type> > recvDataBufs (3); 06226 Array<RCP<CommRequest<int> > > recvSizeReqs (3); 06227 Array<RCP<CommRequest<int> > > recvDataReqs (3); 06228 06229 // Buffer for nonblocking send of the "send size." 06230 ArrayRCP<int_type> sendDataSize (1); 06231 sendDataSize[0] = myNumRows; 06232 06233 if (myRank == 0) { 06234 if (debug) { 06235 std::ostringstream os; 06236 os << myRank << ": Post receive-size receives from " 06237 "Procs 1 and 2: tag = " << sizeTag << endl; 06238 *err << os.str (); 06239 } 06240 // Process 0: Post receive-size receives from Processes 1 and 2. 06241 recvSizeBufs[0].resize (1); 06242 (recvSizeBufs[0])[0] = -1; // error flag 06243 recvSizeBufs[1].resize (1); 06244 (recvSizeBufs[1])[0] = -1; // error flag 06245 recvSizeBufs[2].resize (1); 06246 (recvSizeBufs[2])[0] = -1; // error flag 06247 if (numProcs > 1) { 06248 recvSizeReqs[1] = 06249 ireceive<int, int_type> (recvSizeBufs[1], 1, sizeTag, comm); 06250 } 06251 if (numProcs > 2) { 06252 recvSizeReqs[2] = 06253 ireceive<int, int_type> (recvSizeBufs[2], 2, sizeTag, comm); 06254 } 06255 } 06256 else if (myRank == 1 || myRank == 2) { 06257 if (debug) { 06258 std::ostringstream os; 06259 os << myRank << ": Post send-size send: size = " 06260 << sendDataSize[0] << ", tag = " << sizeTag << endl; 06261 *err << os.str (); 06262 } 06263 // Prime the pipeline by having Processes 1 and 2 start 06264 // their send-size sends. We don't want _all_ the processes 06265 // to start their send-size sends, because that wouldn't be 06266 // memory scalable. 06267 sendReqSize = isend<int, int_type> (sendDataSize, 0, sizeTag, comm); 06268 } 06269 else { 06270 if (debug) { 06271 std::ostringstream os; 06272 os << myRank << ": Not posting my send-size send yet" << endl; 06273 *err << os.str (); 06274 } 06275 } 06276 06277 // 06278 // Pack my GIDs and PIDs. Each (GID,PID) pair gets packed 06279 // consecutively, for better locality. 06280 // 06281 06282 if (debug) { 06283 std::ostringstream os; 06284 os << myRank << ": Pack my GIDs and PIDs" << endl; 06285 *err << os.str (); 06286 } 06287 06288 ArrayRCP<int_type> sendDataBuf (myNumRows * 2); 06289 06290 if (map.isContiguous ()) { 06291 const int_type myMinGblIdx = 06292 static_cast<int_type> (map.getMinGlobalIndex ()); 06293 for (size_t k = 0; k < myNumRows; ++k) { 06294 const int_type gid = myMinGblIdx + static_cast<int_type> (k); 06295 const int_type pid = static_cast<int_type> (myRank); 06296 sendDataBuf[2*k] = gid; 06297 sendDataBuf[2*k+1] = pid; 06298 } 06299 } 06300 else { 06301 ArrayView<const GO> myGblInds = map.getNodeElementList (); 06302 for (size_t k = 0; k < myNumRows; ++k) { 06303 const int_type gid = static_cast<int_type> (myGblInds[k]); 06304 const int_type pid = static_cast<int_type> (myRank); 06305 sendDataBuf[2*k] = gid; 06306 sendDataBuf[2*k+1] = pid; 06307 } 06308 } 06309 06310 if (debug) { 06311 std::ostringstream os; 06312 os << myRank << ": Done packing my GIDs and PIDs" << endl; 06313 *err << os.str (); 06314 } 06315 06316 if (myRank == 1) { 06317 // Process 1: post send-data send to Process 0. 06318 if (debug) { 06319 *err << myRank << ": Post send-data send: tag = " << dataTag 06320 << endl; 06321 } 06322 sendReqData = isend<int, int_type> (sendDataBuf, 0, dataTag, comm); 06323 } 06324 06325 if (myRank == 0) { 06326 if (debug) { 06327 *err << myRank << ": Write MatrixMarket header" << endl; 06328 } 06329 06330 // Process 0: Write the MatrixMarket header. 06331 // Description section explains each column. 06332 std::ostringstream hdr; 06333 06334 // Print the Matrix Market header. MultiVector stores data 06335 // nonsymmetrically, hence "general" in the banner line. 06336 hdr << "%%MatrixMarket matrix array integer general" << endl 06337 << "% Format: Version 2.0" << endl 06338 << "%" << endl 06339 << "% This file encodes a Tpetra::Map." << endl 06340 << "% It is stored as a dense vector, with twice as many " << endl 06341 << "% entries as the global number of GIDs (global indices)." << endl 06342 << "% (GID, PID) pairs are stored contiguously, where the PID " << endl 06343 << "% is the rank of the process owning that GID." << endl 06344 << (2 * map.getGlobalNumElements ()) << " " << 1 << endl; 06345 out << hdr.str (); 06346 06347 if (debug) { 06348 std::ostringstream os; 06349 os << myRank << ": Write my GIDs and PIDs" << endl; 06350 *err << os.str (); 06351 } 06352 06353 // Write Process 0's data to the output stream. 06354 // Matrix Market prints dense matrices in column-major order. 06355 const int_type printNumRows = myNumRows; 06356 ArrayView<const int_type> printData = sendDataBuf (); 06357 for (int_type k = 0; k < printNumRows; ++k) { 06358 const int_type gid = printData[2*k]; 06359 const int_type pid = printData[2*k+1]; 06360 out << gid << endl << pid << endl; 06361 } 06362 } 06363 06364 if (myRank == 0) { 06365 // Wait on receive-size receive from Process 1. 06366 const int recvRank = 1; 06367 const int circBufInd = recvRank % 3; 06368 if (debug) { 06369 std::ostringstream os; 06370 os << myRank << ": Wait on receive-size receive from Process " 06371 << recvRank << endl; 06372 *err << os.str (); 06373 } 06374 if (numProcs > 1) { 06375 wait<int> (comm, outArg (recvSizeReqs[circBufInd])); 06376 06377 // We received the number of rows of data. (The data 06378 // come in two columns.) 06379 const int_type recvNumRows = (recvSizeBufs[circBufInd])[0]; 06380 if (debug && recvNumRows == -1) { 06381 std::ostringstream os; 06382 os << myRank << ": Result of receive-size receive from Process " 06383 << recvRank << " is -1. This should never happen, and " 06384 "suggests that the receive never got posted. Please report " 06385 "this bug to the Tpetra developers." << endl; 06386 *err << os.str (); 06387 } 06388 06389 // Post receive-data receive from Process 1. 06390 recvDataBufs[circBufInd].resize (recvNumRows * 2); 06391 if (debug) { 06392 std::ostringstream os; 06393 os << myRank << ": Post receive-data receive from Process " 06394 << recvRank << ": tag = " << dataTag << ", buffer size = " 06395 << recvDataBufs[circBufInd].size () << endl; 06396 *err << os.str (); 06397 } 06398 if (! recvSizeReqs[circBufInd].is_null ()) { 06399 std::ostringstream os; 06400 os << myRank << ": recvSizeReqs[" << circBufInd << "] is not " 06401 "null, before posting the receive-data receive from Process " 06402 << recvRank << ". This should never happen. Please report " 06403 "this bug to the Tpetra developers." << endl; 06404 *err << os.str (); 06405 } 06406 recvDataReqs[circBufInd] = 06407 ireceive<int, int_type> (recvDataBufs[circBufInd], 06408 recvRank, dataTag, comm); 06409 } // numProcs > 1 06410 } 06411 else if (myRank == 1) { 06412 // Wait on my send-size send. 06413 if (debug) { 06414 std::ostringstream os; 06415 os << myRank << ": Wait on my send-size send" << endl; 06416 *err << os.str (); 06417 } 06418 wait<int> (comm, outArg (sendReqSize)); 06419 } 06420 06421 // 06422 // Pipeline loop 06423 // 06424 for (int p = 1; p < numProcs; ++p) { 06425 if (myRank == 0) { 06426 if (p + 2 < numProcs) { 06427 // Post receive-size receive from Process p + 2. 06428 const int recvRank = p + 2; 06429 const int circBufInd = recvRank % 3; 06430 if (debug) { 06431 std::ostringstream os; 06432 os << myRank << ": Post receive-size receive from Process " 06433 << recvRank << ": tag = " << sizeTag << endl; 06434 *err << os.str (); 06435 } 06436 if (! recvSizeReqs[circBufInd].is_null ()) { 06437 std::ostringstream os; 06438 os << myRank << ": recvSizeReqs[" << circBufInd << "] is not " 06439 << "null, for the receive-size receive from Process " 06440 << recvRank << "! This may mean that this process never " 06441 << "finished waiting for the receive from Process " 06442 << (recvRank - 3) << "." << endl; 06443 *err << os.str (); 06444 } 06445 recvSizeReqs[circBufInd] = 06446 ireceive<int, int_type> (recvSizeBufs[circBufInd], 06447 recvRank, sizeTag, comm); 06448 } 06449 06450 if (p + 1 < numProcs) { 06451 const int recvRank = p + 1; 06452 const int circBufInd = recvRank % 3; 06453 06454 // Wait on receive-size receive from Process p + 1. 06455 if (debug) { 06456 std::ostringstream os; 06457 os << myRank << ": Wait on receive-size receive from Process " 06458 << recvRank << endl; 06459 *err << os.str (); 06460 } 06461 wait<int> (comm, outArg (recvSizeReqs[circBufInd])); 06462 06463 // We received the number of rows of data. (The data 06464 // come in two columns.) 06465 const int_type recvNumRows = (recvSizeBufs[circBufInd])[0]; 06466 if (debug && recvNumRows == -1) { 06467 std::ostringstream os; 06468 os << myRank << ": Result of receive-size receive from Process " 06469 << recvRank << " is -1. This should never happen, and " 06470 "suggests that the receive never got posted. Please report " 06471 "this bug to the Tpetra developers." << endl; 06472 *err << os.str (); 06473 } 06474 06475 // Post receive-data receive from Process p + 1. 06476 recvDataBufs[circBufInd].resize (recvNumRows * 2); 06477 if (debug) { 06478 std::ostringstream os; 06479 os << myRank << ": Post receive-data receive from Process " 06480 << recvRank << ": tag = " << dataTag << ", buffer size = " 06481 << recvDataBufs[circBufInd].size () << endl; 06482 *err << os.str (); 06483 } 06484 if (! recvDataReqs[circBufInd].is_null ()) { 06485 std::ostringstream os; 06486 os << myRank << ": recvDataReqs[" << circBufInd << "] is not " 06487 << "null, for the receive-data receive from Process " 06488 << recvRank << "! This may mean that this process never " 06489 << "finished waiting for the receive from Process " 06490 << (recvRank - 3) << "." << endl; 06491 *err << os.str (); 06492 } 06493 recvDataReqs[circBufInd] = 06494 ireceive<int, int_type> (recvDataBufs[circBufInd], 06495 recvRank, dataTag, comm); 06496 } 06497 06498 // Wait on receive-data receive from Process p. 06499 const int recvRank = p; 06500 const int circBufInd = recvRank % 3; 06501 if (debug) { 06502 std::ostringstream os; 06503 os << myRank << ": Wait on receive-data receive from Process " 06504 << recvRank << endl; 06505 *err << os.str (); 06506 } 06507 wait<int> (comm, outArg (recvDataReqs[circBufInd])); 06508 06509 // Write Process p's data. Number of rows lives in 06510 // recvSizeBufs[circBufInd], and the actual data live in 06511 // recvDataBufs[circBufInd]. Do this after posting receives, 06512 // in order to expose overlap of comm. with file I/O. 06513 if (debug) { 06514 std::ostringstream os; 06515 os << myRank << ": Write GIDs and PIDs from Process " 06516 << recvRank << endl; 06517 *err << os.str () << endl; 06518 } 06519 const int_type printNumRows = (recvSizeBufs[circBufInd])[0]; 06520 if (debug && printNumRows == -1) { 06521 std::ostringstream os; 06522 os << myRank << ": Result of receive-size receive from Process " 06523 << recvRank << " was -1. This should never happen, and " 06524 "suggests that its receive-size receive was never posted. " 06525 "Please report this bug to the Tpetra developers." << endl; 06526 *err << os.str (); 06527 } 06528 if (debug && printNumRows > 0 && recvDataBufs[circBufInd].is_null ()) { 06529 std::ostringstream os; 06530 os << myRank << ": Result of receive-size receive from Proc " 06531 << recvRank << " was " << printNumRows << " > 0, but " 06532 "recvDataBufs[" << circBufInd << "] is null. This should " 06533 "never happen. Please report this bug to the Tpetra " 06534 "developers." << endl; 06535 *err << os.str (); 06536 } 06537 ArrayView<const int_type> printData = (recvDataBufs[circBufInd]) (); 06538 for (int_type k = 0; k < printNumRows; ++k) { 06539 const int_type gid = printData[2*k]; 06540 const int_type pid = printData[2*k+1]; 06541 out << gid << endl << pid << endl; 06542 } 06543 } 06544 else if (myRank == p) { // Process p 06545 // Wait on my send-data send. 06546 if (debug) { 06547 std::ostringstream os; 06548 os << myRank << ": Wait on my send-data send" << endl; 06549 *err << os.str (); 06550 } 06551 wait<int> (comm, outArg (sendReqData)); 06552 } 06553 else if (myRank == p + 1) { // Process p + 1 06554 // Post send-data send to Process 0. 06555 if (debug) { 06556 std::ostringstream os; 06557 os << myRank << ": Post send-data send: tag = " << dataTag 06558 << endl; 06559 *err << os.str (); 06560 } 06561 sendReqData = isend<int, int_type> (sendDataBuf, 0, dataTag, comm); 06562 // Wait on my send-size send. 06563 if (debug) { 06564 std::ostringstream os; 06565 os << myRank << ": Wait on my send-size send" << endl; 06566 *err << os.str (); 06567 } 06568 wait<int> (comm, outArg (sendReqSize)); 06569 } 06570 else if (myRank == p + 2) { // Process p + 2 06571 // Post send-size send to Process 0. 06572 if (debug) { 06573 std::ostringstream os; 06574 os << myRank << ": Post send-size send: size = " 06575 << sendDataSize[0] << ", tag = " << sizeTag << endl; 06576 *err << os.str (); 06577 } 06578 sendReqSize = isend<int, int_type> (sendDataSize, 0, sizeTag, comm); 06579 } 06580 } 06581 06582 if (! err.is_null ()) { 06583 err->popTab (); 06584 } 06585 if (debug) { 06586 *err << myRank << ": writeMap: Done" << endl; 06587 } 06588 if (! err.is_null ()) { 06589 err->popTab (); 06590 } 06591 } 06592 06594 static void 06595 writeMapFile (const std::string& filename, 06596 const map_type& map) 06597 { 06598 const int myRank = map.getComm ()->getRank (); 06599 std::ofstream out; 06600 if (myRank == 0) { // Only open the file on Proc 0. 06601 out.open (filename.c_str()); 06602 } 06603 writeMap (out, map); 06604 // We can rely on the destructor of the output stream to close 06605 // the file on scope exit, even if writeDense() throws an 06606 // exception. 06607 } 06608 06609 private: 06633 static void 06634 printAsComment (std::ostream& out, const std::string& str) 06635 { 06636 using std::endl; 06637 std::istringstream inpstream (str); 06638 std::string line; 06639 06640 while (getline (inpstream, line)) { 06641 if (! line.empty()) { 06642 // Note that getline() doesn't store '\n', so we have to 06643 // append the endline ourselves. 06644 if (line[0] == '%') { // Line starts with a comment character. 06645 out << line << endl; 06646 } 06647 else { // Line doesn't start with a comment character. 06648 out << "%% " << line << endl; 06649 } 06650 } 06651 } 06652 } 06653 }; // class Writer 06654 06655 } // namespace MatrixMarket 06656 } // namespace Tpetra 06657 06658 #endif // __MatrixMarket_Tpetra_hpp
1.7.6.1