|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_ROWMATRIX_DEF_HPP 00043 #define TPETRA_ROWMATRIX_DEF_HPP 00044 00045 #include <Tpetra_ConfigDefs.hpp> 00046 #include <Tpetra_CrsMatrix.hpp> 00047 #include <Tpetra_Map.hpp> 00048 00049 namespace Tpetra { 00050 00051 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00052 RowMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~RowMatrix() {} 00053 00054 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00055 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > 00056 RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>:: 00057 add (const Scalar& alpha, 00058 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A, 00059 const Scalar& beta, 00060 const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& domainMap, 00061 const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& rangeMap, 00062 const Teuchos::RCP<Teuchos::ParameterList>& params) const 00063 { 00064 using Teuchos::Array; 00065 using Teuchos::ArrayRCP; 00066 using Teuchos::as; 00067 using Teuchos::ParameterList; 00068 using Teuchos::RCP; 00069 using Teuchos::rcp; 00070 using Teuchos::rcp_implicit_cast; 00071 using Teuchos::sublist; 00072 typedef LocalOrdinal LO; 00073 typedef GlobalOrdinal GO; 00074 typedef Teuchos::ScalarTraits<Scalar> STS; 00075 typedef Map<LO, GO, Node> map_type; 00076 typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type; 00077 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type; 00078 00079 const this_type& B = *this; // a convenient abbreviation 00080 00081 // If the user didn't supply a domain or range Map, then try to 00082 // get one from B first (if it has them), then from A (if it has 00083 // them). If we don't have any domain or range Maps, scold the 00084 // user. 00085 RCP<const map_type> A_domainMap = A.getDomainMap (); 00086 RCP<const map_type> A_rangeMap = A.getRangeMap (); 00087 RCP<const map_type> B_domainMap = B.getDomainMap (); 00088 RCP<const map_type> B_rangeMap = B.getRangeMap (); 00089 00090 RCP<const map_type> theDomainMap = domainMap; 00091 RCP<const map_type> theRangeMap = rangeMap; 00092 00093 if (domainMap.is_null ()) { 00094 if (B_domainMap.is_null ()) { 00095 TEUCHOS_TEST_FOR_EXCEPTION( 00096 A_domainMap.is_null (), std::invalid_argument, 00097 "Tpetra::RowMatrix::add: If neither A nor B have a domain Map, " 00098 "then you must supply a nonnull domain Map to this method."); 00099 theDomainMap = A_domainMap; 00100 } else { 00101 theDomainMap = B_domainMap; 00102 } 00103 } 00104 if (rangeMap.is_null ()) { 00105 if (B_rangeMap.is_null ()) { 00106 TEUCHOS_TEST_FOR_EXCEPTION( 00107 A_rangeMap.is_null (), std::invalid_argument, 00108 "Tpetra::RowMatrix::add: If neither A nor B have a range Map, " 00109 "then you must supply a nonnull range Map to this method."); 00110 theRangeMap = A_rangeMap; 00111 } else { 00112 theRangeMap = B_rangeMap; 00113 } 00114 } 00115 00116 #ifdef HAVE_TPETRA_DEBUG 00117 // In a debug build, check that A and B have matching domain and 00118 // range Maps, if they have domain and range Maps at all. (If 00119 // they aren't fill complete, then they may not yet have them.) 00120 if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) { 00121 if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) { 00122 TEUCHOS_TEST_FOR_EXCEPTION( 00123 ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument, 00124 "Tpetra::RowMatrix::add: The input RowMatrix A must have a domain Map " 00125 "which is the same as (isSameAs) this RowMatrix's domain Map."); 00126 TEUCHOS_TEST_FOR_EXCEPTION( 00127 ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument, 00128 "Tpetra::RowMatrix::add: The input RowMatrix A must have a range Map " 00129 "which is the same as (isSameAs) this RowMatrix's range Map."); 00130 TEUCHOS_TEST_FOR_EXCEPTION( 00131 ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap), 00132 std::invalid_argument, 00133 "Tpetra::RowMatrix::add: The input domain Map must be the same as " 00134 "(isSameAs) this RowMatrix's domain Map."); 00135 TEUCHOS_TEST_FOR_EXCEPTION( 00136 ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap), 00137 std::invalid_argument, 00138 "Tpetra::RowMatrix::add: The input range Map must be the same as " 00139 "(isSameAs) this RowMatrix's range Map."); 00140 } 00141 } 00142 else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) { 00143 TEUCHOS_TEST_FOR_EXCEPTION( 00144 ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap), 00145 std::invalid_argument, 00146 "Tpetra::RowMatrix::add: The input domain Map must be the same as " 00147 "(isSameAs) this RowMatrix's domain Map."); 00148 TEUCHOS_TEST_FOR_EXCEPTION( 00149 ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap), 00150 std::invalid_argument, 00151 "Tpetra::RowMatrix::add: The input range Map must be the same as " 00152 "(isSameAs) this RowMatrix's range Map."); 00153 } 00154 else { 00155 TEUCHOS_TEST_FOR_EXCEPTION( 00156 domainMap.is_null () || rangeMap.is_null (), std::invalid_argument, 00157 "Tpetra::RowMatrix::add: If neither A nor B have a domain and range " 00158 "Map, then you must supply a nonnull domain and range Map to this " 00159 "method."); 00160 } 00161 #endif // HAVE_TPETRA_DEBUG 00162 00163 // What parameters do we pass to C's constructor? Do we call 00164 // fillComplete on C after filling it? And if so, what parameters 00165 // do we pass to C's fillComplete call? 00166 bool callFillComplete = true; 00167 RCP<ParameterList> constructorSublist; 00168 RCP<ParameterList> fillCompleteSublist; 00169 if (! params.is_null ()) { 00170 callFillComplete = params->get ("Call fillComplete", callFillComplete); 00171 constructorSublist = sublist (params, "Constructor parameters"); 00172 fillCompleteSublist = sublist (params, "fillComplete parameters"); 00173 } 00174 00175 RCP<const map_type> A_rowMap = A.getRowMap (); 00176 RCP<const map_type> B_rowMap = B.getRowMap (); 00177 RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation 00178 RCP<crs_matrix_type> C; // The result matrix. 00179 00180 // If A and B's row Maps are the same, we can compute an upper 00181 // bound on the number of entries in each row of C, before 00182 // actually computing the sum. A reasonable upper bound is the 00183 // sum of the two entry counts in each row. If we choose this as 00184 // the actual per-row upper bound, we can use static profile. 00185 if (A_rowMap->isSameAs (*B_rowMap)) { 00186 const LO localNumRows = as<LO> (A_rowMap->getNodeNumElements ()); 00187 ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0); 00188 00189 // Get the number of entries in each row of A. 00190 if (alpha != STS::zero ()) { 00191 for (LO localRow = 0; localRow < localNumRows; ++localRow) { 00192 const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow); 00193 C_maxNumEntriesPerRow[localRow] += A_numEntries; 00194 } 00195 } 00196 // Get the number of entries in each row of B. 00197 if (beta != STS::zero ()) { 00198 for (LO localRow = 0; localRow < localNumRows; ++localRow) { 00199 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow); 00200 C_maxNumEntriesPerRow[localRow] += B_numEntries; 00201 } 00202 } 00203 // Construct the result matrix C. 00204 if (constructorSublist.is_null ()) { 00205 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow, 00206 StaticProfile)); 00207 } else { 00208 C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow, 00209 StaticProfile, constructorSublist)); 00210 } 00211 // Since A and B have the same row Maps, we could add them 00212 // together all at once and merge values before we call 00213 // insertGlobalValues. However, we don't really need to, since 00214 // we've already allocated enough space in each row of C for C 00215 // to do the merge itself. 00216 } 00217 else { // the row Maps of A and B are not the same 00218 // Construct the result matrix C. 00219 if (constructorSublist.is_null ()) { 00220 C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile)); 00221 } else { 00222 C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile, 00223 constructorSublist)); 00224 } 00225 } 00226 00227 #ifdef HAVE_TPETRA_DEBUG 00228 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error, 00229 "Tpetra::RowMatrix::add: C should not be null at this point. " 00230 "Please report this bug to the Tpetra developers."); 00231 #endif // HAVE_TPETRA_DEBUG 00232 // 00233 // Compute C = alpha*A + beta*B. 00234 // 00235 Array<GO> ind; 00236 Array<Scalar> val; 00237 00238 if (alpha != STS::zero ()) { 00239 const LO A_localNumRows = as<LO> (A_rowMap->getNodeNumElements ()); 00240 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) { 00241 size_t A_numEntries = A.getNumEntriesInLocalRow (localRow); 00242 const GO globalRow = A_rowMap->getGlobalElement (localRow); 00243 if (A_numEntries > as<size_t> (ind.size ())) { 00244 ind.resize (A_numEntries); 00245 val.resize (A_numEntries); 00246 } 00247 ArrayView<GO> indView = ind (0, A_numEntries); 00248 ArrayView<Scalar> valView = val (0, A_numEntries); 00249 A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries); 00250 00251 if (alpha != STS::one ()) { 00252 for (size_t k = 0; k < A_numEntries; ++k) { 00253 valView[k] *= alpha; 00254 } 00255 } 00256 C->insertGlobalValues (globalRow, indView, valView); 00257 } 00258 } 00259 00260 if (beta != STS::zero ()) { 00261 const LO B_localNumRows = as<LO> (B_rowMap->getNodeNumElements ()); 00262 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) { 00263 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow); 00264 const GO globalRow = B_rowMap->getGlobalElement (localRow); 00265 if (B_numEntries > as<size_t> (ind.size ())) { 00266 ind.resize (B_numEntries); 00267 val.resize (B_numEntries); 00268 } 00269 ArrayView<GO> indView = ind (0, B_numEntries); 00270 ArrayView<Scalar> valView = val (0, B_numEntries); 00271 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries); 00272 00273 if (beta != STS::one ()) { 00274 for (size_t k = 0; k < B_numEntries; ++k) { 00275 valView[k] *= beta; 00276 } 00277 } 00278 C->insertGlobalValues (globalRow, indView, valView); 00279 } 00280 } 00281 00282 if (callFillComplete) { 00283 if (fillCompleteSublist.is_null ()) { 00284 C->fillComplete (theDomainMap, theRangeMap); 00285 } else { 00286 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist); 00287 } 00288 } 00289 00290 return rcp_implicit_cast<this_type> (C); 00291 } 00292 00293 00294 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node> 00295 void 00296 RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>:: 00297 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00298 Teuchos::Array<char>& exports, 00299 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00300 size_t& constantNumPackets, 00301 Distributor &distor) const 00302 { 00303 using Teuchos::Array; 00304 using Teuchos::ArrayView; 00305 using Teuchos::as; 00306 using Teuchos::av_reinterpret_cast; 00307 using Teuchos::RCP; 00308 typedef LocalOrdinal LO; 00309 typedef GlobalOrdinal GO; 00310 typedef typename ArrayView<const LO>::size_type size_type; 00311 typedef Map<LO, GO, Node> map_type; 00312 const char tfecfFuncName[] = "pack"; 00313 00314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00315 exportLIDs.size() != numPacketsPerLID.size(), 00316 std::invalid_argument, "exportLIDs.size() = " << exportLIDs.size() 00317 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 00318 00319 // Row Map of the source matrix. 00320 RCP<const map_type> rowMapPtr = this->getRowMap (); 00321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00322 rowMapPtr.is_null (), std::runtime_error, 00323 "The source object's row Map is null."); 00324 const map_type& rowMap = *rowMapPtr; 00325 00326 // Each input LID corresponds to a different row of the sparse 00327 // matrix. In general, a RowMatrix doesn't have the same number 00328 // of entries in each row. 00329 constantNumPackets = 0; 00330 00331 // Get the GIDs of the rows we want to pack. 00332 Array<GO> exportGIDs (exportLIDs.size ()); 00333 const size_type numExportGIDs = exportGIDs.size (); 00334 for (size_type i = 0; i < numExportGIDs; ++i) { 00335 exportGIDs[i] = rowMap.getGlobalElement (exportLIDs[i]); 00336 } 00337 00338 // We say "Packet" is char (really a "byte"), but the actual unit 00339 // of packing is a (GID, value) pair. The GID is the column index 00340 // in that row of the sparse matrix, and the value is the value at 00341 // that entry of the sparse matrix. Thus, we have to scale 00342 // numPacketsPerLID by the number of bytes in a _packed_ (GID, 00343 // value) pair. (We pack the GID and value in each pair 00344 // separately, so the number of bytes in a packed pair is actually 00345 // sizeof(GO) + sizeof(Scalar).) 00346 // 00347 // FIXME (mfh 24 Feb 2013) This code is only correct if 00348 // sizeof(Scalar) is a meaningful representation of the amount of 00349 // data in a Scalar instance. (GO is always a built-in integer 00350 // type.) 00351 // 00352 // Compute the number of packets per export LID, and accumulate 00353 // the total number of packages. While doing so, find the max 00354 // number of entries in each row owned by this process; we will 00355 // use that to size temporary arrays below. 00356 const size_t sizeOfOrdValPair = sizeof (GO) + sizeof (Scalar); 00357 size_t totalNumEntries = 0; 00358 size_t maxRowLength = 0; 00359 for (size_type i = 0; i < exportGIDs.size(); ++i) { 00360 const size_t curNumEntries = 00361 this->getNumEntriesInGlobalRow (exportGIDs[i]); 00362 numPacketsPerLID[i] = curNumEntries * sizeOfOrdValPair; 00363 totalNumEntries += curNumEntries; 00364 maxRowLength = std::max (curNumEntries, maxRowLength); 00365 } 00366 00367 // Pack export data by interleaving rows' indices and values in 00368 // the following way: 00369 // 00370 // [inds_row0 vals_row0 inds_row1 vals_row1 ... ] 00371 if (totalNumEntries > 0) { 00372 // exports is an array of char (bytes), so scale the total 00373 // number of entries by the number of bytes per entry (where 00374 // "entry" includes both the column index and the value). 00375 const size_t totalNumBytes = totalNumEntries * sizeOfOrdValPair; 00376 exports.resize (totalNumBytes); 00377 00378 // Temporary buffers for a copy of the entries in each row. 00379 Array<GO> inds (as<size_type> (maxRowLength)); 00380 Array<Scalar> vals (as<size_type> (maxRowLength)); 00381 // Current position in the 'exports' output array. 00382 size_t curOffsetInBytes = 0; 00383 00384 // For each row of the matrix owned by the calling process, pack 00385 // that row's column indices and values into the exports array. 00386 // We'll use getGlobalRowCopy, since it will always work, no 00387 // matter how the subclass stores its data. Subclasses (like 00388 // CrsMatrix) should reimplement this method to use (e.g.,) 00389 // getGlobalRowView if appropriate, since that will likely be 00390 // more efficient. 00391 // 00392 // FIXME (mfh 28 Jun 2013) This could be made a (shared-memory) 00393 // parallel kernel, by using the CSR data layout to calculate 00394 // positions in the output buffer. 00395 for (size_type i = 0; i < exportGIDs.size(); ++i) { 00396 // Get a copy of the current row's data. 00397 size_t curNumEntries = 0; 00398 this->getGlobalRowCopy (exportGIDs[i], inds (), vals (), curNumEntries); 00399 // inds and vals arrays might have more entries than the row. 00400 // curNumEntries is the number of valid entries of these arrays. 00401 ArrayView<const GO> curInds = inds (0, as<size_type> (curNumEntries)); 00402 ArrayView<const Scalar> curVals = vals (0, as<size_type> (curNumEntries)); 00403 00404 // Get views of the spots in the exports array in which to 00405 // put the indices resp. values. See notes and FIXME above. 00406 ArrayView<char> outIndsChar = 00407 exports (curOffsetInBytes, curNumEntries * sizeof (GO)); 00408 ArrayView<char> outValsChar = 00409 exports (curOffsetInBytes + curNumEntries * sizeof (GO), 00410 curNumEntries * sizeof (Scalar)); 00411 00412 // Cast the above views of char as views of GO resp. Scalar. 00413 ArrayView<GO> outInds = av_reinterpret_cast<GO> (outIndsChar); 00414 ArrayView<Scalar> outVals = av_reinterpret_cast<Scalar> (outValsChar); 00415 00416 // Copy the source matrix's row data into the views of the 00417 // exports array for indices resp. values. 00418 std::copy (curInds.begin(), curInds.end(), outInds.begin()); 00419 std::copy (curVals.begin(), curVals.end(), outVals.begin()); 00420 curOffsetInBytes += sizeOfOrdValPair * curNumEntries; 00421 } 00422 00423 #ifdef HAVE_TPETRA_DEBUG 00424 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curOffsetInBytes != totalNumBytes, 00425 std::logic_error, ": At end of method, the final offset bytes count " 00426 "curOffsetInBytes=" << curOffsetInBytes << " does not equal the total " 00427 "number of bytes packed totalNumBytes=" << totalNumBytes << ". Please " 00428 "report this bug to the Tpetra developers."); 00429 #endif // HAVE_TPETRA_DEBUG 00430 } 00431 } 00432 00433 } // namespace Tpetra 00434 00435 // 00436 // Explicit instantiation macro 00437 // 00438 // Must be expanded from within the Tpetra namespace! 00439 // 00440 00441 #define TPETRA_ROWMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 00442 \ 00443 template class RowMatrix< SCALAR , LO , GO , NODE >; 00444 00445 00446 #endif // TPETRA_ROWMATRIX_DEF_HPP 00447
1.7.6.1