Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_RowMatrix_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_ROWMATRIX_DEF_HPP
00043 #define TPETRA_ROWMATRIX_DEF_HPP
00044 
00045 #include <Tpetra_ConfigDefs.hpp>
00046 #include <Tpetra_CrsMatrix.hpp>
00047 #include <Tpetra_Map.hpp>
00048 
00049 namespace Tpetra {
00050 
00051   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00052   RowMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~RowMatrix() {}
00053 
00054   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00055   Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
00056   RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
00057   add (const Scalar& alpha,
00058        const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
00059        const Scalar& beta,
00060        const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& domainMap,
00061        const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& rangeMap,
00062        const Teuchos::RCP<Teuchos::ParameterList>& params) const
00063   {
00064     using Teuchos::Array;
00065     using Teuchos::ArrayRCP;
00066     using Teuchos::as;
00067     using Teuchos::ParameterList;
00068     using Teuchos::RCP;
00069     using Teuchos::rcp;
00070     using Teuchos::rcp_implicit_cast;
00071     using Teuchos::sublist;
00072     typedef LocalOrdinal LO;
00073     typedef GlobalOrdinal GO;
00074     typedef Teuchos::ScalarTraits<Scalar> STS;
00075     typedef Map<LO, GO, Node> map_type;
00076     typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type;
00077     typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crs_matrix_type;
00078 
00079     const this_type& B = *this; // a convenient abbreviation
00080 
00081     // If the user didn't supply a domain or range Map, then try to
00082     // get one from B first (if it has them), then from A (if it has
00083     // them).  If we don't have any domain or range Maps, scold the
00084     // user.
00085     RCP<const map_type> A_domainMap = A.getDomainMap ();
00086     RCP<const map_type> A_rangeMap = A.getRangeMap ();
00087     RCP<const map_type> B_domainMap = B.getDomainMap ();
00088     RCP<const map_type> B_rangeMap = B.getRangeMap ();
00089 
00090     RCP<const map_type> theDomainMap = domainMap;
00091     RCP<const map_type> theRangeMap = rangeMap;
00092 
00093     if (domainMap.is_null ()) {
00094       if (B_domainMap.is_null ()) {
00095         TEUCHOS_TEST_FOR_EXCEPTION(
00096           A_domainMap.is_null (), std::invalid_argument,
00097           "Tpetra::RowMatrix::add: If neither A nor B have a domain Map, "
00098           "then you must supply a nonnull domain Map to this method.");
00099         theDomainMap = A_domainMap;
00100       } else {
00101         theDomainMap = B_domainMap;
00102       }
00103     }
00104     if (rangeMap.is_null ()) {
00105       if (B_rangeMap.is_null ()) {
00106         TEUCHOS_TEST_FOR_EXCEPTION(
00107           A_rangeMap.is_null (), std::invalid_argument,
00108           "Tpetra::RowMatrix::add: If neither A nor B have a range Map, "
00109           "then you must supply a nonnull range Map to this method.");
00110         theRangeMap = A_rangeMap;
00111       } else {
00112         theRangeMap = B_rangeMap;
00113       }
00114     }
00115 
00116 #ifdef HAVE_TPETRA_DEBUG
00117     // In a debug build, check that A and B have matching domain and
00118     // range Maps, if they have domain and range Maps at all.  (If
00119     // they aren't fill complete, then they may not yet have them.)
00120     if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
00121       if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
00122         TEUCHOS_TEST_FOR_EXCEPTION(
00123           ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
00124           "Tpetra::RowMatrix::add: The input RowMatrix A must have a domain Map "
00125           "which is the same as (isSameAs) this RowMatrix's domain Map.");
00126         TEUCHOS_TEST_FOR_EXCEPTION(
00127           ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
00128           "Tpetra::RowMatrix::add: The input RowMatrix A must have a range Map "
00129           "which is the same as (isSameAs) this RowMatrix's range Map.");
00130         TEUCHOS_TEST_FOR_EXCEPTION(
00131           ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
00132           std::invalid_argument,
00133           "Tpetra::RowMatrix::add: The input domain Map must be the same as "
00134           "(isSameAs) this RowMatrix's domain Map.");
00135         TEUCHOS_TEST_FOR_EXCEPTION(
00136           ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
00137           std::invalid_argument,
00138           "Tpetra::RowMatrix::add: The input range Map must be the same as "
00139           "(isSameAs) this RowMatrix's range Map.");
00140       }
00141     }
00142     else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
00143       TEUCHOS_TEST_FOR_EXCEPTION(
00144         ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
00145         std::invalid_argument,
00146         "Tpetra::RowMatrix::add: The input domain Map must be the same as "
00147         "(isSameAs) this RowMatrix's domain Map.");
00148       TEUCHOS_TEST_FOR_EXCEPTION(
00149         ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
00150         std::invalid_argument,
00151         "Tpetra::RowMatrix::add: The input range Map must be the same as "
00152         "(isSameAs) this RowMatrix's range Map.");
00153     }
00154     else {
00155       TEUCHOS_TEST_FOR_EXCEPTION(
00156         domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
00157         "Tpetra::RowMatrix::add: If neither A nor B have a domain and range "
00158         "Map, then you must supply a nonnull domain and range Map to this "
00159         "method.");
00160     }
00161 #endif // HAVE_TPETRA_DEBUG
00162 
00163     // What parameters do we pass to C's constructor?  Do we call
00164     // fillComplete on C after filling it?  And if so, what parameters
00165     // do we pass to C's fillComplete call?
00166     bool callFillComplete = true;
00167     RCP<ParameterList> constructorSublist;
00168     RCP<ParameterList> fillCompleteSublist;
00169     if (! params.is_null ()) {
00170       callFillComplete = params->get ("Call fillComplete", callFillComplete);
00171       constructorSublist = sublist (params, "Constructor parameters");
00172       fillCompleteSublist = sublist (params, "fillComplete parameters");
00173     }
00174 
00175     RCP<const map_type> A_rowMap = A.getRowMap ();
00176     RCP<const map_type> B_rowMap = B.getRowMap ();
00177     RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
00178     RCP<crs_matrix_type> C; // The result matrix.
00179 
00180     // If A and B's row Maps are the same, we can compute an upper
00181     // bound on the number of entries in each row of C, before
00182     // actually computing the sum.  A reasonable upper bound is the
00183     // sum of the two entry counts in each row.  If we choose this as
00184     // the actual per-row upper bound, we can use static profile.
00185     if (A_rowMap->isSameAs (*B_rowMap)) {
00186       const LO localNumRows = as<LO> (A_rowMap->getNodeNumElements ());
00187       ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
00188 
00189       // Get the number of entries in each row of A.
00190       if (alpha != STS::zero ()) {
00191         for (LO localRow = 0; localRow < localNumRows; ++localRow) {
00192           const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
00193           C_maxNumEntriesPerRow[localRow] += A_numEntries;
00194         }
00195       }
00196       // Get the number of entries in each row of B.
00197       if (beta != STS::zero ()) {
00198         for (LO localRow = 0; localRow < localNumRows; ++localRow) {
00199           const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
00200           C_maxNumEntriesPerRow[localRow] += B_numEntries;
00201         }
00202       }
00203       // Construct the result matrix C.
00204       if (constructorSublist.is_null ()) {
00205         C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
00206                                       StaticProfile));
00207       } else {
00208         C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
00209                                       StaticProfile, constructorSublist));
00210       }
00211       // Since A and B have the same row Maps, we could add them
00212       // together all at once and merge values before we call
00213       // insertGlobalValues.  However, we don't really need to, since
00214       // we've already allocated enough space in each row of C for C
00215       // to do the merge itself.
00216     }
00217     else { // the row Maps of A and B are not the same
00218       // Construct the result matrix C.
00219       if (constructorSublist.is_null ()) {
00220         C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
00221       } else {
00222         C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
00223                                       constructorSublist));
00224       }
00225     }
00226 
00227 #ifdef HAVE_TPETRA_DEBUG
00228     TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
00229       "Tpetra::RowMatrix::add: C should not be null at this point.  "
00230       "Please report this bug to the Tpetra developers.");
00231 #endif // HAVE_TPETRA_DEBUG
00232     //
00233     // Compute C = alpha*A + beta*B.
00234     //
00235     Array<GO> ind;
00236     Array<Scalar> val;
00237 
00238     if (alpha != STS::zero ()) {
00239       const LO A_localNumRows = as<LO> (A_rowMap->getNodeNumElements ());
00240       for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
00241         size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
00242         const GO globalRow = A_rowMap->getGlobalElement (localRow);
00243         if (A_numEntries > as<size_t> (ind.size ())) {
00244           ind.resize (A_numEntries);
00245           val.resize (A_numEntries);
00246         }
00247         ArrayView<GO> indView = ind (0, A_numEntries);
00248         ArrayView<Scalar> valView = val (0, A_numEntries);
00249         A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
00250 
00251         if (alpha != STS::one ()) {
00252           for (size_t k = 0; k < A_numEntries; ++k) {
00253             valView[k] *= alpha;
00254           }
00255         }
00256         C->insertGlobalValues (globalRow, indView, valView);
00257       }
00258     }
00259 
00260     if (beta != STS::zero ()) {
00261       const LO B_localNumRows = as<LO> (B_rowMap->getNodeNumElements ());
00262       for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
00263         size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
00264         const GO globalRow = B_rowMap->getGlobalElement (localRow);
00265         if (B_numEntries > as<size_t> (ind.size ())) {
00266           ind.resize (B_numEntries);
00267           val.resize (B_numEntries);
00268         }
00269         ArrayView<GO> indView = ind (0, B_numEntries);
00270         ArrayView<Scalar> valView = val (0, B_numEntries);
00271         B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
00272 
00273         if (beta != STS::one ()) {
00274           for (size_t k = 0; k < B_numEntries; ++k) {
00275             valView[k] *= beta;
00276           }
00277         }
00278         C->insertGlobalValues (globalRow, indView, valView);
00279       }
00280     }
00281 
00282     if (callFillComplete) {
00283       if (fillCompleteSublist.is_null ()) {
00284         C->fillComplete (theDomainMap, theRangeMap);
00285       } else {
00286         C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
00287       }
00288     }
00289 
00290     return rcp_implicit_cast<this_type> (C);
00291   }
00292 
00293 
00294   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00295   void
00296   RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
00297   pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00298   Teuchos::Array<char>& exports,
00299   const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00300   size_t& constantNumPackets,
00301   Distributor &distor) const
00302   {
00303     using Teuchos::Array;
00304     using Teuchos::ArrayView;
00305     using Teuchos::as;
00306     using Teuchos::av_reinterpret_cast;
00307     using Teuchos::RCP;
00308     typedef LocalOrdinal LO;
00309     typedef GlobalOrdinal GO;
00310     typedef typename ArrayView<const LO>::size_type size_type;
00311     typedef Map<LO, GO, Node> map_type;
00312     const char tfecfFuncName[] = "pack";
00313 
00314     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00315       exportLIDs.size() != numPacketsPerLID.size(),
00316       std::invalid_argument, "exportLIDs.size() = " << exportLIDs.size()
00317       << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
00318 
00319     // Row Map of the source matrix.
00320     RCP<const map_type> rowMapPtr = this->getRowMap ();
00321     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00322       rowMapPtr.is_null (), std::runtime_error,
00323       "The source object's row Map is null.");
00324     const map_type& rowMap = *rowMapPtr;
00325 
00326     // Each input LID corresponds to a different row of the sparse
00327     // matrix.  In general, a RowMatrix doesn't have the same number
00328     // of entries in each row.
00329     constantNumPackets = 0;
00330 
00331     // Get the GIDs of the rows we want to pack.
00332     Array<GO> exportGIDs (exportLIDs.size ());
00333     const size_type numExportGIDs = exportGIDs.size ();
00334     for (size_type i = 0; i < numExportGIDs; ++i) {
00335       exportGIDs[i] = rowMap.getGlobalElement (exportLIDs[i]);
00336     }
00337 
00338     // We say "Packet" is char (really a "byte"), but the actual unit
00339     // of packing is a (GID, value) pair.  The GID is the column index
00340     // in that row of the sparse matrix, and the value is the value at
00341     // that entry of the sparse matrix.  Thus, we have to scale
00342     // numPacketsPerLID by the number of bytes in a _packed_ (GID,
00343     // value) pair.  (We pack the GID and value in each pair
00344     // separately, so the number of bytes in a packed pair is actually
00345     // sizeof(GO) + sizeof(Scalar).)
00346     //
00347     // FIXME (mfh 24 Feb 2013) This code is only correct if
00348     // sizeof(Scalar) is a meaningful representation of the amount of
00349     // data in a Scalar instance.  (GO is always a built-in integer
00350     // type.)
00351     //
00352     // Compute the number of packets per export LID, and accumulate
00353     // the total number of packages.  While doing so, find the max
00354     // number of entries in each row owned by this process; we will
00355     // use that to size temporary arrays below.
00356     const size_t sizeOfOrdValPair = sizeof (GO) + sizeof (Scalar);
00357     size_t totalNumEntries = 0;
00358     size_t maxRowLength = 0;
00359     for (size_type i = 0; i < exportGIDs.size(); ++i) {
00360       const size_t curNumEntries =
00361         this->getNumEntriesInGlobalRow (exportGIDs[i]);
00362       numPacketsPerLID[i] = curNumEntries * sizeOfOrdValPair;
00363       totalNumEntries += curNumEntries;
00364       maxRowLength = std::max (curNumEntries, maxRowLength);
00365     }
00366 
00367     // Pack export data by interleaving rows' indices and values in
00368     // the following way:
00369     //
00370     // [inds_row0 vals_row0 inds_row1 vals_row1 ... ]
00371     if (totalNumEntries > 0) {
00372       // exports is an array of char (bytes), so scale the total
00373       // number of entries by the number of bytes per entry (where
00374       // "entry" includes both the column index and the value).
00375       const size_t totalNumBytes = totalNumEntries * sizeOfOrdValPair;
00376       exports.resize (totalNumBytes);
00377 
00378       // Temporary buffers for a copy of the entries in each row.
00379       Array<GO> inds (as<size_type> (maxRowLength));
00380       Array<Scalar> vals (as<size_type> (maxRowLength));
00381       // Current position in the 'exports' output array.
00382       size_t curOffsetInBytes = 0;
00383 
00384       // For each row of the matrix owned by the calling process, pack
00385       // that row's column indices and values into the exports array.
00386       // We'll use getGlobalRowCopy, since it will always work, no
00387       // matter how the subclass stores its data.  Subclasses (like
00388       // CrsMatrix) should reimplement this method to use (e.g.,)
00389       // getGlobalRowView if appropriate, since that will likely be
00390       // more efficient.
00391       //
00392       // FIXME (mfh 28 Jun 2013) This could be made a (shared-memory)
00393       // parallel kernel, by using the CSR data layout to calculate
00394       // positions in the output buffer.
00395       for (size_type i = 0; i < exportGIDs.size(); ++i) {
00396   // Get a copy of the current row's data.
00397   size_t curNumEntries = 0;
00398   this->getGlobalRowCopy (exportGIDs[i], inds (), vals (), curNumEntries);
00399   // inds and vals arrays might have more entries than the row.
00400   // curNumEntries is the number of valid entries of these arrays.
00401   ArrayView<const GO> curInds = inds (0, as<size_type> (curNumEntries));
00402   ArrayView<const Scalar> curVals = vals (0, as<size_type> (curNumEntries));
00403 
00404   // Get views of the spots in the exports array in which to
00405   // put the indices resp. values.  See notes and FIXME above.
00406   ArrayView<char> outIndsChar = 
00407     exports (curOffsetInBytes, curNumEntries * sizeof (GO));
00408   ArrayView<char> outValsChar = 
00409     exports (curOffsetInBytes + curNumEntries * sizeof (GO),
00410        curNumEntries * sizeof (Scalar));
00411 
00412   // Cast the above views of char as views of GO resp. Scalar.
00413   ArrayView<GO> outInds = av_reinterpret_cast<GO> (outIndsChar);
00414   ArrayView<Scalar> outVals = av_reinterpret_cast<Scalar> (outValsChar);
00415 
00416   // Copy the source matrix's row data into the views of the
00417   // exports array for indices resp. values.
00418   std::copy (curInds.begin(), curInds.end(), outInds.begin());
00419   std::copy (curVals.begin(), curVals.end(), outVals.begin());
00420   curOffsetInBytes += sizeOfOrdValPair * curNumEntries;
00421       }
00422 
00423 #ifdef HAVE_TPETRA_DEBUG
00424       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curOffsetInBytes != totalNumBytes,
00425         std::logic_error, ": At end of method, the final offset bytes count "
00426         "curOffsetInBytes=" << curOffsetInBytes << " does not equal the total "
00427         "number of bytes packed totalNumBytes=" << totalNumBytes << ".  Please "
00428         "report this bug to the Tpetra developers.");
00429 #endif //  HAVE_TPETRA_DEBUG
00430     }
00431   }
00432 
00433 } // namespace Tpetra
00434 
00435 //
00436 // Explicit instantiation macro
00437 //
00438 // Must be expanded from within the Tpetra namespace!
00439 //
00440 
00441 #define TPETRA_ROWMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
00442   \
00443   template class RowMatrix< SCALAR , LO , GO , NODE >;
00444 
00445 
00446 #endif // TPETRA_ROWMATRIX_DEF_HPP
00447 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines