Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_MultiVector_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
00043 #define TPETRA_MULTIVECTOR_DEF_HPP
00044 
00045 #include <Kokkos_DefaultArithmetic.hpp>
00046 #include <Tpetra_Util.hpp>
00047 #include <Tpetra_Vector.hpp>
00048 
00049 #if TPETRA_USE_KOKKOS_DISTOBJECT
00050 #  include <Tpetra_Details_MultiVectorDistObjectKernels.hpp>
00051 #endif
00052 
00053 #ifdef DOXYGEN_USE_ONLY
00054 #  include "Tpetra_MultiVector_decl.hpp"
00055 #endif
00056 
00057 namespace Tpetra {
00058 
00059   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00060   bool
00061   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00062   vectorIndexOutOfRange(size_t VectorIndex) const {
00063     return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
00064   }
00065 
00066   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00067   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00068   MultiVector () :
00069     DO (Teuchos::rcp (new Map<LocalOrdinal, GlobalOrdinal, Node> ())),
00070     lclMV_ (this->getMap ()->getNode ()),
00071     hasViewSemantics_ (false) // for now, not for long though
00072   {}
00073 
00074   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00075   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00076   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00077                size_t NumVectors,
00078                bool zeroOut) : /* default is true */
00079     DO (map),
00080     lclMV_ (map->getNode ()),
00081     hasViewSemantics_ (false) // for now, not for long though
00082   {
00083     using Teuchos::ArrayRCP;
00084     using Teuchos::RCP;
00085 
00086     TEUCHOS_TEST_FOR_EXCEPTION(NumVectors < 1, std::invalid_argument,
00087       "Tpetra::MultiVector::MultiVector(): NumVectors must be strictly positive.");
00088     const size_t myLen = getLocalLength();
00089     if (myLen > 0) {
00090       RCP<Node> node = map->getNode();
00091       // On host-type Kokkos Nodes, allocBuffer() just calls the
00092       // one-argument version of arcp to allocate memory.  This should
00093       // not fill the memory by default, otherwise we would lose the
00094       // first-touch allocation optimization.
00095       ArrayRCP<Scalar> data = node->template allocBuffer<Scalar>(myLen*NumVectors);
00096       MVT::initializeValues(lclMV_,myLen,NumVectors,data,myLen);
00097       if (zeroOut) {
00098         // MVT uses the Kokkos Node's parallel_for in this case, for
00099         // first-touch allocation (across rows).
00100         MVT::Init(lclMV_, Teuchos::ScalarTraits<Scalar>::zero());
00101       }
00102     }
00103     else {
00104       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00105     }
00106   }
00107 
00108   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00109   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00110   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source) :
00111     DO (source),
00112     lclMV_ (MVT::getNode (source.lclMV_)),
00113     hasViewSemantics_ (source.hasViewSemantics_)
00114   {
00115     if (source.hasViewSemantics_) {
00116       // Shallow copy.  The const_cast is ugly but does no harm:
00117       // ArrayRCP is a pointer, so there's no reason why a method that
00118       // returns it can't be marked const.  (If you're not changing
00119       // the pointer itself, e.g., by reallocating, the pointer is
00120       // const.  It doesn't matter whether or not you can change the
00121       // values to which the pointer points.)
00122       MVT::initializeValues (lclMV_, source.lclMV_.getNumRows (),
00123                              source.lclMV_.getNumCols (),
00124                              const_cast<KMV&> (source.lclMV_).getValuesNonConst (),
00125                              source.lclMV_.getStride (),
00126                              source.lclMV_.getOrigNumRows (),
00127                              source.lclMV_.getOrigNumCols ());
00128       // Don't forget to copy whichVectors_ (for non-constant-stride view).
00129       if (source.whichVectors_.size () > 0) {
00130         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
00131       }
00132     }
00133     else {
00134       using Teuchos::ArrayRCP;
00135       using Teuchos::RCP;
00136 
00137       // copy data from the source MultiVector into this multivector
00138       RCP<Node> node = MVT::getNode(source.lclMV_);
00139       const LocalOrdinal myLen = source.getLocalLength();
00140       const size_t numVecs = source.getNumVectors();
00141 
00142       // On host-type Kokkos Nodes, allocBuffer() just calls the
00143       // one-argument version of arcp to allocate memory.  This should
00144       // not fill the memory by default, otherwise we would lose the
00145       // first-touch allocation optimization.
00146       ArrayRCP<Scalar> data = (myLen > 0) ?
00147         node->template allocBuffer<Scalar> (myLen * numVecs) :
00148         Teuchos::null;
00149       const size_t stride = (myLen > 0) ? myLen : size_t (0);
00150 
00151       // This just sets the dimensions, pointer, and stride of lclMV_.
00152       MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00153       // This actually copies the data.  It uses the Node's
00154       // parallel_for to copy, which should ensure first-touch
00155       // allocation on systems that support it.
00156       if (source.isConstantStride ()) {
00157         MVT::Assign (lclMV_, source.lclMV_);
00158       }
00159       else {
00160         MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00161       }
00162     }
00163   }
00164 
00165 
00166   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00167   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00168   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source,
00169                const Teuchos::DataAccess copyOrView) :
00170     DO (source),
00171     lclMV_ (MVT::getNode (source.lclMV_)),
00172     hasViewSemantics_ (copyOrView == Teuchos::View)
00173   {
00174     if (copyOrView == Teuchos::View) {
00175       // Shallow copy.  The const_cast is ugly but does no harm:
00176       // ArrayRCP is a pointer, so there's no reason why a method that
00177       // returns it can't be marked const.  (If you're not changing
00178       // the pointer itself, e.g., by reallocating, the pointer is
00179       // const.  It doesn't matter whether or not you can change the
00180       // values to which the pointer points.)
00181       MVT::initializeValues (lclMV_, source.lclMV_.getNumRows (),
00182                              source.lclMV_.getNumCols (),
00183                              const_cast<KMV&> (source.lclMV_).getValuesNonConst (),
00184                              source.lclMV_.getStride (),
00185                              source.lclMV_.getOrigNumRows (),
00186                              source.lclMV_.getOrigNumCols ());
00187       // Don't forget to copy whichVectors_ (for non-constant-stride view).
00188       if (source.whichVectors_.size () > 0) {
00189         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
00190       }
00191     }
00192     else if (copyOrView == Teuchos::Copy) {
00193       using Teuchos::ArrayRCP;
00194       using Teuchos::RCP;
00195 
00196       // copy data from the source MultiVector into this multivector
00197       RCP<Node> node = MVT::getNode(source.lclMV_);
00198       const LocalOrdinal myLen = source.getLocalLength();
00199       const size_t numVecs = source.getNumVectors();
00200 
00201       ArrayRCP<Scalar> data = (myLen > 0) ?
00202         node->template allocBuffer<Scalar> (myLen * numVecs) :
00203         Teuchos::null;
00204       const size_t stride = (myLen > 0) ? myLen : size_t (0);
00205 
00206       // This just sets the dimensions, pointer, and stride of lclMV_.
00207       MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00208       // This actually copies the data.  It uses the Node's
00209       // parallel_for to copy, which should ensure first-touch
00210       // allocation on systems that support it.
00211       if (source.isConstantStride ()) {
00212         MVT::Assign (lclMV_, source.lclMV_);
00213       }
00214       else {
00215         MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00216       }
00217     } else {
00218       TEUCHOS_TEST_FOR_EXCEPTION(
00219         true, std::invalid_argument, "Tpetra::MultiVector copy constructor: "
00220         "The second argument 'copyOrView' has an invalid value " << copyOrView
00221         << ".  Valid values include Teuchos::Copy = " << Teuchos::Copy <<
00222         " and Teuchos::View = " << Teuchos::View << ".");
00223     }
00224   }
00225 
00226 
00227   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00228   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00229   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00230                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00231                EPrivateComputeViewConstructor /* dummy */) :
00232     DO (map),
00233     lclMV_ (localMultiVector),
00234     hasViewSemantics_ (false) // for now, not for long though
00235   {
00236     const size_t localNumElts = map->getNodeNumElements ();
00237     TEUCHOS_TEST_FOR_EXCEPTION(
00238       localMultiVector.getNumRows () < localNumElts,
00239       std::invalid_argument,
00240       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00241       << " on process " << map->getComm ()->getRank () << ", but the given "
00242       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00243       << " rows.");
00244   }
00245 
00246   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00247   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00248   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00249                const Teuchos::ArrayRCP<Scalar>& view,
00250                size_t LDA,
00251                size_t NumVectors,
00252                EPrivateHostViewConstructor /* dummy */) :
00253     DO (map),
00254     lclMV_ (map->getNode ()),
00255     hasViewSemantics_ (false) // for now, not for long though
00256   {
00257     using Teuchos::ArrayRCP;
00258     using Teuchos::RCP;
00259 
00260     const char tfecfFuncName[] = "MultiVector(view,LDA,NumVector)";
00261     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00262       ": NumVectors must be strictly positive, but you specified NumVectors = "
00263       << NumVectors << ".");
00264     const size_t myLen = getLocalLength();
00265     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument,
00266       ": LDA must be large enough to accomodate the local entries.");
00267     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00268       static_cast<size_t> (view.size ()) < LDA*(NumVectors - 1) + myLen,
00269       std::invalid_argument,
00270       ": view does not contain enough data to specify the entries in this.");
00271     MVT::initializeValues (lclMV_, myLen, NumVectors, view, myLen);
00272   }
00273 
00274   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00275   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00276   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00277                Teuchos::ArrayRCP<Scalar> data,
00278                size_t LDA,
00279                size_t NumVectors,
00280                EPrivateComputeViewConstructor /* dummy */) :
00281     DO (map),
00282     lclMV_ (map->getNode ()),
00283     hasViewSemantics_ (false) // for now, not for long though
00284   {
00285     using Teuchos::ArrayRCP;
00286     using Teuchos::RCP;
00287 
00288     const char tfecfFuncName[] = "MultiVector(data,LDA,NumVector)";
00289     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00290       ": NumVectors must be strictly positive.");
00291     const size_t myLen = getLocalLength();
00292 #ifdef HAVE_TPETRA_DEBUG
00293     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00294       static_cast<size_t> (data.size ()) < LDA*(NumVectors - 1) + myLen,
00295       std::runtime_error,
00296       ": data does not contain enough data to specify the entries in this.");
00297 #endif
00298     MVT::initializeValues(lclMV_,myLen,NumVectors,data,LDA);
00299   }
00300 
00301   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00302   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00303   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00304                Teuchos::ArrayRCP<Scalar> data,
00305                size_t LDA,
00306                Teuchos::ArrayView<const size_t> WhichVectors,
00307                EPrivateComputeViewConstructor /* dummy */) :
00308     DO (map),
00309     lclMV_ (map->getNode ()),
00310     whichVectors_ (WhichVectors),
00311     hasViewSemantics_ (false) // for now, not for long though
00312   {
00313     using Teuchos::ArrayRCP;
00314     using Teuchos::RCP;
00315 
00316     const char tfecfFuncName[] = "MultiVector(data,LDA,WhichVectors)";
00317     const size_t myLen = getLocalLength();
00318     size_t maxVector = *std::max_element(WhichVectors.begin(), WhichVectors.end());
00319     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00320       ": LDA must be large enough to accomodate the local entries.");
00321 #ifdef HAVE_TPETRA_DEBUG
00322     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00323       static_cast<size_t> (data.size ()) < LDA * maxVector + myLen, std::runtime_error,
00324       ": data does not contain enough data to specify the entries in this.");
00325 #endif
00326     if (WhichVectors.size () == 1) {
00327       // shift data so that desired vector is vector 0
00328       maxVector = 0;
00329       data += LDA * WhichVectors[0];
00330       // kill whichVectors_; we are constant stride
00331       whichVectors_.clear ();
00332     }
00333     if (myLen > 0) {
00334       MVT::initializeValues(lclMV_,myLen,maxVector+1,data,LDA);
00335     }
00336     else {
00337       MVT::initializeValues(lclMV_,0,WhichVectors.size(),Teuchos::null,0);
00338     }
00339   }
00340 
00341   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00342   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00343   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00344                const Teuchos::ArrayRCP<Scalar>& data,
00345                const size_t LDA,
00346                const size_t numVecs) :
00347     DO (map),
00348     lclMV_ (map->getNode ()),
00349     hasViewSemantics_ (false) // for now, not for long though
00350   {
00351     const char tfecfFuncName[] = "MultiVector(map,data,LDA,numVecs)";
00352     const size_t numRows = this->getLocalLength ();
00353     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < numRows, std::runtime_error,
00354       ": LDA = " << LDA << " < numRows = " << numRows << ".");
00355     MVT::initializeValues (lclMV_, numRows, numVecs, data, LDA);
00356   }
00357 
00358   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00359   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00360   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00361                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00362                Teuchos::ArrayView<const size_t> WhichVectors,
00363                EPrivateComputeViewConstructor /* dummy */) :
00364     DO (map),
00365     lclMV_ (localMultiVector),
00366     whichVectors_ (WhichVectors),
00367     hasViewSemantics_ (false) // for now, not for long though
00368   {
00369     const size_t localNumElts = map->getNodeNumElements ();
00370     TEUCHOS_TEST_FOR_EXCEPTION(
00371       localMultiVector.getNumRows () < localNumElts,
00372       std::invalid_argument,
00373       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00374       << " on process " << map->getComm ()->getRank () << ", but the given "
00375       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00376       << " rows.");
00377     TEUCHOS_TEST_FOR_EXCEPTION(
00378       WhichVectors.size () == 0,
00379       std::invalid_argument,
00380       "Tpetra::MultiVector constructor: WhichVectors.size() == 0.  "
00381       "The noncontiguous column view constructor only makes sense for a "
00382       "nonzero number of columns to view.");
00383 
00384     // Optimization for the case of a single column: just make a
00385     // contiguous ("constant stride") view of the one column.
00386     if (WhichVectors.size () == 1) {
00387       const size_t offsetRow = 0;
00388       const size_t offsetCol = WhichVectors[0];
00389       lclMV_ = lclMV_.offsetViewNonConst (localNumElts, 1, offsetRow, offsetCol);
00390       whichVectors_.clear (); // This being empty defines "constant stride."
00391     }
00392   }
00393 
00394 
00395   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00396   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00397   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00398                const Teuchos::ArrayView<const Scalar>& A,
00399                size_t LDA,
00400                size_t NumVectors) :
00401     DO (map),
00402     lclMV_ (map->getNode ()),
00403     hasViewSemantics_ (false) // for now, not for long though
00404   {
00405     using Teuchos::ArrayRCP;
00406     using Teuchos::ArrayView;
00407     using Teuchos::RCP;
00408 
00409     const char tfecfFuncName[] = "MultiVector(A,LDA)";
00410     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00411       ": NumVectors must be strictly positive.");
00412     const size_t myLen = getLocalLength();
00413     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00414       ": LDA must be large enough to accomodate the local entries.");
00415 #ifdef HAVE_TPETRA_DEBUG
00416     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00417       static_cast<size_t> (A.size ()) < LDA*(NumVectors - 1) + myLen,
00418       std::runtime_error,
00419       ": A does not contain enough data to specify the entries in this.");
00420 #endif
00421     if (myLen > 0) {
00422       RCP<Node> node = MVT::getNode(lclMV_);
00423       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00424       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00425       // FIXME (mfh 13 Sep 2012) It would be better to use the Kokkos
00426       // Node's copyToBuffer method to push data directly to the
00427       // device pointer, rather than using an intermediate host
00428       // buffer.  Also, we should have an optimization for the
00429       // contiguous storage case (constant stride, and the stride
00430       // equals the local number of rows).
00431       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00432       typename ArrayView<const Scalar>::iterator srcit = A.begin();
00433       for (size_t j = 0; j < NumVectors; ++j) {
00434         std::copy(srcit,srcit+myLen,myview);
00435         srcit += LDA;
00436         myview += myLen;
00437       }
00438       mydata = Teuchos::null;
00439       myview = Teuchos::null;
00440     }
00441     else {
00442       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00443     }
00444   }
00445 
00446   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00447   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00448   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00449                const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs,
00450                size_t NumVectors) :
00451     DO (map),
00452     lclMV_ (map->getNode ()),
00453     hasViewSemantics_ (false) // for now, not for long though
00454   {
00455     using Teuchos::ArrayRCP;
00456     using Teuchos::ArrayView;
00457     using Teuchos::RCP;
00458 
00459     const char tfecfFuncName[] = "MultiVector(ArrayOfPtrs)";
00460     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00461       NumVectors < 1 || NumVectors != static_cast<size_t>(ArrayOfPtrs.size()),
00462       std::runtime_error,
00463       ": ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs.");
00464     const size_t myLen = getLocalLength();
00465     if (myLen > 0) {
00466       RCP<Node> node = MVT::getNode(lclMV_);
00467       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00468       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00469       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00470       for (size_t j = 0; j < NumVectors; ++j) {
00471 #ifdef HAVE_TPETRA_DEBUG
00472         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00473           static_cast<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(),
00474           std::runtime_error,
00475           ": ArrayOfPtrs[" << j << "].size() (== " << ArrayOfPtrs[j].size()
00476           << ") is not equal to getLocalLength() (== " << getLocalLength());
00477 #endif
00478         typename ArrayView<const Scalar>::iterator src = ArrayOfPtrs[j].begin();
00479         std::copy(src,src+myLen,myview);
00480         myview += myLen;
00481       }
00482       myview = Teuchos::null;
00483       mydata = Teuchos::null;
00484     }
00485     else {
00486       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00487     }
00488   }
00489 
00490   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00491   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~MultiVector() {
00492   }
00493 
00494 
00495   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00496   bool MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isConstantStride() const {
00497     return whichVectors_.empty();
00498   }
00499 
00500 
00501   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00502   size_t
00503   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalLength() const
00504   {
00505     if (this->getMap ().is_null ()) { // possible, due to replaceMap().
00506       return static_cast<size_t> (0);
00507     } else {
00508       return this->getMap ()->getNodeNumElements ();
00509     }
00510   }
00511 
00512 
00513   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00514   global_size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalLength() const
00515   {
00516     if (this->getMap ().is_null ()) { // possible, due to replaceMap().
00517       return static_cast<size_t> (0);
00518     } else {
00519       return this->getMap ()->getGlobalNumElements ();
00520     }
00521   }
00522 
00523 
00524   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00525   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getStride() const {
00526     if (isConstantStride()) {
00527       return MVT::getStride(lclMV_);
00528     }
00529     return 0;
00530   }
00531 
00532 
00533   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00534   bool
00535   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00536   checkSizes (const SrcDistObject& sourceObj)
00537   {
00538     // Check whether the source object is a MultiVector.  If not, then
00539     // we can't even compare sizes, so it's definitely not OK to
00540     // Import or Export from it.
00541     typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type;
00542     const this_type* src = dynamic_cast<const this_type*> (&sourceObj);
00543     if (src == NULL) {
00544       return false;
00545     } else {
00546       // The target of the Import or Export calls checkSizes() in
00547       // DistObject::doTransfer().  By that point, we've already
00548       // constructed an Import or Export object using the two
00549       // multivectors' Maps, which means that (hopefully) we've
00550       // already checked other attributes of the multivectors.  Thus,
00551       // all we need to do here is check the number of columns.
00552       return src->getNumVectors () == this->getNumVectors ();
00553     }
00554   }
00555 
00556   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00557   size_t
00558   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00559   constantNumberOfPackets () const {
00560     return this->getNumVectors ();
00561   }
00562 
00563 #if TPETRA_USE_KOKKOS_DISTOBJECT
00564 
00565   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00566   void
00567   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00568   copyAndPermute (
00569     const SrcDistObject& sourceObj,
00570     size_t numSameIDs,
00571     const Kokkos::View<const LocalOrdinal*, device_type> &permuteToLIDs,
00572     const Kokkos::View<const LocalOrdinal*, device_type> &permuteFromLIDs)
00573   {
00574     using Teuchos::ArrayRCP;
00575     using Teuchos::ArrayView;
00576     using Teuchos::RCP;
00577     using Kokkos::Compat::getKokkosViewDeepCopy;
00578     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00579     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00580     const char tfecfFuncName[] = "copyAndPermute";
00581 
00582     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00583       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00584       ": permuteToLIDs and permuteFromLIDs must have the same size."
00585       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00586       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00587 
00588     // We've already called checkSizes(), so this cast must succeed.
00589     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00590 
00591     const size_t numCols = this->getNumVectors ();
00592     const size_t stride = MVT::getStride (lclMV_);
00593 
00594     // Copy rows [0, numSameIDs-1] of the local multivectors.
00595     //
00596     // For GPU Nodes: All of this happens using device pointers; this
00597     // does not require host views of either source or destination.
00598     if (isConstantStride ()) {
00599       const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00600       const size_t numDestCols = MVT::getNumCols (lclMV_);
00601 
00602       const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00603       KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00604       if (sourceMV.isConstantStride ()) {
00605         MVT::Assign (dest, src);
00606       }
00607       else {
00608         MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00609       }
00610     }
00611     else {
00612       // Copy the columns one at a time, since MVT doesn't have an
00613       // Assign method for noncontiguous access to the destination
00614       // multivector.
00615       for (size_t j = 0; j < numCols; ++j) {
00616         const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00617         const size_t srcCol = sourceMV.isConstantStride () ?
00618           j : sourceMV.whichVectors_[j];
00619         KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00620         const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00621         MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00622       }
00623     }
00624 
00625     // For the remaining GIDs, execute the permutations.  This may
00626     // involve noncontiguous access of both source and destination
00627     // vectors, depending on the LID lists.
00628     //
00629     // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
00630     // the same process, this merges their values by replacement of
00631     // the last encountered GID, not by the specified merge rule
00632     // (such as ADD).
00633 
00634     // If there are no permutations, we are done
00635     if (permuteFromLIDs.size() == 0 || permuteToLIDs.size() == 0)
00636       return;
00637 
00638     if (this->isConstantStride ()) {
00639       Details::PermuteArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> perm;
00640       perm.permuteToLIDs = permuteToLIDs;
00641       perm.permuteFromLIDs = permuteFromLIDs;
00642       perm.src = sourceMV.getKokkosView();
00643       perm.dest = getKokkosViewNonConst();
00644       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00645       perm.dest_stride = stride;
00646       perm.numCols = numCols;
00647 
00648       perm.permute();
00649     }
00650     else {
00651       Details::PermuteArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> perm;
00652       perm.permuteToLIDs = permuteToLIDs;
00653       perm.permuteFromLIDs = permuteFromLIDs;
00654       perm.src = sourceMV.getKokkosView();
00655       perm.dest = getKokkosViewNonConst();
00656       perm.src_whichVectors =
00657         getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00658       perm.dest_whichVectors =
00659         getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00660       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00661       perm.dest_stride = stride;
00662       perm.numCols = numCols;
00663 
00664       perm.permute();
00665     }
00666   }
00667 
00668   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00669   void
00670   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00671   packAndPrepare (
00672     const SrcDistObject& sourceObj,
00673     const Kokkos::View<const LocalOrdinal*, device_type> &exportLIDs,
00674     Kokkos::View<Scalar*, device_type> &exports,
00675     const Kokkos::View<size_t*, device_type> &numExportPacketsPerLID,
00676     size_t& constantNumPackets,
00677     Distributor & /* distor */ )
00678   {
00679     using Teuchos::Array;
00680     using Teuchos::ArrayView;
00681     using Kokkos::Compat::getKokkosViewDeepCopy;
00682     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00683     typedef Array<size_t>::size_type size_type;
00684 
00685     // If we have no exports, there is nothing to do
00686     if (exportLIDs.size() == 0)
00687       return;
00688 
00689     // We've already called checkSizes(), so this cast must succeed.
00690     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00691 
00692     // We don't need numExportPacketsPerLID; forestall "unused
00693     // variable" compile warnings.
00694     (void) numExportPacketsPerLID;
00695 
00696     /* The layout in the export for MultiVectors is as follows:
00697        exports = { all of the data from row exportLIDs.front() ;
00698                    ....
00699                    all of the data from row exportLIDs.back() }
00700       This doesn't have the best locality, but is necessary because
00701       the data for a Packet (all data associated with an LID) is
00702       required to be contiguous. */
00703 
00704     const size_t numCols = sourceMV.getNumVectors ();
00705     const size_t stride = MVT::getStride (sourceMV.lclMV_);
00706     // This spares us from needing to fill numExportPacketsPerLID.
00707     // Setting constantNumPackets to a nonzero value signals that
00708     // all packets have the same number of entries.
00709     constantNumPackets = numCols;
00710 
00711     const size_t numExportLIDs = exportLIDs.size ();
00712     const size_t newExportsSize = numCols * numExportLIDs;
00713     if (exports.size () != newExportsSize) {
00714       Kokkos::Compat::realloc(exports, newExportsSize);
00715     }
00716 
00717     if (numCols == 1) { // special case for one column only
00718       // MultiVector always represents a single column with constant
00719       // stride, but it doesn't hurt to implement both cases anyway.
00720       if (sourceMV.isConstantStride ()) {
00721         Details::PackArraySingleColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00722         pack.exportLIDs = exportLIDs;
00723         pack.exports = exports;
00724         pack.src = sourceMV.getKokkosView();
00725         pack.pack();
00726       }
00727       else {
00728         Details::PackArraySingleColumnOffset<Scalar,LocalOrdinal,device_type> pack;
00729         pack.exportLIDs = exportLIDs;
00730         pack.exports = exports;
00731         pack.src = sourceMV.getKokkosView();
00732         pack.offset = sourceMV.whichVectors_[0] * stride;
00733         pack.pack();
00734       }
00735     }
00736     else { // the source MultiVector has multiple columns
00737       if (sourceMV.isConstantStride ()) {
00738         Details::PackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00739         pack.exportLIDs = exportLIDs;
00740         pack.exports = exports;
00741         pack.src = sourceMV.getKokkosView();
00742         pack.stride = stride;
00743         pack.numCols = numCols;
00744         pack.pack ();
00745       }
00746       else {
00747         Details::PackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> pack;
00748         pack.exportLIDs = exportLIDs;
00749         pack.exports = exports;
00750         pack.src = sourceMV.getKokkosView();
00751         pack.srcWhichVectors =
00752           getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00753         pack.stride = stride;
00754         pack.numCols = numCols;
00755         pack.pack ();
00756       }
00757     }
00758   }
00759 
00760 
00761   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00762   void
00763   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00764   unpackAndCombine (
00765     const Kokkos::View<const LocalOrdinal*, device_type> &importLIDs,
00766     const Kokkos::View<const Scalar*, device_type> &imports,
00767     const Kokkos::View<size_t*, device_type> &numPacketsPerLID,
00768     size_t constantNumPackets,
00769     Distributor & /* distor */,
00770     CombineMode CM)
00771   {
00772     using Teuchos::ArrayView;
00773     using Kokkos::Compat::getKokkosViewDeepCopy;
00774     typedef Teuchos::ScalarTraits<Scalar> SCT;
00775     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00776     const char tfecfFuncName[] = "unpackAndCombine";
00777 
00778     // If we have no imports, there is nothing to do
00779     if (importLIDs.size() == 0)
00780       return;
00781 
00782     // We don't need numPacketsPerLID; forestall "unused variable"
00783     // compile warnings.
00784     (void) numPacketsPerLID;
00785 
00786     /* The layout in the export for MultiVectors is as follows:
00787        imports = { all of the data from row exportLIDs.front() ;
00788                    ....
00789                    all of the data from row exportLIDs.back() }
00790       This doesn't have the best locality, but is necessary because
00791       the data for a Packet (all data associated with an LID) is
00792       required to be contiguous. */
00793 
00794     const size_t numVecs = getNumVectors ();
00795 
00796 #ifdef HAVE_TPETRA_DEBUG
00797     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00798       static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
00799       std::runtime_error,
00800       ": 'imports' buffer size must be consistent with the amount of data to "
00801       "be sent.  " << std::endl << "imports.size() = " << imports.size()
00802       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
00803       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
00804       << ".");
00805 
00806     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00807       constantNumPackets == static_cast<size_t> (0), std::runtime_error,
00808       ": constantNumPackets input argument must be nonzero.");
00809 
00810     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00811       static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets),
00812       std::runtime_error, ": constantNumPackets must equal numVecs.");
00813 #endif // HAVE_TPETRA_DEBUG
00814 
00815     if (numVecs > 0 && importLIDs.size () > 0) {
00816       const size_t myStride = MVT::getStride (lclMV_);
00817 
00818       // NOTE (mfh 10 Mar 2012) If you want to implement custom
00819       // combine modes, start editing here.  Also, if you trust
00820       // inlining, it would be nice to condense this code by using a
00821       // binary function object f:
00822       //
00823       // ncview_[...] = f (ncview_[...], *impptr++);
00824       if (CM == INSERT || CM == REPLACE) {
00825         if (isConstantStride()) {
00826           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00827           unpack.importLIDs = importLIDs;
00828           unpack.imports = imports;
00829           unpack.dest = getKokkosViewNonConst();
00830           unpack.stride = myStride;
00831           unpack.numCols = numVecs;
00832           unpack.op = Details::InsertOp();
00833 
00834           unpack.unpack();
00835         }
00836         else {
00837           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00838           unpack.importLIDs = importLIDs;
00839           unpack.imports = imports;
00840           unpack.dest = getKokkosViewNonConst();
00841           unpack.whichVectors =
00842             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00843           unpack.stride = myStride;
00844           unpack.numCols = numVecs;
00845           unpack.op = Details::InsertOp();
00846 
00847           unpack.unpack();
00848         }
00849       }
00850       else if (CM == ADD) {
00851         if (isConstantStride()) {
00852           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00853           unpack.importLIDs = importLIDs;
00854           unpack.imports = imports;
00855           unpack.dest = getKokkosViewNonConst();
00856           unpack.stride = myStride;
00857           unpack.numCols = numVecs;
00858           unpack.op = Details::AddOp();
00859 
00860           unpack.unpack();
00861         }
00862         else {
00863           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00864           unpack.importLIDs = importLIDs;
00865           unpack.imports = imports;
00866           unpack.dest = getKokkosViewNonConst();
00867           unpack.whichVectors =
00868             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00869           unpack.stride = myStride;
00870           unpack.numCols = numVecs;
00871           unpack.op = Details::AddOp();
00872 
00873           unpack.unpack();
00874         }
00875       }
00876       else if (CM == ABSMAX) {
00877         if (isConstantStride()) {
00878           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00879           unpack.importLIDs = importLIDs;
00880           unpack.imports = imports;
00881           unpack.dest = getKokkosViewNonConst();
00882           unpack.stride = myStride;
00883           unpack.numCols = numVecs;
00884           unpack.op = Details::AbsMaxOp();
00885 
00886           unpack.unpack();
00887         }
00888         else {
00889           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00890           unpack.importLIDs = importLIDs;
00891           unpack.imports = imports;
00892           unpack.dest = getKokkosViewNonConst();
00893           unpack.whichVectors =
00894             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00895           unpack.stride = myStride;
00896           unpack.numCols = numVecs;
00897           unpack.op = Details::AbsMaxOp();
00898 
00899           unpack.unpack();
00900         }
00901       }
00902       else {
00903         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00904           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
00905           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
00906           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
00907       }
00908     }
00909   }
00910 
00911 #else
00912 
00913   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00914   void
00915   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00916   copyAndPermute (const SrcDistObject& sourceObj,
00917                   size_t numSameIDs,
00918                   const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
00919                   const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
00920   {
00921     using Teuchos::ArrayRCP;
00922     using Teuchos::ArrayView;
00923     using Teuchos::RCP;
00924     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00925     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00926     const char tfecfFuncName[] = "copyAndPermute";
00927 
00928     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00929       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00930       ": permuteToLIDs and permuteFromLIDs must have the same size."
00931       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00932       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00933 
00934     // We've already called checkSizes(), so this cast must succeed.
00935     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00936 
00937     const size_t numCols = this->getNumVectors ();
00938 
00939     // Copy rows [0, numSameIDs-1] of the local multivectors.
00940     //
00941     // For GPU Nodes: All of this happens using device pointers; this
00942     // does not require host views of either source or destination.
00943     //
00944     // mfh 10 Jan 2013: In the current implementation, device data has
00945     // already been copied to the host views.  If we do a
00946     // device-device copy here, it won't synch the host views
00947     // correctly.  Thus, we replace this step with a host copy, if
00948     // Node is a GPU Node.  That ensures correctness for now, though
00949     // it's really something we need to fix if we plan to run
00950     // efficiently on GPUs.
00951     if (Node::isHostNode) {
00952       if (isConstantStride ()) {
00953         const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00954         const size_t numDestCols = MVT::getNumCols (lclMV_);
00955 
00956         const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00957         KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00958         if (sourceMV.isConstantStride ()) {
00959           MVT::Assign (dest, src);
00960         }
00961         else {
00962           MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00963         }
00964       }
00965       else {
00966         // Copy the columns one at a time, since MVT doesn't have an
00967         // Assign method for noncontiguous access to the destination
00968         // multivector.
00969         for (size_t j = 0; j < numCols; ++j) {
00970           const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00971           const size_t srcCol = sourceMV.isConstantStride () ?
00972             j : sourceMV.whichVectors_[j];
00973           KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00974           const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00975           MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00976         }
00977       }
00978     }
00979 
00980     // Copy the vectors one at a time.  This works whether or not the
00981     // multivectors have constant stride.
00982     for (size_t j = 0; j < numCols; ++j) {
00983       // Get host views of the current source and destination column.
00984       //
00985       // FIXME (mfh 10 Mar 2012) Copying should really be done on the
00986       // device.  There's no need to bring everything back to the
00987       // host, copy there, and then copy back.
00988       ArrayView<const Scalar> srcView =
00989         sourceMV.getSubArrayRCP (sourceMV.cview_, j) ();
00990       ArrayView<Scalar> dstView = getSubArrayRCP (ncview_, j) ();
00991 
00992       if (! Node::isHostNode) {
00993         // The first numSameIDs IDs are the same between source and
00994         // target, so we can just copy the data.  (This favors faster
00995         // contiguous access whenever we can do it.)
00996         std::copy (srcView.begin (), srcView.begin () + numSameIDs,
00997                    dstView.begin ());
00998       }
00999 
01000       // For the remaining GIDs, execute the permutations.  This may
01001       // involve noncontiguous access of both source and destination
01002       // vectors, depending on the LID lists.
01003       //
01004       // FIXME (mfh 09 Apr 2012) Permutation should be done on the
01005       // device, not on the host.
01006       //
01007       // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
01008       // the same process, this merges their values by replacement of
01009       // the last encountered GID, not by the specified merge rule
01010       // (such as ADD).
01011       const size_type numPermuteLIDs =
01012         std::min (permuteToLIDs.size (), permuteFromLIDs.size ());
01013       for (size_type k = 0; k < numPermuteLIDs; ++k) {
01014         dstView[permuteToLIDs[k]] = srcView[permuteFromLIDs[k]];
01015       }
01016     }
01017   }
01018 
01019   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01020   void
01021   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01022   packAndPrepare (const SrcDistObject& sourceObj,
01023                   const ArrayView<const LocalOrdinal> &exportLIDs,
01024                   Array<Scalar> &exports,
01025                   const ArrayView<size_t> &numExportPacketsPerLID,
01026                   size_t& constantNumPackets,
01027                   Distributor & /* distor */ )
01028   {
01029     using Teuchos::Array;
01030     using Teuchos::ArrayView;
01031     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01032     typedef Array<size_t>::size_type size_type;
01033 
01034     // We've already called checkSizes(), so this cast must succeed.
01035     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
01036 
01037     // We don't need numExportPacketsPerLID; forestall "unused
01038     // variable" compile warnings.
01039     (void) numExportPacketsPerLID;
01040 
01041     /* The layout in the export for MultiVectors is as follows:
01042        exports = { all of the data from row exportLIDs.front() ;
01043                    ....
01044                    all of the data from row exportLIDs.back() }
01045       This doesn't have the best locality, but is necessary because
01046       the data for a Packet (all data associated with an LID) is
01047       required to be contiguous. */
01048 
01049     const size_t numCols = sourceMV.getNumVectors ();
01050     const size_t stride = MVT::getStride (sourceMV.lclMV_);
01051     // This spares us from needing to fill numExportPacketsPerLID.
01052     // Setting constantNumPackets to a nonzero value signals that
01053     // all packets have the same number of entries.
01054     constantNumPackets = numCols;
01055 
01056     const size_type numExportLIDs = exportLIDs.size ();
01057     const size_type newExportsSize = numCols * numExportLIDs;
01058     if (exports.size () != newExportsSize) {
01059       exports.resize (newExportsSize);
01060     }
01061 
01062     ArrayView<const Scalar> srcView = sourceMV.cview_ ();
01063     if (numCols == 1) { // special case for one column only
01064       // MultiVector always represents a single column with constant
01065       // stride, but it doesn't hurt to implement both cases anyway.
01066       if (sourceMV.isConstantStride ()) {
01067         for (size_type k = 0; k < numExportLIDs; ++k) {
01068           exports[k] = srcView[exportLIDs[k]];
01069         }
01070       }
01071       else {
01072         const size_t offset = sourceMV.whichVectors_[0] * stride;
01073         for (size_type k = 0; k < numExportLIDs; ++k) {
01074           exports[k] = srcView[exportLIDs[k] + offset];
01075         }
01076       }
01077     }
01078     else { // the source MultiVector has multiple columns
01079       typename Array<Scalar>::iterator expptr = exports.begin ();
01080       if (sourceMV.isConstantStride ()) {
01081         for (size_type k = 0; k < numExportLIDs; ++k) {
01082           const size_t localRow = static_cast<size_t> (exportLIDs[k]);
01083           for (size_t j = 0; j < numCols; ++j) {
01084             *expptr++ = srcView[localRow + j*stride];
01085           }
01086         }
01087       }
01088       else {
01089         ArrayView<const size_t> srcWhichVectors = sourceMV.whichVectors_ ();
01090         for (size_type k = 0; k < numExportLIDs; ++k) {
01091           const size_t localRow = static_cast<size_t> (exportLIDs[k]);
01092           for (size_t j = 0; j < numCols; ++j) {
01093             *expptr++ = srcView[localRow + srcWhichVectors[j]*stride];
01094           }
01095         }
01096       }
01097     }
01098   }
01099 
01100 
01101   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01102   void
01103   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01104   unpackAndCombine (const ArrayView<const LocalOrdinal> &importLIDs,
01105                     const ArrayView<const Scalar> &imports,
01106                     const ArrayView<size_t> &numPacketsPerLID,
01107                     size_t constantNumPackets,
01108                     Distributor & /* distor */,
01109                     CombineMode CM)
01110   {
01111     using Teuchos::ArrayView;
01112     typedef Teuchos::ScalarTraits<Scalar> SCT;
01113     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
01114     const char tfecfFuncName[] = "unpackAndCombine";
01115 
01116     // We don't need numPacketsPerLID; forestall "unused variable"
01117     // compile warnings.
01118     (void) numPacketsPerLID;
01119 
01120     /* The layout in the export for MultiVectors is as follows:
01121        imports = { all of the data from row exportLIDs.front() ;
01122                    ....
01123                    all of the data from row exportLIDs.back() }
01124       This doesn't have the best locality, but is necessary because
01125       the data for a Packet (all data associated with an LID) is
01126       required to be contiguous. */
01127 
01128     const size_t numVecs = getNumVectors ();
01129 
01130 #ifdef HAVE_TPETRA_DEBUG
01131     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01132       static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
01133       std::runtime_error,
01134       ": 'imports' buffer size must be consistent with the amount of data to "
01135       "be sent.  " << std::endl << "imports.size() = " << imports.size()
01136       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
01137       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
01138       << ".");
01139 
01140     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01141       constantNumPackets == static_cast<size_t> (0), std::runtime_error,
01142       ": constantNumPackets input argument must be nonzero.");
01143 
01144     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01145       static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets),
01146       std::runtime_error, ": constantNumPackets must equal numVecs.");
01147 #endif // HAVE_TPETRA_DEBUG
01148 
01149     if (numVecs > 0 && importLIDs.size () > 0) {
01150       typename ArrayView<const Scalar>::iterator impptr = imports.begin ();
01151       ArrayView<Scalar> destView = ncview_ ();
01152       const size_type numImportLIDs = importLIDs.size ();
01153       const size_t myStride = MVT::getStride (lclMV_);
01154 
01155       // NOTE (mfh 10 Mar 2012) If you want to implement custom
01156       // combine modes, start editing here.  Also, if you trust
01157       // inlining, it would be nice to condense this code by using a
01158       // binary function object f:
01159       //
01160       // ncview_[...] = f (ncview_[...], *impptr++);
01161       if (CM == INSERT || CM == REPLACE) {
01162         if (isConstantStride()) {
01163           for (size_type k = 0; k < numImportLIDs; ++k) {
01164             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01165             for (size_t j = 0; j < numVecs; ++j) {
01166               destView[localRow + myStride*j] = *impptr++;
01167             }
01168           }
01169         }
01170         else {
01171           for (size_type k = 0; k < numImportLIDs; ++k) {
01172             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01173             for (size_t j = 0; j < numVecs; ++j) {
01174               destView[localRow + myStride*whichVectors_[j]] = *impptr++;
01175             }
01176           }
01177         }
01178       }
01179       else if (CM == ADD) {
01180         if (isConstantStride()) {
01181           for (size_type k = 0; k < numImportLIDs; ++k) {
01182             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01183             for (size_t j = 0; j < numVecs; ++j) {
01184               destView[localRow + myStride*j] += *impptr++;
01185             }
01186           }
01187         }
01188         else {
01189           for (size_type k = 0; k < numImportLIDs; ++k) {
01190             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01191             for (size_t j = 0; j < numVecs; ++j) {
01192               destView[localRow + myStride*whichVectors_[j]] += *impptr++;
01193             }
01194           }
01195         }
01196       }
01197       else if (CM == ABSMAX) {
01198         if (isConstantStride()) {
01199           for (size_type k = 0; k < numImportLIDs; ++k) {
01200             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01201             for (size_t j = 0; j < numVecs; ++j) {
01202               Scalar &curval       = destView[localRow + myStride*j];
01203               const Scalar &newval = *impptr++;
01204               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01205             }
01206           }
01207         }
01208         else {
01209           for (size_type k = 0; k < numImportLIDs; ++k) {
01210             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01211             for (size_t j = 0; j < numVecs; ++j) {
01212               Scalar &curval       = destView[localRow + myStride*whichVectors_[j]];
01213               const Scalar &newval = *impptr++;
01214               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01215             }
01216           }
01217         }
01218       }
01219       else {
01220         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01221           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
01222           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
01223           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
01224       }
01225     }
01226   }
01227 
01228 #endif
01229 
01230   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01231   inline size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNumVectors() const {
01232     if (isConstantStride()) {
01233       return MVT::getNumCols(lclMV_);
01234     }
01235     else {
01236       return whichVectors_.size();
01237     }
01238   }
01239 
01240 
01241   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01242   void
01243   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01244   dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01245        const Teuchos::ArrayView<Scalar> &dots) const
01246   {
01247     using Teuchos::Array;
01248     using Teuchos::ArrayRCP;
01249     using Teuchos::arcp_const_cast;
01250 
01251     const char tfecfFuncName[] = "dot()";
01252     const size_t myLen   = getLocalLength();
01253     const size_t numVecs = getNumVectors();
01254 #ifdef HAVE_TPETRA_DEBUG
01255     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01256         ": MultiVectors do not have compatible Maps:" << std::endl
01257         << "this->getMap(): " << std::endl << *this->getMap()
01258         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01259 #else
01260     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01261         ": MultiVectors do not have the same local length.");
01262 #endif
01263     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01264         ": MultiVectors must have the same number of vectors.");
01265     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01266       static_cast<size_t> (dots.size ()) != numVecs, std::runtime_error,
01267       ": dots.size() must be as large as the number of vectors in *this and A.");
01268     if (isConstantStride() && A.isConstantStride()) {
01269       MVT::Dot(lclMV_,A.lclMV_,dots);
01270     }
01271     else {
01272       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01273       ArrayRCP<Scalar> vptr  = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01274       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01275       for (size_t j=0; j < numVecs; ++j) {
01276         ArrayRCP<Scalar> vj  =   getSubArrayRCP( vptr,j);
01277         ArrayRCP<Scalar> avj = A.getSubArrayRCP(avptr,j);
01278         MVT::initializeValues(a,myLen, 1, avj, myLen);
01279         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01280         dots[j] = MVT::Dot((const KMV&)v,(const KMV &)a);
01281       }
01282     }
01283     if (this->isDistributed ()) {
01284       Array<Scalar> ldots(dots);
01285       Teuchos::reduceAll (* (this->getMap ()->getComm ()), Teuchos::REDUCE_SUM,
01286                           static_cast<int> (numVecs), ldots.getRawPtr (),
01287                           dots.getRawPtr ());
01288     }
01289   }
01290 
01291 
01292   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01293   void
01294   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01295   norm2 (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>& norms) const
01296   {
01297     using Teuchos::arcp_const_cast;
01298     using Teuchos::Array;
01299     using Teuchos::ArrayRCP;
01300     using Teuchos::Comm;
01301     using Teuchos::null;
01302     using Teuchos::RCP;
01303     using Teuchos::reduceAll;
01304     using Teuchos::REDUCE_SUM;
01305     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
01306     typedef Teuchos::ScalarTraits<MT> STM;
01307 
01308     const size_t numNorms = static_cast<size_t> (norms.size ());
01309     const size_t numVecs = this->getNumVectors ();
01310     TEUCHOS_TEST_FOR_EXCEPTION(
01311       numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::norm2: "
01312       "'norms' must have at least as many entries as the number of vectors in "
01313       "*this.  norms.size() = " << numNorms << " < this->getNumVectors() = "
01314       << numVecs << ".");
01315 
01316     if (isConstantStride ()) {
01317       MVT::Norm2Squared (lclMV_, norms);
01318     }
01319     else {
01320       KMV v (MVT::getNode (lclMV_));
01321       ArrayRCP<Scalar> vi;
01322       for (size_t j = 0; j < numVecs; ++j) {
01323         vi = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[j]));
01324         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vi,
01325                                MVT::getStride (lclMV_));
01326         norms[j] = MVT::Norm2Squared (v);
01327       }
01328     }
01329     if (this->isDistributed ()) {
01330       RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
01331         this->getMap ()->getComm ();
01332       // The calling process only participates in the collective if
01333       // both the Map and its Comm on that process are nonnull.
01334       if (! comm.is_null ()) {
01335         Array<MT> lnorms (norms);
01336         reduceAll<int, MT> (*comm, REDUCE_SUM, static_cast<int> (numVecs),
01337                             lnorms.getRawPtr (), norms.getRawPtr ());
01338       }
01339     }
01340 
01341     for (typename Teuchos::ArrayView<MT>::iterator n = norms.begin ();
01342          n != norms.begin () + numVecs; ++n) {
01343       (*n) = STM::squareroot (*n);
01344     }
01345   }
01346 
01347 
01348   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01349   void
01350   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01351   normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights,
01352                 const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01353   {
01354     using Teuchos::arcp_const_cast;
01355     using Teuchos::Array;
01356     using Teuchos::ArrayRCP;
01357     using Teuchos::ArrayView;
01358     using Teuchos::reduceAll;
01359     using Teuchos::REDUCE_SUM;
01360     typedef Teuchos::ScalarTraits<Scalar> STS;
01361     typedef typename STS::magnitudeType MT;
01362     typedef Teuchos::ScalarTraits<MT> STM;
01363 
01364     const char tfecfFuncName[] = "normWeighted: ";
01365 
01366     const MT OneOverN =
01367       STM::one () / static_cast<MT> (this->getGlobalLength ());
01368     const size_t numVecs = this->getNumVectors ();
01369     const size_t myLen = this->getLocalLength ();
01370     const bool OneW = (weights.getNumVectors () == 1);
01371 
01372     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01373       ! OneW && weights.getNumVectors () != numVecs, std::runtime_error,
01374       "The input MultiVector of weights must either have only one vector "
01375       "(column), or it must have the same number of vectors as this.  "
01376       "weights.getNumVectors() = " << weights.getNumVectors () << ", and "
01377       "this->getNumVectors() = " << numVecs << ".");
01378     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01379       static_cast<size_t> (norms.size ()) != numVecs, std::invalid_argument,
01380       "norms.size() must be as large as the number of vectors in *this.  "
01381       "norms.size() = " << norms.size () << " != this->getNumVectors() = "
01382       << numVecs << ".");
01383 
01384 #ifdef HAVE_TPETRA_DEBUG
01385     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01386       ! this->getMap ()->isCompatible (* (weights.getMap ())),
01387       std::runtime_error, "MultiVectors do not have compatible Maps.");
01388 #else
01389     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01390       myLen != weights.getLocalLength (), std::runtime_error,
01391       "This MultiVector and the input argument 'weights' do not have the same "
01392       "local length.  this->getLocalLength() = " << myLen << " != weights."
01393       "getLocalLength() = " << weights.getLocalLength () << ".");
01394 #endif // HAVE_TPETRA_DEBUG
01395 
01396     for (size_t j = 0; j < numVecs; ++j) {
01397       ArrayRCP<const Scalar> X_j = this->getData (j);
01398       ArrayRCP<const Scalar> W_j =
01399         OneW ? weights.getData (0) : weights.getData (j);
01400       MT norm_j = STM::zero ();
01401       for (size_t i = 0; i < myLen; ++i) {
01402         const Scalar tmp = X_j[i] / W_j[i];
01403         const MT tmp_real = STS::real (tmp);
01404         const MT tmp_imag = STS::imag (tmp);
01405         norm_j += tmp_real * tmp_real + tmp_imag * tmp_imag;
01406       }
01407       norms[j] = norm_j;
01408     }
01409 
01410     if (this->isDistributed ()) {
01411       Array<MT> lnorms (norms);
01412       reduceAll<int, MT> (* (this->getMap ()->getComm ()), REDUCE_SUM,
01413                           static_cast<int> (numVecs), lnorms.getRawPtr (),
01414                           norms.getRawPtr ());
01415     }
01416     for (size_t k = 0; k < numVecs; ++k) {
01417       norms[k] = STM::squareroot (norms[k] * OneOverN);
01418     }
01419   }
01420 
01421 
01422   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01423   void
01424   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01425   norm1 (const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01426   {
01427     using Teuchos::Array;
01428     using Teuchos::ArrayRCP;
01429     using Teuchos::arcp_const_cast;
01430     using Teuchos::Comm;
01431     using Teuchos::null;
01432     using Teuchos::RCP;
01433     using Teuchos::REDUCE_SUM;
01434     using Teuchos::reduceAll;
01435     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
01436 
01437     const size_t numNorms = static_cast<size_t> (norms.size ());
01438     const size_t numVecs = this->getNumVectors ();
01439     TEUCHOS_TEST_FOR_EXCEPTION(
01440       numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::norm1: "
01441       "'norms' must have at least as many entries as the number of vectors in "
01442       "*this.  norms.size() = " << numNorms << " < this->getNumVectors() = "
01443       << numVecs << ".");
01444 
01445     if (isConstantStride ()) {
01446       MVT::Norm1 (lclMV_, norms);
01447     }
01448     else {
01449       KMV v (MVT::getNode (lclMV_));
01450       ArrayRCP<Scalar> vj;
01451       for (size_t j = 0; j < numVecs; ++j) {
01452         vj = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[j]));
01453         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj,
01454                                MVT::getStride (lclMV_));
01455         norms[j] = MVT::Norm1 (v);
01456       }
01457     }
01458     if (this->isDistributed ()) {
01459       RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
01460         this->getMap ()->getComm ();
01461       // The calling process only participates in the collective if
01462       // both the Map and its Comm on that process are nonnull.
01463       if (! comm.is_null ()) {
01464         Array<MT> lnorms (norms);
01465         reduceAll<int, MT> (*comm, REDUCE_SUM, static_cast<int> (numVecs),
01466                             lnorms.getRawPtr (), norms.getRawPtr ());
01467       }
01468     }
01469   }
01470 
01471 
01472   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01473   void
01474   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01475   normInf (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01476   {
01477     using Teuchos::Array;
01478     using Teuchos::ArrayRCP;
01479     using Teuchos::arcp_const_cast;
01480     using Teuchos::Comm;
01481     using Teuchos::null;
01482     using Teuchos::RCP;
01483     using Teuchos::reduceAll;
01484     using Teuchos::REDUCE_MAX;
01485     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
01486 
01487     const size_t numVecs = this->getNumVectors();
01488     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(norms.size()) != numVecs, std::runtime_error,
01489       "Tpetra::MultiVector::normInf(norms): norms.size() must be as large as the number of vectors in *this.");
01490     if (isConstantStride ()) {
01491       MVT::NormInf(lclMV_,norms);
01492     }
01493     else {
01494       KMV v (MVT::getNode (lclMV_));
01495       ArrayRCP<Scalar> vj;
01496       for (size_t j = 0; j < numVecs; ++j) {
01497         vj = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[j]));
01498         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj,
01499                                MVT::getStride (lclMV_));
01500         norms[j] = MVT::NormInf (v);
01501       }
01502     }
01503     if (this->isDistributed ()) {
01504       RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
01505         this->getMap ()->getComm ();
01506       // The calling process only participates in the collective if
01507       // both the Map and its Comm on that process are nonnull.
01508       if (! comm.is_null ()) {
01509         Array<MT> lnorms (norms);
01510         reduceAll<int, MT> (*comm, REDUCE_MAX, static_cast<int> (numVecs),
01511                             lnorms.getRawPtr (), norms.getRawPtr ());
01512       }
01513     }
01514   }
01515 
01516 
01517   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01518   void
01519   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01520   meanValue (const Teuchos::ArrayView<Scalar> &means) const
01521   {
01522     using Teuchos::Array;
01523     using Teuchos::ArrayRCP;
01524     using Teuchos::arcp_const_cast;
01525     using Teuchos::reduceAll;
01526     using Teuchos::REDUCE_SUM;
01527     typedef Teuchos::ScalarTraits<Scalar> SCT;
01528 
01529     const size_t numVecs = getNumVectors();
01530     const size_t myLen   = getLocalLength();
01531     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(means.size()) != numVecs, std::runtime_error,
01532       "Tpetra::MultiVector::meanValue(): means.size() must be as large as the number of vectors in *this.");
01533     // compute local components of the means
01534     // sum these across all nodes
01535     if (isConstantStride()) {
01536       MVT::Sum(lclMV_,means);
01537     }
01538     else {
01539       KMV v(MVT::getNode(lclMV_));
01540       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01541       for (size_t j=0; j < numVecs; ++j) {
01542         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j);
01543         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01544         means[j] = MVT::Sum((const KMV &)v);
01545       }
01546     }
01547     if (this->isDistributed ()) {
01548       Array<Scalar> lmeans(means);
01549       // only combine if we are a distributed MV
01550       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM,
01551                  static_cast<int> (numVecs), lmeans.getRawPtr (),
01552                  means.getRawPtr ());
01553     }
01554     // mfh 12 Apr 2012: Don't take out the cast from the ordinal type
01555     // to the magnitude type, since operator/ (std::complex<T>, int)
01556     // isn't necessarily defined.
01557     const Scalar OneOverN =
01558       SCT::one() / static_cast<typename SCT::magnitudeType> (getGlobalLength ());
01559     for (typename ArrayView<Scalar>::iterator i = means.begin();
01560          i != means.begin() + numVecs;
01561          ++i)
01562     {
01563       (*i) = (*i) * OneOverN;
01564     }
01565   }
01566 
01567 
01568   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01569   void
01570   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01571   randomize()
01572   {
01573     if (isConstantStride ()) {
01574       MVT::Random (lclMV_);
01575     }
01576     else {
01577       const size_t numVecs = this->getNumVectors ();
01578       KMV v (MVT::getNode (lclMV_));
01579       Teuchos::ArrayRCP<Scalar> vj;
01580       for (size_t j = 0; j < numVecs; ++j) {
01581         vj = MVT::getValuesNonConst (lclMV_, whichVectors_[j]);
01582         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01583         MVT::Random (v);
01584       }
01585     }
01586   }
01587 
01588 
01589   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01590   void
01591   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01592   putScalar (const Scalar &alpha)
01593   {
01594     const size_t numVecs = getNumVectors();
01595     if (isConstantStride()) {
01596       MVT::Init(lclMV_,alpha);
01597     }
01598     else {
01599       KMV v(MVT::getNode(lclMV_));
01600       Teuchos::ArrayRCP<Scalar> vj;
01601       for (size_t j=0; j < numVecs; ++j) {
01602         vj = MVT::getValuesNonConst(lclMV_,whichVectors_[j]);
01603         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
01604         MVT::Init(v,alpha);
01605       }
01606     }
01607   }
01608 
01609 
01610   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01611   void
01612   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01613   replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& newMap)
01614   {
01615     using Teuchos::ArrayRCP;
01616     using Teuchos::Comm;
01617     using Teuchos::RCP;
01618 
01619     // mfh 28 Mar 2013: This method doesn't forget whichVectors_, so
01620     // it might work if the MV is a column view of another MV.
01621     // However, things might go wrong when restoring the original
01622     // Map, so we don't allow this case for now.
01623     TEUCHOS_TEST_FOR_EXCEPTION(
01624       ! this->isConstantStride (), std::logic_error,
01625       "Tpetra::MultiVector::replaceMap: This method does not currently work "
01626       "if the MultiVector is a column view of another MultiVector (that is, if "
01627       "isConstantStride() == false).");
01628 
01629     // Case 1: current Map and new Map are both nonnull on this process.
01630     // Case 2: current Map is nonnull, new Map is null.
01631     // Case 3: current Map is null, new Map is nonnull.
01632     // Case 4: both Maps are null: forbidden.
01633     //
01634     // Case 1 means that we don't have to do anything on this process,
01635     // other than assign the new Map.  (We always have to do that.)
01636     // It's an error for the user to supply a Map that requires
01637     // resizing in this case.
01638     //
01639     // Case 2 means that the calling process is in the current Map's
01640     // communicator, but will be excluded from the new Map's
01641     // communicator.  We don't have to do anything on the calling
01642     // process; just leave whatever data it may have alone.
01643     //
01644     // Case 3 means that the calling process is excluded from the
01645     // current Map's communicator, but will be included in the new
01646     // Map's communicator.  This means we need to (re)allocate the
01647     // local (KokkosClassic::)MultiVector if it does not have the right
01648     // number of rows.  If the new number of rows is nonzero, we'll
01649     // fill the newly allocated local data with zeros, as befits a
01650     // projection operation.
01651     //
01652     // The typical use case for Case 3 is that the MultiVector was
01653     // first created with the Map with more processes, then that Map
01654     // was replaced with a Map with fewer processes, and finally the
01655     // original Map was restored on this call to replaceMap.
01656 
01657 #ifdef HAVE_TEUCHOS_DEBUG
01658     // mfh 28 Mar 2013: We can't check for compatibility across the
01659     // whole communicator, unless we know that the current and new
01660     // Maps are nonnull on _all_ participating processes.
01661     // TEUCHOS_TEST_FOR_EXCEPTION(
01662     //   origNumProcs == newNumProcs && ! this->getMap ()->isCompatible (*map),
01663     //   std::invalid_argument, "Tpetra::MultiVector::project: "
01664     //   "If the input Map's communicator is compatible (has the same number of "
01665     //   "processes as) the current Map's communicator, then the two Maps must be "
01666     //   "compatible.  The replaceMap() method is not for data redistribution; "
01667     //   "use Import or Export for that purpose.");
01668 
01669     // TODO (mfh 28 Mar 2013) Add compatibility checks for projections
01670     // of the Map, in case the process counts don't match.
01671 #endif // HAVE_TEUCHOS_DEBUG
01672 
01673     if (this->getMap ().is_null ()) { // current Map is null
01674       TEUCHOS_TEST_FOR_EXCEPTION(
01675         newMap.is_null (), std::invalid_argument,
01676         "Tpetra::MultiVector::replaceMap: both current and new Maps are null.  "
01677         "This probably means that the input Map is incorrect.");
01678       // Case 3: current Map is null, new Map is nonnull.
01679       const size_t newNumRows = newMap->getNodeNumElements ();
01680       const size_t origNumRows = lclMV_.getNumRows ();
01681       const size_t numCols = getNumVectors ();
01682 
01683       if (origNumRows != newNumRows) {
01684         RCP<Node> node = newMap->getNode ();
01685         ArrayRCP<Scalar> data = newNumRows == 0 ? Teuchos::null :
01686           node->template allocBuffer<Scalar> (newNumRows * numCols);
01687         const size_t stride = newNumRows;
01688         MVT::initializeValues (lclMV_, newNumRows, numCols, data, stride);
01689         if (newNumRows > 0) {
01690           MVT::Init (lclMV_, Teuchos::ScalarTraits<Scalar>::zero ());
01691         }
01692       }
01693     }
01694     else if (newMap.is_null ()) { // Case 2: current Map is nonnull, new Map is null
01695       // I am an excluded process.  Reinitialize my data so that I
01696       // have 0 rows.  Keep the number of columns as before.
01697       const size_t numVecs = getNumVectors ();
01698       MVT::initializeValues (lclMV_, 0, numVecs, Teuchos::null, 0);
01699     }
01700     this->map_ = newMap;
01701   }
01702 
01703 
01704   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01705   void
01706   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01707   scale (const Scalar &alpha)
01708   {
01709     using Teuchos::arcp_const_cast;
01710     using Teuchos::ArrayRCP;
01711 
01712     // NOTE: can't substitute putScalar(0.0) for scale(0.0), because
01713     //       the former will overwrite NaNs present in the MultiVector, while the
01714     //       semantics of this call require multiplying them by 0, which IEEE requires to be NaN
01715     const size_t numVecs = getNumVectors();
01716     if (alpha == Teuchos::ScalarTraits<Scalar>::one()) {
01717       // do nothing
01718     }
01719     else if (isConstantStride()) {
01720       MVT::Scale(lclMV_,alpha);
01721     }
01722     else {
01723       KMV v(MVT::getNode(lclMV_));
01724       ArrayRCP<Scalar> vj;
01725       for (size_t j=0; j < numVecs; ++j) {
01726         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_, whichVectors_[j]));
01727         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01728         MVT::Scale (v, alpha);
01729       }
01730     }
01731   }
01732 
01733 
01734   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01735   void
01736   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01737   scale (Teuchos::ArrayView<const Scalar> alphas)
01738   {
01739     using Teuchos::arcp_const_cast;
01740     using Teuchos::ArrayRCP;
01741 
01742     const size_t numVecs = this->getNumVectors();
01743     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(alphas.size()) != numVecs, std::runtime_error,
01744       "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as the number of vectors in *this.");
01745 
01746     // Unfortunately, MVT doesn't have a Scale method that works on a
01747     // whole MultiVector.  Thus, we have to scale a column at a time.
01748     if (this->isConstantStride ()) {
01749       for (size_t j = 0; j < numVecs; ++j) {
01750         const Scalar alpha_j = alphas[j];
01751         const size_t curCol = j;
01752 
01753         // If alphas[j] is zero, don't scale; just fill with zeros.
01754         // This follows the BLAS convention.  (zero*NaN is NaN.)
01755         //
01756         // Similarly, if alphas[j] is one, don't scale; just do nothing.
01757         // This follows the BLAS convention.  (one*NaN is NaN.)
01758         if (alpha_j == Teuchos::ScalarTraits<Scalar>::zero ()) {
01759           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01760           MVT::Init (MV_j, alpha_j);
01761         }
01762         else if (alpha_j != Teuchos::ScalarTraits<Scalar>::one ()) {
01763           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01764           MVT::Scale (MV_j, alpha_j);
01765         }
01766       }
01767     } else { // not constant stride
01768       for (size_t j = 0; j < numVecs; ++j) {
01769         const Scalar alpha_j = alphas[j];
01770         const size_t curCol = whichVectors_[j];
01771 
01772         // If alphas[j] is zero, don't scale; just fill with zeros.
01773         // This follows the BLAS convention.  (zero*NaN is NaN.)
01774         //
01775         // Similarly, if alphas[j] is one, don't scale; just do nothing.
01776         // This follows the BLAS convention.  (one*NaN is NaN.)
01777         if (alpha_j == Teuchos::ScalarTraits<Scalar>::zero ()) {
01778           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01779           MVT::Init (MV_j, alpha_j);
01780         }
01781         else if (alpha_j != Teuchos::ScalarTraits<Scalar>::one ()) {
01782           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01783           MVT::Scale (MV_j, alpha_j);
01784         }
01785       }
01786     }
01787 
01788     // KMV vec(MVT::getNode(lclMV_));
01789     // const size_t myLen = MVT::getNumRows(lclMV_);
01790     // if (myLen == 0) return;
01791     // ArrayRCP<Scalar> mybuf = MVT::getValuesNonConst(lclMV_);
01792     // for (size_t j = 0; j < numVecs; ++j) {
01793     //   if (alphas[j] == Teuchos::ScalarTraits<Scalar>::one()) {
01794     //     // do nothing: NaN * 1.0 == NaN, Number*1.0 == Number
01795     //   }
01796     //   else {
01797     //     ArrayRCP<Scalar> mybufj = getSubArrayRCP(mybuf,j);
01798     //     MVT::initializeValues(vec,myLen,1,mybufj,myLen);
01799     //     MVT::Scale(vec,alphas[j]);
01800     //   }
01801     // }
01802   }
01803 
01804 
01805   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01806   void
01807   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01808   scale (const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01809   {
01810     using Teuchos::arcp_const_cast;
01811     using Teuchos::ArrayRCP;
01812 
01813     const char tfecfFuncName[] = "scale(alpha,A)";
01814 
01815     const size_t numVecs = getNumVectors(),
01816                  myLen   = getLocalLength();
01817 #ifdef HAVE_TPETRA_DEBUG
01818     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! this->getMap ()->isCompatible (*A.getMap ()),
01819       std::runtime_error, ": MultiVectors do not have compatible Maps:" << std::endl
01820         << "this->getMap(): " << std::endl << *(this->getMap ())
01821         << "A.getMap(): " << std::endl << *(A.getMap ()) << std::endl);
01822 #else
01823     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01824       ": MultiVectors do not have the same local length.");
01825 #endif
01826     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01827       ": MultiVectors must have the same number of vectors.");
01828     if (isConstantStride() && A.isConstantStride()) {
01829       // set me == alpha*A
01830       MVT::Scale(lclMV_,alpha,(const KMV&)A.lclMV_);
01831     }
01832     else {
01833       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01834       ArrayRCP<Scalar> vptr  = MVT::getValuesNonConst (lclMV_);
01835       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar> (MVT::getValues (A.lclMV_));
01836       for (size_t j=0; j < numVecs; ++j) {
01837         ArrayRCP<Scalar>  vj =   getSubArrayRCP (vptr,  j);
01838         ArrayRCP<Scalar> avj = A.getSubArrayRCP (avptr, j);
01839         MVT::initializeValues (a,myLen, 1, avj, myLen);
01840         MVT::initializeValues (v,myLen, 1,  vj, myLen);
01841         MVT::Scale (v, alpha, (const KMV &)a);
01842       }
01843     }
01844   }
01845 
01846 
01847   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01848   void
01849   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01850   reciprocal (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01851   {
01852     using Teuchos::arcp_const_cast;
01853     using Teuchos::ArrayRCP;
01854 
01855     const char tfecfFuncName[] = "reciprocal()";
01856 
01857 #ifdef HAVE_TPETRA_DEBUG
01858     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01859         ": MultiVectors do not have compatible Maps:" << std::endl
01860         << "this->getMap(): " << std::endl << *this->getMap()
01861         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01862 #else
01863     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01864         ": MultiVectors do not have the same local length.");
01865 #endif
01866     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01867         ": MultiVectors must have the same number of vectors.");
01868     const size_t numVecs = getNumVectors();
01869     const size_t myLen = getLocalLength();
01870     try {
01871       if (isConstantStride() && A.isConstantStride()) {
01872         MVT::Recip(lclMV_,(const KMV&)A.lclMV_);
01873       }
01874       else {
01875         KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01876         ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01877                         avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01878         for (size_t j=0; j < numVecs; ++j) {
01879           ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01880                           avj = A.getSubArrayRCP(avptr,j);
01881           MVT::initializeValues(a,myLen, 1, avj, myLen);
01882           MVT::initializeValues(v,myLen, 1,  vj, myLen);
01883           MVT::Recip(v,(const KMV &)a);
01884         }
01885       }
01886     }
01887     catch (std::runtime_error &e) {
01888       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
01889           ": caught exception from Kokkos:" << std::endl
01890           << e.what() << std::endl);
01891     }
01892   }
01893 
01894 
01895   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01896   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::abs(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A) {
01897     using Teuchos::arcp_const_cast;
01898     using Teuchos::ArrayRCP;
01899 
01900     const char tfecfFuncName[] = "abs()";
01901 #ifdef HAVE_TPETRA_DEBUG
01902     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01903         ": MultiVectors do not have compatible Maps:" << std::endl
01904         << "this->getMap(): " << std::endl << *this->getMap()
01905         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01906 #else
01907     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01908         ": MultiVectors do not have the same local length.");
01909 #endif
01910     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01911         ": MultiVectors must have the same number of vectors.");
01912     const size_t myLen = getLocalLength();
01913     const size_t numVecs = getNumVectors();
01914     if (isConstantStride() && A.isConstantStride()) {
01915       MVT::Abs(lclMV_,(const KMV&)A.lclMV_);
01916     }
01917     else {
01918       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01919       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01920                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01921       for (size_t j=0; j < numVecs; ++j) {
01922         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01923                         avj = A.getSubArrayRCP(avptr,j);
01924         MVT::initializeValues(a,myLen, 1, avj, myLen);
01925         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01926         MVT::Abs(v,(const KMV &)a);
01927       }
01928     }
01929   }
01930 
01931 
01932   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01933   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01934                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01935                       const Scalar &beta)
01936   {
01937     using Teuchos::arcp_const_cast;
01938     using Teuchos::ArrayRCP;
01939 
01940     const char tfecfFuncName[] = "update()";
01941     // this = beta*this + alpha*A
01942     // must support case where &this == &A
01943     // can't short circuit on alpha==0.0 or beta==0.0, because 0.0*NaN != 0.0
01944 #ifdef HAVE_TPETRA_DEBUG
01945     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01946         ": MultiVectors do not have compatible Maps:" << std::endl
01947         << "this->getMap(): " << std::endl << this->getMap()
01948         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01949 #else
01950     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01951         ": MultiVectors do not have the same local length.");
01952 #endif
01953     const size_t myLen = getLocalLength();
01954     const size_t numVecs = getNumVectors();
01955     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01956         ": MultiVectors must have the same number of vectors.");
01957     if (isConstantStride() && A.isConstantStride()) {
01958       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta);
01959     }
01960     else {
01961       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01962       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01963                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01964       for (size_t j=0; j < numVecs; ++j) {
01965         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01966                         avj = A.getSubArrayRCP(avptr,j);
01967         MVT::initializeValues(a,myLen, 1, avj, myLen);
01968         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01969         MVT::GESUM(v,alpha,(const KMV &)a,beta);
01970       }
01971     }
01972   }
01973 
01974 
01975   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01976   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01977                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01978                       const Scalar &beta,  const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &B,
01979                       const Scalar &gamma)
01980   {
01981     using Teuchos::arcp_const_cast;
01982     using Teuchos::ArrayRCP;
01983 
01984     const char tfecfFuncName[] = "update()";
01985     // this = alpha*A + beta*B + gamma*this
01986     // must support case where &this == &A or &this == &B
01987     // can't short circuit on alpha==0.0 or beta==0.0 or gamma==0.0, because 0.0*NaN != 0.0
01988 #ifdef HAVE_TPETRA_DEBUG
01989     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()) || !this->getMap()->isCompatible(*B.getMap()),
01990         std::runtime_error,
01991         ": MultiVectors do not have compatible Maps:" << std::endl
01992         << "this->getMap(): " << std::endl << *this->getMap()
01993         << "A.getMap(): " << std::endl << *A.getMap() << std::endl
01994         << "B.getMap(): " << std::endl << *B.getMap() << std::endl);
01995 #else
01996     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength() || getLocalLength() != B.getLocalLength(), std::runtime_error,
01997         ": MultiVectors do not have the same local length.");
01998 #endif
01999     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors() || B.getNumVectors() != this->getNumVectors(), std::runtime_error,
02000         ": MultiVectors must have the same number of vectors.");
02001     const size_t myLen = getLocalLength();
02002     const size_t numVecs = getNumVectors();
02003     if (isConstantStride() && A.isConstantStride() && B.isConstantStride()) {
02004       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta,(const KMV&)B.lclMV_,gamma);
02005     }
02006     else {
02007       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_)), b(MVT::getNode(lclMV_));
02008       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
02009                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_)),
02010                       bvptr = arcp_const_cast<Scalar>(MVT::getValues(B.lclMV_));
02011       for (size_t j=0; j < numVecs; ++j) {
02012         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
02013                         avj = A.getSubArrayRCP(avptr,j),
02014                         bvj = B.getSubArrayRCP(bvptr,j);
02015         MVT::initializeValues(b,myLen, 1, bvj, myLen);
02016         MVT::initializeValues(a,myLen, 1, avj, myLen);
02017         MVT::initializeValues(v,myLen, 1,  vj, myLen);
02018         MVT::GESUM(v,alpha,(const KMV&)a,beta,(const KMV&)b,gamma);
02019       }
02020     }
02021   }
02022 
02023 
02024   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02025   Teuchos::ArrayRCP<const Scalar>
02026   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02027   getData (size_t j) const
02028   {
02029     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
02030     return node->template viewBuffer<Scalar> (getLocalLength (), getSubArrayRCP (MVT::getValues(lclMV_), j));
02031   }
02032 
02033 
02034   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02035   Teuchos::ArrayRCP<Scalar>
02036   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02037   getDataNonConst(size_t j)
02038   {
02039     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
02040     return node->template viewBufferNonConst<Scalar> (KokkosClassic::ReadWrite, getLocalLength (), getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j));
02041   }
02042 
02043 
02044   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02045   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>&
02046   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02047   operator= (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source)
02048   {
02049     if (source.hasViewSemantics_) {
02050       this->map_ = source.map_; // "op=" for DistObject
02051       lclMV_ = source.lclMV_; // Shallow copy
02052 
02053       // Don't forget to copy whichVectors_ (for non-constant-stride view).
02054       if (source.whichVectors_.size () > 0) {
02055         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
02056       }
02057       // View semantics are "contagious" for the "classic" version of
02058       // MultiVector.  That is, the right-hand side of the assignment
02059       // transmits view semantics to the left-hand side.  (The Kokkos
02060       // refactor version of MultiVector always has view semantics.)
02061       hasViewSemantics_ = true;
02062     }
02063     else {
02064 #ifdef HAVE_TPETRA_DEBUG
02065       using Teuchos::outArg;
02066       using Teuchos::REDUCE_MIN;
02067       using Teuchos::reduceAll;
02068       using std::endl;
02069 #endif // HAVE_TPETRA_DEBUG
02070 
02071       const char tfecfFuncName[] = "operator=";
02072       // Check for special case of this=Source, in which case we do nothing.
02073       if (this != &source) {
02074         // Whether the input and *this are compatible on the calling process.
02075         const int locallyCompat =
02076           (this->getLocalLength () == source.getLocalLength ()) ? 1 : 0;
02077 #ifdef HAVE_TPETRA_DEBUG
02078         int globallyCompat = 1; // innocent until proven guilty
02079         reduceAll<int, int> (* (this->getMap ()->getComm ()), REDUCE_MIN,
02080                              locallyCompat, outArg (globallyCompat));
02081         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02082           globallyCompat == 0, std::invalid_argument,
02083           ": MultiVectors do not have the same local length on all processes.");
02084 #else
02085         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02086           locallyCompat == 0, std::invalid_argument,
02087           ": MultiVectors do not have the same local length on the calling "
02088           "process " << this->getMap ()->getComm ()->getSize () << ".  *this "
02089           "has " << this->getLocalLength () << " local rows, and source has "
02090           << source.getLocalLength () << " rows.");
02091 #endif // HAVE_TPETRA_DEBUG
02092 
02093         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02094           source.getNumVectors() != getNumVectors(), std::invalid_argument,
02095           ": MultiVectors must have the same number of vectors.");
02096 
02097         Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02098         const size_t numVecs = getNumVectors();
02099         if (isConstantStride () && source.isConstantStride () &&
02100             getLocalLength () == getStride () &&
02101             source.getLocalLength ()== source.getStride ()) {
02102           // Both multivectors' data are stored contiguously, so we can
02103           // copy in one call.
02104           node->template copyBuffers<Scalar> (getLocalLength () * numVecs,
02105                                               MVT::getValues (source.lclMV_),
02106                                               MVT::getValuesNonConst (lclMV_));
02107         }
02108         else {
02109           // We have to copy the columns one at a time.
02110           for (size_t j=0; j < numVecs; ++j) {
02111             node->template copyBuffers<Scalar> (getLocalLength (),
02112                                                 source.getSubArrayRCP (MVT::getValues (source.lclMV_), j),
02113                                                 getSubArrayRCP (MVT::getValuesNonConst(lclMV_), j));
02114           }
02115         }
02116       }
02117     }
02118     return *this;
02119   }
02120 
02121 
02122   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02123   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02124   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02125   subCopy (const Teuchos::ArrayView<const size_t>& cols) const
02126   {
02127     using Teuchos::RCP;
02128     using Teuchos::rcp;
02129     typedef Teuchos::ArrayView<const size_t>::size_type size_type;
02130     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02131 
02132     const size_type numCopyVecs = static_cast<size_type> (cols.size ());
02133     TEUCHOS_TEST_FOR_EXCEPTION(
02134       numCopyVecs < 1, std::runtime_error, "Tpetra::MultiVector::subCopy"
02135       "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
02136       "contain at least one entry, but cols.size() = " << cols.size ()
02137       << " == 0.");
02138 
02139     // Check whether the index set in cols is contiguous.  If it is,
02140     // use the more efficient Range1D version of subCopy.
02141     bool contiguous = true;
02142     for (size_type j = 1; j < numCopyVecs; ++j) {
02143       if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
02144         contiguous = false;
02145         break;
02146       }
02147     }
02148 
02149     const bool zeroData = false;
02150     if (contiguous) {
02151       if (numCopyVecs == 0) {
02152         // The output MV has no columns, so there is nothing to copy.
02153         return rcp (new MV (this->getMap (), numCopyVecs, zeroData));
02154       } else {
02155         // Use the more efficient contiguous-index-range version.
02156         return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
02157       }
02158     }
02159     else { // Copy data one column at a time.
02160       RCP<MV> mv = rcp (new MV (this->getMap (), numCopyVecs, zeroData));
02161       RCP<Node> node = this->getMap ()->getNode ();
02162       for (size_type j = 0; j < numCopyVecs; ++j) {
02163         node->template copyBuffers<Scalar> (getLocalLength (),
02164                                             getSubArrayRCP (MVT::getValues (lclMV_), cols[j]),
02165                                             MVT::getValuesNonConst (mv->lclMV_, j));
02166       }
02167       return mv;
02168     }
02169   }
02170 
02171 
02172   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02173   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02174   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02175   subCopy (const Teuchos::Range1D& colRng) const
02176   {
02177     using Teuchos::RCP;
02178     using Teuchos::rcp;
02179     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02180 
02181     TEUCHOS_TEST_FOR_EXCEPTION(
02182       colRng.size () == 0, std::runtime_error, "Tpetra::MultiVector::"
02183       "subCopy(Range1D): Range must include at least one vector.");
02184 
02185     const size_t numCopyVecs = colRng.size ();
02186     const bool zeroData = false;
02187     RCP<Node> node = MVT::getNode (lclMV_);
02188     RCP<MV> mv;
02189     // mv is allocated with constant stride
02190     mv = rcp (new MV (this->getMap (), numCopyVecs, zeroData));
02191     // copy data from *this into mv
02192     for (size_t js = colRng.lbound (), jd = 0; jd < numCopyVecs; ++jd, ++js) {
02193       node->template copyBuffers<Scalar> (getLocalLength (),
02194                                           getSubArrayRCP (MVT::getValues (lclMV_), js),
02195                                           MVT::getValuesNonConst (mv->lclMV_, jd));
02196     }
02197     return mv;
02198   }
02199 
02200 
02201   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02202   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02203   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02204   offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
02205               size_t offset) const
02206   {
02207     using Teuchos::RCP;
02208     using Teuchos::rcp;
02209     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02210 
02211     const size_t newNumRows = subMap->getNodeNumElements();
02212     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02213     if (tooManyElts) {
02214       const int myRank = this->getMap ()->getComm ()->getRank ();
02215       TEUCHOS_TEST_FOR_EXCEPTION(
02216         newNumRows + offset > MVT::getNumRows (lclMV_),
02217         std::runtime_error,
02218         "Tpetra::MultiVector::offsetView: Invalid input Map.  Input Map owns "
02219         << subMap->getNodeNumElements() << " elements on process " << myRank
02220         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02221         << lclMV_.getOrigNumRows () << " on this process.");
02222     }
02223     RCP<const MV> constViewMV;
02224     if (isConstantStride()) {
02225       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02226         lclMV_.offsetView (newNumRows, lclMV_.getNumCols (), offset, 0);
02227       constViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02228     }
02229     else {
02230       // Compute the max column being viewed.  This tells us where to stop the view.
02231       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02232       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02233         lclMV_.offsetView (newNumRows, maxCol+1, offset, 0);
02234       constViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02235     }
02236     return constViewMV;
02237   }
02238 
02239 
02240   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02241   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02242   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02243   offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
02244                       size_t offset)
02245   {
02246     using Teuchos::RCP;
02247     using Teuchos::rcp;
02248     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02249 
02250     const size_t newNumRows = subMap->getNodeNumElements ();
02251     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02252     if (tooManyElts) {
02253       const int myRank = this->getMap ()->getComm ()->getRank ();
02254       TEUCHOS_TEST_FOR_EXCEPTION(
02255         newNumRows + offset > MVT::getNumRows (lclMV_),
02256         std::runtime_error,
02257         "Tpetra::MultiVector::offsetViewNonconst: Invalid input Map.  Input Map owns "
02258         << subMap->getNodeNumElements() << " elements on process " << myRank
02259         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02260         << lclMV_.getOrigNumRows () << " on this process.");
02261     }
02262     RCP<MV> subViewMV;
02263     if (isConstantStride()) {
02264       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02265         lclMV_.offsetViewNonConst (newNumRows, lclMV_.getNumCols (), offset, 0);
02266       subViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02267     }
02268     else {
02269       // Compute the max column being viewed.  This tells us where to stop the view.
02270       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02271       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02272         lclMV_.offsetViewNonConst (newNumRows, maxCol+1, offset, 0);
02273       subViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02274     }
02275     return subViewMV;
02276   }
02277 
02278 
02279   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02280   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02281   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02282   subView (const ArrayView<const size_t>& cols) const
02283   {
02284     using Teuchos::arcp_const_cast;
02285     using Teuchos::Array;
02286     using Teuchos::ArrayRCP;
02287     using Teuchos::RCP;
02288     using Teuchos::rcp;
02289     using Teuchos::rcp_const_cast;
02290     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02291     typedef Teuchos::ArrayView<size_t>::size_type size_type;
02292 
02293     const size_type numViewCols = static_cast<size_type> (cols.size ());
02294     TEUCHOS_TEST_FOR_EXCEPTION(
02295       numViewCols < 1, std::runtime_error, "Tpetra::MultiVector::subView"
02296       "(const Teuchos::ArrayView<const size_t>&): The input array 'cols' "
02297       "must contain at least one entry, but cols.size() = " << cols.size ()
02298       << " == 0.");
02299 
02300     // Check whether the index set in cols is contiguous.  If it is,
02301     // use the more efficient Range1D version of subView.
02302     bool contiguous = true;
02303     for (size_type j = 1; j < numViewCols; ++j) {
02304       if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
02305         contiguous = false;
02306         break;
02307       }
02308     }
02309     if (contiguous) {
02310       if (numViewCols == 0) {
02311         // The output MV has no columns, so there is nothing to view.
02312         return rcp (new MV (this->getMap (), numViewCols));
02313       } else {
02314         // Use the more efficient contiguous-index-range version.
02315         return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
02316       }
02317     }
02318 
02319     // this is const, so the lclMV_ is const, so that we can only get const buffers
02320     // we will cast away the const; this is okay, because
02321     //   a) the constructor doesn't modify the data, and
02322     //   b) we are encapsulating in a const MV before returning
02323     const size_t myStride = MVT::getStride (lclMV_);
02324     const size_t myLen = MVT::getNumRows (lclMV_);
02325     // use the smallest view possible of the buffer: from the first
02326     // element of the minInd vector to the last element of the maxInd
02327     // vector.  this minimizes overlap between views, and keeps view
02328     // of the minimum amount necessary, in order to allow node to
02329     // achieve maximum efficiency.  adjust the indices appropriately;
02330     // shift so that smallest index is 0
02331     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02332     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar> (cbuf);
02333     Array<size_t> newCols (numViewCols);
02334     size_t minInd = Teuchos::OrdinalTraits<size_t>::max();
02335     size_t maxInd = Teuchos::OrdinalTraits<size_t>::zero();
02336     if (isConstantStride()) {
02337       for (size_type j = 0; j < numViewCols; ++j) {
02338         newCols[j] = cols[j];
02339         if (newCols[j] < minInd) minInd = newCols[j];
02340         if (maxInd < newCols[j]) maxInd = newCols[j];
02341       }
02342     }
02343     else {
02344       for (size_type j = 0; j < numViewCols; ++j) {
02345         newCols[j] = whichVectors_[cols[j]];
02346         if (newCols[j] < minInd) minInd = newCols[j];
02347         if (maxInd < newCols[j]) maxInd = newCols[j];
02348       }
02349     }
02350     ArrayRCP<Scalar> minbuf =
02351       ncbuf.persistingView (minInd * myStride,
02352                             myStride * (maxInd - minInd) + myLen);
02353     for (size_type j = 0; j < numViewCols; ++j) {
02354       newCols[j] -= minInd;
02355     }
02356     RCP<const MV> constViewMV;
02357     return rcp_const_cast<const MV> (rcp (new MV (this->getMap (), minbuf,
02358                                                   myStride, newCols (),
02359                                                   COMPUTE_VIEW_CONSTRUCTOR)));
02360   }
02361 
02362 
02363   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02364   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02365   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02366   subView (const Teuchos::Range1D &colRng) const
02367   {
02368     using Teuchos::arcp_const_cast;
02369     using Teuchos::Array;
02370     using Teuchos::ArrayRCP;
02371     using Teuchos::RCP;
02372     using Teuchos::rcp;
02373     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02374 
02375     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02376       "Tpetra::MultiVector::subView(Range1D): range must include at least one vector.");
02377     size_t numViewVecs = colRng.size();
02378     // this is const, so the lclMV_ is const, so that we can only get const buffers
02379     // we will cast away the const; this is okay, because
02380     //   a) the constructor doesn't modify the data, and
02381     //   b) we are encapsulating in a const MV before returning
02382     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02383     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar>(cbuf);
02384     // resulting MultiVector is constant stride only if *this is
02385     if (isConstantStride()) {
02386       // view goes from first entry of first vector to last entry of last vector
02387       ArrayRCP<Scalar> subdata = ncbuf.persistingView( MVT::getStride(lclMV_) * colRng.lbound(),
02388                                                        MVT::getStride(lclMV_) * (numViewVecs-1) + getLocalLength() );
02389       return rcp (new MV (this->getMap (), subdata, MVT::getStride (lclMV_), numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02390     }
02391     // otherwise, use a subset of this whichVectors_ to construct new multivector
02392     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02393     return rcp (new MV (this->getMap (), ncbuf, MVT::getStride (lclMV_), whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02394   }
02395 
02396 
02397   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02398   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02399   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02400   subViewNonConst (const ArrayView<const size_t> &cols)
02401   {
02402     using Teuchos::arcp_const_cast;
02403     using Teuchos::Array;
02404     using Teuchos::ArrayRCP;
02405     using Teuchos::Range1D;
02406     using Teuchos::RCP;
02407     using Teuchos::rcp;
02408     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02409 
02410     const size_t numViewCols = static_cast<size_t> (cols.size ());
02411     TEUCHOS_TEST_FOR_EXCEPTION(
02412       numViewCols < 1, std::runtime_error, "Tpetra::MultiVector::"
02413       "subViewNonConst(const Teuchos::ArrayView<const size_t>&): The input "
02414       "array 'cols' must contain at least one entry, but cols.size() = "
02415       << cols.size () << " == 0.");
02416 
02417     // Check whether the index set in cols is contiguous.  If it is,
02418     // use the more efficient Range1D version of subViewNonConst.
02419     bool contiguous = true;
02420     for (size_t j = 1; j < numViewCols; ++j) {
02421       if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
02422         contiguous = false;
02423         break;
02424       }
02425     }
02426     if (contiguous) {
02427       if (numViewCols == 0) {
02428         // The output MV has no columns, so there is nothing to view.
02429         return rcp (new MV (this->getMap (), numViewCols));
02430       } else {
02431         // Use the more efficient contiguous-index-range version.
02432         return this->subViewNonConst (Range1D (cols[0], cols[numViewCols-1]));
02433       }
02434     }
02435 
02436     if (isConstantStride ()) {
02437       return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_),
02438                           MVT::getStride (lclMV_), cols,
02439                           COMPUTE_VIEW_CONSTRUCTOR));
02440     }
02441     // else, lookup current whichVectors_ using cols
02442     Array<size_t> newcols (numViewCols);
02443     for (size_t j = 0; j < numViewCols; ++j) {
02444       newcols[j] = whichVectors_[cols[j]];
02445     }
02446     return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_),
02447                         MVT::getStride (lclMV_), newcols (),
02448                         COMPUTE_VIEW_CONSTRUCTOR));
02449   }
02450 
02451 
02452   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02453   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02454   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02455   subViewNonConst (const Teuchos::Range1D &colRng)
02456   {
02457     using Teuchos::arcp_const_cast;
02458     using Teuchos::Array;
02459     using Teuchos::ArrayRCP;
02460     using Teuchos::RCP;
02461     using Teuchos::rcp;
02462     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02463 
02464     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02465       "Tpetra::MultiVector::subViewNonConst(Range1D): range must include at least one vector.");
02466     size_t numViewVecs = colRng.size();
02467     // resulting MultiVector is constant stride only if *this is
02468     if (isConstantStride ()) {
02469       // view goes from first entry of first vector to last entry of last vector
02470       const size_t stride = MVT::getStride(lclMV_);
02471       ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02472       ArrayRCP<Scalar> subdata = data.persistingView( stride * colRng.lbound(),
02473                                                       stride * (numViewVecs-1) + getLocalLength() );
02474       return rcp (new MV (this->getMap (), subdata, stride, numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02475     }
02476     // otherwise, use a subset of this whichVectors_ to construct new multivector
02477     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02478     const size_t stride = MVT::getStride(lclMV_);
02479     ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02480     return rcp (new MV (this->getMap (), data, stride, whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02481   }
02482 
02483 
02484   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02485   Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02486   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02487   getVector (size_t j) const
02488   {
02489     using Teuchos::arcp_const_cast;
02490     using Teuchos::ArrayRCP;
02491     using Teuchos::rcp;
02492     using Teuchos::rcp_const_cast;
02493     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02494 
02495 #ifdef HAVE_TPETRA_DEBUG
02496     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02497         "Tpetra::MultiVector::getVector(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02498 #endif
02499     // this is const, so lclMV_ is const, so we get const buff
02500     // it is safe to cast away the const because we will wrap it in a const Vector below
02501     ArrayRCP<Scalar> ncbuff;
02502     if (getLocalLength() > 0) {
02503       ArrayRCP<const Scalar> cbuff = getSubArrayRCP(MVT::getValues(lclMV_),j);
02504       ncbuff = arcp_const_cast<Scalar>(cbuff);
02505     }
02506     return rcp_const_cast<const V> (rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR)));
02507   }
02508 
02509 
02510   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02511   Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02512   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getVectorNonConst(size_t j)
02513   {
02514     using Teuchos::ArrayRCP;
02515     using Teuchos::rcp;
02516     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02517 
02518 #ifdef HAVE_TPETRA_DEBUG
02519     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02520         "Tpetra::MultiVector::getVectorNonConst(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02521 #endif
02522     ArrayRCP<Scalar> ncbuff;
02523     if (getLocalLength() > 0) {
02524       ncbuff = getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j);
02525     }
02526     return rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR));
02527   }
02528 
02529 
02530   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02531   void
02532   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02533   get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const
02534   {
02535     using Teuchos::ArrayRCP;
02536     using Teuchos::ArrayView;
02537     using Teuchos::RCP;
02538     using Teuchos::rcp;
02539 
02540     const char tfecfFuncName[] = "get1dCopy(A,LDA)";
02541     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < getLocalLength(), std::runtime_error,
02542       ": specified stride is not large enough for local vector length.");
02543     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02544       static_cast<size_t> (A.size ()) < LDA * (getNumVectors() - 1) + getLocalLength (),
02545       std::runtime_error, ": specified stride/storage is not large enough for "
02546       "the number of vectors.");
02547     RCP<Node> node = MVT::getNode(lclMV_);
02548     const size_t myStride = MVT::getStride(lclMV_),
02549                   numCols = getNumVectors(),
02550                   myLen   = getLocalLength();
02551     if (myLen > 0) {
02552       ArrayRCP<const Scalar> mydata = MVT::getValues(lclMV_);
02553       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,mydata);
02554       typename ArrayView<Scalar>::iterator Aptr = A.begin();
02555       for (size_t j=0; j<numCols; j++) {
02556         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02557         std::copy(myviewj,myviewj+myLen,Aptr);
02558         Aptr += LDA;
02559       }
02560       myview = Teuchos::null;
02561       mydata = Teuchos::null;
02562     }
02563   }
02564 
02565 
02566   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02567   void
02568   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02569   get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const
02570   {
02571     using Teuchos::ArrayRCP;
02572     using Teuchos::ArrayView;
02573     using Teuchos::RCP;
02574     using Teuchos::rcp;
02575 
02576     const char tfecfFuncName[] = "get2dCopy(ArrayOfPtrs)";
02577     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02578       static_cast<size_t> (ArrayOfPtrs.size ()) != getNumVectors(), std::runtime_error,
02579       ": Array of pointers must contain as many pointers as the MultiVector has rows.");
02580     RCP<Node> node = MVT::getNode(lclMV_);
02581     const size_t numCols = getNumVectors(),
02582                    myLen = getLocalLength();
02583     if (myLen > 0) {
02584       ArrayRCP<const Scalar> mybuff = MVT::getValues(lclMV_);
02585       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(mybuff.size(), mybuff);
02586       for (size_t j=0; j<numCols; ++j) {
02587 #ifdef HAVE_TPETRA_DEBUG
02588         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02589           static_cast<size_t> (ArrayOfPtrs[j].size ()) != getLocalLength (),
02590           std::runtime_error, ": The ArrayView provided in ArrayOfPtrs[" << j
02591           << "] was not large enough to contain the local entries.");
02592 #endif
02593         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02594         std::copy(myviewj,myviewj+myLen,ArrayOfPtrs[j].begin());
02595       }
02596       myview = Teuchos::null;
02597       mybuff = Teuchos::null;
02598     }
02599   }
02600 
02601 
02602   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02603   Teuchos::ArrayRCP<const Scalar>
02604   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dView () const
02605   {
02606     if (getLocalLength () == 0 || getNumVectors () == 0) {
02607       return Teuchos::null;
02608     } else {
02609       TEUCHOS_TEST_FOR_EXCEPTION(
02610         ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
02611         "get1dView: This MultiVector does not have constant stride, so it is "
02612         "not possible to view its data as a single array.  You may check "
02613         "whether a MultiVector has constant stride by calling "
02614         "isConstantStride().");
02615       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02616       return node->template viewBuffer<Scalar> (getStride () * (getNumVectors () - 1) +
02617                                                 getLocalLength (),
02618                                                 MVT::getValues (lclMV_));
02619     }
02620   }
02621 
02622 
02623   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02624   Teuchos::ArrayRCP<Scalar>
02625   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dViewNonConst ()
02626   {
02627     if (getLocalLength () == 0 || getNumVectors () == 0) {
02628       return Teuchos::null;
02629     } else {
02630       TEUCHOS_TEST_FOR_EXCEPTION(
02631         ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
02632         "get1dViewNonConst: This MultiVector does not have constant stride, so "
02633         "it is not possible to view its data as a single array.  You may check "
02634         "whether a MultiVector has constant stride by calling "
02635         "isConstantStride().");
02636       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02637       return node->template viewBufferNonConst<Scalar> (KokkosClassic::ReadWrite,
02638                                                         getStride () * (getNumVectors () - 1) +
02639                                                         getLocalLength (),
02640                                                         MVT::getValuesNonConst (lclMV_));
02641     }
02642   }
02643 
02644 
02645   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02646   Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
02647   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dViewNonConst()
02648   {
02649     using Teuchos::arcp;
02650     using Teuchos::ArrayRCP;
02651     using Teuchos::RCP;
02652 
02653     RCP<Node> node = MVT::getNode(lclMV_);
02654     ArrayRCP<ArrayRCP<Scalar> > views = arcp<ArrayRCP<Scalar> > (getNumVectors ());
02655     if (isConstantStride ()) {
02656       const size_t myStride = MVT::getStride(lclMV_),
02657                     numCols = getNumVectors(),
02658                     myLen   = getLocalLength();
02659       if (myLen > 0) {
02660         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02661         for (size_t j=0; j<numCols; ++j) {
02662           views[j] = myview.persistingView(0,myLen);
02663           myview += myStride;
02664         }
02665       }
02666     }
02667     else {
02668       const size_t myStride = MVT::getStride(lclMV_),
02669                     numCols = MVT::getNumCols(lclMV_),
02670                      myCols = getNumVectors(),
02671                      myLen  = MVT::getNumRows(lclMV_);
02672       if (myLen > 0) {
02673         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02674         for (size_t j=0; j<myCols; ++j) {
02675           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02676         }
02677       }
02678     }
02679     return views;
02680   }
02681 
02682 
02683   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02684   Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
02685   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dView() const
02686   {
02687     using Teuchos::arcp;
02688     using Teuchos::ArrayRCP;
02689     using Teuchos::RCP;
02690 
02691     RCP<Node> node = MVT::getNode(lclMV_);
02692     ArrayRCP< ArrayRCP<const Scalar> > views = arcp<ArrayRCP<const Scalar> >(getNumVectors());
02693     if (isConstantStride()) {
02694       const size_t myStride = MVT::getStride(lclMV_),
02695                     numCols = getNumVectors(),
02696                     myLen   = getLocalLength();
02697       if (myLen > 0) {
02698         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02699         for (size_t j=0; j<numCols; ++j) {
02700           views[j] = myview.persistingView(0,myLen);
02701           myview += myStride;
02702         }
02703       }
02704     }
02705     else {
02706       const size_t myStride = MVT::getStride(lclMV_),
02707                     numCols = MVT::getNumCols(lclMV_),
02708                      myCols = getNumVectors(),
02709                      myLen  = MVT::getNumRows(lclMV_);
02710       if (myLen > 0) {
02711         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02712         for (size_t j=0; j<myCols; ++j) {
02713           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02714         }
02715       }
02716     }
02717     return views;
02718   }
02719 
02720 
02721   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02722   void
02723   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02724   multiply (Teuchos::ETransp transA,
02725             Teuchos::ETransp transB,
02726             const Scalar &alpha,
02727             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02728             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02729             const Scalar &beta)
02730   {
02731     using Teuchos::NO_TRANS;      // enums
02732     using Teuchos::TRANS;
02733     using Teuchos::CONJ_TRANS;
02734     using Teuchos::null;
02735     using Teuchos::ScalarTraits;  // traits
02736     using Teuchos::RCP;
02737     using Teuchos::rcp;
02738     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02739 
02740     // This routine performs a variety of matrix-matrix multiply operations, interpreting
02741     // the MultiVector (this-aka C , A and B) as 2D matrices.  Variations are due to
02742     // the fact that A, B and C can be local replicated or global distributed
02743     // MultiVectors and that we may or may not operate with the transpose of
02744     // A and B.  Possible cases are:
02745     //                                       Num
02746     //      OPERATIONS                        cases  Notes
02747     //  1) C(local) = A^X(local) * B^X(local)  4    (X=Trans or Not, No comm needed)
02748     //  2) C(local) = A^T(distr) * B  (distr)  1    (2D dot product, replicate C)
02749     //  3) C(distr) = A  (distr) * B^X(local)  2    (2D vector update, no comm needed)
02750     //
02751     // The following operations are not meaningful for 1D distributions:
02752     //
02753     // u1) C(local) = A^T(distr) * B^T(distr)  1
02754     // u2) C(local) = A  (distr) * B^X(distr)  2
02755     // u3) C(distr) = A^X(local) * B^X(local)  4
02756     // u4) C(distr) = A^X(local) * B^X(distr)  4
02757     // u5) C(distr) = A^T(distr) * B^X(local)  2
02758     // u6) C(local) = A^X(distr) * B^X(local)  4
02759     // u7) C(distr) = A^X(distr) * B^X(local)  4
02760     // u8) C(local) = A^X(local) * B^X(distr)  4
02761     //
02762     // Total of 32 case (2^5).
02763 
02764     const char errPrefix[] = "Tpetra::MultiVector::multiply(transOpA,transOpB,alpha,A,B,beta): ";
02765 
02766     TEUCHOS_TEST_FOR_EXCEPTION( ScalarTraits<Scalar>::isComplex && (transA == TRANS || transB == TRANS), std::invalid_argument,
02767         errPrefix << "non-conjugate transpose not supported for complex types.");
02768     transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02769     transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02770 
02771     // Compute effective dimensions, w.r.t. transpose operations on
02772     size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength();
02773     size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors();
02774     size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength();
02775     size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors();
02776 
02777     Scalar beta_local = beta; // local copy of beta; might be reassigned below
02778 
02779     TEUCHOS_TEST_FOR_EXCEPTION(
02780       getLocalLength() != A_nrows || getNumVectors() != B_ncols || A_ncols != B_nrows,
02781       std::runtime_error,
02782       errPrefix << "dimension of *this, op(A) and op(B) must be consistent.  "
02783       << std::endl << "The local part of *this is "
02784       << getLocalLength() << " x " << getNumVectors()
02785       << ", A is " << A_nrows << " x " << A_ncols
02786       << ", and B is " << B_nrows << " x " << B_ncols << ".");
02787 
02788     bool A_is_local = ! A.isDistributed ();
02789     bool B_is_local = ! B.isDistributed ();
02790     bool C_is_local = ! this->isDistributed ();
02791     // Case 1: C(local) = A^X(local) * B^X(local)
02792     bool Case1 = (C_is_local &&  A_is_local && B_is_local);
02793     // Case 2: C(local) = A^T(distr) * B  (distr)
02794     bool Case2 = (C_is_local && ! A_is_local && ! B_is_local &&
02795                   transA == CONJ_TRANS && transB == NO_TRANS);
02796     // Case 3: C(distr) = A  (distr) * B^X(local)
02797     bool Case3 = (! C_is_local && ! A_is_local && B_is_local &&
02798                   transA == NO_TRANS);
02799 
02800     // Test that we are considering a meaningful cases
02801     TEUCHOS_TEST_FOR_EXCEPTION(
02802       ! Case1 && ! Case2 && ! Case3, std::runtime_error,
02803       errPrefix << "multiplication of op(A) and op(B) into *this is not a "
02804       "supported use case.");
02805 
02806     if (beta != ScalarTraits<Scalar>::zero() && Case2) {
02807       // If Case2, then C is local and contributions must be summed
02808       // across all processes.  However, if beta != 0, then accumulate
02809       // beta*C into the sum.  When summing across all processes, we
02810       // only want to accumulate this once, so set beta == 0 on all
02811       // processes except Process 0.
02812       const int MyPID = this->getMap ()->getComm ()->getRank ();
02813       if (MyPID!=0) beta_local = ScalarTraits<Scalar>::zero();
02814     }
02815 
02816     // Check if A, B, C have constant stride, if not then make temp copy (strided)
02817     RCP<const MV> Atmp, Btmp;
02818     RCP<MV>       Ctmp;
02819     if (isConstantStride() == false) Ctmp = rcp (new MV (*this, Teuchos::Copy));
02820     else Ctmp = rcp(this,false);
02821 
02822     if (A.isConstantStride() == false) Atmp = rcp (new MV (A, Teuchos::Copy));
02823     else Atmp = rcp(&A,false);
02824 
02825     if (B.isConstantStride() == false) Btmp = rcp (new MV (B, Teuchos::Copy));
02826     else Btmp = rcp(&B,false);
02827 
02828 #ifdef HAVE_TEUCHOS_DEBUG
02829     TEUCHOS_TEST_FOR_EXCEPTION(!Ctmp->isConstantStride() || !Btmp->isConstantStride() || !Atmp->isConstantStride(), std::logic_error,
02830         errPrefix << "failed making temporary strided copies of input multivectors.");
02831 #endif
02832 
02833     KMV &C_mv = Ctmp->lclMV_;
02834     {
02835       // get local multivectors
02836       const KMV &A_mv = Atmp->lclMV_;
02837       const KMV &B_mv = Btmp->lclMV_;
02838       // do the multiply (GEMM)
02839       MVT::GEMM(C_mv,transA,transB,alpha,A_mv,B_mv,beta_local);
02840     }
02841 
02842     // Dispose of (possibly) extra copies of A, B
02843     Atmp = null;
02844     Btmp = null;
02845 
02846     RCP<Node> node = MVT::getNode(lclMV_);
02847     // If *this was not strided, copy the data from the strided version and then delete it
02848     if (! isConstantStride ()) {
02849       // *this is not strided, we must put data from Ctmp into *this
02850       TEUCHOS_TEST_FOR_EXCEPT(&C_mv != &lclMV_);
02851       const size_t numVecs = MVT::getNumCols(lclMV_);
02852       for (size_t j=0; j < numVecs; ++j) {
02853         node->template copyBuffers<Scalar>(getLocalLength(),MVT::getValues(C_mv,j),MVT::getValuesNonConst(lclMV_,whichVectors_[j]));
02854       }
02855     }
02856 
02857     // If Case 2 then sum up *this and distribute it to all processors.
02858     if (Case2) {
02859       this->reduce();
02860     }
02861   }
02862 
02863   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02864   void
02865   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02866   elementWiseMultiply (Scalar scalarAB,
02867                        const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02868                        const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02869                        Scalar scalarThis)
02870   {
02871     using Teuchos::arcp_const_cast;
02872     const char tfecfFuncName[] = "elementWiseMultiply()";
02873 
02874 #ifdef HAVE_TPETRA_DEBUG
02875     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02876       getLocalLength() != A.getLocalLength() ||
02877       getLocalLength() != B.getLocalLength(), std::runtime_error,
02878       ": MultiVectors do not have the same local length.");
02879 #endif // HAVE_TPETRA_DEBUG
02880     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02881       B.getNumVectors() != this->getNumVectors(), std::runtime_error,
02882       ": MultiVectors 'this' and B must have the same number of vectors.");
02883     try {
02884       MVT::ElemMult (lclMV_, scalarThis, scalarAB, (const KMV&) A.lclMV_,
02885                      (const KMV&) B.lclMV_);
02886     }
02887     catch (std::runtime_error &e) {
02888       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
02889           ": caught exception from Kokkos:" << std::endl
02890           << e.what() << std::endl);
02891     }
02892   }
02893 
02894   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02895   void
02896   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::reduce()
02897   {
02898     using Teuchos::Array;
02899     using Teuchos::ArrayView;
02900     using Teuchos::ArrayRCP;
02901     using Teuchos::RCP;
02902     using Teuchos::reduceAll;
02903     using Teuchos::REDUCE_SUM;
02904 
02905     // This method should be called only for locally replicated
02906     // MultiVectors (! isDistributed ()).
02907     TEUCHOS_TEST_FOR_EXCEPTION(this->isDistributed (), std::runtime_error,
02908       "Tpetra::MultiVector::reduce() should only be called with locally "
02909       "replicated or otherwise not distributed MultiVector objects.");
02910     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02911     if (comm->getSize() == 1) return;
02912     RCP<Node> node = MVT::getNode(lclMV_);
02913     // sum the data across all multivectors
02914     // need to have separate packed buffers for send and receive
02915     // if we're packed, we'll just set the receive buffer as our data, the send as a copy
02916     // if we're not packed, we'll use allocated buffers for both.
02917     ArrayView<Scalar> target;
02918     const size_t myStride = MVT::getStride(lclMV_),
02919                  numCols  = MVT::getNumCols(lclMV_),
02920                  myLen    = MVT::getNumRows(lclMV_);
02921     Array<Scalar> sourceBuffer(numCols*myLen), tmparr(0);
02922     bool packed = isConstantStride() && (myStride == myLen);
02923     ArrayRCP<Scalar> bufView = node->template viewBufferNonConst<Scalar>(
02924                                           KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,
02925                                           MVT::getValuesNonConst(lclMV_) );
02926     if (packed) {
02927       // copy data from view to sourceBuffer, reduce into view below
02928       target = bufView(0,myLen*numCols);
02929       std::copy(target.begin(),target.end(),sourceBuffer.begin());
02930     }
02931     else {
02932       // copy data into sourceBuffer, reduce from sourceBuffer into tmparr below, copy back to view after that
02933       tmparr.resize(myLen*numCols);
02934       Scalar *sptr = sourceBuffer.getRawPtr();
02935       ArrayRCP<const Scalar> vptr = bufView;
02936       for (size_t j=0; j<numCols; ++j) {
02937         std::copy(vptr,vptr+myLen,sptr);
02938         sptr += myLen;
02939         vptr += myStride;
02940       }
02941       target = tmparr();
02942     }
02943     // reduce
02944     reduceAll<int,Scalar> (*comm, REDUCE_SUM, static_cast<int> (numCols*myLen),
02945                            sourceBuffer.getRawPtr (), target.getRawPtr ());
02946     if (! packed) {
02947       // copy tmparr back into view
02948       const Scalar *sptr = tmparr.getRawPtr();
02949       ArrayRCP<Scalar> vptr = bufView;
02950       for (size_t j=0; j<numCols; ++j) {
02951         std::copy (sptr, sptr+myLen, vptr);
02952         sptr += myLen;
02953         vptr += myStride;
02954       }
02955     }
02956     bufView = Teuchos::null;
02957   }
02958 
02959 
02960   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02961   void
02962   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02963   replaceLocalValue (LocalOrdinal MyRow,
02964                      size_t VectorIndex,
02965                      const Scalar &ScalarValue)
02966   {
02967 #ifdef HAVE_TPETRA_DEBUG
02968     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02969     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02970     TEUCHOS_TEST_FOR_EXCEPTION(
02971       MyRow < minLocalIndex || MyRow > maxLocalIndex,
02972       std::runtime_error,
02973       "Tpetra::MultiVector::replaceLocalValue: row index " << MyRow
02974       << " is invalid.  The range of valid row indices on this process "
02975       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
02976       << ", " << maxLocalIndex << "].");
02977     TEUCHOS_TEST_FOR_EXCEPTION(
02978       vectorIndexOutOfRange(VectorIndex),
02979       std::runtime_error,
02980       "Tpetra::MultiVector::replaceLocalValue: vector index " << VectorIndex
02981       << " of the multivector is invalid.");
02982 #endif
02983     const size_t colInd = isConstantStride () ?
02984       VectorIndex : whichVectors_[VectorIndex];
02985     lclMV_.replaceLocalValue (static_cast<size_t> (MyRow), colInd, ScalarValue);
02986   }
02987 
02988 
02989   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02990   void
02991   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02992   sumIntoLocalValue (LocalOrdinal MyRow,
02993                      size_t VectorIndex,
02994                      const Scalar &ScalarValue)
02995   {
02996 #ifdef HAVE_TPETRA_DEBUG
02997     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02998     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02999     TEUCHOS_TEST_FOR_EXCEPTION(
03000       MyRow < minLocalIndex || MyRow > maxLocalIndex,
03001       std::runtime_error,
03002       "Tpetra::MultiVector::sumIntoLocalValue: row index " << MyRow
03003       << " is invalid.  The range of valid row indices on this process "
03004       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
03005       << ", " << maxLocalIndex << "].");
03006     TEUCHOS_TEST_FOR_EXCEPTION(
03007       vectorIndexOutOfRange(VectorIndex),
03008       std::runtime_error,
03009       "Tpetra::MultiVector::sumIntoLocalValue: vector index " << VectorIndex
03010       << " of the multivector is invalid.");
03011 #endif
03012     const size_t colInd = isConstantStride () ?
03013       VectorIndex : whichVectors_[VectorIndex];
03014     lclMV_.sumIntoLocalValue (static_cast<size_t> (MyRow), colInd, ScalarValue);
03015   }
03016 
03017 
03018   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03019   void
03020   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03021   replaceGlobalValue (GlobalOrdinal GlobalRow,
03022                       size_t VectorIndex,
03023                       const Scalar &ScalarValue)
03024   {
03025     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
03026 #ifdef HAVE_TPETRA_DEBUG
03027     TEUCHOS_TEST_FOR_EXCEPTION(
03028       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
03029       std::runtime_error,
03030       "Tpetra::MultiVector::replaceGlobalValue: global row index " << GlobalRow
03031       << "is not present on this process " << this->getMap()->getComm()->getRank()
03032       << ".");
03033     TEUCHOS_TEST_FOR_EXCEPTION(
03034       vectorIndexOutOfRange(VectorIndex),
03035       std::runtime_error,
03036       "Tpetra::MultiVector::replaceGlobalValue: vector index " << VectorIndex
03037       << " of the multivector is invalid.");
03038 #endif
03039     replaceLocalValue (MyRow, VectorIndex, ScalarValue);
03040   }
03041 
03042   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03043   void
03044   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03045   sumIntoGlobalValue (GlobalOrdinal GlobalRow,
03046                       size_t VectorIndex,
03047                       const Scalar &ScalarValue)
03048   {
03049     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
03050 #ifdef HAVE_TEUCHOS_DEBUG
03051     TEUCHOS_TEST_FOR_EXCEPTION(
03052       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
03053       std::runtime_error,
03054       "Tpetra::MultiVector::sumIntoGlobalValue: global row index " << GlobalRow
03055       << "is not present on this process " << this->getMap()->getComm()->getRank()
03056       << ".");
03057     TEUCHOS_TEST_FOR_EXCEPTION(
03058       vectorIndexOutOfRange(VectorIndex),
03059       std::runtime_error,
03060       "Tpetra::MultiVector::sumIntoGlobalValue: vector index " << VectorIndex
03061       << " of the multivector is invalid.");
03062 #endif
03063     sumIntoLocalValue (MyRow, VectorIndex, ScalarValue);
03064   }
03065 
03066   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03067   template <class T>
03068   Teuchos::ArrayRCP<T>
03069   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03070   getSubArrayRCP (Teuchos::ArrayRCP<T> arr,
03071                   size_t j) const
03072   {
03073     const size_t stride = MVT::getStride (lclMV_);
03074     const size_t myLen = getLocalLength ();
03075     Teuchos::ArrayRCP<T> ret;
03076     if (isConstantStride()) {
03077       ret = arr.persistingView (j*stride, myLen);
03078     }
03079     else {
03080       ret = arr.persistingView (whichVectors_[j]*stride, myLen);
03081     }
03082     return ret;
03083   }
03084 
03085   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03086   KokkosClassic::MultiVector<Scalar,Node>
03087   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMV() const {
03088     return lclMV_;
03089   }
03090 
03091   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03092   TEUCHOS_DEPRECATED
03093   KokkosClassic::MultiVector<Scalar,Node>&
03094   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMVNonConst() {
03095     return lclMV_;
03096   }
03097 
03098   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03099   std::string
03100   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const
03101   {
03102     using std::endl;
03103     std::ostringstream oss;
03104     oss << Teuchos::typeName (*this) << " {" << endl
03105         << "  label: \"" << this->getObjectLabel () << "\"" << endl
03106         << "  numRows: " << getGlobalLength () << endl
03107         << "  numCols: " << getNumVectors () << endl
03108         << "  isConstantStride: " << isConstantStride () << endl;
03109     if (isConstantStride ()) {
03110       oss << "  columnStride: " << getStride () << endl;
03111     }
03112     oss << "}" << endl;
03113     return oss.str();
03114   }
03115 
03116   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03117   void
03118   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03119   describe (Teuchos::FancyOStream &out,
03120             const Teuchos::EVerbosityLevel verbLevel) const
03121   {
03122     using Teuchos::ArrayRCP;
03123     using Teuchos::RCP;
03124     using Teuchos::VERB_DEFAULT;
03125     using Teuchos::VERB_NONE;
03126     using Teuchos::VERB_LOW;
03127     using Teuchos::VERB_MEDIUM;
03128     using Teuchos::VERB_HIGH;
03129     using Teuchos::VERB_EXTREME;
03130     using std::endl;
03131     using std::setw;
03132 
03133     // Set default verbosity if applicable.
03134     const Teuchos::EVerbosityLevel vl =
03135       (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
03136 
03137     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
03138     const int myImageID = comm->getRank();
03139     const int numImages = comm->getSize();
03140 
03141     if (vl != VERB_NONE) {
03142       // Don't set the tab level unless we're printing something.
03143       Teuchos::OSTab tab (out);
03144 
03145       if (myImageID == 0) { // >= VERB_LOW prints description()
03146         out << this->description() << endl;
03147       }
03148       for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
03149         if (myImageID == imageCtr) {
03150           if (vl != VERB_LOW) {
03151             // At verbosity > VERB_LOW, each process prints something.
03152             out << "Process " << myImageID << ":" << endl;
03153 
03154             Teuchos::OSTab procTab (out);
03155             // >= VERB_MEDIUM: print the local vector length.
03156             out << "local length=" << getLocalLength();
03157             if (vl != VERB_MEDIUM) {
03158               // >= VERB_HIGH: print isConstantStride() and getStride()
03159               if (isConstantStride()) {
03160                 out << ", constant stride=" << getStride() << endl;
03161               }
03162               else {
03163                 out << ", not constant stride" << endl;
03164               }
03165               if (vl == VERB_EXTREME) {
03166                 // VERB_EXTREME: print all the values in the multivector.
03167                 out << "Values:" << endl;
03168                 ArrayRCP<ArrayRCP<const Scalar> > X = this->get2dView();
03169                 for (size_t i = 0; i < getLocalLength(); ++i) {
03170                   for (size_t j = 0; j < getNumVectors(); ++j) {
03171                     out << X[j][i];
03172                     if (j < getNumVectors() - 1) {
03173                       out << " ";
03174                     }
03175                   } // for each column
03176                   out << endl;
03177                 } // for each row
03178               } // if vl == VERB_EXTREME
03179             } // if (vl != VERB_MEDIUM)
03180             else { // vl == VERB_LOW
03181               out << endl;
03182             }
03183           } // if vl != VERB_LOW
03184         } // if it is my process' turn to print
03185         comm->barrier();
03186       } // for each process in the communicator
03187     } // if vl != VERB_NONE
03188   }
03189 
03190 #if TPETRA_USE_KOKKOS_DISTOBJECT
03191 
03192   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03193   void
03194   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03195   createViews() const
03196   {
03197     // Do nothing in Kokkos::View implementation
03198   }
03199 
03200   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03201   void
03202   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03203   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
03204   {
03205     // Do nothing in Kokkos::View implementation
03206   }
03207 
03208   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03209   void
03210   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
03211   {
03212     // Do nothing in Kokkos::View implementation
03213   }
03214 
03215 #else
03216 
03217   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03218   void
03219   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03220   createViews() const
03221   {
03222     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
03223     if (cview_.is_null () && getLocalLength () > 0) {
03224       Teuchos::ArrayRCP<const Scalar> buff = MVT::getValues (lclMV_);
03225       cview_ = node->template viewBuffer<Scalar> (buff.size (), buff);
03226     }
03227   }
03228 
03229   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03230   void
03231   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03232   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
03233   {
03234     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
03235     if (ncview_.is_null () && getLocalLength () > 0) {
03236       Teuchos::ArrayRCP<Scalar> buff = MVT::getValuesNonConst (lclMV_);
03237       ncview_ = node->template viewBufferNonConst<Scalar> (rwo, buff.size (), buff);
03238     }
03239   }
03240 
03241   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03242   void
03243   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
03244   {
03245     const int constViewCount = cview_.total_count ();
03246     const int nonconstViewCount = ncview_.total_count ();
03247 
03248     // This prevents unused variable compiler warnings, in case Tpetra
03249     // efficiency warnings aren't enabled.
03250     (void) constViewCount;
03251     (void) nonconstViewCount;
03252 
03253     // Release the views.
03254     cview_ = Teuchos::null;
03255     ncview_ = Teuchos::null;
03256   }
03257 
03258 #endif
03259 
03260   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03261   void
03262   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03263   removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap)
03264   {
03265     replaceMap (newMap);
03266   }
03267 
03268   template <class ST, class LO, class GO, class NT>
03269   MultiVector<ST, LO, GO, NT>
03270   createCopy (const MultiVector<ST, LO, GO, NT>& src)
03271   {
03272     // The 2-argument copy constructor with second argument =
03273     // Teuchos::Copy does a deep copy of its input.
03274     MultiVector<ST, LO, GO, NT> dst (src, Teuchos::Copy);
03275 
03276     // Returning by value will invoke the copy constructor, so we need
03277     // to set the result to have view semantics, so that the copy
03278     // constructor only does a shallow copy.
03279     dst.setCopyOrView (Teuchos::View);
03280     return dst;
03281   }
03282 
03283   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03284   void
03285   MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
03286   assign (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& src)
03287   {
03288     const char prefix[] = "Tpetra::MultiVector::assign (called by deep_copy): ";
03289     TEUCHOS_TEST_FOR_EXCEPTION(
03290       this->getGlobalLength () != src.getGlobalLength (), std::invalid_argument,
03291       prefix << "this->getGlobalLength() = " << this->getGlobalLength ()
03292       << " != src.getGlobalLength() = " << src.getGlobalLength () << ".");
03293     TEUCHOS_TEST_FOR_EXCEPTION(
03294       this->getNumVectors () != src.getNumVectors(), std::invalid_argument,
03295       prefix << "this->getNumVectors() = " << this->getNumVectors () << " != "
03296       "src.getNumVectors() = " << src.getNumVectors () << ".");
03297     // FIXME (mfh 11 Sep 2014) This may cause deadlock on error.
03298     TEUCHOS_TEST_FOR_EXCEPTION(
03299       this->getMap ().is_null () || src.getMap ().is_null (), std::runtime_error,
03300       prefix << "You may not call this method if either the source or target "
03301       "(this) MultiVector's Map is null.  That means that the calling process "
03302       "is not participating in the operation.")
03303     // FIXME (mfh 11 Sep 2014) This may cause deadlock on error.
03304     TEUCHOS_TEST_FOR_EXCEPTION(
03305       this->getLocalLength () != src.getLocalLength (), std::invalid_argument,
03306       prefix << "this->getLocalLength() = " << this->getLocalLength () << " != "
03307       "src.getLocalLength() = " << src.getLocalLength () << ".");
03308 
03309     if (this != &src) {
03310       if (this->isConstantStride ()) {
03311         if (src.isConstantStride ()) {
03312           MVT::Assign (this->lclMV_, src.lclMV_);
03313         }
03314         else { // src is not constant stride
03315           MVT::Assign (lclMV_, src.lclMV_, src.whichVectors_);
03316         }
03317       }
03318       else { // we have to copy the columns one at a time
03319         Teuchos::RCP<Node> node = MVT::getNode (this->lclMV_);
03320         const size_t lclNumRows = this->getLocalLength ();
03321         const size_t numVecs = this->getNumVectors ();
03322         for (size_t j = 0; j < numVecs; ++j) {
03323           node->template copyBuffers<Scalar> (lclNumRows,
03324                                               src.getSubArrayRCP (MVT::getValues (src.lclMV_), j),
03325                                               this->getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j));
03326         }
03327       }
03328     }
03329   }
03330 } // namespace Tpetra
03331 
03332 // Include KokkosRefactor partial specialization if enabled
03333 #if defined(TPETRA_HAVE_KOKKOS_REFACTOR)
03334 #include "Tpetra_KokkosRefactor_MultiVector_def.hpp"
03335 #endif
03336 
03337 //
03338 // Explicit instantiation macro
03339 //
03340 // Must be expanded from within the Tpetra namespace!
03341 //
03342 
03343 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
03344   \
03345   template class MultiVector< SCALAR , LO , GO , NODE >; \
03346   template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
03347 
03348 
03349 
03350 #endif // TPETRA_MULTIVECTOR_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines