|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DECL_HPP 00043 #define TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DECL_HPP 00044 00045 #include <Kokkos_DualView.hpp> 00046 #include <KokkosCompat_ClassicNodeAPI_Wrapper.hpp> 00047 #include <Kokkos_InnerProductSpaceTraits.hpp> 00048 #include <Kokkos_ArithTraits.hpp> 00049 #include <Tpetra_KokkosRefactor_DistObject.hpp> 00050 00051 namespace KokkosClassic { 00052 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00053 // forward declaration of DefaultArithmetic 00054 template<class KokkosMultiVectorType> 00055 class DefaultArithmetic; 00056 #endif // DOXYGEN_SHOULD_SKIP_THIS 00057 } // namespace KokkosClassic 00058 00059 namespace Tpetra { 00060 00061 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00062 // forward declaration of Vector, needed to prevent circular inclusions 00063 template<class S, class LO, class GO, class N> class Vector; 00064 00065 // forward declaration of Map 00066 template<class LO, class GO, class N> class Map; 00067 #endif // DOXYGEN_SHOULD_SKIP_THIS 00068 00295 template<class Scalar, 00296 class LocalOrdinal, 00297 class GlobalOrdinal, 00298 class DeviceType> 00299 class MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > : 00300 public DistObject<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 00301 { 00302 private: 00303 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> Node; 00304 typedef DistObject<Scalar, LocalOrdinal, GlobalOrdinal, Node> base_type; 00305 00306 public: 00308 00309 00311 typedef Scalar scalar_type; 00313 typedef LocalOrdinal local_ordinal_type; 00315 typedef GlobalOrdinal global_ordinal_type; 00317 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> node_type; 00318 00324 typedef typename Kokkos::Details::InnerProductSpaceTraits<Scalar>::dot_type dot_type; 00325 00332 typedef typename Kokkos::Details::ArithTraits<Scalar>::mag_type mag_type; 00333 00335 typedef DeviceType device_type; 00336 00338 typedef typename DeviceType::host_mirror_device_type host_mirror_device_type; 00339 00341 typedef Kokkos::DualView<scalar_type**, Kokkos::LayoutLeft, device_type> dual_view_type; 00342 00344 00345 00346 00348 MultiVector (); 00349 00360 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00361 size_t NumVectors, 00362 bool zeroOut=true); 00363 00365 MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &source); 00366 00375 MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source, 00376 const Teuchos::DataAccess copyOrView); 00377 00393 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00394 const Teuchos::ArrayView<const Scalar>& A, 00395 const size_t LDA, 00396 const size_t NumVectors); 00397 00411 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00412 const Teuchos::ArrayView<const Teuchos::ArrayView<const Scalar> >&ArrayOfPtrs, 00413 const size_t NumVectors); 00414 00425 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00426 const dual_view_type& view); 00427 00448 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00449 const dual_view_type& view, 00450 const dual_view_type& origView); 00451 00467 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00468 const dual_view_type& view, 00469 const Teuchos::ArrayView<const size_t>& whichVectors); 00470 00496 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00497 const dual_view_type& view, 00498 const dual_view_type& origView, 00499 const Teuchos::ArrayView<const size_t>& whichVectors); 00500 00502 template <class Node2> 00503 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node2> > 00504 clone (const Teuchos::RCP<Node2> &node2) const; 00505 00507 virtual ~MultiVector(); 00508 00510 00511 00512 00521 void 00522 replaceGlobalValue (GlobalOrdinal globalRow, 00523 size_t vectorIndex, 00524 const Scalar &value); 00525 00534 void 00535 sumIntoGlobalValue (GlobalOrdinal globalRow, 00536 size_t vectorIndex, 00537 const Scalar &value); 00538 00547 void 00548 replaceLocalValue (LocalOrdinal myRow, 00549 size_t vectorIndex, 00550 const Scalar &value); 00551 00560 void 00561 sumIntoLocalValue (LocalOrdinal myRow, 00562 size_t vectorIndex, 00563 const Scalar &value); 00564 00566 void putScalar (const Scalar &value); 00567 00583 void randomize(); 00584 00650 void replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map); 00651 00658 void reduce(); 00659 00674 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& 00675 operator= (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source); 00676 00678 00680 00688 00690 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00691 subCopy (const Teuchos::Range1D &colRng) const; 00692 00694 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00695 subCopy (const Teuchos::ArrayView<const size_t> &cols) const; 00696 00698 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00699 subView (const Teuchos::Range1D &colRng) const; 00700 00702 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00703 subView (const Teuchos::ArrayView<const size_t> &cols) const; 00704 00706 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00707 subViewNonConst (const Teuchos::Range1D &colRng); 00708 00710 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00711 subViewNonConst (const Teuchos::ArrayView<const size_t> &cols); 00712 00775 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00776 offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap, 00777 size_t offset) const; 00778 00796 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00797 offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &subMap, 00798 size_t offset); 00799 00801 Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00802 getVector (size_t j) const; 00803 00805 Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > 00806 getVectorNonConst (size_t j); 00807 00809 Teuchos::ArrayRCP<const Scalar> getData(size_t j) const; 00810 00812 Teuchos::ArrayRCP<Scalar> getDataNonConst(size_t j); 00813 00820 void get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const; 00821 00827 void get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const; 00828 00834 Teuchos::ArrayRCP<const Scalar> get1dView() const; 00835 00837 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > get2dView() const; 00838 00844 Teuchos::ArrayRCP<Scalar> get1dViewNonConst(); 00845 00847 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > get2dViewNonConst(); 00848 00853 KokkosClassic::MultiVector<Scalar,Node> getLocalMV() const; 00854 00865 TEUCHOS_DEPRECATED KokkosClassic::MultiVector<Scalar,Node> 00866 getLocalMVNonConst (); 00867 00872 dual_view_type getDualView () const; 00873 00892 template<class TargetDeviceType> 00893 void sync () { 00894 getDualView ().template sync<TargetDeviceType> (); 00895 } 00896 00902 template<class TargetDeviceType> 00903 void modify () { 00904 getDualView ().template modify<TargetDeviceType> (); 00905 } 00906 00938 template<class TargetDeviceType> 00939 typename Kokkos::Impl::if_c< 00940 Kokkos::Impl::is_same< 00941 typename device_type::memory_space, 00942 typename TargetDeviceType::memory_space>::value, 00943 typename dual_view_type::t_dev, 00944 typename dual_view_type::t_host>::type 00945 getLocalView () const { 00946 return getDualView ().template view<TargetDeviceType> (); 00947 } 00948 00950 00951 00952 00966 void 00967 dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 00968 const Teuchos::ArrayView<dot_type>& dots) const; 00969 00981 template <typename T> 00982 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<dot_type, T>::value), void >::type 00983 dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 00984 const Teuchos::ArrayView<T> &dots) const 00985 { 00986 const size_t sz = dots.size (); 00987 Teuchos::Array<dot_type> dts (sz); 00988 this->dot (A, dts); 00989 for (size_t i = 0; i < sz; ++i) { 00990 dots[i] = dts[i]; 00991 } 00992 } 00993 01011 void 01012 dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01013 const Kokkos::View<dot_type*, device_type>& dots) const; 01014 01027 template <typename T> 01028 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<dot_type, T>::value), void >::type 01029 dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01030 const Kokkos::View<T*, device_type>& dots) const 01031 { 01032 const size_t numDots = dots.dimension_0 (); 01033 Kokkos::View<dot_type*, device_type> dts ("MV::dot tmp", numDots); 01034 // Call overload that takes a Kokkos::View<dot_type*, device_type>. 01035 this->dot (A, dts); 01036 // FIXME (mfh 14 Jul 2014) Does this actually work if dot_type 01037 // and T differ? We would need a test for this, but only the 01038 // Sacado and Stokhos packages are likely to care about this use 01039 // case. 01040 Kokkos::deep_copy (dots, dts); 01041 } 01042 01044 void abs(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A); 01045 01047 void reciprocal(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A); 01048 01056 void scale (const Scalar &alpha); 01057 01066 void scale (Teuchos::ArrayView<const Scalar> alpha); 01067 01076 void scale (const Kokkos::View<const Scalar*, device_type> alpha); 01077 01086 void 01087 scale (const Scalar& alpha, 01088 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A); 01089 01096 void 01097 update (const Scalar& alpha, 01098 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01099 const Scalar& beta); 01100 01107 void 01108 update (const Scalar& alpha, 01109 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01110 const Scalar& beta, 01111 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B, 01112 const Scalar& gamma); 01113 01125 void norm1 (const Kokkos::View<mag_type*, device_type>& norms) const; 01126 01140 template <typename T> 01141 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type, T>::value), void >::type 01142 norm1 (const Kokkos::View<T*, device_type>& norms) const 01143 { 01144 const size_t numNorms = norms.dimension_0 (); 01145 Kokkos::View<mag_type*, device_type> tmpNorms ("MV::norm1 tmp", numNorms); 01146 // Call overload that takes a Kokkos::View<mag_type*, device_type>. 01147 this->norm1 (tmpNorms); 01148 // FIXME (mfh 15 Jul 2014) Does this actually work if mag_type 01149 // and T differ? We would need a test for this, but only the 01150 // Sacado and Stokhos packages are likely to care about this use 01151 // case. 01152 Kokkos::deep_copy (norms, tmpNorms); 01153 } 01154 01155 01161 void norm1 (const Teuchos::ArrayView<mag_type>& norms) const; 01162 01175 template <typename T> 01176 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type,T>::value), void >::type 01177 norm1 (const Teuchos::ArrayView<T>& norms) const 01178 { 01179 typedef typename Teuchos::ArrayView<T>::size_type size_type; 01180 const size_type sz = norms.size (); 01181 Teuchos::Array<mag_type> theNorms (sz); 01182 this->norm1 (theNorms); 01183 for (size_type i = 0; i < sz; ++i) { 01184 norms[i] = theNorms[i]; 01185 } 01186 } 01187 01200 void norm2 (const Kokkos::View<mag_type*, device_type>& norms) const; 01201 01213 template<typename T> 01214 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type, T>::value), void >::type 01215 norm2 (const Kokkos::View<T*, device_type>& norms) const 01216 { 01217 const size_t numNorms = norms.dimension_0 (); 01218 Kokkos::View<mag_type*, device_type> theNorms ("MV::norm2 tmp", numNorms); 01219 // Call overload that takes a Kokkos::View<mag_type*, device_type>. 01220 this->norm2 (theNorms); 01221 // FIXME (mfh 14 Jul 2014) Does this actually work if mag_type 01222 // and T differ? We would need a test for this, but only the 01223 // Sacado and Stokhos packages are likely to care about this use 01224 // case. 01225 Kokkos::deep_copy (norms, theNorms); 01226 } 01227 01234 void norm2 (const Teuchos::ArrayView<mag_type>& norms) const; 01235 01248 template <typename T> 01249 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type,T>::value), void >::type 01250 norm2 (const Teuchos::ArrayView<T>& norms) const 01251 { 01252 typedef typename Teuchos::ArrayView<T>::size_type size_type; 01253 const size_type sz = norms.size (); 01254 Teuchos::Array<mag_type> theNorms (sz); 01255 this->norm2 (theNorms); 01256 for (size_type i = 0; i < sz; ++i) { 01257 norms[i] = theNorms[i]; 01258 } 01259 } 01260 01267 void normInf (const Kokkos::View<mag_type*, device_type>& norms) const; 01268 01280 template<typename T> 01281 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type, T>::value), void >::type 01282 normInf (const Kokkos::View<T*, device_type>& norms) const 01283 { 01284 const size_t numNorms = norms.dimension_0 (); 01285 Kokkos::View<mag_type*, device_type> theNorms ("MV::normInf tmp", numNorms); 01286 // Call overload that takes a Kokkos::View<mag_type*, device_type>. 01287 this->normInf (theNorms); 01288 // FIXME (mfh 15 Jul 2014) Does this actually work if mag_type 01289 // and T differ? We would need a test for this, but only the 01290 // Sacado and Stokhos packages are likely to care about this use 01291 // case. 01292 Kokkos::deep_copy (norms, theNorms); 01293 } 01294 01300 void normInf (const Teuchos::ArrayView<mag_type>& norms) const; 01301 01314 template <typename T> 01315 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type,T>::value), void >::type 01316 normInf (const Teuchos::ArrayView<T>& norms) const 01317 { 01318 typedef typename Teuchos::ArrayView<T>::size_type size_type; 01319 const size_type sz = norms.size (); 01320 Teuchos::Array<mag_type> theNorms (sz); 01321 this->norm2 (theNorms); 01322 for (size_type i = 0; i < sz; ++i) { 01323 norms[i] = theNorms[i]; 01324 } 01325 } 01326 01329 void 01330 normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights, 01331 const Teuchos::ArrayView<mag_type>& norms) const; 01332 01345 template <typename T> 01346 typename Kokkos::Impl::enable_if< !(Kokkos::Impl::is_same<mag_type,T>::value), void >::type 01347 normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights, 01348 const Teuchos::ArrayView<T>& norms) const 01349 { 01350 typedef typename Teuchos::ArrayView<T>::size_type size_type; 01351 const size_type sz = norms.size (); 01352 Teuchos::Array<mag_type> theNorms (sz); 01353 this->normWeighted (weights, theNorms); 01354 for (size_type i = 0; i < sz; ++i) { 01355 norms[i] = theNorms[i]; 01356 } 01357 } 01358 01361 void meanValue(const Teuchos::ArrayView<Scalar> &means) const; 01362 01368 void 01369 multiply (Teuchos::ETransp transA, 01370 Teuchos::ETransp transB, 01371 const Scalar& alpha, 01372 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01373 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B, 01374 const Scalar& beta); 01375 01396 void 01397 elementWiseMultiply (Scalar scalarAB, 01398 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, 01399 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B, 01400 Scalar scalarThis); 01402 01403 01404 01406 size_t getNumVectors() const; 01407 01409 size_t getLocalLength() const; 01410 01412 global_size_t getGlobalLength() const; 01413 01419 size_t getStride() const; 01420 01424 bool isConstantStride() const; 01425 01427 01429 01430 01432 virtual std::string description() const; 01433 01462 virtual void 01463 describe (Teuchos::FancyOStream& out, 01464 const Teuchos::EVerbosityLevel verbLevel = 01465 Teuchos::Describable::verbLevel_default) const; 01467 01481 virtual void 01482 removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap); 01483 01494 void setCopyOrView (const Teuchos::DataAccess copyOrView) { 01495 TEUCHOS_TEST_FOR_EXCEPTION( 01496 copyOrView == Teuchos::Copy, std::invalid_argument, 01497 "Tpetra::MultiVector::setCopyOrView: The Kokkos refactor version of " 01498 "MultiVector _only_ implements view semantics. You may not call this " 01499 "method with copyOrView = Teuchos::Copy. The only valid argument is " 01500 "Teuchos::View."); 01501 } 01502 01513 Teuchos::DataAccess getCopyOrView () const { 01514 return Teuchos::View; 01515 } 01516 01531 void 01532 assign (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& src); 01533 01534 protected: 01535 template <class S, class LO, class GO, class D> 01536 friend MultiVector<S,LO,GO,Kokkos::Compat::KokkosDeviceWrapperNode<D> > 01537 createCopy (const MultiVector<S,LO,GO,Kokkos::Compat::KokkosDeviceWrapperNode<D> >& src); 01538 01539 template <class DS, class DL, class DG, class DD, class SS, class SL, class SG, class SD> 01540 friend void 01541 deep_copy (MultiVector<DS,DL,DG,Kokkos::Compat::KokkosDeviceWrapperNode<DD> >& dst, 01542 const MultiVector<SS,SL,SG,Kokkos::Compat::KokkosDeviceWrapperNode<SD> >& src); 01543 01544 typedef KokkosClassic::MultiVector<Scalar,Node> KMV; 01545 typedef KokkosClassic::DefaultArithmetic<KMV> MVT; 01546 01551 //KMV lclMV_; 01552 01559 mutable dual_view_type view_; 01560 01590 mutable dual_view_type origView_; 01591 01604 Array<size_t> whichVectors_; 01605 01607 01608 01609 template <class S,class LO,class GO,class N> 01610 friend Teuchos::RCP<MultiVector<S,LO,GO,N> > 01611 createMultiVectorFromView (const Teuchos::RCP<const Map<LO,GO,N> >&, const Teuchos::ArrayRCP<S>&, const size_t, const size_t); 01612 01613 bool vectorIndexOutOfRange (size_t VectorIndex) const; 01614 01619 template <class T> 01620 ArrayRCP<T> getSubArrayRCP(ArrayRCP<T> arr, size_t j) const; 01621 01623 size_t getOrigNumLocalRows () const; 01624 01626 size_t getOrigNumLocalCols () const; 01627 01629 01630 01631 01636 virtual bool 01637 checkSizes (const SrcDistObject& sourceObj); 01638 01640 virtual size_t constantNumberOfPackets () const; 01641 01643 virtual bool useNewInterface () { return true; } 01644 01645 virtual void 01646 copyAndPermuteNew ( 01647 const SrcDistObject& sourceObj, 01648 size_t numSameIDs, 01649 const Kokkos::View<const LocalOrdinal*, device_type>& permuteToLIDs, 01650 const Kokkos::View<const LocalOrdinal*, device_type>& permuteFromLIDs); 01651 01652 virtual void 01653 packAndPrepareNew ( 01654 const SrcDistObject& sourceObj, 01655 const Kokkos::View<const LocalOrdinal*, device_type>& exportLIDs, 01656 Kokkos::View<Scalar*, device_type>& exports, 01657 const Kokkos::View<size_t*, device_type>& numPacketsPerLID, 01658 size_t& constantNumPackets, 01659 Distributor &distor); 01660 01661 virtual void 01662 unpackAndCombineNew ( 01663 const Kokkos::View<const LocalOrdinal*, device_type>& importLIDs, 01664 const Kokkos::View<const Scalar*, device_type>& imports, 01665 const Kokkos::View<size_t*, device_type>& numPacketsPerLID, 01666 size_t constantNumPackets, 01667 Distributor &distor, 01668 CombineMode CM); 01669 01670 void createViews () const; 01671 void createViewsNonConst (KokkosClassic::ReadWriteOption rwo); 01672 void releaseViews () const; 01674 01675 typename dual_view_type::t_dev getKokkosView() const { return view_.d_view; } 01676 01677 }; // class MultiVector 01678 01679 namespace Details { 01680 01681 // Partial specialization of MultiVectorCloner, for when the 01682 // source and destination MultiVector types are both Kokkos 01683 // refactor types, and when they both have the same Scalar type, 01684 // but all their other template parameters might be different. 01685 template<class ScalarType, 01686 class DstLocalOrdinalType, class DstGlobalOrdinalType, class DstDeviceType, 01687 class SrcLocalOrdinalType, class SrcGlobalOrdinalType, class SrcDeviceType> 01688 struct MultiVectorCloner< ::Tpetra::MultiVector<ScalarType, 01689 DstLocalOrdinalType, 01690 DstGlobalOrdinalType, 01691 Kokkos::Compat::KokkosDeviceWrapperNode<DstDeviceType> >, 01692 ::Tpetra::MultiVector<ScalarType, 01693 SrcLocalOrdinalType, 01694 SrcGlobalOrdinalType, 01695 Kokkos::Compat::KokkosDeviceWrapperNode<SrcDeviceType> > > 01696 { 01697 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DstDeviceType> dst_node_type; 01698 typedef Kokkos::Compat::KokkosDeviceWrapperNode<SrcDeviceType> src_node_type; 01699 typedef ::Tpetra::MultiVector<ScalarType, DstLocalOrdinalType, 01700 DstGlobalOrdinalType, 01701 dst_node_type> dst_mv_type; 01702 typedef ::Tpetra::MultiVector<ScalarType, SrcLocalOrdinalType, 01703 SrcGlobalOrdinalType, 01704 src_node_type> src_mv_type; 01705 01706 static Teuchos::RCP<dst_mv_type> 01707 clone (const src_mv_type& X, const Teuchos::RCP<dst_node_type>& node2) 01708 { 01709 typedef typename src_mv_type::map_type src_map_type; 01710 typedef typename dst_mv_type::map_type dst_map_type; 01711 typedef typename dst_mv_type::node_type dst_node_type; 01712 typedef typename src_mv_type::dual_view_type src_dual_view_type; 01713 typedef typename dst_mv_type::dual_view_type dst_dual_view_type; 01714 01715 // Clone X's Map to have the new Node type. 01716 RCP<const src_map_type> map1 = X.getMap (); 01717 RCP<const dst_map_type> map2 = map1.is_null () ? 01718 Teuchos::null : map1->template clone<dst_node_type> (node2); 01719 01720 const size_t lclNumRows = X.getLocalLength (); 01721 const size_t numCols = X.getNumVectors (); 01722 src_dual_view_type X_view = X.getDualView (); 01723 dst_dual_view_type Y_view ("MV::dual_view", lclNumRows, numCols); 01724 01725 RCP<dst_mv_type> Y = rcp (new dst_mv_type (map2, Y_view)); 01726 // Let deep_copy do the work for us, to avoid code duplication. 01727 ::Tpetra::deep_copy (Y, X); 01728 return Y ; 01729 } 01730 }; 01731 01732 // Partial specialization for the Kokkos refactor specialization 01733 // of Tpetra::MultiVector. 01734 template<class S, class LO, class GO, class DeviceType> 01735 struct CreateMultiVectorFromView<MultiVector<S, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > { 01736 typedef S scalar_type; 01737 typedef LO local_ordinal_type; 01738 typedef GO global_ordinal_type; 01739 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> node_type; 01740 typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type; 01741 typedef MultiVector<S, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MultiVectorType; 01742 01743 static Teuchos::RCP<MultiVectorType> 01744 create (const Teuchos::RCP<const map_type>& map, 01745 const Teuchos::ArrayRCP<scalar_type>& view, 01746 const size_t LDA, 01747 const size_t numVectors) 01748 { 01749 (void) map; 01750 (void) view; 01751 (void) LDA; 01752 (void) numVectors; 01753 01754 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Not implemented"); 01755 } 01756 }; 01757 } // namespace Details 01758 01759 template <class ST, class LO, class GO, class DT> 01760 MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DT> > 01761 createCopy (const MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DT> >& src); 01762 01763 namespace { // (anonymous) 01764 template<class DstType, class SrcType, class IndexType, class DeviceType, 01765 const bool DstConstStride, const bool SrcConstStride> 01766 struct DeepCopySelectedVectors { 01767 typedef DeviceType device_type; 01768 DstType dst_; 01769 SrcType src_; 01770 Kokkos::View<const IndexType*, DeviceType> whichVectorDst_; 01771 Kokkos::View<const IndexType*, DeviceType> whichVectorSrc_; 01772 const IndexType numVecs_; 01773 01774 DeepCopySelectedVectors (DstType dst, 01775 SrcType src, 01776 const Kokkos::View<const IndexType*, DeviceType>& whichVectorDst, 01777 const Kokkos::View<const IndexType*, DeviceType>& whichVectorSrc) : 01778 dst_ (dst), 01779 src_ (src), 01780 whichVectorDst_ (whichVectorDst), 01781 whichVectorSrc_ (whichVectorSrc), 01782 numVecs_ (whichVectorSrc_.dimension_0 ()) 01783 {} 01784 01785 DeepCopySelectedVectors (DstType dst, SrcType src) : 01786 dst_ (dst), 01787 src_ (src), 01788 numVecs_ (dst.dimension_1 ()) 01789 { 01790 TEUCHOS_TEST_FOR_EXCEPTION( 01791 ! DstConstStride || ! SrcConstStride, std::logic_error, 01792 "Tpetra::DeepCopySelectedVectors: You may not use the constant-stride " 01793 "constructor if either of the Boolean template parameters is false."); 01794 } 01795 01796 void KOKKOS_INLINE_FUNCTION operator () (const IndexType i) const { 01797 if (DstConstStride) { 01798 if (SrcConstStride) { 01799 for (IndexType j = 0; j < numVecs_; ++j) { 01800 dst_(i,j) = src_(i,j); 01801 } 01802 } else { 01803 for (IndexType j = 0; j < numVecs_; ++j) { 01804 dst_(i,j) = src_(i,whichVectorSrc_(j)); 01805 } 01806 } 01807 } else { 01808 if (SrcConstStride) { 01809 for (IndexType j = 0; j < numVecs_; ++j) { 01810 dst_(i,whichVectorDst_(j)) = src_(i,j); 01811 } 01812 } else { 01813 for (IndexType j = 0; j < numVecs_; ++j) { 01814 dst_(i,whichVectorDst_(j)) = src_(i,whichVectorSrc_(j)); 01815 } 01816 } 01817 } 01818 } 01819 }; 01820 } // namespace (anonymous) 01821 01822 01823 // NOTE (mfh 11 Sep 2014) Even though this partial specialization 01824 // looks redundant with the one in Tpetra_MultiVector_decl.hpp, it 01825 // needs to be here, else GCC 4.8.2 gives a compiler error saying 01826 // that calls to Tpetra::deep_copy are ambiguous. 01827 template <class ST, class LO, class GO, class DT> 01828 void 01829 deep_copy (MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DT> >& dst, 01830 const MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DT> >& src) 01831 { 01832 // NOTE (mfh 11 Sep 2014) We can't implement deep_copy with 01833 // shallow-copy operator=, because that would invalidate existing 01834 // views of dst! 01835 dst.assign (src); 01836 } 01837 01838 01839 template <class DS, class DL, class DG, class DD, 01840 class SS, class SL, class SG, class SD> 01841 void 01842 deep_copy (MultiVector<DS, DL, DG, Kokkos::Compat::KokkosDeviceWrapperNode<DD> >& dst, 01843 const MultiVector<SS, SL, SG, Kokkos::Compat::KokkosDeviceWrapperNode<SD> >& src) 01844 { 01845 using Kokkos::parallel_for; 01846 typedef MultiVector<DS,DL,DG,Kokkos::Compat::KokkosDeviceWrapperNode<DD> > MVD; 01847 typedef const MultiVector<SS,SL,SG,Kokkos::Compat::KokkosDeviceWrapperNode<SD> > MVS; 01848 01849 TEUCHOS_TEST_FOR_EXCEPTION( 01850 dst.getGlobalLength () != src.getGlobalLength () || 01851 dst.getNumVectors () != src.getNumVectors (), std::invalid_argument, 01852 "Tpetra::deep_copy: Global dimensions of the two Tpetra::MultiVector " 01853 "objects do not match. src has dimensions [" << src.getGlobalLength () 01854 << "," << src.getNumVectors () << "], and dst has dimensions [" 01855 << dst.getGlobalLength () << "," << dst.getNumVectors () << "]."); 01856 01857 // FIXME (mfh 28 Jul 2014) Don't throw; just set a local error flag. 01858 TEUCHOS_TEST_FOR_EXCEPTION( 01859 dst.getLocalLength () != src.getLocalLength (), std::invalid_argument, 01860 "Tpetra::deep_copy: The local row counts of the two Tpetra::MultiVector " 01861 "objects do not match. src has " << src.getLocalLength () << " row(s) " 01862 << " and dst has " << dst.getLocalLength () << " row(s)."); 01863 01864 if (src.isConstantStride () && dst.isConstantStride ()) { 01865 Kokkos::deep_copy (dst.getDualView (), src.getDualView ()); 01866 } 01867 else { 01868 if (dst.isConstantStride ()) { 01869 const SL numWhichVecs = static_cast<SL> (src.whichVectors_.size ()); 01870 const std::string whichVecsLabel ("MV::deep_copy::whichVecs"); 01871 01872 // We can't sync src, since it is only an input argument. 01873 // Thus, we have to use the most recently modified version of 01874 // src, device or host. 01875 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 01876 // Copy from the device version of src. 01877 // 01878 // whichVecs tells the kernel which vectors (columns) of src 01879 // to copy. Fill whichVecs on the host, and sync to device. 01880 typedef Kokkos::DualView<SL*, DD> whichvecs_type; 01881 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 01882 whichVecs.template modify<typename DD::host_mirror_device_type> (); 01883 for (SL i = 0; i < numWhichVecs; ++i) { 01884 whichVecs.h_view(i) = static_cast<SL> (src.whichVectors_[i]); 01885 } 01886 // Sync the host version of whichVecs to the device. 01887 whichVecs.template sync<DD> (); 01888 01889 // Mark the device version of dst's DualView as modified. 01890 dst.template modify<DD> (); 01891 // Copy from the selected vectors of src to dst, on the 01892 // device. The functor ignores its 3rd arg in this case. 01893 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_dev, 01894 typename MVS::dual_view_type::t_dev, SL, DD, true, false> functor_type; 01895 functor_type f (dst.getDualView ().template view<DD> (), 01896 src.getDualView ().template view<DD> (), 01897 whichVecs.d_view, whichVecs.d_view); 01898 Kokkos::parallel_for (src.getLocalLength (), f); 01899 // Sync dst's DualView to the host. This is cheaper than 01900 // repeating the above copy from src to dst on the host. 01901 dst.template sync<typename DD::host_mirror_device_type> (); 01902 } 01903 else { // host version of src was the most recently modified 01904 // Copy from the host version of src. 01905 // 01906 // whichVecs tells the kernel which vectors (columns) of src 01907 // to copy. Fill whichVecs on the host, and use it there. 01908 typedef typename DD::host_mirror_device_type host_dev_type; 01909 typedef Kokkos::View<SL*, host_dev_type> whichvecs_type; 01910 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 01911 for (SL i = 0; i < numWhichVecs; ++i) { 01912 whichVecs(i) = static_cast<SL> (src.whichVectors_[i]); 01913 } 01914 // Copy from the selected vectors of src to dst, on the host. 01915 // The functor ignores its 3rd arg in this case. 01916 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_host, 01917 typename MVS::dual_view_type::t_host, SL, host_dev_type, 01918 true, false> functor_type; 01919 functor_type f (dst.getDualView ().template view<host_dev_type> (), 01920 src.getDualView ().template view<host_dev_type> (), 01921 whichVecs, whichVecs); 01922 Kokkos::parallel_for (src.getLocalLength (), f); 01923 // Sync dst back to the device, since we only copied on the host. 01924 dst.template sync<DD> (); 01925 } 01926 } 01927 else { // dst is NOT constant stride 01928 if (src.isConstantStride ()) { 01929 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 01930 // Copy from the device version of src. 01931 // 01932 // whichVecs tells the kernel which vectors (columns) of dst 01933 // to copy. Fill whichVecs on the host, and sync to device. 01934 typedef Kokkos::DualView<DL*, DD> whichvecs_type; 01935 const std::string whichVecsLabel ("MV::deep_copy::whichVecs"); 01936 const DL numWhichVecs = static_cast<DL> (dst.whichVectors_.size ()); 01937 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 01938 whichVecs.template modify<typename DD::host_mirror_device_type> (); 01939 for (DL i = 0; i < numWhichVecs; ++i) { 01940 whichVecs.h_view(i) = dst.whichVectors_[i]; 01941 } 01942 // Sync the host version of whichVecs to the device. 01943 whichVecs.template sync<DD> (); 01944 01945 // Copy src to the selected vectors of dst, on the device. 01946 // The functor ignores its 4th arg in this case. 01947 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_dev, 01948 typename MVS::dual_view_type::t_dev, DL, DD, false, true> functor_type; 01949 functor_type f (dst.getDualView ().template view<DD> (), 01950 src.getDualView ().template view<DD> (), 01951 whichVecs.d_view, whichVecs.d_view); 01952 Kokkos::parallel_for (src.getLocalLength (), f); 01953 // We can't sync src and repeat the above copy on the 01954 // host, so sync dst back to the host. 01955 // 01956 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 01957 // don't actually belong to dst's view. 01958 dst.template sync<typename DD::host_mirror_device_type> (); 01959 } 01960 else { // host version of src was the most recently modified 01961 // Copy from the host version of src. 01962 // 01963 // whichVecs tells the kernel which vectors (columns) of src 01964 // to copy. Fill whichVecs on the host, and use it there. 01965 typedef typename DD::host_mirror_device_type host_dev_type; 01966 typedef Kokkos::View<DL*, host_dev_type> whichvecs_type; 01967 const DL numWhichVecs = static_cast<DL> (dst.whichVectors_.size ()); 01968 whichvecs_type whichVecs ("MV::deep_copy::whichVecs", numWhichVecs); 01969 for (DL i = 0; i < numWhichVecs; ++i) { 01970 whichVecs(i) = static_cast<DL> (dst.whichVectors_[i]); 01971 } 01972 // Copy from src to the selected vectors of dst, on the 01973 // host. The functor ignores its 4th arg in this case. 01974 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_host, 01975 typename MVS::dual_view_type::t_host, DL, host_dev_type, 01976 false, true> functor_type; 01977 functor_type f (dst.getDualView ().template view<host_dev_type> (), 01978 src.getDualView ().template view<host_dev_type> (), 01979 whichVecs, whichVecs); 01980 Kokkos::parallel_for (src.getLocalLength (), f); 01981 // Sync dst back to the device, since we only copied on the host. 01982 // 01983 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 01984 // don't actually belong to dst's view. 01985 dst.template sync<DD> (); 01986 } 01987 } 01988 else { // neither src nor dst have constant stride 01989 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 01990 // Copy from the device version of src. 01991 // 01992 // whichVectorsDst tells the kernel which vectors 01993 // (columns) of dst to copy. Fill it on the host, and 01994 // sync to device. 01995 const DL dstNumWhichVecs = static_cast<DL> (dst.whichVectors_.size ()); 01996 Kokkos::DualView<DL*, DD> whichVecsDst ("MV::deep_copy::whichVecsDst", 01997 dstNumWhichVecs); 01998 whichVecsDst.template modify<typename DD::host_mirror_device_type> (); 01999 for (DL i = 0; i < dstNumWhichVecs; ++i) { 02000 whichVecsDst.h_view(i) = static_cast<DL> (dst.whichVectors_[i]); 02001 } 02002 // Sync the host version of whichVecsDst to the device. 02003 whichVecsDst.template sync<DD> (); 02004 02005 // whichVectorsSrc tells the kernel which vectors 02006 // (columns) of src to copy. Fill it on the host, and 02007 // sync to device. Use the destination MultiVector's 02008 // LocalOrdinal type here. 02009 const DL srcNumWhichVecs = static_cast<DL> (src.whichVectors_.size ()); 02010 Kokkos::DualView<DL*, DD> whichVecsSrc ("MV::deep_copy::whichVecsSrc", 02011 srcNumWhichVecs); 02012 whichVecsSrc.template modify<typename DD::host_mirror_device_type> (); 02013 for (DL i = 0; i < srcNumWhichVecs; ++i) { 02014 whichVecsSrc.h_view(i) = static_cast<DL> (src.whichVectors_[i]); 02015 } 02016 // Sync the host version of whichVecsSrc to the device. 02017 whichVecsSrc.template sync<DD> (); 02018 02019 // Copy from the selected vectors of src to the selected 02020 // vectors of dst, on the device. 02021 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_dev, 02022 typename MVS::dual_view_type::t_dev, DL, DD, false, false> 02023 functor_type; 02024 functor_type f (dst.getDualView ().template view<DD> (), 02025 src.getDualView ().template view<DD> (), 02026 whichVecsDst.d_view, whichVecsSrc.d_view); 02027 Kokkos::parallel_for (src.getLocalLength (), f); 02028 } 02029 else { 02030 typedef typename DD::host_mirror_device_type host_dev_type; 02031 02032 const DL dstNumWhichVecs = static_cast<DL> (dst.whichVectors_.size ()); 02033 Kokkos::View<DL*, host_dev_type> whichVectorsDst ("dstWhichVecs", dstNumWhichVecs); 02034 for (DL i = 0; i < dstNumWhichVecs; ++i) { 02035 whichVectorsDst(i) = dst.whichVectors_[i]; 02036 } 02037 02038 // Use the destination MultiVector's LocalOrdinal type here. 02039 const DL srcNumWhichVecs = static_cast<DL> (src.whichVectors_.size ()); 02040 Kokkos::View<DL*, host_dev_type> whichVectorsSrc ("srcWhichVecs", srcNumWhichVecs); 02041 for (DL i = 0; i < srcNumWhichVecs; ++i) { 02042 whichVectorsSrc(i) = src.whichVectors_[i]; 02043 } 02044 02045 typedef DeepCopySelectedVectors<typename MVD::dual_view_type::t_host, 02046 typename MVS::dual_view_type::t_host, 02047 DL, host_dev_type, false, false> functor_type; 02048 functor_type f (dst.getDualView ().template view<host_dev_type> (), 02049 src.getDualView ().template view<host_dev_type> (), 02050 whichVectorsDst, whichVectorsSrc); 02051 Kokkos::parallel_for (src.getLocalLength (), f); 02052 02053 // We can't sync src and repeat the above copy on the 02054 // host, so sync dst back to the host. 02055 // 02056 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 02057 // don't actually belong to dst's view. 02058 dst.template sync<typename DD::host_mirror_device_type> (); 02059 } 02060 } 02061 } 02062 } 02063 } 02064 } // namespace Tpetra 02065 02066 #endif // TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DECL_HPP
1.7.6.1