|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DEF_HPP 00043 #define TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DEF_HPP 00044 00045 #include <Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp> 00046 00047 #ifdef DOXYGEN_USE_ONLY 00048 # include "Tpetra_KokkosRefactor_MultiVector_decl.hpp" 00049 #endif 00050 00051 #include <KokkosCompat_View.hpp> 00052 #include <Kokkos_MV.hpp> 00053 #include <Kokkos_MV_GEMM.hpp> 00054 #include <Kokkos_Random.hpp> 00055 00056 namespace Tpetra { 00057 00058 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00059 bool 00060 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00061 vectorIndexOutOfRange(size_t VectorIndex) const { 00062 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors(); 00063 } 00064 00065 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00066 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00067 MultiVector () : 00068 base_type (Teuchos::rcp (new Map<LocalOrdinal, GlobalOrdinal, node_type> ())) 00069 {} 00070 00071 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00072 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00073 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00074 size_t NumVectors, 00075 bool zeroOut) : /* default is true */ 00076 base_type (map) 00077 { 00078 using Teuchos::ArrayRCP; 00079 using Teuchos::RCP; 00080 00081 (void) zeroOut; // View allocation does first touch automatically. 00082 00083 TEUCHOS_TEST_FOR_EXCEPTION(NumVectors < 1, std::invalid_argument, 00084 "Tpetra::MultiVector::MultiVector(): NumVectors must be strictly positive."); 00085 const size_t myLen = getLocalLength(); 00086 if (myLen > 0) { 00087 RCP<Node> node = map->getNode(); 00088 // On host-type Kokkos Nodes, allocBuffer() just calls the 00089 // one-argument version of arcp to allocate memory. This should 00090 // not fill the memory by default, otherwise we would lose the 00091 // first-touch allocation optimization. 00092 00093 // Allocate a DualView from new Kokkos, wrap its device data into an ArrayRCP 00094 view_ = dual_view_type("MV::dual_view",myLen,NumVectors); 00095 } 00096 else { 00097 view_ = dual_view_type("MV::dual_view",0,NumVectors); 00098 } 00099 00100 origView_ = view_; 00101 } 00102 00103 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00104 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00105 MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& source) : 00106 base_type (source), 00107 view_ (source.view_), 00108 origView_ (source.origView_), 00109 whichVectors_ (source.whichVectors_) 00110 {} 00111 00112 00113 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00114 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00115 MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& source, 00116 const Teuchos::DataAccess copyOrView) : 00117 base_type (source), 00118 view_ (source.view_), 00119 origView_ (source.origView_), 00120 whichVectors_ (source.whichVectors_) 00121 { 00122 if (copyOrView == Teuchos::Copy) { 00123 // Reuse the conveniently already existing function that creates 00124 // a deep copy. 00125 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,node_type> cpy = 00126 createCopy (source); 00127 this->view_ = cpy.view_; 00128 this->origView_ = cpy.origView_; 00129 this->whichVectors_ = cpy.whichVectors_; 00130 } 00131 else if (copyOrView == Teuchos::View) { 00132 } 00133 else { 00134 TEUCHOS_TEST_FOR_EXCEPTION( 00135 true, std::invalid_argument, "Tpetra::MultiVector copy constructor: " 00136 "The second argument 'copyOrView' has an invalid value " << copyOrView 00137 << ". Valid values include Teuchos::Copy = " << Teuchos::Copy << 00138 " and Teuchos::View = " << Teuchos::View << "."); 00139 } 00140 } 00141 00142 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00143 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00144 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00145 const dual_view_type& view) : 00146 base_type (map), 00147 view_ (view), 00148 origView_ (view) 00149 { 00150 using Teuchos::ArrayRCP; 00151 using Teuchos::RCP; 00152 const char tfecfFuncName[] = "Tpetra::MultiVector(map,view)"; 00153 00154 // Get stride of view: if second dimension is 0, the 00155 // stride might be 0, so take view_dimension instead. 00156 size_t stride[8]; 00157 origView_.stride (stride); 00158 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 (); 00159 const size_t numVecs = view_.dimension_1 (); 00160 00161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs < 1, std::invalid_argument, 00162 ": numVecs must be strictly positive, but you specified numVecs = " 00163 << numVecs << "."); 00164 const size_t myLen = getLocalLength (); 00165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument, 00166 ": LDA must be large enough to accomodate the local entries."); 00167 } 00168 00169 00170 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00171 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00172 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00173 const dual_view_type& view, 00174 const dual_view_type& origView) : 00175 base_type (map), 00176 view_ (view), 00177 origView_ (origView) 00178 { 00179 using Teuchos::ArrayRCP; 00180 using Teuchos::RCP; 00181 const char tfecfFuncName[] = "Tpetra::MultiVector(map,view,origView)"; 00182 00183 // Get stride of view: if second dimension is 0, the 00184 // stride might be 0, so take view_dimension instead. 00185 size_t stride[8]; 00186 origView_.stride (stride); 00187 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 (); 00188 const size_t numVecs = view_.dimension_1 (); 00189 00190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs < 1, std::invalid_argument, 00191 ": numVecs must be strictly positive, but you specified numVecs = " 00192 << numVecs << "."); 00193 const size_t myLen = getLocalLength (); 00194 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument, 00195 ": LDA must be large enough to accomodate the local entries."); 00196 } 00197 00198 00199 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00200 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00201 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00202 const dual_view_type& view, 00203 const Teuchos::ArrayView<const size_t>& whichVectors) : 00204 base_type (map), 00205 view_ (view), 00206 origView_ (view), 00207 whichVectors_ (whichVectors.begin (), whichVectors.end ()) 00208 { 00209 using Kokkos::ALL; 00210 using Kokkos::subview; 00211 using Teuchos::ArrayRCP; 00212 using Teuchos::RCP; 00213 const char tfecfFuncName[] = "MultiVector(map,view,whichVectors)"; 00214 00215 // Get stride of view: if second dimension is 0, the 00216 // stride might be 0, so take view_dimension instead. 00217 size_t stride[8]; 00218 origView_.stride (stride); 00219 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 (); 00220 size_t numVecs = view_.dimension_1 (); 00221 00222 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs < 1, std::invalid_argument, 00223 ": numVecs must be strictly positive, but you specified numVecs = " 00224 << numVecs << "."); 00225 const size_t myLen = getLocalLength(); 00226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument, 00227 ": LDA must be large enough to accomodate the local entries."); 00228 00229 if (whichVectors.size () == 1) { 00230 // If whichVectors has only one entry, we don't need to bother 00231 // with nonconstant stride. Just shift the view over so it 00232 // points to the desired column. 00233 // 00234 // NOTE (mfh 10 May 2014) This is a special case where we set 00235 // origView_ just to view that one column, not all of the 00236 // original columns. This ensures that the use of origView_ in 00237 // offsetView works correctly. 00238 view_ = subview<dual_view_type> (view_, ALL (), whichVectors[0]); 00239 origView_ = subview<dual_view_type> (origView_, ALL (), whichVectors[0]); 00240 numVecs = 1; 00241 // whichVectors_.size() == 0 means "constant stride." 00242 whichVectors_.clear (); 00243 } 00244 } 00245 00246 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00247 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00248 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00249 const dual_view_type& view, 00250 const dual_view_type& origView, 00251 const Teuchos::ArrayView<const size_t>& whichVectors) : 00252 base_type (map), 00253 view_ (view), 00254 origView_ (origView), 00255 whichVectors_ (whichVectors.begin (), whichVectors.end ()) 00256 { 00257 using Kokkos::ALL; 00258 using Kokkos::subview; 00259 using Teuchos::ArrayRCP; 00260 using Teuchos::RCP; 00261 const char tfecfFuncName[] = "MultiVector(map,view,whichVectors)"; 00262 00263 // Get stride of view: if second dimension is 0, the 00264 // stride might be 0, so take view_dimension instead. 00265 size_t stride[8]; 00266 origView_.stride (stride); 00267 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 (); 00268 size_t numVecs = view_.dimension_1 (); 00269 00270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numVecs < 1, std::invalid_argument, 00271 ": numVecs must be strictly positive, but you specified numVecs = " 00272 << numVecs << "."); 00273 const size_t myLen = getLocalLength(); 00274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument, 00275 ": LDA must be large enough to accomodate the local entries."); 00276 00277 if (whichVectors.size () == 1) { 00278 // If whichVectors has only one entry, we don't need to bother 00279 // with nonconstant stride. Just shift the view over so it 00280 // points to the desired column. 00281 // 00282 // NOTE (mfh 10 May 2014) This is a special case where we set 00283 // origView_ just to view that one column, not all of the 00284 // original columns. This ensures that the use of origView_ in 00285 // offsetView works correctly. 00286 view_ = subview<dual_view_type> (view_, ALL (), whichVectors[0]); 00287 origView_ = subview<dual_view_type> (origView_, ALL (), whichVectors[0]); 00288 numVecs = 1; 00289 // whichVectors_.size() == 0 means "constant stride." 00290 whichVectors_.clear (); 00291 } 00292 } 00293 00294 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00295 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00296 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00297 const Teuchos::ArrayView<const Scalar>& data, 00298 const size_t LDA, 00299 const size_t numVecs) : 00300 base_type (map) 00301 { 00302 // Deep copy constructor, constant stride (NO whichVectors_). 00303 // There is no need for a deep copy constructor with nonconstant stride. 00304 00305 const char tfecfFuncName[] = "MultiVector(map,data,LDA,numVecs)"; 00306 const size_t numRows = this->getLocalLength (); 00307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < numRows, std::runtime_error, 00308 ": LDA = " << LDA << " < numRows = " << numRows << "."); 00309 00310 view_ = dual_view_type ("MV::view_", numRows, numVecs); 00311 for (size_t i = 0; i < numRows; ++i) { 00312 for (size_t j = 0; j < numVecs; ++j) { 00313 view_.h_view(i,j) = data[j*LDA+i]; 00314 } 00315 } 00316 view_.template modify<typename dual_view_type::host_mirror_device_type> (); 00317 origView_ = view_; 00318 } 00319 00320 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00321 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00322 MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map, 00323 const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs, 00324 const size_t NumVectors) : 00325 base_type (map) 00326 { 00327 using Teuchos::ArrayRCP; 00328 using Teuchos::ArrayView; 00329 using Teuchos::RCP; 00330 const char tfecfFuncName[] = "MultiVector(map,ArrayOfPtrs,NumVectors)"; 00331 00332 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00333 NumVectors < 1 || NumVectors != static_cast<size_t>(ArrayOfPtrs.size()), 00334 std::runtime_error, 00335 ": ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs."); 00336 const size_t myLen = getLocalLength (); 00337 view_ = dual_view_type ("MV::view_", myLen, NumVectors); 00338 00339 // TODO: write a functor and use parallel_for. 00340 00341 for (size_t i = 0; i < myLen; ++i) { 00342 for (size_t j = 0; j < NumVectors; ++j) { 00343 view_.h_view(i,j) = ArrayOfPtrs[j][i]; 00344 } 00345 } 00346 view_.template modify<typename dual_view_type::t_host::device_type> (); 00347 00348 origView_ = view_; 00349 } 00350 00351 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00352 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00353 ~MultiVector () {} 00354 00355 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00356 bool MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00357 isConstantStride () const { 00358 return whichVectors_.empty (); 00359 } 00360 00361 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00362 size_t 00363 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00364 getLocalLength () const 00365 { 00366 if (this->getMap ().is_null ()) { // possible, due to replaceMap(). 00367 return static_cast<size_t> (0); 00368 } else { 00369 return this->getMap ()->getNodeNumElements (); 00370 } 00371 } 00372 00373 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00374 global_size_t 00375 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00376 getGlobalLength () const 00377 { 00378 if (this->getMap ().is_null ()) { // possible, due to replaceMap(). 00379 return static_cast<size_t> (0); 00380 } else { 00381 return this->getMap ()->getGlobalNumElements (); 00382 } 00383 } 00384 00385 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00386 size_t 00387 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00388 getStride () const 00389 { 00390 if (isConstantStride ()) { 00391 // Get stride of view: if second dimension is 0, the 00392 // stride might be 0, so take view_dimension instead. 00393 size_t stride[8]; 00394 origView_.stride (stride); 00395 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 (); 00396 return LDA; 00397 } 00398 else { 00399 return static_cast<size_t> (0); 00400 } 00401 } 00402 00403 00404 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00405 bool 00406 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00407 checkSizes (const SrcDistObject& sourceObj) 00408 { 00409 // Check whether the source object is a MultiVector. If not, then 00410 // we can't even compare sizes, so it's definitely not OK to 00411 // Import or Export from it. 00412 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type; 00413 const this_type* src = dynamic_cast<const this_type*> (&sourceObj); 00414 if (src == NULL) { 00415 return false; 00416 } else { 00417 // The target of the Import or Export calls checkSizes() in 00418 // DistObject::doTransfer(). By that point, we've already 00419 // constructed an Import or Export object using the two 00420 // multivectors' Maps, which means that (hopefully) we've 00421 // already checked other attributes of the multivectors. Thus, 00422 // all we need to do here is check the number of columns. 00423 return src->getNumVectors () == this->getNumVectors (); 00424 } 00425 } 00426 00427 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00428 size_t 00429 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00430 constantNumberOfPackets () const { 00431 return this->getNumVectors (); 00432 } 00433 00434 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00435 void 00436 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00437 copyAndPermuteNew ( 00438 const SrcDistObject& sourceObj, 00439 size_t numSameIDs, 00440 const Kokkos::View<const LocalOrdinal*, device_type> &permuteToLIDs, 00441 const Kokkos::View<const LocalOrdinal*, device_type> &permuteFromLIDs) 00442 { 00443 using Teuchos::ArrayRCP; 00444 using Teuchos::ArrayView; 00445 using Teuchos::RCP; 00446 using Kokkos::Compat::getKokkosViewDeepCopy; 00447 typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 00448 //typedef typename ArrayView<const LocalOrdinal>::size_type size_type; // unused 00449 const char tfecfFuncName[] = "copyAndPermute"; 00450 00451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00452 permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error, 00453 ": permuteToLIDs and permuteFromLIDs must have the same size." 00454 << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size () 00455 << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << "."); 00456 00457 // We've already called checkSizes(), so this cast must succeed. 00458 const MV& sourceMV = dynamic_cast<const MV&> (sourceObj); 00459 00460 const size_t numCols = this->getNumVectors (); 00461 00462 // TODO (mfh 15 Sep 2013) When we replace 00463 // KokkosClassic::MultiVector with a Kokkos::View, there are two 00464 // ways to copy the data: 00465 // 00466 // 1. Get a (sub)view of each column and call deep_copy on that. 00467 // 2. Write a custom kernel to copy the data. 00468 // 00469 // The first is easier, but the second might be more performant in 00470 // case we decide to use layouts other than LayoutLeft. It might 00471 // even make sense to hide whichVectors_ in an entirely new layout 00472 // for Kokkos Views. 00473 00474 // Copy rows [0, numSameIDs-1] of the local multivectors. 00475 // 00476 // For GPU Nodes: All of this happens using device pointers; this 00477 // does not require host views of either source or destination. 00478 // 00479 // Note (ETP 2 Jul 2014) We need to always copy one column at a 00480 // time, even when both multivectors are constant-stride, since 00481 // deep_copy between strided subviews with more than one column 00482 // doesn't currently work. 00483 if (numSameIDs > 0) { 00484 const std::pair<size_t, size_t> rows( 0, numSameIDs ); 00485 for (size_t j = 0; j < numCols; ++j) { 00486 const size_t dstCol = 00487 isConstantStride() ? j : whichVectors_[j]; 00488 const size_t srcCol = 00489 sourceMV.isConstantStride() ? j : sourceMV.whichVectors_[j]; 00490 dual_view_type dst_j = 00491 Kokkos::subview<dual_view_type>( view_, rows, dstCol ); 00492 dual_view_type src_j = 00493 Kokkos::subview<dual_view_type>( sourceMV.view_, rows, srcCol ); 00494 Kokkos::deep_copy( dst_j, src_j ); // Copy src_j into dest_j 00495 } 00496 } 00497 00498 // For the remaining GIDs, execute the permutations. This may 00499 // involve noncontiguous access of both source and destination 00500 // vectors, depending on the LID lists. 00501 // 00502 // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on 00503 // the same process, this merges their values by replacement of 00504 // the last encountered GID, not by the specified merge rule 00505 // (such as ADD). 00506 00507 // If there are no permutations, we are done 00508 if (permuteFromLIDs.size() == 0 || permuteToLIDs.size() == 0) 00509 return; 00510 00511 if (this->isConstantStride ()) { 00512 KokkosRefactor::Details::permute_array_multi_column( 00513 getKokkosView(), 00514 sourceMV.getKokkosView(), 00515 permuteToLIDs, 00516 permuteFromLIDs, 00517 numCols); 00518 } 00519 else { 00520 KokkosRefactor::Details::permute_array_multi_column_variable_stride( 00521 getKokkosView(), 00522 sourceMV.getKokkosView(), 00523 permuteToLIDs, 00524 permuteFromLIDs, 00525 getKokkosViewDeepCopy<device_type> (whichVectors_ ()), 00526 getKokkosViewDeepCopy<device_type> (sourceMV.whichVectors_ ()), 00527 numCols); 00528 } 00529 } 00530 00531 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00532 void 00533 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00534 packAndPrepareNew ( 00535 const SrcDistObject& sourceObj, 00536 const Kokkos::View<const LocalOrdinal*, device_type> &exportLIDs, 00537 Kokkos::View<Scalar*, device_type> &exports, 00538 const Kokkos::View<size_t*, device_type> &numExportPacketsPerLID, 00539 size_t& constantNumPackets, 00540 Distributor & /* distor */ ) 00541 { 00542 using Teuchos::Array; 00543 using Teuchos::ArrayView; 00544 using Kokkos::Compat::getKokkosViewDeepCopy; 00545 typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 00546 //typedef Array<size_t>::size_type size_type; // unused 00547 00548 // If we have no exports, there is nothing to do 00549 if (exportLIDs.size () == 0) { 00550 return; 00551 } 00552 00553 // We've already called checkSizes(), so this cast must succeed. 00554 const MV& sourceMV = dynamic_cast<const MV&> (sourceObj); 00555 00556 // We don't need numExportPacketsPerLID; forestall "unused 00557 // variable" compile warnings. 00558 (void) numExportPacketsPerLID; 00559 00560 /* The layout in the export for MultiVectors is as follows: 00561 exports = { all of the data from row exportLIDs.front() ; 00562 .... 00563 all of the data from row exportLIDs.back() } 00564 This doesn't have the best locality, but is necessary because 00565 the data for a Packet (all data associated with an LID) is 00566 required to be contiguous. */ 00567 00568 // FIXME (mfh 15 Sep 2013) Would it make sense to rethink the 00569 // packing scheme in the above comment? The data going to a 00570 // particular process must be contiguous, of course, but those 00571 // data could include entries from multiple LIDs. DistObject just 00572 // needs to know how to index into that data. Kokkos is good at 00573 // decoupling storage intent from data layout choice. 00574 00575 const size_t numCols = sourceMV.getNumVectors (); 00576 00577 // This spares us from needing to fill numExportPacketsPerLID. 00578 // Setting constantNumPackets to a nonzero value signals that 00579 // all packets have the same number of entries. 00580 constantNumPackets = numCols; 00581 00582 const size_t numExportLIDs = exportLIDs.size (); 00583 const size_t newExportsSize = numCols * numExportLIDs; 00584 if (exports.size () != newExportsSize) { 00585 Kokkos::Compat::realloc (exports, newExportsSize); 00586 } 00587 00588 if (numCols == 1) { // special case for one column only 00589 // MultiVector always represents a single column with constant 00590 // stride, but it doesn't hurt to implement both cases anyway. 00591 // 00592 // ETP: I'm not sure I agree with the above statement. Can't a single- 00593 // column multivector be a subview of another multi-vector, in which case 00594 // sourceMV.whichVectors_[0] != 0 ? I think we have to handle that case 00595 // separately. 00596 if (sourceMV.isConstantStride ()) { 00597 KokkosRefactor::Details::pack_array_single_column( 00598 exports, 00599 sourceMV.getKokkosView (), 00600 exportLIDs, 00601 0); 00602 } 00603 else { 00604 KokkosRefactor::Details::pack_array_single_column( 00605 exports, 00606 sourceMV.getKokkosView (), 00607 exportLIDs, 00608 sourceMV.whichVectors_[0]); 00609 } 00610 } 00611 else { // the source MultiVector has multiple columns 00612 if (sourceMV.isConstantStride ()) { 00613 KokkosRefactor::Details::pack_array_multi_column( 00614 exports, 00615 sourceMV.getKokkosView (), 00616 exportLIDs, 00617 numCols); 00618 } 00619 else { 00620 KokkosRefactor::Details::pack_array_multi_column_variable_stride( 00621 exports, 00622 sourceMV.getKokkosView (), 00623 exportLIDs, 00624 getKokkosViewDeepCopy<device_type> (sourceMV.whichVectors_ ()), 00625 numCols); 00626 } 00627 } 00628 } 00629 00630 00631 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00632 void 00633 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00634 unpackAndCombineNew ( 00635 const Kokkos::View<const LocalOrdinal*, device_type> &importLIDs, 00636 const Kokkos::View<const Scalar*, device_type> &imports, 00637 const Kokkos::View<size_t*, device_type> &numPacketsPerLID, 00638 size_t constantNumPackets, 00639 Distributor & /* distor */, 00640 CombineMode CM) 00641 { 00642 using Teuchos::ArrayView; 00643 using Kokkos::Compat::getKokkosViewDeepCopy; 00644 const char tfecfFuncName[] = "unpackAndCombine"; 00645 00646 // If we have no imports, there is nothing to do 00647 if (importLIDs.size () == 0) { 00648 return; 00649 } 00650 00651 // We don't need numPacketsPerLID; forestall "unused variable" 00652 // compile warnings. 00653 (void) numPacketsPerLID; 00654 00655 /* The layout in the export for MultiVectors is as follows: 00656 imports = { all of the data from row exportLIDs.front() ; 00657 .... 00658 all of the data from row exportLIDs.back() } 00659 This doesn't have the best locality, but is necessary because 00660 the data for a Packet (all data associated with an LID) is 00661 required to be contiguous. */ 00662 00663 const size_t numVecs = getNumVectors (); 00664 00665 #ifdef HAVE_TPETRA_DEBUG 00666 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00667 static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(), 00668 std::runtime_error, 00669 ": 'imports' buffer size must be consistent with the amount of data to " 00670 "be sent. " << std::endl << "imports.size() = " << imports.size() 00671 << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*" 00672 << importLIDs.size() << " = " << getNumVectors() * importLIDs.size() 00673 << "."); 00674 00675 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00676 constantNumPackets == static_cast<size_t> (0), std::runtime_error, 00677 ": constantNumPackets input argument must be nonzero."); 00678 00679 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00680 static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets), 00681 std::runtime_error, ": constantNumPackets must equal numVecs."); 00682 #endif // HAVE_TPETRA_DEBUG 00683 00684 if (numVecs > 0 && importLIDs.size () > 0) { 00685 00686 // NOTE (mfh 10 Mar 2012, 24 Mar 2014) If you want to implement 00687 // custom combine modes, start editing here. Also, if you trust 00688 // inlining, it would be nice to condense this code by using a 00689 // binary function object f in the pack functors. 00690 if (CM == INSERT || CM == REPLACE) { 00691 if (isConstantStride()) { 00692 KokkosRefactor::Details::unpack_array_multi_column( 00693 getKokkosView(), 00694 imports, 00695 importLIDs, 00696 KokkosRefactor::Details::InsertOp(), 00697 numVecs); 00698 } 00699 else { 00700 KokkosRefactor::Details::unpack_array_multi_column_variable_stride( 00701 getKokkosView(), 00702 imports, 00703 importLIDs, 00704 getKokkosViewDeepCopy<device_type>(whichVectors_ ()), 00705 KokkosRefactor::Details::InsertOp(), 00706 numVecs); 00707 } 00708 } 00709 else if (CM == ADD) { 00710 if (isConstantStride()) { 00711 KokkosRefactor::Details::unpack_array_multi_column( 00712 getKokkosView(), 00713 imports, 00714 importLIDs, 00715 KokkosRefactor::Details::AddOp(), 00716 numVecs); 00717 } 00718 else { 00719 KokkosRefactor::Details::unpack_array_multi_column_variable_stride( 00720 getKokkosView(), 00721 imports, 00722 importLIDs, 00723 getKokkosViewDeepCopy<device_type>(whichVectors_ ()), 00724 KokkosRefactor::Details::AddOp(), 00725 numVecs); 00726 } 00727 } 00728 else if (CM == ABSMAX) { 00729 if (isConstantStride()) { 00730 KokkosRefactor::Details::unpack_array_multi_column( 00731 getKokkosView(), 00732 imports, 00733 importLIDs, 00734 KokkosRefactor::Details::AbsMaxOp(), 00735 numVecs); 00736 } 00737 else { 00738 KokkosRefactor::Details::unpack_array_multi_column_variable_stride( 00739 getKokkosView(), 00740 imports, 00741 importLIDs, 00742 getKokkosViewDeepCopy<device_type>(whichVectors_ ()), 00743 KokkosRefactor::Details::AbsMaxOp(), 00744 numVecs); 00745 } 00746 } 00747 else { 00748 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00749 CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX, 00750 std::invalid_argument, ": Invalid CombineMode: " << CM << ". Valid " 00751 "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX."); 00752 } 00753 } 00754 } 00755 00756 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00757 inline size_t 00758 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00759 getNumVectors () const 00760 { 00761 if (isConstantStride ()) { 00762 return static_cast<size_t> (view_.dimension_1 ()); 00763 } 00764 else { 00765 return static_cast<size_t> (whichVectors_.size ()); 00766 } 00767 } 00768 00769 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00770 void 00771 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00772 dot (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& A, 00773 const Teuchos::ArrayView<dot_type>& dots) const 00774 { 00775 using Kokkos::ALL; 00776 using Kokkos::subview; 00777 using Teuchos::REDUCE_SUM; 00778 using Teuchos::reduceAll; 00779 // View of a MultiVector's local data (all columns). 00780 typedef typename dual_view_type::t_dev mv_view_type; 00781 // View of a single column of a MultiVector's local data. 00782 // 00783 // FIXME (mfh 14 Jul 2014) It would be better to get this typedef 00784 // from mv_view_type itself, in case the layout changes. 00785 typedef Kokkos::View<scalar_type*, Kokkos::LayoutLeft, device_type> vec_view_type; 00786 typedef typename dual_view_type::host_mirror_device_type host_mirror_device_type; 00787 // View of all the dot product results. 00788 typedef Kokkos::View<dot_type*, Kokkos::LayoutLeft, 00789 host_mirror_device_type, Kokkos::MemoryUnmanaged> host_dots_view_type; 00790 typedef Kokkos::View<dot_type*, Kokkos::LayoutLeft, 00791 host_mirror_device_type> host_dots_managed_view_type; 00792 const char tfecfFuncName[] = "Tpetra::MultiVector::dot"; 00793 00794 #ifdef HAVE_TPETRA_DEBUG 00795 { 00796 const bool compat = this->getMap ()->isCompatible (* (A.getMap ())); 00797 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00798 ! compat, std::invalid_argument, "Tpetra::MultiVector::dot: *this is " 00799 "not compatible with the input MultiVector A. We only test for this " 00800 "in a debug build."); 00801 } 00802 #endif // HAVE_TPETRA_DEBUG 00803 00804 // FIXME (mfh 11 Jul 2014) These exception tests may not 00805 // necessarily be thrown on all processes consistently. We should 00806 // instead pass along error state with the inner product. We 00807 // could do this by setting an extra slot to 00808 // Kokkos::Details::ArithTraits<dot_type>::one() on error. The 00809 // final sum should be 00810 // Kokkos::Details::ArithTraits<dot_type>::zero() if not error. 00811 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00812 getLocalLength () != A.getLocalLength (), std::runtime_error, 00813 ": MultiVectors do not have the same local length. " 00814 "this->getLocalLength() = " << getLocalLength () << " != " 00815 "A.getLocalLength() = " << A.getLocalLength () << "."); 00816 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00817 getNumVectors () != A.getNumVectors (), std::runtime_error, 00818 ": MultiVectors must have the same number of columns (vectors). " 00819 "this->getNumVectors() = " << getNumVectors () << " != " 00820 "A.getNumVectors() = " << A.getNumVectors () << "."); 00821 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00822 static_cast<size_t> (dots.size () ) != getNumVectors (), std::runtime_error, ": The output " 00823 "array 'dots' must have the same number of entries as the number of " 00824 "columns (vectors) in *this and A. dots.size() = " << dots.size () 00825 << " != this->getNumVectors() = " << getNumVectors () << "."); 00826 00827 // const size_t numVecs = getNumVectors (); // not used 00828 const size_t lclNumRows = getLocalLength (); 00829 const size_t numDots = static_cast<size_t> (dots.size ()); 00830 00831 // We're computing using the device's data, so we need to make 00832 // sure first that the device is in sync with the host. 00833 A.view_.template sync<DeviceType> (); 00834 view_.template sync<DeviceType> (); 00835 00836 // In case the input dimensions don't match, make sure that we 00837 // don't overwrite memory that doesn't belong to us, by using 00838 // subset views with the minimum dimensions over all input. 00839 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 00840 const std::pair<size_t, size_t> colRng (0, numDots); 00841 mv_view_type X = subview<mv_view_type> (view_.d_view, rowRng, colRng); 00842 mv_view_type Y = subview<mv_view_type> (A.view_.d_view, rowRng, colRng); 00843 00844 if (numDots == 1) { 00845 // Special case 1: Both MultiVectors only have a single column. 00846 // The single-vector dot product kernel may be more efficient. 00847 const size_t ZERO = static_cast<size_t> (0); 00848 vec_view_type X_k = subview<vec_view_type> (X, ALL (), ZERO); 00849 vec_view_type Y_k = subview<vec_view_type> (Y, ALL (), ZERO); 00850 00851 TEUCHOS_TEST_FOR_EXCEPTION( 00852 X_k.dimension_0 () != lclNumRows, std::logic_error, "Tpetra::Multi" 00853 "Vector::dot: In the special single-vector case, X_k.dimension_0() = " 00854 << X_k.dimension_0 () << " != lclNumRows = " << lclNumRows 00855 << ". Please report this bug to the Tpetra developers."); 00856 TEUCHOS_TEST_FOR_EXCEPTION( 00857 Y_k.dimension_0 () != lclNumRows, std::logic_error, "Tpetra::Multi" 00858 "Vector::dot: In the special single-vector case, Y_k.dimension_0() = " 00859 << Y_k.dimension_0 () << " != lclNumRows = " << lclNumRows 00860 << ". Please report this bug to the Tpetra developers."); 00861 00862 dots[0] = Kokkos::V_Dot (X_k, Y_k, -1); 00863 } 00864 else if (isConstantStride () && A.isConstantStride ()) { 00865 // Special case 2: Both MultiVectors have constant stride. 00866 (void) Kokkos::MV_Dot (dots.getRawPtr (), X, Y, -1); 00867 } 00868 else { 00869 // FIXME (mfh 14 Jul 2014) This does a kernel launch for every 00870 // column. It might be better to have a kernel that does the 00871 // work all at once. On the other hand, we don't prioritize 00872 // performance of MultiVector views of noncontiguous columns. 00873 for (size_t k = 0; k < numDots; ++k) { 00874 const size_t X_col = isConstantStride () ? k : whichVectors_[k]; 00875 const size_t Y_col = A.isConstantStride () ? k : A.whichVectors_[k]; 00876 vec_view_type X_k = subview<vec_view_type> (X, ALL (), X_col); 00877 vec_view_type Y_k = subview<vec_view_type> (Y, ALL (), Y_col); 00878 dots[k] = Kokkos::V_Dot (X_k, Y_k, -1); 00879 } 00880 } 00881 00882 // If the MultiVectors are distributed over multiple processes, 00883 // sum the results across processes. We assume that the MPI 00884 // implementation can read from and write to device memory. 00885 // 00886 // replaceMap() may have removed some processes. Those processes 00887 // have a null Map. They must not participate in any collective 00888 // operations. We ask first whether the Map is null, because 00889 // isDistributed() defers that question to the Map. We still 00890 // compute and return local dot products for processes not 00891 // participating in collective operations; those probably don't 00892 // make any sense, but it doesn't hurt to do them, since it's 00893 // illegal to call dot() on those processes anyway. 00894 if (! this->getMap ().is_null () && this->isDistributed ()) { 00895 host_dots_view_type theDots (dots.getRawPtr (), numDots); 00896 // MPI doesn't allow aliasing of arguments, so we have to make a 00897 // copy of the local sum. 00898 host_dots_managed_view_type lclDots ("MV::dot lcl", numDots); 00899 00900 TEUCHOS_TEST_FOR_EXCEPTION( 00901 lclDots.dimension_0 () != theDots.dimension_0 (), std::logic_error, 00902 "Tpetra::MultiVector::dot: lclDots and theDots have different sizes. " 00903 "lclDots.dimension_0 () = " << lclDots.dimension_0 () << " != " 00904 "theDots.dimension_0 () = " << theDots.dimension_0 () << ". " 00905 "Please report this bug to the Tpetra developers."); 00906 00907 Kokkos::deep_copy (lclDots, theDots); 00908 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ()); 00909 const dot_type* const lclSum = lclDots.ptr_on_device (); 00910 dot_type* const gblSum = theDots.ptr_on_device (); 00911 reduceAll<int, dot_type> (comm, REDUCE_SUM, static_cast<int> (numDots), 00912 lclSum, gblSum); 00913 } 00914 } 00915 00916 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 00917 void 00918 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 00919 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 00920 dot (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type>& A, 00921 const Kokkos::View<dot_type*, device_type>& dots) const 00922 { 00923 using Kokkos::ALL; 00924 using Kokkos::subview; 00925 using Teuchos::REDUCE_SUM; 00926 using Teuchos::reduceAll; 00927 // View of a MultiVector's local data (all columns). 00928 typedef typename dual_view_type::t_dev mv_view_type; 00929 // View of a single column of a MultiVector's local data. 00930 // 00931 // FIXME (mfh 14 Jul 2014) It would be better to get this typedef 00932 // from mv_view_type itself, in case the layout changes. 00933 typedef Kokkos::View<scalar_type*, Kokkos::LayoutLeft, device_type> vec_view_type; 00934 // View of all the dot product results. 00935 typedef Kokkos::View<dot_type*, device_type> dots_view_type; 00936 // Scalar view; view of a single dot product result. 00937 typedef Kokkos::View<dot_type, device_type> dot_view_type; 00938 const char tfecfFuncName[] = "Tpetra::MultiVector::dot"; 00939 00940 #ifdef HAVE_TPETRA_DEBUG 00941 { 00942 const bool compat = this->getMap ()->isCompatible (* (A.getMap ())); 00943 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00944 ! compat, std::invalid_argument, "Tpetra::MultiVector::dot: *this is " 00945 "not compatible with the input MultiVector A. We only test for this " 00946 "in a debug build."); 00947 } 00948 #endif // HAVE_TPETRA_DEBUG 00949 00950 // FIXME (mfh 11 Jul 2014) These exception tests may not 00951 // necessarily be thrown on all processes consistently. We should 00952 // instead pass along error state with the inner product. We 00953 // could do this by setting an extra slot to 00954 // Kokkos::Details::ArithTraits<dot_type>::one() on error. The 00955 // final sum should be 00956 // Kokkos::Details::ArithTraits<dot_type>::zero() if not error. 00957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00958 getLocalLength () != A.getLocalLength (), std::runtime_error, 00959 ": MultiVectors do not have the same local length. " 00960 "this->getLocalLength() = " << getLocalLength () << " != " 00961 "A.getLocalLength() = " << A.getLocalLength () << "."); 00962 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00963 getNumVectors () != A.getNumVectors (), std::runtime_error, 00964 ": MultiVectors must have the same number of columns (vectors). " 00965 "this->getNumVectors() = " << getNumVectors () << " != " 00966 "A.getNumVectors() = " << A.getNumVectors () << "."); 00967 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 00968 dots.dimension_0 () != getNumVectors (), std::runtime_error, 00969 ": The output array 'dots' must have the same number of entries as the " 00970 "number of columns (vectors) in *this and A. dots.dimension_0() = " << 00971 dots.dimension_0 () << " != this->getNumVectors() = " << getNumVectors () 00972 << "."); 00973 00974 // We're computing using the device's data, so we need to make 00975 // sure first that the device is in sync with the host. 00976 A.view_.template sync<DeviceType> (); 00977 view_.template sync<DeviceType> (); 00978 00979 // const size_t numVecs = getNumVectors (); // not used 00980 const size_t lclNumRows = getLocalLength (); 00981 const size_t numDots = dots.dimension_0 (); 00982 00983 // In case the input dimensions don't match, make sure that we 00984 // don't overwrite memory that doesn't belong to us, by using 00985 // subset views with the minimum dimensions over all input. 00986 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 00987 const std::pair<size_t, size_t> colRng (0, numDots); 00988 dots_view_type theDots = subview<dots_view_type> (dots, colRng); 00989 mv_view_type X = subview<mv_view_type> (view_.d_view, rowRng, colRng); 00990 mv_view_type Y = subview<mv_view_type> (A.view_.d_view, rowRng, colRng); 00991 00992 // FIXME (mfh 14 Jul 2014) How come ALL() works as the first 00993 // argument, but not a row range? The first line below doesn't 00994 // compile, but the second line does. See 00995 // kokkos/core/unit_test/TestViewAPI.hpp, in particular 00996 // run_test_vector(), for an example of allowed subview arguments. 00997 // 00998 //vec_view_type X_0 = subview<vec_view_type> (X, rowRng, static_cast<size_t> (0)); 00999 //vec_view_type X_0 = subview<vec_view_type> (X, ALL (), static_cast<size_t> (0)); 01000 01001 if (numDots == 1) { 01002 // Special case 1: Both MultiVectors only have a single column. 01003 // The single-vector dot product kernel may be more efficient. 01004 const size_t ZERO = static_cast<size_t> (0); 01005 vec_view_type X_k = subview<vec_view_type> (X, ALL (), ZERO); 01006 vec_view_type Y_k = subview<vec_view_type> (Y, ALL (), ZERO); 01007 dot_view_type dot_k = subview<dot_view_type> (theDots, ZERO); 01008 01009 TEUCHOS_TEST_FOR_EXCEPTION( 01010 X_k.dimension_0 () != lclNumRows, std::logic_error, "Tpetra::Multi" 01011 "Vector::dot: In the special single-vector case, X_k.dimension_0() = " 01012 << X_k.dimension_0 () << " != lclNumRows = " << lclNumRows 01013 << ". Please report this bug to the Tpetra developers."); 01014 TEUCHOS_TEST_FOR_EXCEPTION( 01015 Y_k.dimension_0 () != lclNumRows, std::logic_error, "Tpetra::Multi" 01016 "Vector::dot: In the special single-vector case, Y_k.dimension_0() = " 01017 << Y_k.dimension_0 () << " != lclNumRows = " << lclNumRows 01018 << ". Please report this bug to the Tpetra developers."); 01019 01020 Kokkos::VecDotFunctor<vec_view_type> f (X_k, Y_k, dot_k); 01021 Kokkos::parallel_reduce (lclNumRows, f); 01022 } 01023 else if (isConstantStride () && A.isConstantStride ()) { 01024 // Special case 2: Both MultiVectors have constant stride. 01025 Kokkos::MultiVecDotFunctor<mv_view_type> f (X, Y, theDots); 01026 Kokkos::parallel_reduce (lclNumRows, f); 01027 } 01028 else { 01029 // FIXME (mfh 14 Jul 2014) This does a kernel launch for every 01030 // column. It might be better to have a kernel that does the 01031 // work all at once. On the other hand, we don't prioritize 01032 // performance of MultiVector views of noncontiguous columns. 01033 for (size_t k = 0; k < numDots; ++k) { 01034 const size_t X_col = isConstantStride () ? k : whichVectors_[k]; 01035 const size_t Y_col = A.isConstantStride () ? k : A.whichVectors_[k]; 01036 vec_view_type X_k = subview<vec_view_type> (X, ALL (), X_col); 01037 vec_view_type Y_k = subview<vec_view_type> (Y, ALL (), Y_col); 01038 dot_view_type dot_k = subview<dot_view_type> (theDots, k); 01039 Kokkos::VecDotFunctor<vec_view_type> f (X_k, Y_k, dot_k); 01040 Kokkos::parallel_reduce (lclNumRows, f); 01041 } 01042 } 01043 01044 // If the MultiVectors are distributed over multiple processes, 01045 // sum the results across processes. We assume that the MPI 01046 // implementation can read from and write to device memory. 01047 // 01048 // replaceMap() may have removed some processes. Those processes 01049 // have a null Map. They must not participate in any collective 01050 // operations. We ask first whether the Map is null, because 01051 // isDistributed() defers that question to the Map. We still 01052 // compute and return local dot products for processes not 01053 // participating in collective operations; those probably don't 01054 // make any sense, but it doesn't hurt to do them, since it's 01055 // illegal to call dot() on those processes anyway. 01056 if (! this->getMap ().is_null () && this->isDistributed ()) { 01057 // MPI doesn't allow aliasing of arguments, so we have to make a 01058 // copy of the local sum. 01059 dots_view_type lclDots ("MV::dot lcl", numDots); 01060 01061 TEUCHOS_TEST_FOR_EXCEPTION( 01062 lclDots.dimension_0 () != theDots.dimension_0 (), std::logic_error, 01063 "Tpetra::MultiVector::dot: lclDots and theDots have different sizes. " 01064 "lclDots.dimension_0 () = " << lclDots.dimension_0 () << " != " 01065 "theDots.dimension_0 () = " << theDots.dimension_0 () << ". " 01066 "Please report this bug to the Tpetra developers."); 01067 01068 Kokkos::deep_copy (lclDots, theDots); 01069 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ()); 01070 const dot_type* const lclSum = lclDots.ptr_on_device (); 01071 dot_type* const gblSum = theDots.ptr_on_device (); 01072 reduceAll<int, dot_type> (comm, REDUCE_SUM, static_cast<int> (numDots), 01073 lclSum, gblSum); 01074 } 01075 } 01076 01077 01078 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01079 void 01080 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01081 norm2 (const Teuchos::ArrayView<mag_type>& norms) const 01082 { 01083 typedef typename dual_view_type::host_mirror_device_type host_mirror_device_type; 01084 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type; 01085 typedef Kokkos::View<mag_type*, typename dev_norms_view_type::array_layout, 01086 host_mirror_device_type, Kokkos::MemoryUnmanaged> host_norms_view_type; 01087 01088 const size_t numNorms = static_cast<size_t> (norms.size ()); 01089 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms); 01090 dev_norms_view_type normsDevView ("MV::norm2 tmp", numNorms); 01091 this->norm2 (normsDevView); // Do the computation on the device. 01092 Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host 01093 } 01094 01095 01096 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01097 void 01098 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 01099 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01100 norm2 (const Kokkos::View<mag_type*, device_type>& norms) const 01101 { 01102 using Kokkos::ALL; 01103 using Kokkos::subview; 01104 using Teuchos::Comm; 01105 using Teuchos::null; 01106 using Teuchos::RCP; 01107 using Teuchos::REDUCE_SUM; 01108 using Teuchos::reduceAll; 01109 // View of a MultiVector's local data (all columns). 01110 typedef typename dual_view_type::t_dev mv_view_type; 01111 // View of a single column of a MultiVector's local data. 01112 // 01113 // FIXME (mfh 14 Jul 2014) It would be better to get this typedef 01114 // from mv_view_type itself, in case the layout changes. 01115 typedef Kokkos::View<scalar_type*, Kokkos::LayoutLeft, device_type> vec_view_type; 01116 // View of all the norm results. 01117 typedef Kokkos::View<mag_type*, device_type> norms_view_type; 01118 // Scalar view; view of a single norm result. 01119 typedef Kokkos::View<mag_type, device_type> norm_view_type; 01120 const char tfecfFuncName[] = "Tpetra::MultiVector::norm2"; 01121 01122 const size_t numVecs = getNumVectors (); 01123 const size_t lclNumRows = getLocalLength (); 01124 const size_t numNorms = static_cast<size_t> (norms.dimension_0 ()); 01125 01126 // FIXME (mfh 11 Jul 2014) These exception tests may not 01127 // necessarily be thrown on all processes consistently. We should 01128 // instead pass along error state with the inner product. We 01129 // could do this by setting an extra slot to 01130 // Kokkos::Details::ArithTraits<mag_type>::one() on error. The 01131 // final sum should be 01132 // Kokkos::Details::ArithTraits<mag_type>::zero() if not error. 01133 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01134 numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::norm2: " 01135 "'norms' must have at least as many entries as the number of vectors in " 01136 "*this. norms.dimension_0() = " << numVecs << " < this->getNumVectors()" 01137 " = " << numVecs << "."); 01138 01139 // We're computing using the device's data, so we need to make 01140 // sure first that the device is in sync with the host. 01141 view_.template sync<DeviceType> (); 01142 01143 // In case the input dimensions don't match, make sure that we 01144 // don't overwrite memory that doesn't belong to us, by using 01145 // subset views with the minimum dimensions over all input. 01146 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 01147 const std::pair<size_t, size_t> colRng (0, numVecs); 01148 norms_view_type theNorms = subview<norms_view_type> (norms, colRng); 01149 mv_view_type X = subview<mv_view_type> (view_.d_view, rowRng, colRng); 01150 01151 if (numVecs == 1) { 01152 // Special case 1: The MultiVector only has a single column. 01153 // The single-vector norm kernel may be more efficient. 01154 const size_t ZERO = static_cast<size_t> (0); 01155 vec_view_type X_k = subview<vec_view_type> (X, ALL (), ZERO); 01156 norm_view_type norm_k = subview<norm_view_type> (theNorms, ZERO); 01157 Kokkos::VecNorm2SquaredFunctor<vec_view_type> f (X_k, norm_k); 01158 Kokkos::parallel_reduce (lclNumRows, f); 01159 } 01160 else if (isConstantStride ()) { 01161 // Special case 2: The MultiVector has constant stride. 01162 Kokkos::MultiVecNorm2SquaredFunctor<mv_view_type> f (X, theNorms); 01163 Kokkos::parallel_reduce (lclNumRows, f); 01164 } 01165 else { 01166 // FIXME (mfh 14 Jul 2014) This does a kernel launch for every 01167 // column. It might be better to have a kernel that does the 01168 // work all at once. On the other hand, we don't prioritize 01169 // performance of MultiVector views of noncontiguous columns. 01170 for (size_t k = 0; k < numVecs; ++k) { 01171 const size_t X_col = isConstantStride () ? k : whichVectors_[k]; 01172 vec_view_type X_k = subview<vec_view_type> (X, ALL (), X_col); 01173 norm_view_type norm_k = subview<norm_view_type> (theNorms, k); 01174 Kokkos::VecNorm2SquaredFunctor<vec_view_type> f (X_k, norm_k); 01175 Kokkos::parallel_reduce (lclNumRows, f); 01176 } 01177 } 01178 01179 // If the MultiVectors are distributed over multiple processes, 01180 // sum the results across processes. We assume that the MPI 01181 // implementation can read from and write to device memory. 01182 // 01183 // replaceMap() may have removed some processes. Those processes 01184 // have a null Map. They must not participate in any collective 01185 // operations. We ask first whether the Map is null, because 01186 // isDistributed() defers that question to the Map. We still 01187 // compute and return local norms for processes not participating 01188 // in collective operations; those probably don't make any sense, 01189 // but it doesn't hurt to do them, since it's illegal to call 01190 // norm2() on those processes anyway. 01191 if (this->isDistributed ()) { 01192 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null : 01193 this->getMap ()->getComm (); 01194 // The calling process only participates in the collective if 01195 // both the Map and its Comm on that process are nonnull. 01196 if (! comm.is_null ()) { 01197 // MPI doesn't allow aliasing of arguments, so we have to make 01198 // a copy of the local sum. 01199 norms_view_type lclNorms ("MV::norm2 lcl", numNorms); 01200 Kokkos::deep_copy (lclNorms, theNorms); 01201 const mag_type* const lclSum = lclNorms.ptr_on_device (); 01202 mag_type* const gblSum = theNorms.ptr_on_device (); 01203 reduceAll<int, mag_type> (*comm, REDUCE_SUM, static_cast<int> (numVecs), 01204 lclSum, gblSum); 01205 } 01206 } 01207 01208 // Replace the norm-squared results with their square roots in 01209 // place, to get the final output. If the device memory and the 01210 // host memory are the same, it probably doesn't pay to launch a 01211 // parallel kernel for that, since there isn't enough 01212 // parallelism for the typical MultiVector case. 01213 typedef typename device_type::host_mirror_device_type host_mirror_device_type; 01214 const bool inHostMemory = Kokkos::Impl::is_same<typename device_type::memory_space, 01215 typename host_mirror_device_type::memory_space>::value; 01216 if (inHostMemory) { 01217 for (size_t j = 0; j < numVecs; ++j) { 01218 theNorms(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (theNorms(j)); 01219 } 01220 } 01221 else { 01222 // There's not as much parallelism now, but that's OK. The 01223 // point of doing parallel dispatch here is to keep the norm 01224 // results on the device, thus avoiding a copy to the host and 01225 // back again. 01226 Kokkos::SquareRootFunctor<norms_view_type> f (theNorms); 01227 Kokkos::parallel_for (numVecs, f); 01228 } 01229 } 01230 01231 01232 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01233 void 01234 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01235 normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& weights, 01236 const Teuchos::ArrayView<mag_type> &norms) const 01237 { 01238 // KR FIXME MVT::WeightedNorm (might already exist). 01239 // 01240 // KR FIXME Overload this method to take a View. 01241 01242 using Kokkos::ALL; 01243 using Kokkos::subview; 01244 using Teuchos::arcp_const_cast; 01245 using Teuchos::Array; 01246 using Teuchos::ArrayRCP; 01247 using Teuchos::ArrayView; 01248 using Teuchos::reduceAll; 01249 using Teuchos::REDUCE_SUM; 01250 using Teuchos::ScalarTraits; 01251 typedef mag_type Mag; 01252 const char tfecfFuncName[] = "normWeighted"; 01253 01254 const Mag OneOverN = Teuchos::ScalarTraits<Mag>::one () / static_cast<Mag> (getGlobalLength ()); 01255 bool OneW = false; 01256 const size_t numVecs = this->getNumVectors(); 01257 if (weights.getNumVectors() == 1) { 01258 OneW = true; 01259 } 01260 else { 01261 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(weights.getNumVectors() != numVecs, std::runtime_error, 01262 ": MultiVector of weights must contain either one vector or the same number of vectors as this."); 01263 } 01264 #ifdef HAVE_TPETRA_DEBUG 01265 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*weights.getMap()), std::runtime_error, 01266 ": MultiVectors do not have compatible Maps:" << std::endl 01267 << "this->getMap(): " << std::endl << *this->getMap() 01268 << "weights.getMap(): " << std::endl << *weights.getMap() << std::endl); 01269 #else 01270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != weights.getLocalLength(), std::runtime_error, 01271 ": MultiVectors do not have the same local length."); 01272 #endif 01273 01274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01275 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error, 01276 ": norms.size() must be as large as the number of vectors in *this."); 01277 typedef Kokkos::View<Scalar*,DeviceType> view_type; 01278 view_.template sync<DeviceType>(); 01279 weights.view_.template sync<DeviceType>(); 01280 if (isConstantStride ()) { 01281 if(OneW) { 01282 view_type weights_0 = subview<view_type> (weights.view_.d_view, ALL (), 0); 01283 Kokkos::MV_DotWeighted (&norms[0], weights_0, view_.d_view, getLocalLength ()); 01284 } else 01285 Kokkos::MV_DotWeighted (&norms[0], weights.view_.d_view , view_.d_view, getLocalLength ()); 01286 } 01287 else { 01288 // FIXME (mfh 11 Mar 2014) Once we have strided Views, we won't 01289 // have to write the explicit for loop over columns any more. 01290 if(OneW) { 01291 view_type weights_0 = subview<view_type> (weights.view_.d_view, ALL (), 0); 01292 for (size_t k = 0; k < numVecs; ++k) { 01293 const size_t curCol = whichVectors_[k]; 01294 view_type vector_k = subview<view_type> (view_.d_view, ALL (), curCol); 01295 norms[k] = Kokkos::V_DotWeighted (weights_0, vector_k,getLocalLength()); 01296 } 01297 } else { 01298 for (size_t k = 0; k < numVecs; ++k) { 01299 const size_t curCol = whichVectors_[k]; 01300 view_type weights_k = subview<view_type> (weights.view_.d_view, ALL (), curCol); 01301 view_type vector_k = subview<view_type> (view_.d_view, ALL (), curCol); 01302 norms[k] = Kokkos::V_DotWeighted (weights_k, vector_k,getLocalLength()); 01303 } 01304 } 01305 } 01306 if (this->isDistributed ()) { 01307 Array<Mag> lnorms(norms); 01308 reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, 01309 static_cast<int> (numVecs), lnorms.getRawPtr (), 01310 norms.getRawPtr ()); 01311 } 01312 for (typename ArrayView<Mag>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) { 01313 *n = ScalarTraits<Mag>::squareroot(*n * OneOverN); 01314 } 01315 } 01316 01317 01318 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01319 void 01320 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01321 norm1 (const Teuchos::ArrayView<mag_type>& norms) const 01322 { 01323 typedef typename dual_view_type::host_mirror_device_type host_mirror_device_type; 01324 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type; 01325 typedef Kokkos::View<mag_type*, typename dev_norms_view_type::array_layout, 01326 host_mirror_device_type, Kokkos::MemoryUnmanaged> host_norms_view_type; 01327 01328 const size_t numNorms = static_cast<size_t> (norms.size ()); 01329 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms); 01330 dev_norms_view_type normsDevView ("MV::norm1 tmp", numNorms); 01331 this->norm1 (normsDevView); // Do the computation on the device. 01332 Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host 01333 } 01334 01335 01336 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01337 void 01338 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 01339 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01340 norm1 (const Kokkos::View<mag_type*, device_type>& norms) const 01341 { 01342 using Kokkos::ALL; 01343 using Kokkos::subview; 01344 using Teuchos::Comm; 01345 using Teuchos::null; 01346 using Teuchos::RCP; 01347 using Teuchos::REDUCE_SUM; 01348 using Teuchos::reduceAll; 01349 01350 // View of a MultiVector's local data (all columns). 01351 typedef typename dual_view_type::t_dev mv_view_type; 01352 // View of a single column of a MultiVector's local data. 01353 // 01354 // FIXME (mfh 14 Jul 2014) It would be better to get this typedef 01355 // from mv_view_type itself, in case the layout changes. 01356 typedef Kokkos::View<scalar_type*, Kokkos::LayoutLeft, device_type> vec_view_type; 01357 // View of all the norm results. 01358 typedef Kokkos::View<mag_type*, device_type> norms_view_type; 01359 // Scalar view; view of a single norm result. 01360 typedef Kokkos::View<mag_type, device_type> norm_view_type; 01361 01362 const size_t numVecs = this->getNumVectors (); 01363 const size_t lclNumRows = this->getLocalLength (); 01364 const size_t numNorms = static_cast<size_t> (norms.dimension_0 ()); 01365 01366 // FIXME (mfh 11 Jul 2014) These exception tests may not 01367 // necessarily be thrown on all processes consistently. We should 01368 // instead pass along error state with the inner product. We 01369 // could do this by setting an extra slot to 01370 // Kokkos::Details::ArithTraits<mag_type>::one() on error. The 01371 // final sum should be 01372 // Kokkos::Details::ArithTraits<mag_type>::zero() if not error. 01373 TEUCHOS_TEST_FOR_EXCEPTION( 01374 numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::norm1: " 01375 "'norms' must have at least as many entries as the number of vectors in " 01376 "*this. norms.dimension_0() = " << numVecs << " < this->getNumVectors()" 01377 " = " << numVecs << "."); 01378 01379 // We're computing using the device's data, so we need to make 01380 // sure first that the device is in sync with the host. 01381 view_.template sync<DeviceType> (); 01382 01383 // In case the input dimensions don't match, make sure that we 01384 // don't overwrite memory that doesn't belong to us, by using 01385 // subset views with the minimum dimensions over all input. 01386 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 01387 const std::pair<size_t, size_t> colRng (0, numVecs); 01388 norms_view_type theNorms = subview<norms_view_type> (norms, colRng); 01389 mv_view_type X = subview<mv_view_type> (view_.d_view, rowRng, colRng); 01390 01391 if (numVecs == 1) { 01392 // Special case 1: The MultiVector only has a single column. 01393 // The single-vector norm kernel may be more efficient. 01394 const size_t ZERO = static_cast<size_t> (0); 01395 vec_view_type X_k = subview<vec_view_type> (X, ALL (), ZERO); 01396 norm_view_type norm_k = subview<norm_view_type> (theNorms, ZERO); 01397 Kokkos::VecNorm1Functor<vec_view_type> f (X_k, norm_k); 01398 Kokkos::parallel_reduce (lclNumRows, f); 01399 } 01400 else if (isConstantStride ()) { 01401 // Special case 2: The MultiVector has constant stride. 01402 Kokkos::MultiVecNorm1Functor<mv_view_type> f (X, theNorms); 01403 Kokkos::parallel_reduce (lclNumRows, f); 01404 } 01405 else { 01406 // FIXME (mfh 14 Jul 2014) This does a kernel launch for every 01407 // column. It might be better to have a kernel that does the 01408 // work all at once. On the other hand, we don't prioritize 01409 // performance of MultiVector views of noncontiguous columns. 01410 for (size_t k = 0; k < numVecs; ++k) { 01411 const size_t X_col = isConstantStride () ? k : whichVectors_[k]; 01412 vec_view_type X_k = subview<vec_view_type> (X, ALL (), X_col); 01413 norm_view_type norm_k = subview<norm_view_type> (theNorms, k); 01414 Kokkos::VecNorm1Functor<vec_view_type> f (X_k, norm_k); 01415 Kokkos::parallel_reduce (lclNumRows, f); 01416 } 01417 } 01418 01419 // If the MultiVectors are distributed over multiple processes, 01420 // sum the results across processes. We assume that the MPI 01421 // implementation can read from and write to device memory. 01422 // 01423 // replaceMap() may have removed some processes. Those processes 01424 // have a null Map. They must not participate in any collective 01425 // operations. We ask first whether the Map is null, because 01426 // isDistributed() defers that question to the Map. We still 01427 // compute and return local norms for processes not participating 01428 // in collective operations; those probably don't make any sense, 01429 // but it doesn't hurt to do them, since it's illegal to call 01430 // norm1() on those processes anyway. 01431 if (this->isDistributed ()) { 01432 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null : 01433 this->getMap ()->getComm (); 01434 // The calling process only participates in the collective if 01435 // both the Map and its Comm on that process are nonnull. 01436 if (! comm.is_null ()) { 01437 // MPI doesn't allow aliasing of arguments, so we have to make 01438 // a copy of the local sum. 01439 norms_view_type lclNorms ("MV::norm1 lcl", numNorms); 01440 Kokkos::deep_copy (lclNorms, theNorms); 01441 const mag_type* const lclSum = lclNorms.ptr_on_device (); 01442 mag_type* const gblSum = theNorms.ptr_on_device (); 01443 reduceAll<int, mag_type> (*comm, REDUCE_SUM, static_cast<int> (numVecs), 01444 lclSum, gblSum); 01445 } 01446 } 01447 } 01448 01449 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01450 void 01451 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01452 normInf (const Teuchos::ArrayView<mag_type>& norms) const 01453 { 01454 typedef typename dual_view_type::host_mirror_device_type host_mirror_device_type; 01455 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type; 01456 typedef Kokkos::View<mag_type*, typename dev_norms_view_type::array_layout, host_mirror_device_type, Kokkos::MemoryUnmanaged> host_norms_view_type; 01457 01458 const size_t numNorms = static_cast<size_t> (norms.size ()); 01459 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms); 01460 dev_norms_view_type normsDevView ("MV::normInf tmp", numNorms); 01461 this->normInf (normsDevView); // Do the computation on the device. 01462 Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host 01463 } 01464 01465 01466 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01467 void 01468 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 01469 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01470 normInf (const Kokkos::View<mag_type*, device_type>& norms) const 01471 { 01472 using Kokkos::ALL; 01473 using Kokkos::subview; 01474 using Teuchos::Comm; 01475 using Teuchos::null; 01476 using Teuchos::RCP; 01477 using Teuchos::REDUCE_MAX; 01478 using Teuchos::reduceAll; 01479 // View of a MultiVector's local data (all columns). 01480 typedef typename dual_view_type::t_dev mv_view_type; 01481 // View of a single column of a MultiVector's local data. 01482 // 01483 // FIXME (mfh 14 Jul 2014) It would be better to get this typedef 01484 // from mv_view_type itself, in case the layout changes. 01485 typedef Kokkos::View<scalar_type*, Kokkos::LayoutLeft, device_type> vec_view_type; 01486 // View of all the norm results. 01487 typedef Kokkos::View<mag_type*, device_type> norms_view_type; 01488 // Scalar view; view of a single norm result. 01489 typedef Kokkos::View<mag_type, device_type> norm_view_type; 01490 01491 const size_t numVecs = this->getNumVectors (); 01492 const size_t lclNumRows = this->getLocalLength (); 01493 const size_t numNorms = static_cast<size_t> (norms.dimension_0 ()); 01494 01495 // FIXME (mfh 11 Jul 2014) These exception tests may not 01496 // necessarily be thrown on all processes consistently. We should 01497 // instead pass along error state with the inner product. We 01498 // could do this by setting an extra slot to 01499 // Kokkos::Details::ArithTraits<mag_type>::one() on error. The 01500 // final sum should be 01501 // Kokkos::Details::ArithTraits<mag_type>::zero() if not error. 01502 TEUCHOS_TEST_FOR_EXCEPTION( 01503 numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::normInf: " 01504 "'norms' must have at least as many entries as the number of vectors in " 01505 "*this. norms.dimension_0() = " << numVecs << " < this->getNumVectors()" 01506 " = " << numVecs << "."); 01507 01508 // We're computing using the device's data, so we need to make 01509 // sure first that the device is in sync with the host. 01510 view_.template sync<DeviceType> (); 01511 01512 // In case the input dimensions don't match, make sure that we 01513 // don't overwrite memory that doesn't belong to us, by using 01514 // subset views with the minimum dimensions over all input. 01515 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 01516 const std::pair<size_t, size_t> colRng (0, numVecs); 01517 norms_view_type theNorms = subview<norms_view_type> (norms, colRng); 01518 mv_view_type X = subview<mv_view_type> (view_.d_view, rowRng, colRng); 01519 01520 if (numVecs == 1) { 01521 // Special case 1: The MultiVector only has a single column. 01522 // The single-vector norm kernel may be more efficient. 01523 const size_t ZERO = static_cast<size_t> (0); 01524 vec_view_type X_k = subview<vec_view_type> (X, ALL (), ZERO); 01525 norm_view_type norm_k = subview<norm_view_type> (theNorms, ZERO); 01526 Kokkos::VecNormInfFunctor<vec_view_type> f (X_k, norm_k); 01527 Kokkos::parallel_reduce (lclNumRows, f); 01528 } 01529 else if (isConstantStride ()) { 01530 // Special case 2: The MultiVector has constant stride. 01531 Kokkos::MultiVecNormInfFunctor<mv_view_type> f (X, theNorms); 01532 Kokkos::parallel_reduce (lclNumRows, f); 01533 } 01534 else { 01535 // FIXME (mfh 15 Jul 2014) This does a kernel launch for every 01536 // column. It might be better to have a kernel that does the 01537 // work all at once. On the other hand, we don't prioritize 01538 // performance of MultiVector views of noncontiguous columns. 01539 for (size_t k = 0; k < numVecs; ++k) { 01540 const size_t X_col = isConstantStride () ? k : whichVectors_[k]; 01541 vec_view_type X_k = subview<vec_view_type> (X, ALL (), X_col); 01542 norm_view_type norm_k = subview<norm_view_type> (theNorms, k); 01543 Kokkos::VecNormInfFunctor<vec_view_type> f (X_k, norm_k); 01544 Kokkos::parallel_reduce (lclNumRows, f); 01545 } 01546 } 01547 01548 // If the MultiVectors are distributed over multiple processes, 01549 // compute the max of the results across processes. We assume 01550 // that the MPI implementation can read from and write to device 01551 // memory. 01552 // 01553 // replaceMap() may have removed some processes. Those processes 01554 // have a null Map. They must not participate in any collective 01555 // operations. We ask first whether the Map is null, because 01556 // isDistributed() defers that question to the Map. We still 01557 // compute and return local norms for processes not participating 01558 // in collective operations; those probably don't make any sense, 01559 // but it doesn't hurt to do them, since it's illegal to call 01560 // normInf() on those processes anyway. 01561 if (this->isDistributed ()) { 01562 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null : 01563 this->getMap ()->getComm (); 01564 // The calling process only participates in the collective if 01565 // both the Map and its Comm on that process are nonnull. 01566 if (! comm.is_null ()) { 01567 // MPI doesn't allow aliasing of arguments, so we have to make 01568 // a copy of the local sum. 01569 norms_view_type lclNorms ("MV::normInf lcl", numNorms); 01570 Kokkos::deep_copy (lclNorms, theNorms); 01571 const mag_type* const lclSum = lclNorms.ptr_on_device (); 01572 mag_type* const gblSum = theNorms.ptr_on_device (); 01573 reduceAll<int, mag_type> (*comm, REDUCE_MAX, static_cast<int> (numVecs), 01574 lclSum, gblSum); 01575 } 01576 } 01577 } 01578 01579 01580 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01581 void 01582 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01583 meanValue (const Teuchos::ArrayView<Scalar> &means) const 01584 { 01585 // KR FIXME MVT::Sum (might already exist). 01586 // 01587 // KR FIXME Overload this method to take a View. 01588 01589 using Teuchos::Array; 01590 using Teuchos::ArrayRCP; 01591 using Teuchos::arcp_const_cast; 01592 using Teuchos::reduceAll; 01593 using Teuchos::REDUCE_SUM; 01594 01595 using Kokkos::ALL; 01596 using Kokkos::subview; 01597 01598 typedef Teuchos::ScalarTraits<Scalar> SCT; 01599 01600 const size_t numVecs = getNumVectors(); 01601 01602 TEUCHOS_TEST_FOR_EXCEPTION( 01603 static_cast<size_t> (means.size ()) != numVecs, std::runtime_error, 01604 "Tpetra::MultiVector::meanValue: means.size() must be as large as " 01605 "the number of vectors in *this."); 01606 // compute local components of the means 01607 // sum these across all nodes 01608 view_.template sync<DeviceType>(); 01609 if (isConstantStride ()) { 01610 Kokkos::MV_Sum (&means[0], view_.d_view, getLocalLength ()); 01611 } 01612 else { 01613 // FIXME (mfh 11 Mar 2014) Once we have strided Views, we won't 01614 // have to write the explicit for loop over columns any more. 01615 for (size_t k = 0; k < numVecs; ++k) { 01616 typedef Kokkos::View<Scalar*,DeviceType> view_type; 01617 const size_t curCol = whichVectors_[k]; 01618 view_type vector_k = subview<view_type> (view_.d_view, ALL (), curCol); 01619 means[k] = Kokkos::V_Sum (vector_k); 01620 } 01621 } 01622 if (this->isDistributed()) { 01623 Array<Scalar> lmeans(means); 01624 // only combine if we are a distributed MV 01625 reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, static_cast<int> (numVecs), 01626 lmeans.getRawPtr (), means.getRawPtr ()); 01627 } 01628 // mfh 12 Apr 2012: Don't take out the cast from the ordinal type 01629 // to the magnitude type, since operator/ (std::complex<T>, int) 01630 // isn't necessarily defined. 01631 const Scalar OneOverN = 01632 SCT::one() / static_cast<typename SCT::magnitudeType> (getGlobalLength ()); 01633 for (typename ArrayView<Scalar>::iterator i = means.begin(); 01634 i != means.begin() + numVecs; 01635 ++i) 01636 { 01637 (*i) = (*i) * OneOverN; 01638 } 01639 } 01640 01641 01642 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01643 void 01644 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01645 randomize () 01646 { 01647 const uint64_t myRank = 01648 static_cast<uint64_t> (this->getMap ()->getComm ()->getRank ()); 01649 uint64_t seed64 = static_cast<uint64_t> (std::rand ()) + myRank + 17311uLL; 01650 unsigned int seed = static_cast<unsigned int> (seed64&0xffffffff); 01651 01652 Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool(seed); 01653 01654 Scalar max = Kokkos::rand<typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type, Scalar>::max (); 01655 Scalar min = Kokkos::Details::ArithTraits<Scalar>::is_signed ? Scalar(-max) : Kokkos::Details::ArithTraits<Scalar>::zero (); 01656 01657 01658 if (isConstantStride ()) { 01659 Kokkos::fill_random(view_.d_view,rand_pool,min,max); 01660 view_.template modify<DeviceType>(); 01661 } 01662 else { 01663 const size_t numVecs = getNumVectors(); 01664 view_.template sync<DeviceType>(); 01665 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01666 for (size_t k = 0; k < numVecs; ++k) { 01667 view_type vector_k = Kokkos::subview<view_type> (view_.d_view, Kokkos::ALL (), whichVectors_[k]); 01668 Kokkos::fill_random(vector_k,rand_pool,min,max); 01669 } 01670 view_.template modify<DeviceType>(); 01671 } 01672 } 01673 01674 01675 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01676 void 01677 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01678 putScalar (const Scalar& alpha) 01679 { 01680 const size_t numVecs = getNumVectors(); 01681 if (isConstantStride ()) { 01682 Kokkos::Impl::ViewFill<typename dual_view_type::t_dev> (view_.d_view, alpha); 01683 } 01684 else { 01685 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01686 for (size_t k = 0; k < numVecs; ++k) { 01687 view_type vector_k = Kokkos::subview<view_type> (view_.d_view, Kokkos::ALL (), whichVectors_[k]); 01688 Kokkos::Impl::ViewFill<view_type> (vector_k, alpha); 01689 } 01690 } 01691 } 01692 01693 01694 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01695 void 01696 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01697 replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& newMap) 01698 { 01699 using Teuchos::ArrayRCP; 01700 using Teuchos::Comm; 01701 using Teuchos::RCP; 01702 01703 // mfh 28 Mar 2013: This method doesn't forget whichVectors_, so 01704 // it might work if the MV is a column view of another MV. 01705 // However, things might go wrong when restoring the original 01706 // Map, so we don't allow this case for now. 01707 TEUCHOS_TEST_FOR_EXCEPTION( 01708 ! this->isConstantStride (), std::logic_error, 01709 "Tpetra::MultiVector::replaceMap: This method does not currently work " 01710 "if the MultiVector is a column view of another MultiVector (that is, if " 01711 "isConstantStride() == false)."); 01712 01713 // Case 1: current Map and new Map are both nonnull on this process. 01714 // Case 2: current Map is nonnull, new Map is null. 01715 // Case 3: current Map is null, new Map is nonnull. 01716 // Case 4: both Maps are null: forbidden. 01717 // 01718 // Case 1 means that we don't have to do anything on this process, 01719 // other than assign the new Map. (We always have to do that.) 01720 // It's an error for the user to supply a Map that requires 01721 // resizing in this case. 01722 // 01723 // Case 2 means that the calling process is in the current Map's 01724 // communicator, but will be excluded from the new Map's 01725 // communicator. We don't have to do anything on the calling 01726 // process; just leave whatever data it may have alone. 01727 // 01728 // Case 3 means that the calling process is excluded from the 01729 // current Map's communicator, but will be included in the new 01730 // Map's communicator. This means we need to (re)allocate the 01731 // local (KokkosClassic::)MultiVector if it does not have the right 01732 // number of rows. If the new number of rows is nonzero, we'll 01733 // fill the newly allocated local data with zeros, as befits a 01734 // projection operation. 01735 // 01736 // The typical use case for Case 3 is that the MultiVector was 01737 // first created with the Map with more processes, then that Map 01738 // was replaced with a Map with fewer processes, and finally the 01739 // original Map was restored on this call to replaceMap. 01740 01741 #ifdef HAVE_TEUCHOS_DEBUG 01742 // mfh 28 Mar 2013: We can't check for compatibility across the 01743 // whole communicator, unless we know that the current and new 01744 // Maps are nonnull on _all_ participating processes. 01745 // TEUCHOS_TEST_FOR_EXCEPTION( 01746 // origNumProcs == newNumProcs && ! this->getMap ()->isCompatible (*map), 01747 // std::invalid_argument, "Tpetra::MultiVector::project: " 01748 // "If the input Map's communicator is compatible (has the same number of " 01749 // "processes as) the current Map's communicator, then the two Maps must be " 01750 // "compatible. The replaceMap() method is not for data redistribution; " 01751 // "use Import or Export for that purpose."); 01752 01753 // TODO (mfh 28 Mar 2013) Add compatibility checks for projections 01754 // of the Map, in case the process counts don't match. 01755 #endif // HAVE_TEUCHOS_DEBUG 01756 01757 if (this->getMap ().is_null ()) { // current Map is null 01758 // If this->getMap() is null, that means that this MultiVector 01759 // has already had replaceMap happen to it. In that case, just 01760 // reallocate the DualView with the right size. 01761 01762 TEUCHOS_TEST_FOR_EXCEPTION( 01763 newMap.is_null (), std::invalid_argument, 01764 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. " 01765 "This probably means that the input Map is incorrect."); 01766 01767 // Case 3: current Map is null, new Map is nonnull. 01768 // Reallocate the DualView with the right dimensions. 01769 const size_t newNumRows = newMap->getNodeNumElements (); 01770 const size_t origNumRows = view_.dimension_0 (); 01771 const size_t numCols = this->getNumVectors (); 01772 01773 if (origNumRows != newNumRows || view_.dimension_1 () != numCols) { 01774 view_ = dual_view_type ("MV::dual_view", newNumRows, numCols); 01775 } 01776 } 01777 else if (newMap.is_null ()) { // Case 2: current Map is nonnull, new Map is null 01778 // I am an excluded process. Reinitialize my data so that I 01779 // have 0 rows. Keep the number of columns as before. 01780 const size_t newNumRows = static_cast<size_t> (0); 01781 const size_t numCols = this->getNumVectors (); 01782 view_ = dual_view_type ("MV::dual_view", newNumRows, numCols); 01783 } 01784 01785 this->map_ = newMap; 01786 } 01787 01788 01789 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01790 void 01791 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01792 scale (const Scalar &alpha) 01793 { 01794 using Kokkos::ALL; 01795 using Teuchos::arcp_const_cast; 01796 using Teuchos::ArrayRCP; 01797 01798 // NOTE: can't substitute putScalar(0.0) for scale(0.0), because 01799 // the former will overwrite NaNs present in the MultiVector, while the 01800 // semantics of this call require multiplying them by 0, which IEEE requires to be NaN 01801 const size_t numVecs = getNumVectors(); 01802 if (alpha == Teuchos::ScalarTraits<Scalar>::one()) { 01803 // do nothing 01804 } 01805 else if (isConstantStride ()) { 01806 view_.template sync<DeviceType>(); 01807 view_.template modify<DeviceType>(); 01808 Kokkos::MV_MulScalar (view_.d_view, alpha, view_.d_view); 01809 } 01810 else { 01811 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01812 01813 for (size_t k = 0; k < numVecs; ++k) { 01814 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 01815 view_type vector_k = Kokkos::subview<view_type> (view_.d_view, ALL (), this_col); 01816 Kokkos::V_MulScalar (vector_k, alpha, vector_k); 01817 } 01818 } 01819 } 01820 01821 01822 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01823 void 01824 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01825 scale (Teuchos::ArrayView<const Scalar> alphas) 01826 { 01827 using Kokkos::ALL; 01828 using Teuchos::arcp_const_cast; 01829 using Teuchos::ArrayRCP; 01830 01831 const size_t numVecs = this->getNumVectors(); 01832 TEUCHOS_TEST_FOR_EXCEPTION( 01833 static_cast<size_t> (alphas.size ()) != numVecs, std::invalid_argument, 01834 "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as " 01835 "the number of vectors in *this."); 01836 01837 const size_t myLen = view_.dimension_0(); 01838 if (myLen == 0) { 01839 return; 01840 } 01841 01842 if (isConstantStride ()) { 01843 Kokkos::DualView<Scalar*,device_type> k_alphas("Alphas::tmp",alphas.size()); 01844 for(int i=0; i<alphas.size(); i++) 01845 k_alphas.h_view(i) = alphas[i]; 01846 k_alphas.template modify<host_mirror_device_type>(); 01847 k_alphas.template sync<device_type>(); 01848 view_.template sync<DeviceType>(); 01849 view_.template modify<DeviceType>(); 01850 Kokkos::MV_MulScalar (view_.d_view, k_alphas.d_view, view_.d_view); 01851 } 01852 else { 01853 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01854 01855 for (size_t k = 0; k < numVecs; ++k) { 01856 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 01857 view_type vector_k = Kokkos::subview<view_type> (view_.d_view, ALL (), this_col); 01858 Kokkos::V_MulScalar (vector_k, alphas[k], vector_k); 01859 } 01860 } 01861 } 01862 01863 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01864 void 01865 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01866 scale (const Kokkos::View<const Scalar*, device_type> alphas) 01867 { 01868 using Kokkos::ALL; 01869 using Teuchos::arcp_const_cast; 01870 using Teuchos::ArrayRCP; 01871 01872 const size_t numVecs = this->getNumVectors(); 01873 TEUCHOS_TEST_FOR_EXCEPTION( 01874 static_cast<size_t> (alphas.dimension_0()) != numVecs, std::invalid_argument, 01875 "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as " 01876 "the number of vectors in *this."); 01877 01878 const size_t myLen = view_.dimension_0 (); 01879 if (myLen == 0) { 01880 return; 01881 } 01882 01883 if (isConstantStride ()) { 01884 view_.template sync<DeviceType>(); 01885 view_.template modify<DeviceType>(); 01886 Kokkos::MV_MulScalar (view_.d_view, alphas, view_.d_view); 01887 } 01888 else { 01889 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01890 typename Kokkos::View<const Scalar*, device_type>::HostMirror h_alphas = 01891 Kokkos::create_mirror_view (alphas); 01892 Kokkos::deep_copy (h_alphas, alphas); 01893 for (size_t k = 0; k < numVecs; ++k) { 01894 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 01895 view_type vector_k = Kokkos::subview<view_type> (view_.d_view, ALL (), this_col); 01896 Kokkos::V_MulScalar (vector_k, alphas(k), vector_k); 01897 } 01898 } 01899 } 01900 01901 01902 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01903 void 01904 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01905 scale (const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > &A) 01906 { 01907 const char tfecfFuncName[] = "scale(alpha,A)"; 01908 const size_t numVecs = getNumVectors (); 01909 01910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01911 getLocalLength () != A.getLocalLength (), std::runtime_error, 01912 ": MultiVectors do not have the same local length. " 01913 "this->getLocalLength() = " << getLocalLength () 01914 << " != A.getLocalLength() = " << A.getLocalLength () << "."); 01915 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01916 A.getNumVectors () != numVecs, std::runtime_error, 01917 ": MultiVectors do not have the same number of columns (vectors). " 01918 "this->getNumVectors() = " << getNumVectors () 01919 << " != A.getNumVectors() = " << A.getNumVectors () << "."); 01920 01921 if (isConstantStride () && A.isConstantStride ()) { 01922 view_.template sync<DeviceType>(); 01923 view_.template modify<DeviceType>(); 01924 Kokkos::MV_MulScalar (view_.d_view, alpha, A.view_.d_view); 01925 } 01926 else { 01927 using Kokkos::ALL; 01928 using Kokkos::subview; 01929 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01930 01931 view_.template sync<DeviceType>(); 01932 view_.template modify<DeviceType>(); 01933 A.view_.template sync<DeviceType>(); 01934 A.view_.template modify<DeviceType>(); 01935 for (size_t k = 0; k < numVecs; ++k) { 01936 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 01937 view_type vector_k = subview<view_type> (view_.d_view, ALL (), this_col); 01938 const size_t A_col = isConstantStride () ? k : A.whichVectors_[k]; 01939 view_type vector_Ak = subview<view_type> (A.view_.d_view, ALL (), A_col); 01940 Kokkos::V_MulScalar (vector_k, alpha, vector_Ak); 01941 } 01942 } 01943 } 01944 01945 01946 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 01947 void 01948 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 01949 reciprocal (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > &A) 01950 { 01951 const char tfecfFuncName[] = "reciprocal"; 01952 01953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01954 getLocalLength () != A.getLocalLength (), std::runtime_error, 01955 ": MultiVectors do not have the same local length. " 01956 "this->getLocalLength() = " << getLocalLength () 01957 << " != A.getLocalLength() = " << A.getLocalLength () << "."); 01958 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 01959 A.getNumVectors () != this->getNumVectors (), std::runtime_error, 01960 ": MultiVectors do not have the same number of columns (vectors). " 01961 "this->getNumVectors() = " << getNumVectors () 01962 << " != A.getNumVectors() = " << A.getNumVectors () << "."); 01963 01964 const size_t numVecs = getNumVectors (); 01965 try { 01966 if (isConstantStride () && A.isConstantStride ()) { 01967 view_.template sync<DeviceType> (); 01968 view_.template modify<DeviceType> (); 01969 Kokkos::MV_Reciprocal (view_.d_view, A.view_.d_view); 01970 } 01971 else { 01972 using Kokkos::ALL; 01973 using Kokkos::subview; 01974 typedef Kokkos::View<Scalar*, DeviceType> view_type; 01975 01976 view_.template sync<DeviceType> (); 01977 view_.template modify<DeviceType> (); 01978 01979 // FIXME (mfh 23 Jul 2014) I'm not sure if it should be our 01980 // responsibility to sync A. 01981 A.view_.template sync<DeviceType> (); 01982 A.view_.template modify<DeviceType> (); 01983 01984 for (size_t k = 0; k < numVecs; ++k) { 01985 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 01986 view_type vector_k = subview<view_type> (view_.d_view, ALL (), this_col); 01987 const size_t A_col = isConstantStride () ? k : A.whichVectors_[k]; 01988 view_type vector_Ak = subview<view_type> (A.view_.d_view, ALL (), A_col); 01989 Kokkos::V_Reciprocal(vector_k, vector_Ak); 01990 } 01991 } 01992 } 01993 catch (std::runtime_error &e) { 01994 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, 01995 ": Caught exception from Kokkos: " << e.what () << std::endl); 01996 } 01997 } 01998 01999 02000 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02001 void 02002 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02003 abs (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& A) 02004 { 02005 const char tfecfFuncName[] = "abs"; 02006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02007 getLocalLength () != A.getLocalLength (), std::runtime_error, 02008 ": MultiVectors do not have the same local length. " 02009 "this->getLocalLength() = " << getLocalLength () 02010 << " != A.getLocalLength() = " << A.getLocalLength () << "."); 02011 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02012 A.getNumVectors () != this->getNumVectors (), std::runtime_error, 02013 ": MultiVectors do not have the same number of columns (vectors). " 02014 "this->getNumVectors() = " << getNumVectors () 02015 << " != A.getNumVectors() = " << A.getNumVectors () << "."); 02016 02017 const size_t numVecs = getNumVectors (); 02018 if (isConstantStride () && A.isConstantStride ()) { 02019 view_.template sync<DeviceType>(); 02020 view_.template modify<DeviceType>(); 02021 Kokkos::MV_Abs(view_.d_view,A.view_.d_view); 02022 } 02023 else { 02024 using Kokkos::ALL; 02025 using Kokkos::subview; 02026 typedef Kokkos::View<Scalar*, DeviceType> view_type; 02027 02028 view_.template sync<DeviceType> (); 02029 view_.template modify<DeviceType> (); 02030 02031 // FIXME (mfh 23 Jul 2014) I'm not sure if it should be our 02032 // responsibility to sync A. 02033 A.view_.template sync<DeviceType> (); 02034 A.view_.template modify<DeviceType> (); 02035 02036 for (size_t k=0; k < numVecs; ++k) { 02037 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 02038 view_type vector_k = subview<view_type> (view_.d_view, ALL (), this_col); 02039 const size_t A_col = isConstantStride () ? k : A.whichVectors_[k]; 02040 view_type vector_Ak = subview<view_type> (A.view_.d_view, ALL (), A_col); 02041 Kokkos::V_Abs(vector_k, vector_Ak); 02042 } 02043 } 02044 } 02045 02046 02047 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02048 void 02049 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02050 update (const Scalar& alpha, 02051 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& A, 02052 const Scalar& beta) 02053 { 02054 using Kokkos::ALL; 02055 using Kokkos::subview; 02056 typedef Kokkos::View<Scalar*, DeviceType> view_type; 02057 const char tfecfFuncName[] = "update"; 02058 02059 // this = beta*this + alpha*A 02060 // must support case where &this == &A 02061 // can't short circuit on alpha==0.0 or beta==0.0, because 0.0*NaN != 0.0 02062 02063 // FIXME (mfh 23 Jul 2014) Actually, the BLAS' AXPY allows short 02064 // circuiting for alpha == 0. This is the convention for both the 02065 // dense and sparse BLAS. 02066 02067 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02068 getLocalLength () != A.getLocalLength (), std::invalid_argument, 02069 ": The input MultiVector A has " << A.getLocalLength () << " local " 02070 "row(s), but this MultiVector has " << getLocalLength () << " local " 02071 "row(s)."); 02072 02073 const size_t numVecs = getNumVectors (); 02074 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02075 A.getNumVectors () != numVecs, std::invalid_argument, 02076 ": The input MultiVector A has " << A.getNumVectors () << " column(s), " 02077 "but this MultiVector has " << numVecs << " column(s)."); 02078 02079 const size_t lclNumRows = getLocalLength (); 02080 if (isConstantStride () && A.isConstantStride ()) { 02081 Kokkos::MV_Add (view_.d_view, alpha, A.view_.d_view, beta, 02082 view_.d_view, lclNumRows); 02083 } 02084 else { 02085 // Make sure that Kokkos only uses the local length for add. 02086 const std::pair<size_t, size_t> rowRng (0, lclNumRows); 02087 for (size_t k = 0; k < numVecs; ++k) { 02088 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 02089 const size_t A_col = A.isConstantStride () ? k : A.whichVectors_[k]; 02090 view_type this_colView = 02091 subview<view_type> (view_.d_view, rowRng, this_col); 02092 view_type A_colView = 02093 subview<view_type> (A.view_.d_view, rowRng, A_col); 02094 Kokkos::V_Add (this_colView, alpha, A_colView, beta, this_colView); 02095 } 02096 } 02097 } 02098 02099 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02100 void 02101 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02102 update (const Scalar& alpha, 02103 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& A, 02104 const Scalar& beta, 02105 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& B, 02106 const Scalar& gamma) 02107 { 02108 using Kokkos::ALL; 02109 using Kokkos::V_Add; 02110 using Kokkos::subview; 02111 typedef Kokkos::View<Scalar*, DeviceType> view_type; 02112 const char tfecfFuncName[] = "update"; 02113 02114 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02115 getLocalLength () != A.getLocalLength (), std::invalid_argument, 02116 ": The input MultiVector A has " << A.getLocalLength () << " local " 02117 "row(s), but this MultiVector has " << getLocalLength () << " local " 02118 "row(s)."); 02119 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02120 getLocalLength () != B.getLocalLength (), std::invalid_argument, 02121 ": The input MultiVector B has " << B.getLocalLength () << " local " 02122 "row(s), but this MultiVector has " << getLocalLength () << " local " 02123 "row(s)."); 02124 const size_t numVecs = getNumVectors (); 02125 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02126 A.getNumVectors () != numVecs, std::invalid_argument, 02127 ": The input MultiVector A has " << A.getNumVectors () << " column(s), " 02128 "but this MultiVector has " << numVecs << " column(s)."); 02129 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02130 B.getNumVectors () != numVecs, std::invalid_argument, 02131 ": The input MultiVector B has " << B.getNumVectors () << " column(s), " 02132 "but this MultiVector has " << numVecs << " column(s)."); 02133 02134 const Scalar zero = Teuchos::ScalarTraits<Scalar>::zero (); 02135 const Scalar one = Teuchos::ScalarTraits<Scalar>::one (); 02136 02137 if (isConstantStride() && A.isConstantStride () && B.isConstantStride ()) { 02138 if (gamma == zero) { 02139 Kokkos::MV_Add (view_.d_view, alpha, A.view_.d_view, beta, 02140 B.view_.d_view); 02141 } else { 02142 Kokkos::MV_Add (view_.d_view, alpha, A.view_.d_view, gamma, view_.d_view); 02143 Kokkos::MV_Add (view_.d_view, beta, B.view_.d_view, one, view_.d_view); 02144 } 02145 } else { // some input (or *this) is not constant stride 02146 02147 // Make sure that Kokkos only uses the local length for add. 02148 const std::pair<size_t, size_t> rowRng (0, getLocalLength ()); 02149 02150 for (size_t k = 0; k < numVecs; ++k) { 02151 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 02152 const size_t A_col = A.isConstantStride () ? k : A.whichVectors_[k]; 02153 const size_t B_col = B.isConstantStride () ? k : B.whichVectors_[k]; 02154 if (gamma == zero) { 02155 // TODO: make sure it only uses LocalLength for add. 02156 V_Add (subview<view_type> (view_.d_view, rowRng, this_col), 02157 alpha, 02158 subview<view_type> (A.view_.d_view, rowRng, A_col), 02159 beta, 02160 subview<view_type> (B.view_.d_view, rowRng, B_col)); 02161 } else { 02162 V_Add (subview<view_type> (view_.d_view, rowRng, this_col), 02163 alpha, 02164 subview<view_type> (A.view_.d_view, rowRng, A_col), 02165 gamma, 02166 subview<view_type> (view_.d_view, rowRng, this_col)); 02167 V_Add (subview<view_type> (view_.d_view, rowRng, this_col), 02168 beta, 02169 subview<view_type> (B.view_.d_view, rowRng, B_col), 02170 one, 02171 subview<view_type> (view_.d_view, rowRng, this_col)); 02172 } 02173 } 02174 } 02175 } 02176 02177 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02178 Teuchos::ArrayRCP<const Scalar> 02179 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02180 getData (size_t j) const 02181 { 02182 using Kokkos::ALL; 02183 using Kokkos::subview; 02184 typedef typename dual_view_type::host_mirror_device_type host_type; 02185 typedef typename dual_view_type::t_host host_view_type; 02186 02187 // Any MultiVector method that called the (classic) Kokkos Node's 02188 // viewBuffer or viewBufferNonConst methods always implied a 02189 // device->host synchronization. Thus, we synchronize here as 02190 // well. 02191 view_.template sync<host_type> (); 02192 02193 // Get a host view of the entire MultiVector's data. 02194 host_view_type hostView = view_.template view<host_type> (); 02195 // Get a subview of column j. 02196 host_view_type hostView_j; 02197 if (isConstantStride ()) { 02198 hostView_j = subview<host_view_type> (hostView, ALL (), j); 02199 } else { 02200 hostView_j = subview<host_view_type> (hostView, ALL (), whichVectors_[j]); 02201 } 02202 02203 // Wrap up the subview of column j in an ArrayRCP<const scalar_type>. 02204 Teuchos::ArrayRCP<scalar_type> dataAsArcp = 02205 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ()); 02206 02207 #ifdef HAVE_TPETRA_DEBUG 02208 TEUCHOS_TEST_FOR_EXCEPTION( 02209 hostView_j.dimension_0 () < dataAsArcp.size (), std::logic_error, 02210 "Tpetra::MultiVector::getData: hostView_j.dimension_0() = " 02211 << hostView_j.dimension_0 () << " < dataAsArcp.size() = " 02212 << dataAsArcp.size () << ". " 02213 "Please report this bug to the Tpetra developers."); 02214 TEUCHOS_TEST_FOR_EXCEPTION( 02215 getLocalLength () != 0 && getNumVectors () != 0 && 02216 hostView_j(0,0) != dataAsArcp[0], std::logic_error, "Tpetra::MultiVector" 02217 "::getData: hostView_j(0,0) = " << hostView_j(0,0) << " != dataAsArcp[0]" 02218 " = " << dataAsArcp[0] << ". Please report this bug to the Tpetra " 02219 "developers."); 02220 #endif // HAVE_TPETRA_DEBUG 02221 02222 return Teuchos::arcp_const_cast<const scalar_type> (dataAsArcp); 02223 } 02224 02225 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02226 Teuchos::ArrayRCP<Scalar> 02227 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02228 getDataNonConst (size_t j) 02229 { 02230 using Kokkos::ALL; 02231 using Kokkos::subview; 02232 typedef typename dual_view_type::host_mirror_device_type host_type; 02233 typedef typename dual_view_type::t_host host_view_type; 02234 02235 // Any MultiVector method that called the (classic) Kokkos Node's 02236 // viewBuffer or viewBufferNonConst methods always implied a 02237 // device->host synchronization. Thus, we synchronize here as 02238 // well. 02239 view_.template sync<host_type> (); 02240 02241 // Get a host view of the entire MultiVector's data. 02242 host_view_type hostView = view_.template view<host_type> (); 02243 // Get a subview of column j. 02244 host_view_type hostView_j; 02245 if (isConstantStride ()) { 02246 hostView_j = subview<host_view_type> (hostView, ALL (), j); 02247 } else { 02248 hostView_j = subview<host_view_type> (hostView, ALL (), whichVectors_[j]); 02249 } 02250 02251 // Calling getDataNonConst() implies that the user plans to modify 02252 // the values in the MultiVector, so we call modify on the view 02253 // here. 02254 view_.template modify<host_type> (); 02255 02256 // Wrap up the subview of column j in an ArrayRCP<const scalar_type>. 02257 Teuchos::ArrayRCP<scalar_type> dataAsArcp = 02258 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ()); 02259 02260 #ifdef HAVE_TPETRA_DEBUG 02261 TEUCHOS_TEST_FOR_EXCEPTION( 02262 hostView_j.dimension_0 () < dataAsArcp.size (), std::logic_error, 02263 "Tpetra::MultiVector::getDataNonConst: hostView_j.dimension_0() = " 02264 << hostView_j.dimension_0 () << " < dataAsArcp.size() = " 02265 << dataAsArcp.size () << ". " 02266 "Please report this bug to the Tpetra developers."); 02267 TEUCHOS_TEST_FOR_EXCEPTION( 02268 getLocalLength () != 0 && getNumVectors () != 0 && 02269 hostView_j(0,0) != dataAsArcp[0], std::logic_error, "Tpetra::MultiVector" 02270 "::getDataNonConst: hostView_j(0,0) = " << hostView_j(0,0) << " != " 02271 "dataAsArcp[0] = " << dataAsArcp[0] << ". Please report this bug to the " 02272 "Tpetra developers."); 02273 #endif // HAVE_TPETRA_DEBUG 02274 02275 return dataAsArcp; 02276 } 02277 02278 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02279 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& 02280 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02281 operator= (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& source) 02282 { 02283 // KR FIXME This is just the assignment operator. 02284 02285 if (this != &source) { 02286 base_type::operator= (source); 02287 // 02288 // operator= implements view semantics (shallow copy). 02289 // 02290 02291 // OK: Kokkos::View operator= also implements view semantics. 02292 view_ = source.view_; 02293 origView_ = source.origView_; 02294 02295 // NOTE (mfh 24 Mar 2014) Christian wrote here that assigning 02296 // whichVectors_ is "probably not ok" (probably constitutes deep 02297 // copy). I would say that it's OK, because whichVectors_ is 02298 // immutable (from the user's perspective); it's analogous to 02299 // the dimensions or stride. Once we make whichVectors_ a 02300 // Kokkos::View instead of a Teuchos::Array, all debate will go 02301 // away and we will unquestionably have view semantics. 02302 whichVectors_ = source.whichVectors_; 02303 } 02304 return *this; 02305 } 02306 02307 02308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02309 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02310 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02311 subCopy (const Teuchos::ArrayView<const size_t>& cols) const 02312 { 02313 using Teuchos::RCP; 02314 using Teuchos::rcp; 02315 typedef typename dual_view_type::host_mirror_device_type 02316 host_mirror_device_type; 02317 typedef typename dual_view_type::t_host host_view_type; 02318 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> MV; 02319 02320 // Check whether the index set in cols is contiguous. If it is, 02321 // use the more efficient Range1D version of subCopy. 02322 { 02323 bool contiguous = true; 02324 const size_t numCopyVecs = static_cast<size_t> (cols.size ()); 02325 for (size_t j = 1; j < numCopyVecs; ++j) { 02326 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) { 02327 contiguous = false; 02328 break; 02329 } 02330 } 02331 if (contiguous && numCopyVecs > 0) { 02332 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1])); 02333 } 02334 } 02335 02336 // Sync the source MultiVector (*this) to host first. Copy it to 02337 // the output View on host, then sync the output View (only) to 02338 // device. Doing copies on host saves us the trouble of copying 02339 // whichVecsSrc and whichVecsDst over to the device. 02340 view_.template sync<host_mirror_device_type> (); 02341 02342 const size_t numRows = this->getLocalLength (); 02343 const size_t numCols = this->getNumVectors (); 02344 const std::pair<size_t, size_t> rowRange (0, numRows); 02345 const std::pair<size_t, size_t> colRange (0, numCols); 02346 const size_t numColsToCopy = static_cast<size_t> (cols.size ()); 02347 02348 // Create a DualView which will be a contiguously stored deep copy of this MV's view. 02349 dual_view_type dstView ("MV::dual_view", numRows, numColsToCopy); 02350 Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsDst ("whichVecsDst", numColsToCopy); 02351 Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsSrc ("whichVecsSrc", numColsToCopy); 02352 02353 if (! this->isConstantStride ()) { 02354 for (size_t j = 0; j < numColsToCopy; ++j) { 02355 whichVecsSrc(j) = static_cast<LocalOrdinal> (this->whichVectors_[cols[j]]); 02356 } 02357 } 02358 else { 02359 for (size_t j = 0; j < numColsToCopy; ++j) { 02360 whichVecsSrc(j) = static_cast<LocalOrdinal> (cols[j]); 02361 } 02362 } 02363 for (size_t j = 0; j < numColsToCopy; ++j) { 02364 whichVecsDst(j) = static_cast<LocalOrdinal> (j); 02365 } 02366 02367 // 02368 // Do the copy on host first. 02369 // 02370 host_view_type srcView = 02371 Kokkos::subview<host_view_type> (view_.h_view, rowRange, colRange); 02372 DeepCopySelectedVectors<host_view_type, host_view_type, LocalOrdinal, 02373 host_mirror_device_type, false, false> f (dstView.h_view, srcView, 02374 whichVecsDst, whichVecsSrc); 02375 Kokkos::parallel_for (numRows, f); 02376 02377 // Sync the output DualView (only) back to device. 02378 dstView.template sync<device_type> (); 02379 02380 // Create and return a MultiVector using the new DualView. 02381 return rcp (new MV (this->getMap (), dstView)); 02382 } 02383 02384 02385 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02386 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02387 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02388 subCopy (const Teuchos::Range1D &colRng) const 02389 { 02390 using Teuchos::RCP; 02391 using Teuchos::rcp; 02392 typedef typename dual_view_type::host_mirror_device_type 02393 host_mirror_device_type; 02394 typedef typename dual_view_type::t_host host_view_type; 02395 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> MV; 02396 02397 // Sync the source MultiVector (*this) to host first. Copy it to 02398 // the output View on host, then sync the output View (only) to 02399 // device. Doing copies on host saves us the trouble of copying 02400 // whichVecsSrc and whichVecsDst over to the device. 02401 view_.template sync<host_mirror_device_type> (); 02402 02403 const size_t numRows = this->getLocalLength (); 02404 const size_t numCols = this->getNumVectors (); 02405 const std::pair<size_t, size_t> rowRange (0, numRows); 02406 const std::pair<size_t, size_t> colRange (0, numCols); 02407 02408 // Range1D is an inclusive range. If the upper bound is less then 02409 // the lower bound, that signifies an invalid range, which we 02410 // interpret as "copy zero columns." 02411 const size_t numColsToCopy = (colRng.ubound () >= colRng.lbound ()) ? 02412 static_cast<size_t> (colRng.size ()) : 02413 static_cast<size_t> (0); 02414 02415 // Create a DualView which will be a contiguously stored deep copy of this MV's view. 02416 dual_view_type dstView ("MV::dual_view", numRows, numColsToCopy); 02417 Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsDst ("whichVecsDst", numColsToCopy); 02418 Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsSrc ("whichVecsSrc", numColsToCopy); 02419 02420 if (! this->isConstantStride ()) { 02421 for (size_t j = 0; j < numColsToCopy; ++j) { 02422 const size_t col = static_cast<size_t> (colRng.lbound ()) + j; 02423 whichVecsSrc(j) = static_cast<LocalOrdinal> (this->whichVectors_[col]); 02424 } 02425 } 02426 else { 02427 for (size_t j = 0; j < numColsToCopy; ++j) { 02428 const size_t col = static_cast<size_t> (colRng.lbound ()) + j; 02429 whichVecsSrc(j) = static_cast<LocalOrdinal> (col); 02430 } 02431 } 02432 for (size_t j = 0; j < numColsToCopy; ++j) { 02433 whichVecsDst(j) = static_cast<LocalOrdinal> (j); 02434 } 02435 02436 // 02437 // Do the copy on host first. 02438 // 02439 // FIXME (mfh 10 Jul 2014) Exploit contiguity of the desired columns. 02440 host_view_type srcView = 02441 Kokkos::subview<host_view_type> (view_.h_view, rowRange, colRange); 02442 DeepCopySelectedVectors<host_view_type, host_view_type, LocalOrdinal, 02443 host_mirror_device_type, false, false> f (dstView.h_view, srcView, 02444 whichVecsDst, whichVecsSrc); 02445 Kokkos::parallel_for (numRows, f); 02446 02447 // Sync the output DualView (only) back to device. 02448 dstView.template sync<device_type> (); 02449 02450 // Create and return a MultiVector using the new DualView. 02451 return rcp (new MV (this->getMap (), dstView)); 02452 } 02453 02454 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02455 size_t 02456 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02457 getOrigNumLocalRows () const { 02458 return origView_.dimension_0 (); 02459 } 02460 02461 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02462 size_t 02463 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02464 getOrigNumLocalCols () const { 02465 return origView_.dimension_1 (); 02466 } 02467 02468 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02469 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02470 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02471 offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap, 02472 size_t offset) const 02473 { 02474 using Kokkos::ALL; 02475 using Kokkos::subview; 02476 using Teuchos::RCP; 02477 using Teuchos::rcp; 02478 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 02479 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 02480 02481 const size_t newNumRows = subMap->getNodeNumElements (); 02482 const bool tooManyElts = newNumRows + offset > this->getOrigNumLocalRows (); 02483 if (tooManyElts) { 02484 const int myRank = this->getMap ()->getComm ()->getRank (); 02485 TEUCHOS_TEST_FOR_EXCEPTION( 02486 newNumRows + offset > this->getLocalLength (), std::runtime_error, 02487 "Tpetra::MultiVector::offsetView(NonConst): Invalid input Map. The " 02488 "input Map owns " << newNumRows << " entries on process " << myRank << 02489 ". offset = " << offset << ". Yet, the MultiVector contains only " 02490 << this->getOrigNumLocalRows () << " rows on this process."); 02491 } 02492 02493 #ifdef HAVE_TPETRA_DEBUG 02494 // Ensure that the returned MultiVector has the same stride. 02495 const size_t strideBefore = isConstantStride () ? getStride () : static_cast<size_t> (0); 02496 #endif // HAVE_TPETRA_DEBUG 02497 02498 const std::pair<size_t, size_t> offsetPair (offset, offset + newNumRows); 02499 // FIXME (mfh 10 May 2014) Use of origView_ instead of view_ for 02500 // the second argument may be wrong, if view_ resulted from a 02501 // previous call to offsetView with offset != 0. 02502 dual_view_type newView = 02503 subview<dual_view_type> (origView_, offsetPair, ALL ()); 02504 RCP<const MV> subViewMV; 02505 if (isConstantStride ()) { 02506 subViewMV = rcp (new MV (subMap, newView, origView_)); 02507 02508 } 02509 else { 02510 subViewMV = rcp (new MV (subMap, newView, origView_, whichVectors_ ())); 02511 } 02512 02513 #ifdef HAVE_TPETRA_DEBUG 02514 const size_t strideAfter = subViewMV->isConstantStride () ? 02515 subViewMV->getStride () : static_cast<size_t> (0); 02516 02517 KokkosClassic::MultiVector<Scalar, node_type> X_lcl = this->getLocalMV (); 02518 KokkosClassic::MultiVector<Scalar, node_type> Y_lcl = subViewMV->getLocalMV (); 02519 02520 if (strideBefore != strideAfter || 02521 (offset == 0 && X_lcl.getValues ().getRawPtr () != Y_lcl.getValues ().getRawPtr ()) || 02522 X_lcl.getNumCols () != Y_lcl.getNumCols () || 02523 X_lcl.getStride () != Y_lcl.getStride () || 02524 X_lcl.getNumRows () != this->getMap ()->getNodeNumElements () || 02525 Y_lcl.getNumRows () != subMap->getNodeNumElements ()) { 02526 using std::endl; 02527 std::ostringstream os; 02528 os << "Tpetra::MultiVector::offsetView: Something is wrong." << endl 02529 << "strideBefore = " << strideBefore 02530 << ", strideAfter = " << strideAfter 02531 << endl 02532 << "Raw pointers of KokkosClassic::MultiVector objects equal? " 02533 << ((X_lcl.getValues ().getRawPtr () != Y_lcl.getValues ().getRawPtr ()) ? "true" : "false") 02534 << endl 02535 << "Dimensions of KokkosClassic::MultiVector objects: " 02536 << "[" << X_lcl.getNumRows () << ", " << X_lcl.getNumCols () << "], " 02537 << "[" << Y_lcl.getNumRows () << ", " << Y_lcl.getNumCols () << "]" 02538 << endl 02539 << "Strides of KokkosClassic::MultiVector objects: " 02540 << X_lcl.getStride () << ", " << Y_lcl.getStride () << endl 02541 << "this->getMap()->getNodeNumElements() = " 02542 << this->getMap ()->getNodeNumElements () << endl 02543 << "subMap->getNodeNumElements() = " 02544 << subMap->getNodeNumElements () << endl 02545 << "Please report this bug to the Tpetra developers."; 02546 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str ()); 02547 } 02548 #endif // HAVE_TPETRA_DEBUG 02549 02550 return subViewMV; 02551 } 02552 02553 02554 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02555 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02556 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02557 offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap, 02558 size_t offset) 02559 { 02560 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 02561 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 02562 return Teuchos::rcp_const_cast<MV> (this->offsetView (subMap, offset)); 02563 } 02564 02565 02566 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02567 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02568 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02569 subView (const ArrayView<const size_t>& cols) const 02570 { 02571 using Teuchos::Array; 02572 using Teuchos::rcp; 02573 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> MV; 02574 02575 const size_t numViewCols = static_cast<size_t> (cols.size ()); 02576 TEUCHOS_TEST_FOR_EXCEPTION( 02577 numViewCols < 1, std::runtime_error, "Tpetra::MultiVector::subView" 02578 "(const Teuchos::ArrayView<const size_t>&): The input array cols must " 02579 "contain at least one entry, but cols.size() = " << cols.size () 02580 << " == 0."); 02581 02582 // Check whether the index set in cols is contiguous. If it is, 02583 // use the more efficient Range1D version of subView. 02584 bool contiguous = true; 02585 for (size_t j = 1; j < numViewCols; ++j) { 02586 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) { 02587 contiguous = false; 02588 break; 02589 } 02590 } 02591 if (contiguous) { 02592 if (numViewCols == 0) { 02593 // The output MV has no columns, so there is nothing to view. 02594 return rcp (new MV (this->getMap (), numViewCols)); 02595 } else { 02596 // Use the more efficient contiguous-index-range version. 02597 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1])); 02598 } 02599 } 02600 02601 if (isConstantStride ()) { 02602 return rcp (new MV (this->getMap (), view_, origView_, cols)); 02603 } 02604 else { 02605 Array<size_t> newcols (cols.size ()); 02606 for (size_t j = 0; j < numViewCols; ++j) { 02607 newcols[j] = whichVectors_[cols[j]]; 02608 } 02609 return rcp (new MV (this->getMap (), view_, origView_, newcols ())); 02610 } 02611 } 02612 02613 02614 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02615 Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02616 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02617 subView (const Teuchos::Range1D& colRng) const 02618 { 02619 using Kokkos::ALL; 02620 using Kokkos::subview; 02621 using Teuchos::Array; 02622 using Teuchos::rcp; 02623 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> MV; 02624 const char prefix[] = "Tpetra::MultiVector::subView(Range1D): "; 02625 02626 TEUCHOS_TEST_FOR_EXCEPTION( 02627 colRng.size() == 0, std::runtime_error, prefix << "Range must include " 02628 "at least one vector."); 02629 TEUCHOS_TEST_FOR_EXCEPTION( 02630 static_cast<size_t> (colRng.size ()) > this->getNumVectors (), 02631 std::runtime_error, prefix << "colRng.size() = " << colRng.size () 02632 << " > this->getNumVectors() = " << this->getNumVectors () << "."); 02633 02634 // resulting MultiVector is constant stride only if *this is 02635 if (isConstantStride ()) { 02636 // view goes from first entry of first vector to last entry of last vector 02637 std::pair<size_t, size_t> cols (colRng.lbound (), colRng.ubound () + 1); 02638 return rcp (new MV (this->getMap (), 02639 subview<dual_view_type> (view_, ALL (), cols), 02640 origView_)); 02641 } 02642 else { 02643 // otherwise, use a subset of this whichVectors_ to construct new multivector 02644 Array<size_t> which (whichVectors_.begin () + colRng.lbound (), 02645 whichVectors_.begin () + colRng.ubound () + 1); 02646 return rcp (new MV (this->getMap (), view_, origView_, which)); 02647 } 02648 } 02649 02650 02651 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02652 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02653 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02654 subViewNonConst (const ArrayView<const size_t> &cols) 02655 { 02656 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 02657 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 02658 return Teuchos::rcp_const_cast<MV> (this->subView (cols)); 02659 } 02660 02661 02662 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02663 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02664 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02665 subViewNonConst (const Teuchos::Range1D &colRng) 02666 { 02667 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, 02668 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > MV; 02669 return Teuchos::rcp_const_cast<MV> (this->subView (colRng)); 02670 } 02671 02672 02673 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02674 Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02675 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02676 getVector (size_t j) const 02677 { 02678 using Kokkos::ALL; 02679 using Kokkos::subview; 02680 using Teuchos::rcp; 02681 // using Teuchos::rcp_const_cast; 02682 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, 02683 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > V; 02684 02685 #ifdef HAVE_TPETRA_DEBUG 02686 TEUCHOS_TEST_FOR_EXCEPTION( 02687 vectorIndexOutOfRange(j), std::runtime_error, "Tpetra::MultiVector::" 02688 "getVector(NonConst): index j (== " << j << ") exceeds valid column " 02689 "range for this multivector."); 02690 #endif // HAVE_TPETRA_DEBUG 02691 02692 // FIXME (mfh 10 May 2014) Why can't Kokkos take size_t instead of 02693 // unsigned int? 02694 const unsigned int jj = isConstantStride () ? 02695 static_cast<unsigned int> (j) : 02696 static_cast<unsigned int> (whichVectors_[j]); 02697 // FIXME (mfh 10 May 2014) The same issue shows up here that shows 02698 // up for offsetView. 02699 return rcp (new V (this->getMap (), 02700 subview<dual_view_type> (view_, ALL (), jj), 02701 origView_)); 02702 } 02703 02704 02705 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02706 Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 02707 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02708 getVectorNonConst (size_t j) 02709 { 02710 typedef Vector<Scalar, LocalOrdinal, GlobalOrdinal, 02711 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > V; 02712 return Teuchos::rcp_const_cast<V> (this->getVector (j)); 02713 } 02714 02715 02716 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02717 void 02718 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02719 get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const 02720 { 02721 using Kokkos::ALL; 02722 using Kokkos::subview; 02723 typedef typename dual_view_type::host_mirror_device_type host_mirror_device_type; 02724 // The user's array is column major ("LayoutLeft"). 02725 // typedef Kokkos::View<Scalar**, Kokkos::LayoutLeft, 02726 // host_mirror_device_type, Kokkos::MemoryUnmanaged> input_view_type; 02727 typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, 02728 host_mirror_device_type, Kokkos::MemoryUnmanaged> input_col_type; 02729 typedef typename dual_view_type::t_host host_view_type; 02730 typedef Kokkos::View< Scalar* 02731 , typename host_view_type::array_layout 02732 , typename host_view_type::device_type 02733 , Kokkos::MemoryUnmanaged > host_col_type ; 02734 const char tfecfFuncName[] = "get1dCopy"; 02735 02736 const size_t numRows = this->getLocalLength (); 02737 const size_t numCols = this->getNumVectors (); 02738 const std::pair<size_t, size_t> rowRange (0, numRows); 02739 const std::pair<size_t, size_t> colRange (0, numCols); 02740 02741 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02742 LDA < numRows, std::runtime_error, 02743 ": LDA = " << LDA << " < numRows = " << numRows << "."); 02744 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02745 numRows > static_cast<size_t> (0) && 02746 numCols > static_cast<size_t> (0) && 02747 static_cast<size_t> (A.size ()) < LDA * (numCols - 1) + numRows, 02748 std::runtime_error, 02749 ": A.size() = " << A.size () << ", but its size must be at least " 02750 << LDA * (numCols - 1) * numRows << " to hold all the entries."); 02751 02752 // This does a deep copy into A, which is column major with 02753 // leading dimension LDA. We assume A is big enough to hold 02754 // everything. Copy directly from host mirror of the data, if it 02755 // exists. 02756 02757 // Start by sync'ing to host. 02758 view_.template sync<host_mirror_device_type> (); 02759 02760 // FIXME (mfh 22 Jul 2014) These actually should be strided views. 02761 // This causes a run-time error with deep copy. The temporary fix 02762 // is to copy one column at a time. 02763 02764 //input_view_type dstWholeView (A.getRawPtr (), LDA, numCols); 02765 // input_view_type dstView = 02766 // Kokkos::subview<input_view_type> (dstWholeView, rowRange, Kokkos::ALL ()); 02767 // host_view_type srcView = 02768 // Kokkos::subview<host_view_type> (view_.h_view, rowRange, Kokkos::ALL ()); 02769 02770 // if (this->isConstantStride ()) { 02771 // Kokkos::deep_copy (dstView, srcView); 02772 // } 02773 // else { 02774 // Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsDst ("whichVecsDst", numCols); 02775 // Kokkos::View<LocalOrdinal*, host_mirror_device_type> whichVecsSrc ("whichVecsSrc", numCols); 02776 // for (size_t j = 0; j < numCols; ++j) { 02777 // whichVecsSrc(j) = static_cast<LocalOrdinal> (this->whichVectors_[j]); 02778 // whichVecsDst(j) = static_cast<LocalOrdinal> (j); 02779 // } 02780 // DeepCopySelectedVectors<input_view_type, host_view_type, LocalOrdinal, 02781 // host_mirror_device_type, false, false> f (dstView, srcView, whichVecsDst, whichVecsSrc); 02782 // Kokkos::parallel_for (numRows, f); 02783 // } 02784 02785 for (size_t j = 0; j < numCols; ++j) { 02786 const size_t srcCol = this->isConstantStride () ? j : this->whichVectors_[j]; 02787 const size_t dstCol = j; 02788 Scalar* const dstColRaw = A.getRawPtr () + LDA * dstCol; 02789 02790 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02791 static_cast<size_t> (A.size ()) < LDA * dstCol + numRows, 02792 std::logic_error, ": The input ArrayView A is not big enough to hold " 02793 "all the data. Please report this bug to the Tpetra developers."); 02794 02795 input_col_type dstColView (dstColRaw, numRows); 02796 host_col_type srcColView = subview<host_col_type> (view_.h_view, ALL (), srcCol); 02797 02798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02799 dstColView.dimension_0 () != srcColView.dimension_0 (), std::logic_error, 02800 ": srcColView and dstColView have different dimensions. " 02801 "Please report this bug to the Tpetra developers."); 02802 02803 Kokkos::deep_copy (dstColView, srcColView); 02804 } 02805 } 02806 02807 02808 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02809 void 02810 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02811 get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const 02812 { 02813 typedef typename dual_view_type::host_mirror_device_type 02814 host_mirror_device_type; 02815 typedef typename dual_view_type::t_host host_view_type; 02816 typedef Kokkos::View<Scalar**, 02817 typename host_view_type::array_layout, 02818 typename dual_view_type::host_mirror_device_type, 02819 Kokkos::MemoryUnmanaged> unmanaged_host_view_type; 02820 02821 const char tfecfFuncName[] = "get2dCopy"; 02822 const size_t numRows = this->getLocalLength (); 02823 const size_t numCols = this->getNumVectors (); 02824 02825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02826 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols, 02827 std::runtime_error, ": Input array of pointers must contain as many " 02828 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = " 02829 << ArrayOfPtrs.size () << " != getNumVectors() = " << numCols << "."); 02830 02831 if (numRows != 0 && numCols != 0) { 02832 // Start by sync'ing to host. 02833 view_.template sync<host_mirror_device_type> (); 02834 02835 // No side effects until we've validated the input. 02836 for (size_t j = 0; j < numCols; ++j) { 02837 const size_t dstLen = static_cast<size_t> (ArrayOfPtrs[j].size ()); 02838 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 02839 dstLen < numRows, std::invalid_argument, ": Array j = " << j << " of " 02840 "the input array of arrays is not long enough to fit all entries in " 02841 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen 02842 << " < getLocalLength() = " << numRows << "."); 02843 } 02844 02845 // We've validated the input, so it's safe to start copying. 02846 for (size_t j = 0; j < numCols; ++j) { 02847 const size_t col = isConstantStride () ? j : whichVectors_[j]; 02848 host_view_type src = 02849 Kokkos::subview<host_view_type> (view_.h_view, Kokkos::ALL (), col); 02850 unmanaged_host_view_type dst (ArrayOfPtrs[j].getRawPtr (), 02851 ArrayOfPtrs[j].size (), 02852 static_cast<size_t> (1)); 02853 Kokkos::deep_copy (dst, src); 02854 } 02855 } 02856 } 02857 02858 02859 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02860 Teuchos::ArrayRCP<const Scalar> 02861 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02862 get1dView () const 02863 { 02864 if (getLocalLength () == 0 || getNumVectors () == 0) { 02865 return Teuchos::null; 02866 } else { 02867 TEUCHOS_TEST_FOR_EXCEPTION( 02868 ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::" 02869 "get1dView: This MultiVector does not have constant stride, so it is " 02870 "not possible to view its data as a single array. You may check " 02871 "whether a MultiVector has constant stride by calling " 02872 "isConstantStride()."); 02873 // NOTE (mfh 09 2014) get1dView() and get1dViewNonConst() have 02874 // always been device->host synchronization points. We might 02875 // want to change this in the future. 02876 typedef typename dual_view_type::host_mirror_device_type host_type; 02877 view_.template sync<host_type> (); 02878 // Both get1dView() and get1dViewNonConst() return a host view 02879 // of the data. 02880 Teuchos::ArrayRCP<scalar_type> dataAsArcp = 02881 Kokkos::Compat::persistingView (view_.template view<host_type> ()); 02882 return Teuchos::arcp_const_cast<const scalar_type> (dataAsArcp); 02883 } 02884 } 02885 02886 02887 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02888 Teuchos::ArrayRCP<Scalar> 02889 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02890 get1dViewNonConst () 02891 { 02892 if (getLocalLength () == 0 || getNumVectors () == 0) { 02893 return Teuchos::null; 02894 } else { 02895 TEUCHOS_TEST_FOR_EXCEPTION( 02896 ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::" 02897 "get1dViewNonConst: This MultiVector does not have constant stride, so " 02898 "it is not possible to view its data as a single array. You may check " 02899 "whether a MultiVector has constant stride by calling " 02900 "isConstantStride()."); 02901 // NOTE (mfh 09 May 2014) get1dView() and get1dViewNonConst() 02902 // have always been device->host synchronization points. We 02903 // might want to change this in the future. 02904 typedef typename dual_view_type::host_mirror_device_type host_type; 02905 view_.template sync<host_type> (); 02906 // Both get1dView() and get1dViewNonConst() return a host view 02907 // of the data. 02908 Teuchos::ArrayRCP<scalar_type> dataAsArcp = 02909 Kokkos::Compat::persistingView (view_.template view<host_type> ()); 02910 return dataAsArcp; 02911 } 02912 } 02913 02914 02915 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02916 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > 02917 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02918 get2dViewNonConst () 02919 { 02920 const size_t numCols = getNumVectors (); 02921 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols); 02922 for (size_t j = 0; j < numCols; ++j) { 02923 const size_t col = isConstantStride () ? j : whichVectors_[j]; 02924 dual_view_type X_col = Kokkos::subview<dual_view_type> (view_, Kokkos::ALL (), col); 02925 views[j] = Kokkos::Compat::persistingView (X_col.d_view); 02926 } 02927 return views; 02928 } 02929 02930 02931 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02932 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > 02933 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02934 get2dView () const 02935 { 02936 const size_t numCols = getNumVectors (); 02937 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols); 02938 for (size_t j = 0; j < numCols; ++j) { 02939 const size_t col = isConstantStride () ? j : whichVectors_[j]; 02940 dual_view_type X_col = Kokkos::subview<dual_view_type> (view_, Kokkos::ALL (), col); 02941 views[j] = Kokkos::Compat::persistingView (X_col.d_view); 02942 } 02943 return views; 02944 } 02945 02946 02947 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 02948 void 02949 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 02950 multiply (Teuchos::ETransp transA, 02951 Teuchos::ETransp transB, 02952 const Scalar &alpha, 02953 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& A, 02954 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& B, 02955 const Scalar &beta) 02956 { 02957 using Teuchos::CONJ_TRANS; 02958 using Teuchos::NO_TRANS; 02959 using Teuchos::TRANS; 02960 using Teuchos::RCP; 02961 using Teuchos::rcp; 02962 using Teuchos::rcpFromRef; 02963 typedef Teuchos::ScalarTraits<Scalar> STS; 02964 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, node_type> MV; 02965 const char errPrefix[] = "Tpetra::MultiVector::multiply: "; 02966 02967 // This routine performs a variety of matrix-matrix multiply 02968 // operations, interpreting the MultiVector (this-aka C , A and B) 02969 // as 2D matrices. Variations are due to the fact that A, B and C 02970 // can be local replicated or global distributed MultiVectors and 02971 // that we may or may not operate with the transpose of A and B. 02972 // Possible cases are: 02973 // 02974 // Operations # Cases Notes 02975 // 1) C(local) = A^X(local) * B^X(local) 4 X=Trans or Not, no comm needed 02976 // 2) C(local) = A^T(distr) * B (distr) 1 2-D dot product, replicate C 02977 // 3) C(distr) = A (distr) * B^X(local) 2 2-D vector update, no comm needed 02978 // 02979 // The following operations are not meaningful for 1-D 02980 // distributions: 02981 // 02982 // u1) C(local) = A^T(distr) * B^T(distr) 1 02983 // u2) C(local) = A (distr) * B^X(distr) 2 02984 // u3) C(distr) = A^X(local) * B^X(local) 4 02985 // u4) C(distr) = A^X(local) * B^X(distr) 4 02986 // u5) C(distr) = A^T(distr) * B^X(local) 2 02987 // u6) C(local) = A^X(distr) * B^X(local) 4 02988 // u7) C(distr) = A^X(distr) * B^X(local) 4 02989 // u8) C(local) = A^X(local) * B^X(distr) 4 02990 // 02991 // Total number of cases: 32 (= 2^5). 02992 02993 TEUCHOS_TEST_FOR_EXCEPTION( 02994 STS::isComplex && (transA == TRANS || transB == TRANS), 02995 std::invalid_argument, errPrefix << "Transpose without conjugation " 02996 "(transA == TRANS || transB == TRANS) not supported for complex Scalar " 02997 "types."); 02998 02999 transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS); 03000 transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS); 03001 03002 // Compute effective dimensions, w.r.t. transpose operations on 03003 size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength(); 03004 size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors(); 03005 size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength(); 03006 size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors(); 03007 03008 Scalar beta_local = beta; // local copy of beta; might be reassigned below 03009 03010 TEUCHOS_TEST_FOR_EXCEPTION( 03011 getLocalLength () != A_nrows || getNumVectors () != B_ncols || 03012 A_ncols != B_nrows, std::runtime_error, errPrefix << "Dimensions of " 03013 "*this, op(A), and op(B) must be consistent. Local part of *this is " 03014 << getLocalLength() << " x " << getNumVectors() 03015 << ", A is " << A_nrows << " x " << A_ncols 03016 << ", and B is " << B_nrows << " x " << B_ncols << "."); 03017 03018 const bool A_is_local = ! A.isDistributed (); 03019 const bool B_is_local = ! B.isDistributed (); 03020 const bool C_is_local = ! this->isDistributed (); 03021 // Case 1: C(local) = A^X(local) * B^X(local) 03022 const bool Case1 = C_is_local && A_is_local && B_is_local; 03023 // Case 2: C(local) = A^T(distr) * B (distr) 03024 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local && 03025 transA == CONJ_TRANS && transB == NO_TRANS; 03026 // Case 3: C(distr) = A (distr) * B^X(local) 03027 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local && 03028 transA == NO_TRANS; 03029 03030 // Test that we are considering a meaningful case 03031 TEUCHOS_TEST_FOR_EXCEPTION( 03032 ! Case1 && ! Case2 && ! Case3, std::runtime_error, errPrefix 03033 << "Multiplication of op(A) and op(B) into *this is not a " 03034 "supported use case."); 03035 03036 if (beta != STS::zero () && Case2) { 03037 // If Case2, then C is local and contributions must be summed 03038 // across all processes. However, if beta != 0, then accumulate 03039 // beta*C into the sum. When summing across all processes, we 03040 // only want to accumulate this once, so set beta == 0 on all 03041 // processes except Process 0. 03042 const int myRank = this->getMap ()->getComm ()->getRank (); 03043 if (myRank != 0) { 03044 beta_local = STS::zero (); 03045 } 03046 } 03047 03048 // We only know how to do matrix-matrix multiplies if all the 03049 // MultiVectors have constant stride. If not, we have to make 03050 // temporary copies of those MultiVectors (including possibly 03051 // *this) that don't have constant stride. 03052 RCP<MV> C_tmp; 03053 if (! isConstantStride ()) { 03054 C_tmp = rcp (new MV (*this, Teuchos::Copy)); // deep copy 03055 } else { 03056 C_tmp = rcp (this, false); 03057 } 03058 03059 RCP<const MV> A_tmp; 03060 if (! A.isConstantStride ()) { 03061 A_tmp = rcp (new MV (A, Teuchos::Copy)); // deep copy 03062 } else { 03063 A_tmp = rcpFromRef (A); 03064 } 03065 03066 RCP<const MV> B_tmp; 03067 if (! B.isConstantStride ()) { 03068 B_tmp = rcp (new MV (B, Teuchos::Copy)); // deep copy 03069 } else { 03070 B_tmp = rcpFromRef (B); 03071 } 03072 03073 TEUCHOS_TEST_FOR_EXCEPTION( 03074 ! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () || 03075 ! A_tmp->isConstantStride (), std::logic_error, errPrefix 03076 << "Failed to make temporary constant-stride copies of MultiVectors."); 03077 03078 typedef Kokkos::DeviceGEMM<Scalar,DeviceType> gemm_type; 03079 03080 gemm_type::GEMM (transA, transB, alpha, 03081 A_tmp->getDualView ().d_view, B_tmp->getDualView ().d_view, 03082 beta_local, C_tmp->getDualView ().d_view); 03083 if (! isConstantStride ()) { 03084 deep_copy (*this, *C_tmp); // Copy the result back into *this. 03085 } 03086 03087 // Dispose of (possibly) extra copies of A and B. 03088 A_tmp = Teuchos::null; 03089 B_tmp = Teuchos::null; 03090 03091 // If Case 2 then sum up *this and distribute it to all processes. 03092 if (Case2) { 03093 this->reduce (); 03094 } 03095 } 03096 03097 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03098 void 03099 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03100 elementWiseMultiply (Scalar scalarAB, 03101 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, node_type>& A, 03102 const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal, node_type>& B, 03103 Scalar scalarThis) 03104 { 03105 using Kokkos::ALL; 03106 using Kokkos::subview; 03107 using Teuchos::arcp_const_cast; 03108 typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, DeviceType> view_type; 03109 const char tfecfFuncName[] = "elementWiseMultiply: "; 03110 03111 #ifdef HAVE_TPETRA_DEBUG 03112 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03113 getLocalLength() != A.getLocalLength() || 03114 getLocalLength() != B.getLocalLength(), std::runtime_error, 03115 "MultiVectors do not have the same local length."); 03116 #endif // HAVE_TPETRA_DEBUG 03117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( 03118 B.getNumVectors() != this->getNumVectors(), std::runtime_error, 03119 "MultiVectors 'this' and B must have the same number of vectors."); 03120 03121 const size_t numVecs = this->getNumVectors (); 03122 if (isConstantStride () && A.isConstantStride ()) { 03123 // FIXME (mfh 02 Oct 2014) Shouldn't it be asking if B has 03124 // constant stride? A is just a Vector; it only has one column, 03125 // so it always has constant stride. 03126 // 03127 // If both *this and A have constant stride, we can do an 03128 // element-wise multiply on all columns at once. 03129 view_.template sync<DeviceType>(); 03130 view_.template modify<DeviceType>(); 03131 A.view_.template sync<DeviceType>(); 03132 A.view_.template modify<DeviceType>(); 03133 B.view_.template sync<DeviceType>(); 03134 B.view_.template modify<DeviceType>(); 03135 view_type vector_A = subview<view_type> (A.view_.d_view, ALL (), 0); 03136 Kokkos::MV_ElementWiseMultiply (scalarThis, view_.d_view, 03137 scalarAB, vector_A, B.view_.d_view); 03138 } 03139 else { 03140 view_.template sync<DeviceType>(); 03141 view_.template modify<DeviceType>(); 03142 A.view_.template sync<DeviceType>(); 03143 A.view_.template modify<DeviceType>(); 03144 B.view_.template sync<DeviceType>(); 03145 B.view_.template modify<DeviceType>(); 03146 view_type vector_A = subview<view_type> (A.view_.d_view, ALL (), 0); 03147 for (size_t k = 0; k < numVecs; ++k) { 03148 const size_t this_col = isConstantStride () ? k : whichVectors_[k]; 03149 view_type vector_k = subview<view_type> (view_.d_view, ALL (), this_col); 03150 const size_t B_col = isConstantStride () ? k : B.whichVectors_[k]; 03151 view_type vector_Bk = subview<view_type> (B.view_.d_view, ALL (), B_col); 03152 Kokkos::V_ElementWiseMultiply (scalarThis, vector_k, scalarAB, vector_A, vector_Bk); 03153 } 03154 } 03155 } 03156 03157 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03158 void 03159 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::reduce() 03160 { 03161 using Kokkos::ALL; 03162 using Kokkos::subview; 03163 using Teuchos::reduceAll; 03164 using Teuchos::REDUCE_SUM; 03165 typedef typename dual_view_type::t_dev device_view_type; 03166 typedef typename device_type::host_mirror_device_type host_mirror_device_type; 03167 03168 TEUCHOS_TEST_FOR_EXCEPTION( 03169 this->isDistributed (), std::runtime_error, 03170 "Tpetra::MultiVector::reduce() should only be called with locally " 03171 "replicated or otherwise not distributed MultiVector objects."); 03172 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ()); 03173 if (comm.getSize () == 1) { 03174 return; 03175 } 03176 03177 const size_t numLclRows = getLocalLength (); 03178 const size_t numCols = getNumVectors (); 03179 03180 // FIXME (mfh 16 June 2014) This exception will cause deadlock if 03181 // it triggers on only some processes. We don't have a good way 03182 // to pack this result into the all-reduce below, but this would 03183 // be a good reason to set a "local error flag" and find other 03184 // opportunities to let it propagate. 03185 TEUCHOS_TEST_FOR_EXCEPTION( 03186 numLclRows > static_cast<size_t> (std::numeric_limits<int>::max ()), 03187 std::runtime_error, "Tpetra::MultiVector::reduce: On Process " << 03188 comm.getRank () << ", the number of local rows " << numLclRows << 03189 " does not fit in int."); 03190 03191 // 03192 // Use MPI to sum the entries across all local blocks. 03193 // 03194 // If this MultiVector's local data are stored contiguously, we 03195 // can use the local View as the source buffer in the 03196 // MPI_Allreduce. Otherwise, we have to allocate a temporary 03197 // source buffer and pack. 03198 const bool contig = isConstantStride () && getStride () == numLclRows; 03199 device_view_type srcBuf; 03200 if (contig) { 03201 srcBuf = view_.d_view; 03202 } 03203 else { 03204 srcBuf = device_view_type ("srcBuf", numLclRows, numCols); 03205 Kokkos::deep_copy (srcBuf, view_.d_view); 03206 } 03207 03208 // MPI requires that the send and receive buffers don't alias one 03209 // another, so we have to copy temporary storage for the result. 03210 // 03211 // We expect that MPI implementations will know how to read device 03212 // pointers. 03213 device_view_type tgtBuf ("tgtBuf", numLclRows, numCols); 03214 03215 const int reduceCount = static_cast<int> (numLclRows * numCols); 03216 reduceAll<int, Scalar> (comm, REDUCE_SUM, reduceCount, 03217 srcBuf.ptr_on_device (), tgtBuf.ptr_on_device ()); 03218 03219 // Tell the DualView that we plan to modify the device data. 03220 view_.template modify<device_type> (); 03221 03222 const std::pair<size_t, size_t> lclRowRange (0, numLclRows); 03223 device_view_type d_view = 03224 subview<device_view_type> (view_.d_view, lclRowRange, ALL ()); 03225 03226 if (contig || isConstantStride ()) { 03227 Kokkos::deep_copy (d_view, tgtBuf); 03228 } 03229 else { 03230 for (size_t j = 0; j < numCols; ++j) { 03231 device_view_type d_view_j = 03232 subview<device_view_type> (d_view, ALL (), j); 03233 device_view_type tgtBuf_j = 03234 subview<device_view_type> (tgtBuf, ALL (), j); 03235 Kokkos::deep_copy (d_view_j, tgtBuf_j); 03236 } 03237 } 03238 03239 // Synchronize the host with changes on the device. 03240 // 03241 // FIXME (mfh 16 June 2014) This raises the question of whether we 03242 // want to synchronize always. Users will find it reassuring if 03243 // MultiVector methods always leave the MultiVector in a 03244 // synchronized state, but it seems silly to synchronize to host 03245 // if they hardly ever need host data. 03246 view_.template sync<host_mirror_device_type> (); 03247 } 03248 03249 03250 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03251 void 03252 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03253 replaceLocalValue (LocalOrdinal MyRow, 03254 size_t VectorIndex, 03255 const Scalar &ScalarValue) 03256 { 03257 #ifdef HAVE_TPETRA_DEBUG 03258 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); 03259 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); 03260 TEUCHOS_TEST_FOR_EXCEPTION( 03261 MyRow < minLocalIndex || MyRow > maxLocalIndex, 03262 std::runtime_error, 03263 "Tpetra::MultiVector::replaceLocalValue: row index " << MyRow 03264 << " is invalid. The range of valid row indices on this process " 03265 << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex 03266 << ", " << maxLocalIndex << "]."); 03267 TEUCHOS_TEST_FOR_EXCEPTION( 03268 vectorIndexOutOfRange(VectorIndex), 03269 std::runtime_error, 03270 "Tpetra::MultiVector::replaceLocalValue: vector index " << VectorIndex 03271 << " of the multivector is invalid."); 03272 #endif 03273 const size_t colInd = isConstantStride () ? 03274 VectorIndex : whichVectors_[VectorIndex]; 03275 view_.d_view (static_cast<size_t> (MyRow), colInd) = ScalarValue; 03276 } 03277 03278 03279 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03280 void 03281 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03282 sumIntoLocalValue (LocalOrdinal MyRow, 03283 size_t VectorIndex, 03284 const Scalar &ScalarValue) 03285 { 03286 #ifdef HAVE_TPETRA_DEBUG 03287 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex(); 03288 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex(); 03289 TEUCHOS_TEST_FOR_EXCEPTION( 03290 MyRow < minLocalIndex || MyRow > maxLocalIndex, 03291 std::runtime_error, 03292 "Tpetra::MultiVector::sumIntoLocalValue: row index " << MyRow 03293 << " is invalid. The range of valid row indices on this process " 03294 << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex 03295 << ", " << maxLocalIndex << "]."); 03296 TEUCHOS_TEST_FOR_EXCEPTION( 03297 vectorIndexOutOfRange(VectorIndex), 03298 std::runtime_error, 03299 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << VectorIndex 03300 << " of the multivector is invalid."); 03301 #endif 03302 const size_t colInd = isConstantStride () ? 03303 VectorIndex : whichVectors_[VectorIndex]; 03304 view_.d_view (static_cast<size_t> (MyRow), colInd) += ScalarValue; 03305 } 03306 03307 03308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03309 void 03310 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03311 replaceGlobalValue (GlobalOrdinal GlobalRow, 03312 size_t VectorIndex, 03313 const Scalar &ScalarValue) 03314 { 03315 LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow); 03316 #ifdef HAVE_TPETRA_DEBUG 03317 TEUCHOS_TEST_FOR_EXCEPTION( 03318 MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), 03319 std::runtime_error, 03320 "Tpetra::MultiVector::replaceGlobalValue: global row index " << GlobalRow 03321 << "is not present on this process " << this->getMap()->getComm()->getRank() 03322 << "."); 03323 TEUCHOS_TEST_FOR_EXCEPTION( 03324 vectorIndexOutOfRange(VectorIndex), 03325 std::runtime_error, 03326 "Tpetra::MultiVector::replaceGlobalValue: vector index " << VectorIndex 03327 << " of the multivector is invalid."); 03328 #endif 03329 replaceLocalValue (MyRow, VectorIndex, ScalarValue); 03330 } 03331 03332 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03333 void 03334 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03335 sumIntoGlobalValue (GlobalOrdinal GlobalRow, 03336 size_t VectorIndex, 03337 const Scalar &ScalarValue) 03338 { 03339 LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow); 03340 #ifdef HAVE_TEUCHOS_DEBUG 03341 TEUCHOS_TEST_FOR_EXCEPTION( 03342 MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), 03343 std::runtime_error, 03344 "Tpetra::MultiVector::sumIntoGlobalValue: global row index " << GlobalRow 03345 << "is not present on this process " << this->getMap()->getComm()->getRank() 03346 << "."); 03347 TEUCHOS_TEST_FOR_EXCEPTION( 03348 vectorIndexOutOfRange(VectorIndex), 03349 std::runtime_error, 03350 "Tpetra::MultiVector::sumIntoGlobalValue: vector index " << VectorIndex 03351 << " of the multivector is invalid."); 03352 #endif 03353 sumIntoLocalValue (MyRow, VectorIndex, ScalarValue); 03354 } 03355 03356 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03357 template <class T> 03358 Teuchos::ArrayRCP<T> 03359 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03360 getSubArrayRCP (Teuchos::ArrayRCP<T> arr, 03361 size_t j) const 03362 { 03363 const size_t col = isConstantStride () ? j : whichVectors_[j]; 03364 dual_view_type X_col = Kokkos::subview<dual_view_type> (view_, Kokkos::ALL (), col); 03365 return Kokkos::Compat::persistingView (X_col.d_view); // ????? host or device??? 03366 } 03367 03368 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03369 KokkosClassic::MultiVector<Scalar,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 03370 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getLocalMV () const 03371 { 03372 // KR Creates the KokkosClassic thing on the fly. 03373 // OK to leave this in place as backwards compat. 03374 03375 KMV kmv (this->getMap ()->getNode ()); 03376 ArrayRCP<Scalar> data = (getLocalLength() > 0) ? 03377 Kokkos::Compat::persistingView (view_.d_view) : 03378 Teuchos::null; 03379 03380 size_t stride[8]; 03381 origView_.stride (stride); 03382 const size_t LDA = origView_.dimension_1 () > 1 ? stride[1] : origView_.dimension_0 (); 03383 MVT::initializeValues (kmv, getLocalLength (), getNumVectors (), 03384 data, 03385 LDA, 03386 getOrigNumLocalRows (), 03387 getOrigNumLocalCols ()); 03388 return kmv; 03389 } 03390 03391 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03392 typename MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::dual_view_type 03393 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getDualView() const { 03394 return view_; 03395 } 03396 03397 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03398 TEUCHOS_DEPRECATED 03399 KokkosClassic::MultiVector<Scalar,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 03400 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::getLocalMVNonConst() { 03401 03402 KMV kmv (this->getMap ()->getNode ()); 03403 ArrayRCP<Scalar> data = (getLocalLength() > 0) ? 03404 Kokkos::Compat::persistingView (view_.d_view) : 03405 Teuchos::null; 03406 03407 size_t stride[8]; 03408 origView_.stride (stride); 03409 const size_t LDA = origView_.dimension_1 () > 1 ? stride[1] : origView_.dimension_0 (); 03410 MVT::initializeValues (kmv, getLocalLength (), getNumVectors (), 03411 data, 03412 LDA, 03413 getOrigNumLocalRows (), 03414 getOrigNumLocalCols ()); 03415 return kmv; 03416 } 03417 03418 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03419 std::string 03420 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description() const 03421 { 03422 using std::endl; 03423 std::ostringstream oss; 03424 oss << Teuchos::typeName (*this) << " {" 03425 << "label: \"" << this->getObjectLabel () << "\"" 03426 << ", numRows: " << getGlobalLength () 03427 << ", numCols: " << getNumVectors () 03428 << ", isConstantStride: " << isConstantStride (); 03429 if (isConstantStride ()) { 03430 oss << ", columnStride: " << getStride (); 03431 } 03432 oss << "}"; 03433 return oss.str(); 03434 } 03435 03436 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03437 void 03438 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03439 describe (Teuchos::FancyOStream &out, 03440 const Teuchos::EVerbosityLevel verbLevel) const 03441 { 03442 using Teuchos::ArrayRCP; 03443 using Teuchos::RCP; 03444 using Teuchos::VERB_DEFAULT; 03445 using Teuchos::VERB_NONE; 03446 using Teuchos::VERB_LOW; 03447 using Teuchos::VERB_MEDIUM; 03448 using Teuchos::VERB_HIGH; 03449 using Teuchos::VERB_EXTREME; 03450 using std::endl; 03451 using std::setw; 03452 03453 // Set default verbosity if applicable. 03454 const Teuchos::EVerbosityLevel vl = 03455 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel; 03456 03457 RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm(); 03458 const int myImageID = comm->getRank(); 03459 const int numImages = comm->getSize(); 03460 03461 if (vl != VERB_NONE) { 03462 // Don't set the tab level unless we're printing something. 03463 Teuchos::OSTab tab0 (out); 03464 03465 if (myImageID == 0) { // >= VERB_LOW prints description() 03466 out << "Tpetra::MultiVector:" << endl; 03467 Teuchos::OSTab tab1 (out); 03468 out << "Template parameters:" << endl; 03469 { 03470 Teuchos::OSTab tab2 (out); 03471 out << "Scalar: " << Teuchos::TypeNameTraits<Scalar>::name () << endl 03472 << "LocalOrdinal: " << Teuchos::TypeNameTraits<LocalOrdinal>::name () << endl 03473 << "GlobalOrdinal: " << Teuchos::TypeNameTraits<GlobalOrdinal>::name () << endl 03474 << "Node: " << Teuchos::TypeNameTraits<Node>::name () << endl; 03475 } 03476 out << "label: \"" << this->getObjectLabel () << "\"" << endl 03477 << "numRows: " << getGlobalLength () << endl 03478 << "numCols: " << getNumVectors () << endl 03479 << "isConstantStride: " << isConstantStride () << endl; 03480 if (isConstantStride ()) { 03481 out << "columnStride: " << getStride () << endl; 03482 } 03483 } 03484 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { 03485 if (myImageID == imageCtr) { 03486 if (vl != VERB_LOW) { 03487 // At verbosity > VERB_LOW, each process prints something. 03488 out << "Process " << myImageID << ":" << endl; 03489 Teuchos::OSTab tab2 (out); 03490 03491 // >= VERB_MEDIUM: print the local vector length. 03492 out << "local length: " << getLocalLength(); 03493 if (vl != VERB_MEDIUM) { 03494 // >= VERB_HIGH: print isConstantStride() and getStride() 03495 if (isConstantStride()) { 03496 out << "constant stride: " << getStride() << endl; 03497 } 03498 else { 03499 out << "not constant stride" << endl; 03500 } 03501 if (vl == VERB_EXTREME) { 03502 // VERB_EXTREME: print all the values in the multivector. 03503 out << "values: " << endl; 03504 ArrayRCP<ArrayRCP<const Scalar> > X = this->get2dView(); 03505 out << "["; 03506 for (size_t i = 0; i < getLocalLength(); ++i) { 03507 for (size_t j = 0; j < getNumVectors(); ++j) { 03508 out << X[j][i]; 03509 if (j + 1 < getNumVectors()) { 03510 out << ", "; 03511 } 03512 } // for each column 03513 if (i + 1 < getLocalLength ()) { 03514 out << "; "; 03515 } else { 03516 out << endl; 03517 } 03518 } // for each row 03519 out << "]" << endl; 03520 } // if vl == VERB_EXTREME 03521 } // if (vl != VERB_MEDIUM) 03522 else { // vl == VERB_LOW 03523 out << endl; 03524 } 03525 } // if vl != VERB_LOW 03526 } // if it is my process' turn to print 03527 comm->barrier(); 03528 } // for each process in the communicator 03529 } // if vl != VERB_NONE 03530 } 03531 03532 #if TPETRA_USE_KOKKOS_DISTOBJECT 03533 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03534 void 03535 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03536 createViews() const 03537 { 03538 // Do nothing in Kokkos::View implementation 03539 } 03540 03541 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03542 void 03543 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03544 createViewsNonConst (KokkosClassic::ReadWriteOption rwo) 03545 { 03546 // Do nothing in Kokkos::View implementation 03547 } 03548 03549 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03550 void 03551 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::releaseViews () const 03552 { 03553 // Do nothing in Kokkos::View implementation 03554 } 03555 03556 #else // NOT TPETRA_USE_KOKKOS_DISTOBJECT 03557 03558 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03559 void 03560 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03561 createViews() const 03562 {} 03563 03564 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03565 void 03566 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03567 createViewsNonConst (KokkosClassic::ReadWriteOption /* rwo */ ) 03568 {} 03569 03570 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03571 void 03572 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03573 releaseViews () const 03574 {} 03575 03576 #endif // TPETRA_USE_KOKKOS_DISTOBJECT 03577 03578 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03579 void 03580 MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03581 removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap) 03582 { 03583 replaceMap (newMap); 03584 } 03585 03586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03587 void 03588 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >:: 03589 assign (const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& src) 03590 { 03591 using Kokkos::parallel_for; 03592 typedef LocalOrdinal LO; 03593 typedef DeviceType DT; 03594 typedef typename DT::host_mirror_device_type HMDT; 03595 typedef typename dual_view_type::t_host host_view_type; 03596 typedef typename dual_view_type::t_dev dev_view_type; 03597 03598 TEUCHOS_TEST_FOR_EXCEPTION( 03599 this->getGlobalLength () != src.getGlobalLength () || 03600 this->getNumVectors () != src.getNumVectors (), std::invalid_argument, 03601 "Tpetra::deep_copy: Global dimensions of the two Tpetra::MultiVector " 03602 "objects do not match. src has dimensions [" << src.getGlobalLength () 03603 << "," << src.getNumVectors () << "], and *this has dimensions [" 03604 << this->getGlobalLength () << "," << this->getNumVectors () << "]."); 03605 // FIXME (mfh 28 Jul 2014) Don't throw; just set a local error flag. 03606 TEUCHOS_TEST_FOR_EXCEPTION( 03607 this->getLocalLength () != src.getLocalLength (), std::invalid_argument, 03608 "Tpetra::deep_copy: The local row counts of the two Tpetra::MultiVector " 03609 "objects do not match. src has " << src.getLocalLength () << " row(s) " 03610 << " and *this has " << this->getLocalLength () << " row(s)."); 03611 03612 if (src.isConstantStride () && this->isConstantStride ()) { 03613 Kokkos::deep_copy (this->getDualView (), src.getDualView ()); 03614 } 03615 else { 03616 if (this->isConstantStride ()) { 03617 const LO numWhichVecs = static_cast<LO> (src.whichVectors_.size ()); 03618 const std::string whichVecsLabel ("MV::deep_copy::whichVecs"); 03619 03620 // We can't sync src, since it is only an input argument. 03621 // Thus, we have to use the most recently modified version of 03622 // src, device or host. 03623 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 03624 // Copy from the device version of src. 03625 // 03626 // whichVecs tells the kernel which vectors (columns) of src 03627 // to copy. Fill whichVecs on the host, and sync to device. 03628 typedef Kokkos::DualView<LO*, DT> whichvecs_type; 03629 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 03630 whichVecs.template modify<HMDT> (); 03631 for (LO i = 0; i < numWhichVecs; ++i) { 03632 whichVecs.h_view(i) = static_cast<LO> (src.whichVectors_[i]); 03633 } 03634 // Sync the host version of whichVecs to the device. 03635 whichVecs.template sync<DT> (); 03636 03637 // Mark the device version of dst's DualView as modified. 03638 this->template modify<DT> (); 03639 // Copy from the selected vectors of src to dst, on the 03640 // device. The functor ignores its 3rd arg in this case. 03641 typedef DeepCopySelectedVectors<dev_view_type, dev_view_type, 03642 LO, DT, true, false> functor_type; 03643 functor_type f (this->getDualView ().template view<DT> (), 03644 src.getDualView ().template view<DT> (), 03645 whichVecs.d_view, whichVecs.d_view); 03646 Kokkos::parallel_for (src.getLocalLength (), f); 03647 // Sync *this' DualView to the host. This is cheaper than 03648 // repeating the above copy from src to *this on the host. 03649 this->template sync<HMDT> (); 03650 } 03651 else { // host version of src was the most recently modified 03652 // Copy from the host version of src. 03653 // 03654 // whichVecs tells the kernel which vectors (columns) of src 03655 // to copy. Fill whichVecs on the host, and use it there. 03656 typedef Kokkos::View<LO*, HMDT> whichvecs_type; 03657 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 03658 for (LO i = 0; i < numWhichVecs; ++i) { 03659 whichVecs(i) = static_cast<LO> (src.whichVectors_[i]); 03660 } 03661 // Copy from the selected vectors of src to dst, on the host. 03662 // The functor ignores its 3rd arg in this case. 03663 typedef DeepCopySelectedVectors<host_view_type, host_view_type, 03664 LO, HMDT, true, false> functor_type; 03665 functor_type f (this->getDualView ().template view<HMDT> (), 03666 src.getDualView ().template view<HMDT> (), 03667 whichVecs, whichVecs); 03668 Kokkos::parallel_for (src.getLocalLength (), f); 03669 // Sync dst back to the device, since we only copied on the host. 03670 this->template sync<DT> (); 03671 } 03672 } 03673 else { // dst is NOT constant stride 03674 if (src.isConstantStride ()) { 03675 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 03676 // Copy from the device version of src. 03677 // 03678 // whichVecs tells the kernel which vectors (columns) of dst 03679 // to copy. Fill whichVecs on the host, and sync to device. 03680 typedef Kokkos::DualView<LO*, DT> whichvecs_type; 03681 const std::string whichVecsLabel ("MV::deep_copy::whichVecs"); 03682 const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ()); 03683 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs); 03684 whichVecs.template modify<HMDT> (); 03685 for (LO i = 0; i < numWhichVecs; ++i) { 03686 whichVecs.h_view(i) = this->whichVectors_[i]; 03687 } 03688 // Sync the host version of whichVecs to the device. 03689 whichVecs.template sync<DT> (); 03690 03691 // Copy src to the selected vectors of dst, on the device. 03692 // The functor ignores its 4th arg in this case. 03693 typedef DeepCopySelectedVectors<dev_view_type, dev_view_type, 03694 LO, DT, false, true> functor_type; 03695 functor_type f (this->getDualView ().template view<DT> (), 03696 src.getDualView ().template view<DT> (), 03697 whichVecs.d_view, whichVecs.d_view); 03698 Kokkos::parallel_for (src.getLocalLength (), f); 03699 // We can't sync src and repeat the above copy on the 03700 // host, so sync dst back to the host. 03701 // 03702 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 03703 // don't actually belong to dst's view. 03704 this->template sync<HMDT> (); 03705 } 03706 else { // host version of src was the most recently modified 03707 // Copy from the host version of src. 03708 // 03709 // whichVecs tells the kernel which vectors (columns) of src 03710 // to copy. Fill whichVecs on the host, and use it there. 03711 typedef Kokkos::View<LO*, HMDT> whichvecs_type; 03712 const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ()); 03713 whichvecs_type whichVecs ("MV::deep_copy::whichVecs", numWhichVecs); 03714 for (LO i = 0; i < numWhichVecs; ++i) { 03715 whichVecs(i) = static_cast<LO> (this->whichVectors_[i]); 03716 } 03717 // Copy from src to the selected vectors of dst, on the 03718 // host. The functor ignores its 4th arg in this case. 03719 typedef DeepCopySelectedVectors<host_view_type, host_view_type, 03720 LO, HMDT, false, true> functor_type; 03721 functor_type f (this->getDualView ().template view<HMDT> (), 03722 src.getDualView ().template view<HMDT> (), 03723 whichVecs, whichVecs); 03724 Kokkos::parallel_for (src.getLocalLength (), f); 03725 // Sync dst back to the device, since we only copied on the host. 03726 // 03727 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 03728 // don't actually belong to dst's view. 03729 this->template sync<DT> (); 03730 } 03731 } 03732 else { // neither src nor dst have constant stride 03733 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 03734 // Copy from the device version of src. 03735 // 03736 // whichVectorsDst tells the kernel which vectors 03737 // (columns) of dst to copy. Fill it on the host, and 03738 // sync to device. 03739 const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ()); 03740 Kokkos::DualView<LO*, DT> whichVecsDst ("MV::deep_copy::whichVecsDst", 03741 dstNumWhichVecs); 03742 whichVecsDst.template modify<HMDT> (); 03743 for (LO i = 0; i < dstNumWhichVecs; ++i) { 03744 whichVecsDst.h_view(i) = static_cast<LO> (this->whichVectors_[i]); 03745 } 03746 // Sync the host version of whichVecsDst to the device. 03747 whichVecsDst.template sync<DT> (); 03748 03749 // whichVectorsSrc tells the kernel which vectors 03750 // (columns) of src to copy. Fill it on the host, and 03751 // sync to device. Use the destination MultiVector's 03752 // LocalOrdinal type here. 03753 const LO srcNumWhichVecs = static_cast<LO> (src.whichVectors_.size ()); 03754 Kokkos::DualView<LO*, DT> whichVecsSrc ("MV::deep_copy::whichVecsSrc", 03755 srcNumWhichVecs); 03756 whichVecsSrc.template modify<HMDT> (); 03757 for (LO i = 0; i < srcNumWhichVecs; ++i) { 03758 whichVecsSrc.h_view(i) = static_cast<LO> (src.whichVectors_[i]); 03759 } 03760 // Sync the host version of whichVecsSrc to the device. 03761 whichVecsSrc.template sync<DT> (); 03762 03763 // Copy from the selected vectors of src to the selected 03764 // vectors of dst, on the device. 03765 typedef DeepCopySelectedVectors<dev_view_type, dev_view_type, 03766 LO, DT, false, false> functor_type; 03767 functor_type f (this->getDualView ().template view<DT> (), 03768 src.getDualView ().template view<DT> (), 03769 whichVecsDst.d_view, whichVecsSrc.d_view); 03770 Kokkos::parallel_for (src.getLocalLength (), f); 03771 } 03772 else { 03773 const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ()); 03774 Kokkos::View<LO*, HMDT> whichVectorsDst ("dstWhichVecs", dstNumWhichVecs); 03775 for (LO i = 0; i < dstNumWhichVecs; ++i) { 03776 whichVectorsDst(i) = this->whichVectors_[i]; 03777 } 03778 03779 // Use the destination MultiVector's LocalOrdinal type here. 03780 const LO srcNumWhichVecs = static_cast<LO> (src.whichVectors_.size ()); 03781 Kokkos::View<LO*, HMDT> whichVectorsSrc ("srcWhichVecs", srcNumWhichVecs); 03782 for (LO i = 0; i < srcNumWhichVecs; ++i) { 03783 whichVectorsSrc(i) = src.whichVectors_[i]; 03784 } 03785 03786 typedef DeepCopySelectedVectors<host_view_type, host_view_type, 03787 LO, HMDT, false, false> functor_type; 03788 functor_type f (this->getDualView ().template view<HMDT> (), 03789 src.getDualView ().template view<HMDT> (), 03790 whichVectorsDst, whichVectorsSrc); 03791 Kokkos::parallel_for (src.getLocalLength (), f); 03792 03793 // We can't sync src and repeat the above copy on the 03794 // host, so sync dst back to the host. 03795 // 03796 // FIXME (mfh 29 Jul 2014) This may overwrite columns that 03797 // don't actually belong to dst's view. 03798 this->template sync<HMDT> (); 03799 } 03800 } 03801 } 03802 } 03803 } 03804 03805 03806 template <class Scalar, class LO, class GO, class DeviceType> 03807 Teuchos::RCP<MultiVector<Scalar, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 03808 createMultiVector (const Teuchos::RCP<const Map<LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >& map, 03809 size_t numVectors) 03810 { 03811 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> node_type; 03812 typedef MultiVector<Scalar, LO, GO, node_type> MV; 03813 return Teuchos::rcp (new MV (map, numVectors)); 03814 } 03815 03841 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType> 03842 Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > > 03843 createMultiVectorFromView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >& map, 03844 const Teuchos::ArrayRCP<Scalar>& view, 03845 const size_t LDA, 03846 const size_t numVectors) 03847 { 03848 (void) map; 03849 (void) view; 03850 (void) LDA; 03851 (void) numVectors; 03852 03853 TEUCHOS_TEST_FOR_EXCEPTION( 03854 true, std::logic_error, "Tpetra::createMultiVectorFromView: " 03855 "Not implemented for Node = KokkosDeviceWrapperNode."); 03856 } 03857 03858 template <class ST, class LO, class GO, class DeviceType> 03859 MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > 03860 createCopy (const MultiVector<ST, LO, GO, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >& src) 03861 { 03862 typedef Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> node_type; 03863 typedef MultiVector<ST, LO, GO, node_type> MV; 03864 03865 MV cpy (src.getMap (), src.getNumVectors ()); 03866 if (src.isConstantStride ()) { 03867 Kokkos::deep_copy (cpy.getDualView (), src.getDualView ()); 03868 } 03869 else { 03870 const char viewName[] = "MV::createCopy::whichVecs"; 03871 const LO numWhichVecs = static_cast<LO> (src.whichVectors_.size ()); 03872 03873 if (src.getDualView ().modified_device >= src.getDualView ().modified_host) { 03874 typedef typename MV::dual_view_type::t_dev dev_view_type; 03875 typedef DeepCopySelectedVectors<dev_view_type, dev_view_type, 03876 LO, DeviceType, true, false> functor_type; 03877 03878 Kokkos::View<LO*, DeviceType> whichVectors (viewName, numWhichVecs); 03879 // FIXME (mfh 23 Jul 2014) The following loop assumes CUDA UVM. 03880 for (LO i = 0; i < numWhichVecs; ++i) { 03881 whichVectors(i) = static_cast<LO> (src.whichVectors_[i]); 03882 } 03883 functor_type f (cpy.getDualView ().template view<DeviceType> (), 03884 src.getDualView ().template view<DeviceType> (), 03885 whichVectors, whichVectors); 03886 Kokkos::parallel_for (src.getLocalLength (), f); 03887 } 03888 else { 03889 typedef typename DeviceType::host_mirror_device_type host_dev_type; 03890 typedef typename MV::dual_view_type::t_host host_view_type; 03891 typedef DeepCopySelectedVectors<host_view_type, host_view_type, 03892 LO, host_dev_type, true, false> functor_type; 03893 Kokkos::View<LO*, host_dev_type> whichVectors (viewName, numWhichVecs); 03894 for (LO i = 0; i < numWhichVecs; ++i) { 03895 whichVectors(i) = static_cast<LO> (src.whichVectors_[i]); 03896 } 03897 functor_type f (cpy.getDualView ().template view<host_dev_type> (), 03898 src.getDualView ().template view<host_dev_type> (), 03899 whichVectors, whichVectors); 03900 Kokkos::parallel_for (src.getLocalLength (), f); 03901 } 03902 } 03903 return cpy; 03904 } 03905 03906 } // namespace Tpetra 03907 03908 03909 #endif // TPETRA_KOKKOS_REFACTOR_MULTIVECTOR_DEF_HPP
1.7.6.1