Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 //
00005 //          Tpetra: Templated Linear Algebra Services Package
00006 //                 Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 // @HEADER
00042 */
00043 
00044 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
00045 // incorrect (as() is not a device function) and usually irrelevant
00046 // (it would only matter if LocalOrdinal were bigger than size_t on a
00047 // particular platform, which is unlikely).
00048 
00049 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
00050 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
00051 
00052 #include "Kokkos_Core.hpp"
00053 #include "Kokkos_ArithTraits.hpp"
00054 
00055 namespace Tpetra {
00056 namespace KokkosRefactor {
00057 namespace Details {
00058 
00059   // Functors for implementing packAndPrepare and unpackAndCombine
00060   // through parallel_for
00061 
00062   template <typename DstView, typename SrcView, typename IdxView>
00063   struct PackArraySingleColumn {
00064     typedef typename DstView::device_type device_type;
00065     typedef typename device_type::size_type size_type;
00066 
00067     DstView dst;
00068     SrcView src;
00069     IdxView idx;
00070     size_t col;
00071 
00072     PackArraySingleColumn(const DstView& dst_,
00073                           const SrcView& src_,
00074                           const IdxView& idx_,
00075                           size_t col_) :
00076       dst(dst_), src(src_), idx(idx_), col(col_) {}
00077 
00078     KOKKOS_INLINE_FUNCTION
00079     void operator()( const size_type k ) const {
00080       dst(k) = src(idx(k), col);
00081     }
00082 
00083     static void pack(const DstView& dst,
00084                      const SrcView& src,
00085                      const IdxView& idx,
00086                      size_t col) {
00087       Kokkos::parallel_for( idx.size(),
00088                             PackArraySingleColumn(dst,src,idx,col) );
00089     }
00090   };
00091 
00092   // To do:  Add enable_if<> restrictions on DstView::Rank == 1,
00093   // SrcView::Rank == 2
00094   template <typename DstView, typename SrcView, typename IdxView>
00095   void pack_array_single_column(const DstView& dst,
00096                                 const SrcView& src,
00097                                 const IdxView& idx,
00098                                 size_t col) {
00099     PackArraySingleColumn<DstView,SrcView,IdxView>::pack(
00100       dst, src, idx, col);
00101   }
00102 
00103   template <typename DstView, typename SrcView, typename IdxView>
00104   struct PackArrayMultiColumn {
00105     typedef typename DstView::device_type device_type;
00106     typedef typename device_type::size_type size_type;
00107 
00108     DstView dst;
00109     SrcView src;
00110     IdxView idx;
00111     size_t numCols;
00112 
00113     PackArrayMultiColumn(const DstView& dst_,
00114                          const SrcView& src_,
00115                          const IdxView& idx_,
00116                          size_t numCols_) :
00117       dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
00118 
00119     KOKKOS_INLINE_FUNCTION
00120     void operator()( const size_type k ) const {
00121       const typename IdxView::value_type localRow = idx(k);
00122       const size_t offset = k*numCols;
00123       for (size_t j = 0; j < numCols; ++j)
00124         dst(offset + j) = src(localRow, j);
00125     }
00126 
00127     static void pack(const DstView& dst,
00128                      const SrcView& src,
00129                      const IdxView& idx,
00130                      size_t numCols) {
00131       Kokkos::parallel_for( idx.size(),
00132                             PackArrayMultiColumn(dst,src,idx,numCols) );
00133     }
00134   };
00135 
00136   // To do:  Add enable_if<> restrictions on DstView::Rank == 1,
00137   // SrcView::Rank == 2
00138   template <typename DstView, typename SrcView, typename IdxView>
00139   void pack_array_multi_column(const DstView& dst,
00140                                const SrcView& src,
00141                                const IdxView& idx,
00142                                size_t numCols) {
00143     PackArrayMultiColumn<DstView,SrcView,IdxView>::pack(
00144       dst, src, idx, numCols);
00145   }
00146 
00147   template <typename DstView, typename SrcView, typename IdxView,
00148             typename ColView>
00149   struct PackArrayMultiColumnVariableStride {
00150     typedef typename DstView::device_type device_type;
00151     typedef typename device_type::size_type size_type;
00152 
00153     DstView dst;
00154     SrcView src;
00155     IdxView idx;
00156     ColView col;
00157     size_t numCols;
00158 
00159     PackArrayMultiColumnVariableStride(const DstView& dst_,
00160                                        const SrcView& src_,
00161                                        const IdxView& idx_,
00162                                        const ColView& col_,
00163                                        size_t numCols_) :
00164       dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
00165 
00166     KOKKOS_INLINE_FUNCTION
00167     void operator()( const size_type k ) const {
00168       const typename IdxView::value_type localRow = idx(k);
00169       const size_t offset = k*numCols;
00170       for (size_t j = 0; j < numCols; ++j)
00171         dst(offset + j) = src(localRow, col(j));
00172     }
00173 
00174     static void pack(const DstView& dst,
00175                      const SrcView& src,
00176                      const IdxView& idx,
00177                      const ColView& col,
00178                      size_t numCols) {
00179       Kokkos::parallel_for( idx.size(),
00180                             PackArrayMultiColumnVariableStride(
00181                               dst,src,idx,col,numCols) );
00182     }
00183   };
00184 
00185   // To do:  Add enable_if<> restrictions on DstView::Rank == 1,
00186   // SrcView::Rank == 2
00187   template <typename DstView, typename SrcView, typename IdxView,
00188             typename ColView>
00189   void pack_array_multi_column_variable_stride(const DstView& dst,
00190                                                const SrcView& src,
00191                                                const IdxView& idx,
00192                                                const ColView& col,
00193                                                size_t numCols) {
00194     PackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView>::pack(
00195       dst, src, idx, col, numCols);
00196   }
00197 
00198   struct InsertOp {
00199     template <typename Scalar>
00200     KOKKOS_INLINE_FUNCTION
00201     void operator() (Scalar& dest, const Scalar& src) const {
00202       Kokkos::atomic_assign(&dest, src);
00203     }
00204   };
00205   struct AddOp {
00206     template <typename Scalar>
00207     KOKKOS_INLINE_FUNCTION
00208     void operator() (Scalar& dest, const Scalar& src) const {
00209       Kokkos::atomic_add(&dest, src);
00210     }
00211   };
00212   struct AbsMaxOp {
00213     // ETP:  Is this really what we want?  This seems very odd if
00214     // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
00215     template <typename T>
00216     KOKKOS_INLINE_FUNCTION
00217     T max(const T& a, const T& b) const { return a > b ? a : b; }
00218 
00219     template <typename Scalar>
00220     KOKKOS_INLINE_FUNCTION
00221     void operator() (Scalar& dest, const Scalar& src) const {
00222       typedef Kokkos::Details::ArithTraits<Scalar> SCT;
00223       Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
00224     }
00225   };
00226 
00227   template <typename DstView, typename SrcView, typename IdxView, typename Op>
00228   struct UnpackArrayMultiColumn {
00229     typedef typename DstView::device_type device_type;
00230     typedef typename device_type::size_type size_type;
00231 
00232     DstView dst;
00233     SrcView src;
00234     IdxView idx;
00235     Op op;
00236     size_t numCols;
00237 
00238     UnpackArrayMultiColumn(const DstView& dst_,
00239                            const SrcView& src_,
00240                            const IdxView& idx_,
00241                            const Op& op_,
00242                            size_t numCols_) :
00243       dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {}
00244 
00245     KOKKOS_INLINE_FUNCTION
00246     void operator()( const size_type k ) const {
00247       const typename IdxView::value_type localRow = idx(k);
00248       const size_t offset = k*numCols;
00249       for (size_t j = 0; j < numCols; ++j)
00250         op( dst(localRow,j), src(offset+j) );
00251     }
00252 
00253     static void unpack(const DstView& dst,
00254                        const SrcView& src,
00255                        const IdxView& idx,
00256                        const Op& op,
00257                        size_t numCols) {
00258       Kokkos::parallel_for( idx.size(),
00259                             UnpackArrayMultiColumn(dst,src,idx,op,numCols) );
00260     }
00261   };
00262 
00263   // To do:  Add enable_if<> restrictions on DstView::Rank == 2,
00264   // SrcView::Rank == 1
00265   template <typename DstView, typename SrcView, typename IdxView, typename Op>
00266   void unpack_array_multi_column(const DstView& dst,
00267                                  const SrcView& src,
00268                                  const IdxView& idx,
00269                                  const Op& op,
00270                                  size_t numCols) {
00271     UnpackArrayMultiColumn<DstView,SrcView,IdxView,Op>::unpack(
00272       dst, src, idx, op, numCols);
00273   }
00274 
00275   template <typename DstView, typename SrcView, typename IdxView,
00276             typename ColView, typename Op>
00277   struct UnpackArrayMultiColumnVariableStride {
00278     typedef typename DstView::device_type device_type;
00279     typedef typename device_type::size_type size_type;
00280 
00281     DstView dst;
00282     SrcView src;
00283     IdxView idx;
00284     ColView col;
00285     Op op;
00286     size_t numCols;
00287 
00288     UnpackArrayMultiColumnVariableStride(const DstView& dst_,
00289                                          const SrcView& src_,
00290                                          const IdxView& idx_,
00291                                          const ColView& col_,
00292                                          const Op& op_,
00293                                          size_t numCols_) :
00294       dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
00295 
00296     KOKKOS_INLINE_FUNCTION
00297     void operator()( const size_type k ) const {
00298       const typename IdxView::value_type localRow = idx(k);
00299       const size_t offset = k*numCols;
00300       for (size_t j = 0; j < numCols; ++j)
00301         op( dst(localRow,col(j)), src(offset+j) );
00302     }
00303 
00304     static void unpack(const DstView& dst,
00305                        const SrcView& src,
00306                        const IdxView& idx,
00307                        const ColView& col,
00308                        const Op& op,
00309                        size_t numCols) {
00310       Kokkos::parallel_for( idx.size(),
00311                             UnpackArrayMultiColumnVariableStride(
00312                               dst,src,idx,col,op,numCols) );
00313     }
00314   };
00315 
00316   // To do:  Add enable_if<> restrictions on DstView::Rank == 2,
00317   // SrcView::Rank == 1
00318   template <typename DstView, typename SrcView,typename IdxView,
00319             typename ColView, typename Op>
00320   void unpack_array_multi_column_variable_stride(const DstView& dst,
00321                                                  const SrcView& src,
00322                                                  const IdxView& idx,
00323                                                  const ColView& col,
00324                                                  const Op& op,
00325                                                  size_t numCols) {
00326     UnpackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView,Op>::unpack(
00327       dst, src, idx, col, op, numCols);
00328   }
00329 
00330   template <typename DstView, typename SrcView,
00331             typename DstIdxView, typename SrcIdxView>
00332   struct PermuteArrayMultiColumn {
00333     typedef typename DstView::device_type device_type;
00334     typedef typename device_type::size_type size_type;
00335 
00336     DstView dst;
00337     SrcView src;
00338     DstIdxView dst_idx;
00339     SrcIdxView src_idx;
00340     size_t numCols;
00341 
00342     PermuteArrayMultiColumn(const DstView& dst_,
00343                             const SrcView& src_,
00344                             const DstIdxView& dst_idx_,
00345                             const SrcIdxView& src_idx_,
00346                             size_t numCols_) :
00347       dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
00348       numCols(numCols_) {}
00349 
00350     KOKKOS_INLINE_FUNCTION
00351     void operator()( const size_type k ) const {
00352       const typename DstIdxView::value_type toRow = dst_idx(k);
00353       const typename SrcIdxView::value_type fromRow = src_idx(k);
00354       for (size_t j = 0; j < numCols; ++j)
00355         dst(toRow, j) = src(fromRow, j);
00356     }
00357 
00358     static void permute(const DstView& dst,
00359                         const SrcView& src,
00360                         const DstIdxView& dst_idx,
00361                         const SrcIdxView& src_idx,
00362                         size_t numCols) {
00363       const size_type n = std::min( dst_idx.size(), src_idx.size() );
00364       Kokkos::parallel_for(
00365         n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
00366     }
00367   };
00368 
00369   // To do:  Add enable_if<> restrictions on DstView::Rank == 1,
00370   // SrcView::Rank == 2
00371   template <typename DstView, typename SrcView,
00372             typename DstIdxView, typename SrcIdxView>
00373   void permute_array_multi_column(const DstView& dst,
00374                                   const SrcView& src,
00375                                   const DstIdxView& dst_idx,
00376                                   const SrcIdxView& src_idx,
00377                                   size_t numCols) {
00378     PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
00379       dst, src, dst_idx, src_idx, numCols);
00380   }
00381 
00382   template <typename DstView, typename SrcView,
00383             typename DstIdxView, typename SrcIdxView,
00384             typename DstColView, typename SrcColView>
00385   struct PermuteArrayMultiColumnVariableStride {
00386     typedef typename DstView::device_type device_type;
00387     typedef typename device_type::size_type size_type;
00388 
00389     DstView dst;
00390     SrcView src;
00391     DstIdxView dst_idx;
00392     SrcIdxView src_idx;
00393     DstColView dst_col;
00394     SrcColView src_col;
00395     size_t numCols;
00396 
00397     PermuteArrayMultiColumnVariableStride(const DstView& dst_,
00398                                           const SrcView& src_,
00399                                           const DstIdxView& dst_idx_,
00400                                           const SrcIdxView& src_idx_,
00401                                           const DstColView& dst_col_,
00402                                           const SrcColView& src_col_,
00403                                           size_t numCols_) :
00404       dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
00405       dst_col(dst_col_), src_col(src_col_),
00406       numCols(numCols_) {}
00407 
00408     KOKKOS_INLINE_FUNCTION
00409     void operator()( const size_type k ) const {
00410       const typename DstIdxView::value_type toRow = dst_idx(k);
00411       const typename SrcIdxView::value_type fromRow = src_idx(k);
00412       for (size_t j = 0; j < numCols; ++j)
00413         dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
00414     }
00415 
00416     static void permute(const DstView& dst,
00417                         const SrcView& src,
00418                         const DstIdxView& dst_idx,
00419                         const SrcIdxView& src_idx,
00420                         const DstColView& dst_col,
00421                         const SrcColView& src_col,
00422                         size_t numCols) {
00423       const size_type n = std::min( dst_idx.size(), src_idx.size() );
00424       Kokkos::parallel_for(
00425         n, PermuteArrayMultiColumnVariableStride(
00426           dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
00427     }
00428   };
00429 
00430   // To do:  Add enable_if<> restrictions on DstView::Rank == 1,
00431   // SrcView::Rank == 2
00432   template <typename DstView, typename SrcView,
00433             typename DstIdxView, typename SrcIdxView,
00434             typename DstColView, typename SrcColView>
00435   void permute_array_multi_column_variable_stride(const DstView& dst,
00436                                                   const SrcView& src,
00437                                                   const DstIdxView& dst_idx,
00438                                                   const SrcIdxView& src_idx,
00439                                                   const DstColView& dst_col,
00440                                                   const SrcColView& src_col,
00441                                                   size_t numCols) {
00442     PermuteArrayMultiColumnVariableStride<DstView,SrcView,
00443       DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
00444       dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
00445   }
00446 
00447 } // Details namespace
00448 } // KokkosRefactor namespace
00449 } // Tpetra namespace
00450 
00451 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines