|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 /* 00002 // @HEADER 00003 // *********************************************************************** 00004 // 00005 // Tpetra: Templated Linear Algebra Services Package 00006 // Copyright (2008) Sandia Corporation 00007 // 00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00009 // the U.S. Government retains certain rights in this software. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00039 // 00040 // ************************************************************************ 00041 // @HEADER 00042 */ 00043 00044 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both 00045 // incorrect (as() is not a device function) and usually irrelevant 00046 // (it would only matter if LocalOrdinal were bigger than size_t on a 00047 // particular platform, which is unlikely). 00048 00049 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP 00050 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP 00051 00052 #include "Kokkos_Core.hpp" 00053 #include "Kokkos_ArithTraits.hpp" 00054 00055 namespace Tpetra { 00056 namespace KokkosRefactor { 00057 namespace Details { 00058 00059 // Functors for implementing packAndPrepare and unpackAndCombine 00060 // through parallel_for 00061 00062 template <typename DstView, typename SrcView, typename IdxView> 00063 struct PackArraySingleColumn { 00064 typedef typename DstView::device_type device_type; 00065 typedef typename device_type::size_type size_type; 00066 00067 DstView dst; 00068 SrcView src; 00069 IdxView idx; 00070 size_t col; 00071 00072 PackArraySingleColumn(const DstView& dst_, 00073 const SrcView& src_, 00074 const IdxView& idx_, 00075 size_t col_) : 00076 dst(dst_), src(src_), idx(idx_), col(col_) {} 00077 00078 KOKKOS_INLINE_FUNCTION 00079 void operator()( const size_type k ) const { 00080 dst(k) = src(idx(k), col); 00081 } 00082 00083 static void pack(const DstView& dst, 00084 const SrcView& src, 00085 const IdxView& idx, 00086 size_t col) { 00087 Kokkos::parallel_for( idx.size(), 00088 PackArraySingleColumn(dst,src,idx,col) ); 00089 } 00090 }; 00091 00092 // To do: Add enable_if<> restrictions on DstView::Rank == 1, 00093 // SrcView::Rank == 2 00094 template <typename DstView, typename SrcView, typename IdxView> 00095 void pack_array_single_column(const DstView& dst, 00096 const SrcView& src, 00097 const IdxView& idx, 00098 size_t col) { 00099 PackArraySingleColumn<DstView,SrcView,IdxView>::pack( 00100 dst, src, idx, col); 00101 } 00102 00103 template <typename DstView, typename SrcView, typename IdxView> 00104 struct PackArrayMultiColumn { 00105 typedef typename DstView::device_type device_type; 00106 typedef typename device_type::size_type size_type; 00107 00108 DstView dst; 00109 SrcView src; 00110 IdxView idx; 00111 size_t numCols; 00112 00113 PackArrayMultiColumn(const DstView& dst_, 00114 const SrcView& src_, 00115 const IdxView& idx_, 00116 size_t numCols_) : 00117 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {} 00118 00119 KOKKOS_INLINE_FUNCTION 00120 void operator()( const size_type k ) const { 00121 const typename IdxView::value_type localRow = idx(k); 00122 const size_t offset = k*numCols; 00123 for (size_t j = 0; j < numCols; ++j) 00124 dst(offset + j) = src(localRow, j); 00125 } 00126 00127 static void pack(const DstView& dst, 00128 const SrcView& src, 00129 const IdxView& idx, 00130 size_t numCols) { 00131 Kokkos::parallel_for( idx.size(), 00132 PackArrayMultiColumn(dst,src,idx,numCols) ); 00133 } 00134 }; 00135 00136 // To do: Add enable_if<> restrictions on DstView::Rank == 1, 00137 // SrcView::Rank == 2 00138 template <typename DstView, typename SrcView, typename IdxView> 00139 void pack_array_multi_column(const DstView& dst, 00140 const SrcView& src, 00141 const IdxView& idx, 00142 size_t numCols) { 00143 PackArrayMultiColumn<DstView,SrcView,IdxView>::pack( 00144 dst, src, idx, numCols); 00145 } 00146 00147 template <typename DstView, typename SrcView, typename IdxView, 00148 typename ColView> 00149 struct PackArrayMultiColumnVariableStride { 00150 typedef typename DstView::device_type device_type; 00151 typedef typename device_type::size_type size_type; 00152 00153 DstView dst; 00154 SrcView src; 00155 IdxView idx; 00156 ColView col; 00157 size_t numCols; 00158 00159 PackArrayMultiColumnVariableStride(const DstView& dst_, 00160 const SrcView& src_, 00161 const IdxView& idx_, 00162 const ColView& col_, 00163 size_t numCols_) : 00164 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {} 00165 00166 KOKKOS_INLINE_FUNCTION 00167 void operator()( const size_type k ) const { 00168 const typename IdxView::value_type localRow = idx(k); 00169 const size_t offset = k*numCols; 00170 for (size_t j = 0; j < numCols; ++j) 00171 dst(offset + j) = src(localRow, col(j)); 00172 } 00173 00174 static void pack(const DstView& dst, 00175 const SrcView& src, 00176 const IdxView& idx, 00177 const ColView& col, 00178 size_t numCols) { 00179 Kokkos::parallel_for( idx.size(), 00180 PackArrayMultiColumnVariableStride( 00181 dst,src,idx,col,numCols) ); 00182 } 00183 }; 00184 00185 // To do: Add enable_if<> restrictions on DstView::Rank == 1, 00186 // SrcView::Rank == 2 00187 template <typename DstView, typename SrcView, typename IdxView, 00188 typename ColView> 00189 void pack_array_multi_column_variable_stride(const DstView& dst, 00190 const SrcView& src, 00191 const IdxView& idx, 00192 const ColView& col, 00193 size_t numCols) { 00194 PackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView>::pack( 00195 dst, src, idx, col, numCols); 00196 } 00197 00198 struct InsertOp { 00199 template <typename Scalar> 00200 KOKKOS_INLINE_FUNCTION 00201 void operator() (Scalar& dest, const Scalar& src) const { 00202 Kokkos::atomic_assign(&dest, src); 00203 } 00204 }; 00205 struct AddOp { 00206 template <typename Scalar> 00207 KOKKOS_INLINE_FUNCTION 00208 void operator() (Scalar& dest, const Scalar& src) const { 00209 Kokkos::atomic_add(&dest, src); 00210 } 00211 }; 00212 struct AbsMaxOp { 00213 // ETP: Is this really what we want? This seems very odd if 00214 // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>) 00215 template <typename T> 00216 KOKKOS_INLINE_FUNCTION 00217 T max(const T& a, const T& b) const { return a > b ? a : b; } 00218 00219 template <typename Scalar> 00220 KOKKOS_INLINE_FUNCTION 00221 void operator() (Scalar& dest, const Scalar& src) const { 00222 typedef Kokkos::Details::ArithTraits<Scalar> SCT; 00223 Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src)))); 00224 } 00225 }; 00226 00227 template <typename DstView, typename SrcView, typename IdxView, typename Op> 00228 struct UnpackArrayMultiColumn { 00229 typedef typename DstView::device_type device_type; 00230 typedef typename device_type::size_type size_type; 00231 00232 DstView dst; 00233 SrcView src; 00234 IdxView idx; 00235 Op op; 00236 size_t numCols; 00237 00238 UnpackArrayMultiColumn(const DstView& dst_, 00239 const SrcView& src_, 00240 const IdxView& idx_, 00241 const Op& op_, 00242 size_t numCols_) : 00243 dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {} 00244 00245 KOKKOS_INLINE_FUNCTION 00246 void operator()( const size_type k ) const { 00247 const typename IdxView::value_type localRow = idx(k); 00248 const size_t offset = k*numCols; 00249 for (size_t j = 0; j < numCols; ++j) 00250 op( dst(localRow,j), src(offset+j) ); 00251 } 00252 00253 static void unpack(const DstView& dst, 00254 const SrcView& src, 00255 const IdxView& idx, 00256 const Op& op, 00257 size_t numCols) { 00258 Kokkos::parallel_for( idx.size(), 00259 UnpackArrayMultiColumn(dst,src,idx,op,numCols) ); 00260 } 00261 }; 00262 00263 // To do: Add enable_if<> restrictions on DstView::Rank == 2, 00264 // SrcView::Rank == 1 00265 template <typename DstView, typename SrcView, typename IdxView, typename Op> 00266 void unpack_array_multi_column(const DstView& dst, 00267 const SrcView& src, 00268 const IdxView& idx, 00269 const Op& op, 00270 size_t numCols) { 00271 UnpackArrayMultiColumn<DstView,SrcView,IdxView,Op>::unpack( 00272 dst, src, idx, op, numCols); 00273 } 00274 00275 template <typename DstView, typename SrcView, typename IdxView, 00276 typename ColView, typename Op> 00277 struct UnpackArrayMultiColumnVariableStride { 00278 typedef typename DstView::device_type device_type; 00279 typedef typename device_type::size_type size_type; 00280 00281 DstView dst; 00282 SrcView src; 00283 IdxView idx; 00284 ColView col; 00285 Op op; 00286 size_t numCols; 00287 00288 UnpackArrayMultiColumnVariableStride(const DstView& dst_, 00289 const SrcView& src_, 00290 const IdxView& idx_, 00291 const ColView& col_, 00292 const Op& op_, 00293 size_t numCols_) : 00294 dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {} 00295 00296 KOKKOS_INLINE_FUNCTION 00297 void operator()( const size_type k ) const { 00298 const typename IdxView::value_type localRow = idx(k); 00299 const size_t offset = k*numCols; 00300 for (size_t j = 0; j < numCols; ++j) 00301 op( dst(localRow,col(j)), src(offset+j) ); 00302 } 00303 00304 static void unpack(const DstView& dst, 00305 const SrcView& src, 00306 const IdxView& idx, 00307 const ColView& col, 00308 const Op& op, 00309 size_t numCols) { 00310 Kokkos::parallel_for( idx.size(), 00311 UnpackArrayMultiColumnVariableStride( 00312 dst,src,idx,col,op,numCols) ); 00313 } 00314 }; 00315 00316 // To do: Add enable_if<> restrictions on DstView::Rank == 2, 00317 // SrcView::Rank == 1 00318 template <typename DstView, typename SrcView,typename IdxView, 00319 typename ColView, typename Op> 00320 void unpack_array_multi_column_variable_stride(const DstView& dst, 00321 const SrcView& src, 00322 const IdxView& idx, 00323 const ColView& col, 00324 const Op& op, 00325 size_t numCols) { 00326 UnpackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView,Op>::unpack( 00327 dst, src, idx, col, op, numCols); 00328 } 00329 00330 template <typename DstView, typename SrcView, 00331 typename DstIdxView, typename SrcIdxView> 00332 struct PermuteArrayMultiColumn { 00333 typedef typename DstView::device_type device_type; 00334 typedef typename device_type::size_type size_type; 00335 00336 DstView dst; 00337 SrcView src; 00338 DstIdxView dst_idx; 00339 SrcIdxView src_idx; 00340 size_t numCols; 00341 00342 PermuteArrayMultiColumn(const DstView& dst_, 00343 const SrcView& src_, 00344 const DstIdxView& dst_idx_, 00345 const SrcIdxView& src_idx_, 00346 size_t numCols_) : 00347 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_), 00348 numCols(numCols_) {} 00349 00350 KOKKOS_INLINE_FUNCTION 00351 void operator()( const size_type k ) const { 00352 const typename DstIdxView::value_type toRow = dst_idx(k); 00353 const typename SrcIdxView::value_type fromRow = src_idx(k); 00354 for (size_t j = 0; j < numCols; ++j) 00355 dst(toRow, j) = src(fromRow, j); 00356 } 00357 00358 static void permute(const DstView& dst, 00359 const SrcView& src, 00360 const DstIdxView& dst_idx, 00361 const SrcIdxView& src_idx, 00362 size_t numCols) { 00363 const size_type n = std::min( dst_idx.size(), src_idx.size() ); 00364 Kokkos::parallel_for( 00365 n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) ); 00366 } 00367 }; 00368 00369 // To do: Add enable_if<> restrictions on DstView::Rank == 1, 00370 // SrcView::Rank == 2 00371 template <typename DstView, typename SrcView, 00372 typename DstIdxView, typename SrcIdxView> 00373 void permute_array_multi_column(const DstView& dst, 00374 const SrcView& src, 00375 const DstIdxView& dst_idx, 00376 const SrcIdxView& src_idx, 00377 size_t numCols) { 00378 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute( 00379 dst, src, dst_idx, src_idx, numCols); 00380 } 00381 00382 template <typename DstView, typename SrcView, 00383 typename DstIdxView, typename SrcIdxView, 00384 typename DstColView, typename SrcColView> 00385 struct PermuteArrayMultiColumnVariableStride { 00386 typedef typename DstView::device_type device_type; 00387 typedef typename device_type::size_type size_type; 00388 00389 DstView dst; 00390 SrcView src; 00391 DstIdxView dst_idx; 00392 SrcIdxView src_idx; 00393 DstColView dst_col; 00394 SrcColView src_col; 00395 size_t numCols; 00396 00397 PermuteArrayMultiColumnVariableStride(const DstView& dst_, 00398 const SrcView& src_, 00399 const DstIdxView& dst_idx_, 00400 const SrcIdxView& src_idx_, 00401 const DstColView& dst_col_, 00402 const SrcColView& src_col_, 00403 size_t numCols_) : 00404 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_), 00405 dst_col(dst_col_), src_col(src_col_), 00406 numCols(numCols_) {} 00407 00408 KOKKOS_INLINE_FUNCTION 00409 void operator()( const size_type k ) const { 00410 const typename DstIdxView::value_type toRow = dst_idx(k); 00411 const typename SrcIdxView::value_type fromRow = src_idx(k); 00412 for (size_t j = 0; j < numCols; ++j) 00413 dst(toRow, dst_col(j)) = src(fromRow, src_col(j)); 00414 } 00415 00416 static void permute(const DstView& dst, 00417 const SrcView& src, 00418 const DstIdxView& dst_idx, 00419 const SrcIdxView& src_idx, 00420 const DstColView& dst_col, 00421 const SrcColView& src_col, 00422 size_t numCols) { 00423 const size_type n = std::min( dst_idx.size(), src_idx.size() ); 00424 Kokkos::parallel_for( 00425 n, PermuteArrayMultiColumnVariableStride( 00426 dst,src,dst_idx,src_idx,dst_col,src_col,numCols) ); 00427 } 00428 }; 00429 00430 // To do: Add enable_if<> restrictions on DstView::Rank == 1, 00431 // SrcView::Rank == 2 00432 template <typename DstView, typename SrcView, 00433 typename DstIdxView, typename SrcIdxView, 00434 typename DstColView, typename SrcColView> 00435 void permute_array_multi_column_variable_stride(const DstView& dst, 00436 const SrcView& src, 00437 const DstIdxView& dst_idx, 00438 const SrcIdxView& src_idx, 00439 const DstColView& dst_col, 00440 const SrcColView& src_col, 00441 size_t numCols) { 00442 PermuteArrayMultiColumnVariableStride<DstView,SrcView, 00443 DstIdxView,SrcIdxView,DstColView,SrcColView>::permute( 00444 dst, src, dst_idx, src_idx, dst_col, src_col, numCols); 00445 } 00446 00447 } // Details namespace 00448 } // KokkosRefactor namespace 00449 } // Tpetra namespace 00450 00451 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
1.7.6.1