|
Kokkos Core Kernels Package
Version of the Day
|
00001 #ifndef KOKKOS_MULTIVECTOR_H_ 00002 #define KOKKOS_MULTIVECTOR_H_ 00003 00004 #include <Kokkos_Core.hpp> 00005 #include <Kokkos_InnerProductSpaceTraits.hpp> 00006 #include <ctime> 00007 00008 #include <iostream> 00009 #include <stdexcept> // For some recently added error handling 00010 00011 #define MAX(a,b) (a<b?b:a) 00012 00013 namespace Kokkos { 00014 00015 00016 template<typename Scalar, class device> 00017 struct MultiVectorDynamic{ 00018 #ifdef KOKKOS_USE_CUSPARSE 00019 typedef typename Kokkos::LayoutLeft layout; 00020 #else 00021 #ifdef KOKKOS_USE_MKL 00022 typedef typename Kokkos::LayoutRight layout; 00023 #else 00024 typedef typename device::array_layout layout; 00025 #endif 00026 #endif 00027 typedef typename Kokkos::View<Scalar** , layout, device> type ; 00028 typedef typename Kokkos::View<const Scalar** , layout, device> const_type ; 00029 typedef typename Kokkos::View<const Scalar** , layout, device, Kokkos::MemoryRandomAccess> random_read_type ; 00030 MultiVectorDynamic() {} 00031 ~MultiVectorDynamic() {} 00032 }; 00033 00034 template<typename Scalar, class device, int n> 00035 struct MultiVectorStatic{ 00036 typedef Scalar scalar; 00037 typedef typename device::array_layout layout; 00038 typedef typename Kokkos::View<Scalar*[n] , layout, device> type ; 00039 typedef typename Kokkos::View<const Scalar*[n] , layout, device> const_type ; 00040 typedef typename Kokkos::View<const Scalar*[n] , layout, device, Kokkos::MemoryRandomAccess> random_read_type ; 00041 MultiVectorStatic() {} 00042 ~MultiVectorStatic() {} 00043 }; 00044 00045 00046 00047 /*------------------------------------------------------------------------------------------ 00048 *-------------------------- Multiply with scalar: y = a * x ------------------------------- 00049 *------------------------------------------------------------------------------------------*/ 00050 template<class RVector, class aVector, class XVector> 00051 struct MV_MulScalarFunctor 00052 { 00053 typedef typename XVector::device_type device_type; 00054 typedef typename XVector::size_type size_type; 00055 00056 RVector m_r; 00057 typename XVector::const_type m_x ; 00058 typename aVector::const_type m_a ; 00059 size_type n; 00060 MV_MulScalarFunctor() {n=1;} 00061 //-------------------------------------------------------------------------- 00062 00063 KOKKOS_INLINE_FUNCTION 00064 void operator()( const size_type i) const 00065 { 00066 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00067 #pragma ivdep 00068 #endif 00069 for(size_type k=0;k<n;k++) 00070 m_r(i,k) = m_a[k]*m_x(i,k); 00071 } 00072 }; 00073 00074 template<class aVector, class XVector> 00075 struct MV_MulScalarFunctorSelf 00076 { 00077 typedef typename XVector::device_type device_type; 00078 typedef typename XVector::size_type size_type; 00079 00080 XVector m_x; 00081 typename aVector::const_type m_a ; 00082 size_type n; 00083 //-------------------------------------------------------------------------- 00084 00085 KOKKOS_INLINE_FUNCTION 00086 void operator()( const size_type i) const 00087 { 00088 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00089 #pragma ivdep 00090 #endif 00091 for(size_type k=0;k<n;k++) 00092 m_x(i,k) *= m_a[k]; 00093 } 00094 }; 00095 00096 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector> 00097 RVector MV_MulScalar (const RVector& r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation>& a, const XVector& x) 00098 { 00099 typedef typename Kokkos::View<DataType,Layout,Device,MemoryManagement> aVector; 00100 if (r == x) { 00101 MV_MulScalarFunctorSelf<aVector,XVector> op ; 00102 op.m_x = x ; 00103 op.m_a = a ; 00104 op.n = x.dimension(1); 00105 Kokkos::parallel_for (x.dimension (0), op); 00106 return r; 00107 } 00108 00109 MV_MulScalarFunctor<RVector,aVector,XVector> op ; 00110 op.m_r = r ; 00111 op.m_x = x ; 00112 op.m_a = a ; 00113 op.n = x.dimension(1); 00114 Kokkos::parallel_for( x.dimension(0) , op ); 00115 return r; 00116 } 00117 00118 template<class RVector, class XVector> 00119 struct MV_MulScalarFunctor<RVector,typename RVector::value_type,XVector> 00120 { 00121 typedef typename XVector::device_type device_type; 00122 typedef typename XVector::size_type size_type; 00123 00124 RVector m_r; 00125 typename XVector::const_type m_x ; 00126 typename RVector::value_type m_a ; 00127 size_type n; 00128 MV_MulScalarFunctor() {n=1;} 00129 //-------------------------------------------------------------------------- 00130 00131 KOKKOS_INLINE_FUNCTION 00132 void operator()( const size_type i) const 00133 { 00134 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00135 #pragma ivdep 00136 #endif 00137 for(size_type k=0;k<n;k++) 00138 m_r(i,k) = m_a*m_x(i,k); 00139 } 00140 }; 00141 00142 template<class XVector> 00143 struct MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> 00144 { 00145 typedef typename XVector::device_type device_type; 00146 typedef typename XVector::size_type size_type; 00147 00148 XVector m_x; 00149 typename XVector::non_const_value_type m_a ; 00150 size_type n; 00151 //-------------------------------------------------------------------------- 00152 00153 KOKKOS_INLINE_FUNCTION 00154 void operator()( const size_type i) const 00155 { 00156 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00157 #pragma ivdep 00158 #endif 00159 for(size_type k=0;k<n;k++) 00160 m_x(i,k) *= m_a; 00161 } 00162 }; 00163 00164 template<class RVector, class XVector> 00165 RVector MV_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x) 00166 { 00167 /*if(r.dimension_1()==1) { 00168 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 00169 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 00170 00171 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 00172 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 00173 return V_MulScalar(r_1d,a,x_1d); 00174 }*/ 00175 if (r == x) { 00176 MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> op ; 00177 op.m_x = x ; 00178 op.m_a = a ; 00179 op.n = x.dimension(1); 00180 Kokkos::parallel_for (x.dimension (0), op); 00181 return r; 00182 } 00183 00184 MV_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ; 00185 op.m_r = r ; 00186 op.m_x = x ; 00187 op.m_a = a ; 00188 op.n = x.dimension(1); 00189 Kokkos::parallel_for( x.dimension(0) , op ); 00190 return r; 00191 } 00192 00193 /*------------------------------------------------------------------------------------------ 00194 *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------ 00195 *------------------------------------------------------------------------------------------*/ 00196 template<class RVector, class XVector> 00197 struct MV_ReciprocalFunctor 00198 { 00199 typedef typename XVector::device_type device_type; 00200 typedef typename XVector::size_type size_type; 00201 00202 RVector m_r; 00203 typename XVector::const_type m_x ; 00204 00205 const size_type m_n; 00206 MV_ReciprocalFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {} 00207 //-------------------------------------------------------------------------- 00208 00209 KOKKOS_INLINE_FUNCTION 00210 void operator()( const size_type i) const 00211 { 00212 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00213 #pragma ivdep 00214 #endif 00215 for(size_type k=0;k<m_n;k++) 00216 m_r(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i,k); 00217 } 00218 }; 00219 00220 template<class XVector> 00221 struct MV_ReciprocalSelfFunctor 00222 { 00223 typedef typename XVector::device_type device_type; 00224 typedef typename XVector::size_type size_type; 00225 00226 XVector m_x ; 00227 00228 const size_type m_n; 00229 MV_ReciprocalSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {} 00230 //-------------------------------------------------------------------------- 00231 00232 KOKKOS_INLINE_FUNCTION 00233 void operator()( const size_type i) const 00234 { 00235 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00236 #pragma ivdep 00237 #endif 00238 for(size_type k=0;k<m_n;k++) 00239 m_x(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i,k); 00240 } 00241 }; 00242 00243 template<class RVector, class XVector> 00244 RVector MV_Reciprocal( const RVector & r, const XVector & x) 00245 { 00246 // TODO: Add error check (didn't link for some reason?) 00247 /*if(r.dimension_0() != x.dimension_0()) 00248 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match"); 00249 if(r.dimension_1() != x.dimension_1()) 00250 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(1) of r and x don't match");*/ 00251 00252 //TODO: Get 1D version done 00253 /*if(r.dimension_1()==1) { 00254 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 00255 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 00256 00257 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 00258 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 00259 return V_MulScalar(r_1d,a,x_1d); 00260 }*/ 00261 if(r==x) { 00262 MV_ReciprocalSelfFunctor<XVector> op(x,x.dimension_1()) ; 00263 Kokkos::parallel_for( x.dimension_0() , op ); 00264 return r; 00265 } 00266 00267 MV_ReciprocalFunctor<RVector,XVector> op(r,x,x.dimension_1()) ; 00268 Kokkos::parallel_for( x.dimension_0() , op ); 00269 return r; 00270 } 00271 00272 /*------------------------------------------------------------------------------------------ 00273 *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ---------------- 00274 *------------------------------------------------------------------------------------------*/ 00275 template<class XVector> 00276 struct MV_ReciprocalThresholdSelfFunctor 00277 { 00278 typedef typename XVector::device_type device_type; 00279 typedef typename XVector::size_type size_type; 00280 typedef typename XVector::non_const_value_type value_type; 00281 typedef Kokkos::Details::ArithTraits<value_type> KAT; 00282 typedef typename KAT::mag_type mag_type; 00283 00284 const XVector m_x; 00285 const value_type m_min_val; 00286 const value_type m_min_val_mag; 00287 const size_type m_n; 00288 00289 MV_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val, const size_type n) : 00290 m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)), m_n(n) {} 00291 //-------------------------------------------------------------------------- 00292 00293 KOKKOS_INLINE_FUNCTION 00294 void operator()( const size_type i) const 00295 { 00296 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00297 #pragma ivdep 00298 #endif 00299 for(size_type k=0;k<m_n;k++) { 00300 if (KAT::abs(m_x(i,k)) < m_min_val_mag) 00301 m_x(i,k) = m_min_val; 00302 else 00303 m_x(i,k) = KAT::one() / m_x(i,k); 00304 } 00305 } 00306 }; 00307 00308 template<class XVector> 00309 XVector MV_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val ) 00310 { 00311 MV_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val,x.dimension_1()) ; 00312 Kokkos::parallel_for( x.dimension_0() , op ); 00313 return x; 00314 } 00315 00316 /*------------------------------------------------------------------------------------------ 00317 *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------ 00318 *------------------------------------------------------------------------------------------*/ 00319 template<class RVector, class XVector> 00320 struct MV_AbsFunctor 00321 { 00322 typedef typename XVector::device_type device_type; 00323 typedef typename XVector::size_type size_type; 00324 00325 RVector m_r; 00326 typename XVector::const_type m_x ; 00327 00328 const size_type m_n; 00329 MV_AbsFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {} 00330 //-------------------------------------------------------------------------- 00331 00332 KOKKOS_INLINE_FUNCTION 00333 void operator()( const size_type i) const 00334 { 00335 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00336 #pragma ivdep 00337 #endif 00338 for(size_type k=0;k<m_n;k++) 00339 m_r(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k)); 00340 } 00341 }; 00342 00343 template<class XVector> 00344 struct MV_AbsSelfFunctor 00345 { 00346 typedef typename XVector::device_type device_type; 00347 typedef typename XVector::size_type size_type; 00348 00349 XVector m_x ; 00350 00351 const size_type m_n; 00352 MV_AbsSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {} 00353 //-------------------------------------------------------------------------- 00354 00355 KOKKOS_INLINE_FUNCTION 00356 void operator()( const size_type i) const 00357 { 00358 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00359 #pragma ivdep 00360 #endif 00361 for(size_type k=0;k<m_n;k++) 00362 m_x(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k)); 00363 } 00364 }; 00365 00366 template<class RVector, class XVector> 00367 RVector MV_Abs( const RVector & r, const XVector & x) 00368 { 00369 // TODO: Add error check (didn't link for some reason?) 00370 /*if(r.dimension_0() != x.dimension_0()) 00371 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match"); 00372 if(r.dimension_1() != x.dimension_1()) 00373 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(1) of r and x don't match");*/ 00374 00375 //TODO: Get 1D version done 00376 /*if(r.dimension_1()==1) { 00377 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 00378 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 00379 00380 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 00381 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 00382 return V_Abs(r_1d,x_1d); 00383 }*/ 00384 if(r==x) { 00385 MV_AbsSelfFunctor<XVector> op(x,x.dimension_1()) ; 00386 Kokkos::parallel_for( x.dimension_0() , op ); 00387 return r; 00388 } 00389 00390 MV_AbsFunctor<RVector,XVector> op(r,x,x.dimension_1()) ; 00391 Kokkos::parallel_for( x.dimension_0() , op ); 00392 return r; 00393 } 00394 00395 /*------------------------------------------------------------------------------------------ 00396 *------ ElementWiseMultiply element wise: C(i,j) = c*C(i,j) + ab*A(i)*B(i,j) -------------- 00397 *------------------------------------------------------------------------------------------*/ 00398 template<class CVector, class AVector, class BVector> 00399 struct MV_ElementWiseMultiplyFunctor 00400 { 00401 typedef typename CVector::device_type device_type; 00402 typedef typename CVector::size_type size_type; 00403 00404 typename CVector::const_value_type m_c; 00405 CVector m_C; 00406 typename AVector::const_value_type m_ab; 00407 typename AVector::const_type m_A ; 00408 typename BVector::const_type m_B ; 00409 00410 const size_type m_n; 00411 MV_ElementWiseMultiplyFunctor( 00412 typename CVector::const_value_type c, 00413 CVector C, 00414 typename AVector::const_value_type ab, 00415 typename AVector::const_type A, 00416 typename BVector::const_type B, 00417 const size_type n): 00418 m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B),m_n(n) 00419 {} 00420 //-------------------------------------------------------------------------- 00421 00422 KOKKOS_INLINE_FUNCTION 00423 void operator()( const size_type i) const 00424 { 00425 typename AVector::const_value_type Ai = m_A(i); 00426 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00427 #pragma ivdep 00428 #endif 00429 for(size_type k=0;k<m_n;k++) 00430 m_C(i,k) = m_c*m_C(i,k) + m_ab*Ai*m_B(i,k); 00431 } 00432 }; 00433 00434 00435 template<class CVector, class AVector, class BVector> 00436 CVector MV_ElementWiseMultiply( 00437 typename CVector::const_value_type c, 00438 CVector C, 00439 typename AVector::const_value_type ab, 00440 AVector A, 00441 BVector B 00442 ) 00443 { 00444 // TODO: Add error check (didn't link for some reason?) 00445 /*if(r.dimension_0() != x.dimension_0()) 00446 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match"); 00447 if(r.dimension_1() != x.dimension_1()) 00448 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/ 00449 00450 //TODO: Get 1D version done 00451 /*if(r.dimension_1()==1) { 00452 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 00453 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 00454 00455 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 00456 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 00457 return V_ElementWiseMultiply(r_1d,x_1d); 00458 }*/ 00459 00460 MV_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B,C.dimension_1()) ; 00461 Kokkos::parallel_for( C.dimension_0() , op ); 00462 return C; 00463 } 00464 00465 /*------------------------------------------------------------------------------------------ 00466 *-------------------------- Vector Add: r = a*x + b*y ------------------------------------- 00467 *------------------------------------------------------------------------------------------*/ 00468 00469 /* Variants of Functors with a and b being vectors. */ 00470 00471 //Unroll for n<=16 00472 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y,int UNROLL> 00473 struct MV_AddUnrollFunctor 00474 { 00475 typedef typename RVector::device_type device_type; 00476 typedef typename RVector::size_type size_type; 00477 00478 RVector m_r ; 00479 XVector m_x ; 00480 YVector m_y ; 00481 aVector m_a; 00482 bVector m_b; 00483 size_type n; 00484 size_type start; 00485 00486 MV_AddUnrollFunctor() {n=UNROLL;} 00487 //-------------------------------------------------------------------------- 00488 00489 KOKKOS_INLINE_FUNCTION 00490 void operator()( const size_type i ) const 00491 { 00492 if((scalar_x==1)&&(scalar_y==1)){ 00493 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00494 #pragma unroll 00495 #endif 00496 for(size_type k=0;k<UNROLL;k++) 00497 m_r(i,k) = m_x(i,k) + m_y(i,k); 00498 } 00499 if((scalar_x==1)&&(scalar_y==-1)){ 00500 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00501 #pragma unroll 00502 #endif 00503 for(size_type k=0;k<UNROLL;k++) 00504 m_r(i,k) = m_x(i,k) - m_y(i,k); 00505 } 00506 if((scalar_x==-1)&&(scalar_y==-1)){ 00507 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00508 #pragma unroll 00509 #endif 00510 for(size_type k=0;k<UNROLL;k++) 00511 m_r(i,k) = -m_x(i,k) - m_y(i,k); 00512 } 00513 if((scalar_x==-1)&&(scalar_y==1)){ 00514 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00515 #pragma unroll 00516 #endif 00517 for(size_type k=0;k<UNROLL;k++) 00518 m_r(i,k) = -m_x(i,k) + m_y(i,k); 00519 } 00520 if((scalar_x==2)&&(scalar_y==1)){ 00521 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00522 #pragma unroll 00523 #endif 00524 for(size_type k=0;k<UNROLL;k++) 00525 m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k); 00526 } 00527 if((scalar_x==2)&&(scalar_y==-1)){ 00528 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00529 #pragma unroll 00530 #endif 00531 for(size_type k=0;k<UNROLL;k++) 00532 m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k); 00533 } 00534 if((scalar_x==1)&&(scalar_y==2)){ 00535 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00536 #pragma unroll 00537 #endif 00538 for(size_type k=0;k<UNROLL;k++) 00539 m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k); 00540 } 00541 if((scalar_x==-1)&&(scalar_y==2)){ 00542 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00543 #pragma unroll 00544 #endif 00545 for(size_type k=0;k<UNROLL;k++) 00546 m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k); 00547 } 00548 if((scalar_x==2)&&(scalar_y==2)){ 00549 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00550 #pragma unroll 00551 #endif 00552 for(size_type k=0;k<UNROLL;k++) 00553 m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k); 00554 } 00555 } 00556 }; 00557 00558 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y> 00559 struct MV_AddVectorFunctor 00560 { 00561 typedef typename RVector::device_type device_type; 00562 typedef typename RVector::size_type size_type; 00563 00564 RVector m_r ; 00565 XVector m_x ; 00566 YVector m_y ; 00567 aVector m_a; 00568 bVector m_b; 00569 size_type n; 00570 00571 MV_AddVectorFunctor() {n=1;} 00572 //-------------------------------------------------------------------------- 00573 00574 KOKKOS_INLINE_FUNCTION 00575 void operator()( const size_type i ) const 00576 { 00577 if((scalar_x==1)&&(scalar_y==1)) 00578 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00579 #pragma ivdep 00580 #endif 00581 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00582 #pragma vector always 00583 #endif 00584 for(size_type k=0;k<n;k++) 00585 m_r(i,k) = m_x(i,k) + m_y(i,k); 00586 if((scalar_x==1)&&(scalar_y==-1)) 00587 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00588 #pragma ivdep 00589 #endif 00590 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00591 #pragma vector always 00592 #endif 00593 for(size_type k=0;k<n;k++) 00594 m_r(i,k) = m_x(i,k) - m_y(i,k); 00595 if((scalar_x==-1)&&(scalar_y==-1)) 00596 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00597 #pragma ivdep 00598 #endif 00599 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00600 #pragma vector always 00601 #endif 00602 for(size_type k=0;k<n;k++) 00603 m_r(i,k) = -m_x(i,k) - m_y(i,k); 00604 if((scalar_x==-1)&&(scalar_y==1)) 00605 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00606 #pragma ivdep 00607 #endif 00608 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00609 #pragma vector always 00610 #endif 00611 for(size_type k=0;k<n;k++) 00612 m_r(i,k) = -m_x(i,k) + m_y(i,k); 00613 if((scalar_x==2)&&(scalar_y==1)) 00614 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00615 #pragma ivdep 00616 #endif 00617 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00618 #pragma vector always 00619 #endif 00620 for(size_type k=0;k<n;k++) 00621 m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k); 00622 if((scalar_x==2)&&(scalar_y==-1)) 00623 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00624 #pragma ivdep 00625 #endif 00626 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00627 #pragma vector always 00628 #endif 00629 for(size_type k=0;k<n;k++) 00630 m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k); 00631 if((scalar_x==1)&&(scalar_y==2)) 00632 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00633 #pragma ivdep 00634 #endif 00635 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00636 #pragma vector always 00637 #endif 00638 for(size_type k=0;k<n;k++) 00639 m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k); 00640 if((scalar_x==-1)&&(scalar_y==2)) 00641 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00642 #pragma ivdep 00643 #endif 00644 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00645 #pragma vector always 00646 #endif 00647 for(size_type k=0;k<n;k++) 00648 m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k); 00649 if((scalar_x==2)&&(scalar_y==2)) 00650 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00651 #pragma ivdep 00652 #endif 00653 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00654 #pragma vector always 00655 #endif 00656 for(size_type k=0;k<n;k++) 00657 m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k); 00658 00659 } 00660 }; 00661 00662 /* Variants of Functors with a and b being scalars. */ 00663 00664 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y,int UNROLL> 00665 struct MV_AddUnrollFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y,UNROLL> 00666 { 00667 typedef typename RVector::device_type device_type; 00668 typedef typename RVector::size_type size_type; 00669 00670 RVector m_r ; 00671 XVector m_x ; 00672 YVector m_y ; 00673 typename XVector::non_const_value_type m_a; 00674 typename YVector::non_const_value_type m_b; 00675 size_type n; 00676 size_type start; 00677 00678 MV_AddUnrollFunctor() {n=UNROLL;} 00679 //-------------------------------------------------------------------------- 00680 00681 KOKKOS_INLINE_FUNCTION 00682 void operator()( const size_type i ) const 00683 { 00684 if((scalar_x==1)&&(scalar_y==1)){ 00685 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00686 #pragma unroll 00687 #endif 00688 for(size_type k=0;k<UNROLL;k++) 00689 m_r(i,k) = m_x(i,k) + m_y(i,k); 00690 } 00691 if((scalar_x==1)&&(scalar_y==-1)){ 00692 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00693 #pragma unroll 00694 #endif 00695 for(size_type k=0;k<UNROLL;k++) 00696 m_r(i,k) = m_x(i,k) - m_y(i,k); 00697 } 00698 if((scalar_x==-1)&&(scalar_y==-1)){ 00699 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00700 #pragma unroll 00701 #endif 00702 for(size_type k=0;k<UNROLL;k++) 00703 m_r(i,k) = -m_x(i,k) - m_y(i,k); 00704 } 00705 if((scalar_x==-1)&&(scalar_y==1)){ 00706 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00707 #pragma unroll 00708 #endif 00709 for(size_type k=0;k<UNROLL;k++) 00710 m_r(i,k) = -m_x(i,k) + m_y(i,k); 00711 } 00712 if((scalar_x==2)&&(scalar_y==1)){ 00713 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00714 #pragma unroll 00715 #endif 00716 for(size_type k=0;k<UNROLL;k++) 00717 m_r(i,k) = m_a*m_x(i,k) + m_y(i,k); 00718 } 00719 if((scalar_x==2)&&(scalar_y==-1)){ 00720 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00721 #pragma unroll 00722 #endif 00723 for(size_type k=0;k<UNROLL;k++) 00724 m_r(i,k) = m_a*m_x(i,k) - m_y(i,k); 00725 } 00726 if((scalar_x==1)&&(scalar_y==2)){ 00727 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00728 #pragma unroll 00729 #endif 00730 for(size_type k=0;k<UNROLL;k++) 00731 m_r(i,k) = m_x(i,k) + m_b*m_y(i,k); 00732 } 00733 if((scalar_x==-1)&&(scalar_y==2)){ 00734 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00735 #pragma unroll 00736 #endif 00737 for(size_type k=0;k<UNROLL;k++) 00738 m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k); 00739 } 00740 if((scalar_x==2)&&(scalar_y==2)){ 00741 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 00742 #pragma unroll 00743 #endif 00744 for(size_type k=0;k<UNROLL;k++) 00745 m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k); 00746 } 00747 } 00748 }; 00749 00750 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y> 00751 struct MV_AddVectorFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y> 00752 { 00753 typedef typename RVector::device_type device_type; 00754 typedef typename RVector::size_type size_type; 00755 00756 RVector m_r ; 00757 XVector m_x ; 00758 YVector m_y ; 00759 typename XVector::non_const_value_type m_a; 00760 typename YVector::non_const_value_type m_b; 00761 size_type n; 00762 00763 MV_AddVectorFunctor() {n=1;} 00764 //-------------------------------------------------------------------------- 00765 00766 KOKKOS_INLINE_FUNCTION 00767 void operator()( const size_type i ) const 00768 { 00769 if((scalar_x==1)&&(scalar_y==1)) 00770 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00771 #pragma ivdep 00772 #endif 00773 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00774 #pragma vector always 00775 #endif 00776 for(size_type k=0;k<n;k++) 00777 m_r(i,k) = m_x(i,k) + m_y(i,k); 00778 if((scalar_x==1)&&(scalar_y==-1)) 00779 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00780 #pragma ivdep 00781 #endif 00782 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00783 #pragma vector always 00784 #endif 00785 for(size_type k=0;k<n;k++) 00786 m_r(i,k) = m_x(i,k) - m_y(i,k); 00787 if((scalar_x==-1)&&(scalar_y==-1)) 00788 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00789 #pragma ivdep 00790 #endif 00791 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00792 #pragma vector always 00793 #endif 00794 for(size_type k=0;k<n;k++) 00795 m_r(i,k) = -m_x(i,k) - m_y(i,k); 00796 if((scalar_x==-1)&&(scalar_y==1)) 00797 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00798 #pragma ivdep 00799 #endif 00800 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00801 #pragma vector always 00802 #endif 00803 for(size_type k=0;k<n;k++) 00804 m_r(i,k) = -m_x(i,k) + m_y(i,k); 00805 if((scalar_x==2)&&(scalar_y==1)) 00806 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00807 #pragma ivdep 00808 #endif 00809 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00810 #pragma vector always 00811 #endif 00812 for(size_type k=0;k<n;k++) 00813 m_r(i,k) = m_a*m_x(i,k) + m_y(i,k); 00814 if((scalar_x==2)&&(scalar_y==-1)) 00815 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00816 #pragma ivdep 00817 #endif 00818 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00819 #pragma vector always 00820 #endif 00821 for(size_type k=0;k<n;k++) 00822 m_r(i,k) = m_a*m_x(i,k) - m_y(i,k); 00823 if((scalar_x==1)&&(scalar_y==2)) 00824 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00825 #pragma ivdep 00826 #endif 00827 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00828 #pragma vector always 00829 #endif 00830 for(size_type k=0;k<n;k++) 00831 m_r(i,k) = m_x(i,k) + m_b*m_y(i,k); 00832 if((scalar_x==-1)&&(scalar_y==2)) 00833 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00834 #pragma ivdep 00835 #endif 00836 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00837 #pragma vector always 00838 #endif 00839 for(size_type k=0;k<n;k++) 00840 m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k); 00841 if((scalar_x==2)&&(scalar_y==2)) 00842 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 00843 #pragma ivdep 00844 #endif 00845 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 00846 #pragma vector always 00847 #endif 00848 for(size_type k=0;k<n;k++) 00849 m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k); 00850 00851 } 00852 }; 00853 00854 template<class RVector,class aVector, class XVector, class bVector, class YVector,int UNROLL> 00855 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, 00856 const bVector &bv, const YVector & y, int n, 00857 int a=2,int b=2) 00858 { 00859 if(a==1&&b==1) { 00860 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,1,UNROLL> op ; 00861 op.m_r = r ; 00862 op.m_x = x ; 00863 op.m_y = y ; 00864 op.m_a = av ; 00865 op.m_b = bv ; 00866 op.n = x.dimension(1); 00867 Kokkos::parallel_for( n , op ); 00868 return r; 00869 } 00870 if(a==1&&b==-1) { 00871 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,-1,UNROLL> op ; 00872 op.m_r = r ; 00873 op.m_x = x ; 00874 op.m_y = y ; 00875 op.m_a = av ; 00876 op.m_b = bv ; 00877 op.n = x.dimension(1); 00878 Kokkos::parallel_for( n , op ); 00879 return r; 00880 } 00881 if(a==-1&&b==1) { 00882 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,1,UNROLL> op ; 00883 op.m_r = r ; 00884 op.m_x = x ; 00885 op.m_y = y ; 00886 op.m_a = av ; 00887 op.m_b = bv ; 00888 op.n = x.dimension(1); 00889 Kokkos::parallel_for( n , op ); 00890 return r; 00891 } 00892 if(a==-1&&b==-1) { 00893 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1,UNROLL> op ; 00894 op.m_r = r ; 00895 op.m_x = x ; 00896 op.m_y = y ; 00897 op.m_a = av ; 00898 op.m_b = bv ; 00899 op.n = x.dimension(1); 00900 Kokkos::parallel_for( n , op ); 00901 return r; 00902 } 00903 if(a*a!=1&&b==1) { 00904 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,1,UNROLL> op ; 00905 op.m_r = r ; 00906 op.m_x = x ; 00907 op.m_y = y ; 00908 op.m_a = av ; 00909 op.m_b = bv ; 00910 op.n = x.dimension(1); 00911 Kokkos::parallel_for( n , op ); 00912 return r; 00913 } 00914 if(a*a!=1&&b==-1) { 00915 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,-1,UNROLL> op ; 00916 op.m_r = r ; 00917 op.m_x = x ; 00918 op.m_y = y ; 00919 op.m_a = av ; 00920 op.m_b = bv ; 00921 op.n = x.dimension(1); 00922 Kokkos::parallel_for( n , op ); 00923 return r; 00924 } 00925 if(a==1&&b*b!=1) { 00926 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,2,UNROLL> op ; 00927 op.m_r = r ; 00928 op.m_x = x ; 00929 op.m_y = y ; 00930 op.m_a = av ; 00931 op.m_b = bv ; 00932 op.n = x.dimension(1); 00933 Kokkos::parallel_for( n , op ); 00934 return r; 00935 } 00936 if(a==-1&&b*b!=1) { 00937 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,2,UNROLL> op ; 00938 op.m_r = r ; 00939 op.m_x = x ; 00940 op.m_y = y ; 00941 op.m_a = av ; 00942 op.m_b = bv ; 00943 op.n = x.dimension(1); 00944 Kokkos::parallel_for( n , op ); 00945 return r; 00946 } 00947 MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,2,UNROLL> op ; 00948 op.m_r = r ; 00949 op.m_x = x ; 00950 op.m_y = y ; 00951 op.m_a = av ; 00952 op.m_b = bv ; 00953 op.n = x.dimension(1); 00954 Kokkos::parallel_for( n , op ); 00955 00956 return r; 00957 } 00958 00959 template<class RVector,class aVector, class XVector, class bVector, class YVector> 00960 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, 00961 const bVector &bv, const YVector & y, int n, 00962 int a=2,int b=2) 00963 { 00964 switch (x.dimension(1)){ 00965 case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,n,a,b); 00966 break; 00967 case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,n,a,b); 00968 break; 00969 case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,n,a,b); 00970 break; 00971 case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,n,a,b); 00972 break; 00973 case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,n,a,b); 00974 break; 00975 case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,n,a,b); 00976 break; 00977 case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,n,a,b); 00978 break; 00979 case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,n,a,b); 00980 break; 00981 case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,n,a,b); 00982 break; 00983 case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,n,a,b); 00984 break; 00985 case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,n,a,b); 00986 break; 00987 case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,n,a,b); 00988 break; 00989 case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,n,a,b); 00990 break; 00991 case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,n,a,b); 00992 break; 00993 case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,n,a,b); 00994 break; 00995 case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,n,a,b); 00996 break; 00997 } 00998 return r; 00999 } 01000 01001 01002 template<class RVector,class aVector, class XVector, class bVector, class YVector> 01003 RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, 01004 const bVector &bv, const YVector & y, int n, 01005 int a=2,int b=2) 01006 { 01007 if(a==1&&b==1) { 01008 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ; 01009 op.m_r = r ; 01010 op.m_x = x ; 01011 op.m_y = y ; 01012 op.m_a = av ; 01013 op.m_b = bv ; 01014 op.n = x.dimension(1); 01015 Kokkos::parallel_for( n , op ); 01016 return r; 01017 } 01018 if(a==1&&b==-1) { 01019 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ; 01020 op.m_r = r ; 01021 op.m_x = x ; 01022 op.m_y = y ; 01023 op.m_a = av ; 01024 op.m_b = bv ; 01025 op.n = x.dimension(1); 01026 Kokkos::parallel_for( n , op ); 01027 return r; 01028 } 01029 if(a==-1&&b==1) { 01030 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ; 01031 op.m_r = r ; 01032 op.m_x = x ; 01033 op.m_y = y ; 01034 op.m_a = av ; 01035 op.m_b = bv ; 01036 op.n = x.dimension(1); 01037 Kokkos::parallel_for( n , op ); 01038 return r; 01039 } 01040 if(a==-1&&b==-1) { 01041 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ; 01042 op.m_r = r ; 01043 op.m_x = x ; 01044 op.m_y = y ; 01045 op.m_a = av ; 01046 op.m_b = bv ; 01047 op.n = x.dimension(1); 01048 Kokkos::parallel_for( n , op ); 01049 return r; 01050 } 01051 if(a*a!=1&&b==1) { 01052 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ; 01053 op.m_r = r ; 01054 op.m_x = x ; 01055 op.m_y = y ; 01056 op.m_a = av ; 01057 op.m_b = bv ; 01058 op.n = x.dimension(1); 01059 Kokkos::parallel_for( n , op ); 01060 return r; 01061 } 01062 if(a*a!=1&&b==-1) { 01063 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ; 01064 op.m_r = r ; 01065 op.m_x = x ; 01066 op.m_y = y ; 01067 op.m_a = av ; 01068 op.m_b = bv ; 01069 op.n = x.dimension(1); 01070 Kokkos::parallel_for( n , op ); 01071 return r; 01072 } 01073 if(a==1&&b*b!=1) { 01074 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ; 01075 op.m_r = r ; 01076 op.m_x = x ; 01077 op.m_y = y ; 01078 op.m_a = av ; 01079 op.m_b = bv ; 01080 op.n = x.dimension(1); 01081 Kokkos::parallel_for( n , op ); 01082 return r; 01083 } 01084 if(a==-1&&b*b!=1) { 01085 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ; 01086 op.m_r = r ; 01087 op.m_x = x ; 01088 op.m_y = y ; 01089 op.m_a = av ; 01090 op.m_b = bv ; 01091 op.n = x.dimension(1); 01092 Kokkos::parallel_for( n , op ); 01093 return r; 01094 } 01095 MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ; 01096 op.m_r = r ; 01097 op.m_x = x ; 01098 op.m_y = y ; 01099 op.m_a = av ; 01100 op.m_b = bv ; 01101 op.n = x.dimension(1); 01102 Kokkos::parallel_for( n , op ); 01103 01104 return r; 01105 } 01106 01107 template<class RVector,class aVector, class XVector, class bVector, class YVector> 01108 RVector MV_AddSpecialise( const RVector & r,const aVector &av,const XVector & x, 01109 const bVector &bv, const YVector & y, unsigned int n, 01110 int a=2,int b=2) 01111 { 01112 01113 if(x.dimension(1)>16) 01114 return MV_AddVector( r,av,x,bv,y,a,b); 01115 01116 if(x.dimension_1()==1) { 01117 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 01118 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 01119 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D; 01120 01121 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 01122 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 01123 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); 01124 01125 V_Add(r_1d,av,x_1d,bv,y_1d,n); 01126 return r; 01127 } else 01128 return MV_AddUnroll( r,av,x,bv,y,a,b); 01129 } 01130 01131 template<class RVector,class aVector, class XVector, class bVector, class YVector> 01132 RVector MV_Add( const RVector & r,const aVector &av,const XVector & x, 01133 const bVector &bv, const YVector & y, int n = -1) 01134 { 01135 if(n==-1) n = x.dimension_0(); 01136 if(x.dimension(1)>16) 01137 return MV_AddVector( r,av,x,bv,y,n,2,2); 01138 01139 if(x.dimension_1()==1) { 01140 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 01141 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 01142 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D; 01143 01144 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 01145 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 01146 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); 01147 01148 V_Add(r_1d,av,x_1d,bv,y_1d,n); 01149 return r; 01150 } else 01151 return MV_AddUnroll( r,av,x,bv,y,n,2,2); 01152 } 01153 01154 template<class RVector,class XVector,class YVector> 01155 RVector MV_Add( const RVector & r, const XVector & x, const YVector & y, int n = -1) 01156 { 01157 if(n==-1) n = x.dimension_0(); 01158 if(x.dimension_1()==1) { 01159 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 01160 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 01161 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D; 01162 01163 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 01164 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 01165 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); 01166 01167 V_Add(r_1d,x_1d,y_1d,n); 01168 return r; 01169 } else { 01170 typename XVector::const_value_type a = 1.0; 01171 return MV_AddSpecialise(r,a,x,a,y,n,1,1); 01172 } 01173 } 01174 01175 template<class RVector,class XVector,class bVector, class YVector> 01176 RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const YVector & y, int n = -1 ) 01177 { 01178 if(n==-1) n = x.dimension_0(); 01179 if(x.dimension_1()==1) { 01180 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 01181 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 01182 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D; 01183 01184 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 01185 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 01186 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); 01187 01188 V_Add(r_1d,x_1d,bv,y_1d,n); 01189 return r; 01190 } else { 01191 MV_AddSpecialise(r,bv,x,bv,y,n,1,2); 01192 } 01193 } 01194 01195 01196 template<class XVector,class YVector> 01197 struct MV_DotProduct_Right_FunctorVector 01198 { 01199 typedef typename XVector::device_type device_type; 01200 typedef typename XVector::size_type size_type; 01201 typedef typename XVector::non_const_value_type xvalue_type; 01202 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 01203 typedef typename IPT::dot_type value_type[]; 01204 size_type value_count; 01205 01206 01207 typedef typename XVector::const_type x_const_type; 01208 typedef typename YVector::const_type y_const_type; 01209 x_const_type m_x ; 01210 y_const_type m_y ; 01211 01212 //-------------------------------------------------------------------------- 01213 01214 KOKKOS_INLINE_FUNCTION 01215 void operator()( const size_type i, value_type sum ) const 01216 { 01217 const size_type numVecs=value_count; 01218 01219 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01220 #pragma ivdep 01221 #endif 01222 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01223 #pragma vector always 01224 #endif 01225 for(size_type k=0;k<numVecs;k++) 01226 sum[k]+=IPT::dot( m_x(i,k), m_y(i,k) ); // m_x(i,k) * m_y(i,k) 01227 } 01228 KOKKOS_INLINE_FUNCTION void init( value_type update) const 01229 { 01230 const size_type numVecs = value_count; 01231 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01232 #pragma ivdep 01233 #endif 01234 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01235 #pragma vector always 01236 #endif 01237 for(size_type k=0;k<numVecs;k++) 01238 update[k] = 0; 01239 } 01240 KOKKOS_INLINE_FUNCTION void join( volatile value_type update , 01241 const volatile value_type source ) const 01242 { 01243 const size_type numVecs = value_count; 01244 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01245 #pragma ivdep 01246 #endif 01247 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01248 #pragma vector always 01249 #endif 01250 for(size_type k=0;k<numVecs;k++){ 01251 update[k] += source[k]; 01252 } 01253 } 01254 }; 01255 01256 01257 // Implementation detail of Tpetra::MultiVector::dot, when both 01258 // MultiVectors in the dot product have constant stride. Compute the 01259 // dot product of the local part of each corresponding vector (column) 01260 // in two MultiVectors. 01261 template<class MultiVecViewType> 01262 struct MultiVecDotFunctor { 01263 typedef typename MultiVecViewType::device_type device_type; 01264 typedef typename MultiVecViewType::size_type size_type; 01265 typedef typename MultiVecViewType::value_type mv_value_type; 01266 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01267 typedef typename IPT::dot_type value_type[]; 01268 01269 typedef MultiVecViewType mv_view_type; 01270 typedef typename MultiVecViewType::const_type mv_const_view_type; 01271 typedef Kokkos::View<typename IPT::dot_type*, device_type> dot_view_type; 01272 01273 mv_const_view_type X_, Y_; 01274 dot_view_type dots_; 01275 // Kokkos::parallel_reduce wants this, so it needs to be public. 01276 size_type value_count; 01277 01278 MultiVecDotFunctor (const mv_const_view_type& X, 01279 const mv_const_view_type& Y, 01280 const dot_view_type& dots) : 01281 X_ (X), Y_ (Y), dots_ (dots), value_count (X.dimension_1 ()) 01282 { 01283 if (value_count != dots.dimension_0 ()) { 01284 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 01285 cuda_abort("Kokkos::MultiVecDotFunctor: value_count does not match the length of 'dots'"); 01286 #else 01287 std::ostringstream os; 01288 os << "Kokkos::MultiVecDotFunctor: value_count does not match the length " 01289 "of 'dots'. X is " << X.dimension_0 () << " x " << X.dimension_1 () << 01290 ", Y is " << Y.dimension_0 () << " x " << Y.dimension_1 () << ", " 01291 "dots has length " << dots.dimension_0 () << ", and value_count = " << 01292 value_count << "."; 01293 throw std::invalid_argument (os.str ()); 01294 #endif 01295 } 01296 01297 } 01298 01299 KOKKOS_INLINE_FUNCTION void 01300 operator() (const size_type i, value_type sum) const 01301 { 01302 const size_type numVecs = value_count; 01303 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01304 #pragma ivdep 01305 #endif 01306 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01307 #pragma vector always 01308 #endif 01309 for (size_type k = 0; k < numVecs; ++k) { 01310 sum[k] += IPT::dot (X_(i,k), Y_(i,k)); 01311 } 01312 } 01313 01314 KOKKOS_INLINE_FUNCTION void 01315 init (value_type update) const 01316 { 01317 const size_type numVecs = value_count; 01318 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01319 #pragma ivdep 01320 #endif 01321 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01322 #pragma vector always 01323 #endif 01324 for (size_type k = 0; k < numVecs; ++k) { 01325 update[k] = Kokkos::Details::ArithTraits<typename IPT::dot_type>::zero (); 01326 } 01327 } 01328 01329 KOKKOS_INLINE_FUNCTION void 01330 join (volatile value_type update, 01331 const volatile value_type source) const 01332 { 01333 const size_type numVecs = value_count; 01334 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01335 #pragma ivdep 01336 #endif 01337 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01338 #pragma vector always 01339 #endif 01340 for (size_type k = 0; k < numVecs; ++k) { 01341 update[k] += source[k]; 01342 } 01343 } 01344 01345 // On device, write the reduction result to the output View. 01346 KOKKOS_INLINE_FUNCTION void 01347 final (const value_type dst) const 01348 { 01349 const size_type numVecs = value_count; 01350 01351 #if !defined(__CUDA_ARCH__) 01352 // DEBUGGING ONLY 01353 { 01354 std::ostringstream os; 01355 os << "numVecs: " << numVecs << ", dst: ["; 01356 for (size_t j = 0; j < numVecs; ++j) { 01357 os << dst[j]; 01358 if (j + 1 < numVecs) { 01359 os << ", "; 01360 } 01361 } 01362 os << "]" << std::endl; 01363 std::cerr << os.str (); 01364 } 01365 #endif 01366 01367 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01368 #pragma ivdep 01369 #endif 01370 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01371 #pragma vector always 01372 #endif 01373 for (size_type k = 0; k < numVecs; ++k) { 01374 dots_(k) = dst[k]; 01375 } 01376 01377 #if !defined(__CUDA_ARCH__) 01378 // DEBUGGING ONLY 01379 { 01380 std::ostringstream os; 01381 os << "numVecs: " << numVecs << ", dots_: ["; 01382 for (size_t j = 0; j < numVecs; ++j) { 01383 os << dots_(j); 01384 if (j + 1 < numVecs) { 01385 os << ", "; 01386 } 01387 } 01388 os << "]" << std::endl; 01389 std::cerr << os.str (); 01390 } 01391 #endif 01392 } 01393 }; 01394 01395 01396 // Implementation detail of Tpetra::MultiVector::norm2, when the 01397 // MultiVector has constant stride. Compute the square of the 01398 // two-norm of each column of a multivector. 01399 template<class MultiVecViewType> 01400 struct MultiVecNorm2SquaredFunctor { 01401 typedef typename MultiVecViewType::device_type device_type; 01402 typedef typename MultiVecViewType::size_type size_type; 01403 typedef typename MultiVecViewType::value_type mv_value_type; 01404 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01405 typedef typename IPT::mag_type value_type[]; 01406 01407 typedef MultiVecViewType mv_view_type; 01408 typedef typename MultiVecViewType::const_type mv_const_view_type; 01409 typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type; 01410 01411 mv_const_view_type X_; 01412 norms_view_type norms_; 01413 // Kokkos::parallel_reduce wants this, so it needs to be public. 01414 size_type value_count; 01415 01416 MultiVecNorm2SquaredFunctor (const mv_const_view_type& X, 01417 const norms_view_type& norms) : 01418 X_ (X), norms_ (norms), value_count (X.dimension_1 ()) 01419 { 01420 if (value_count != norms.dimension_0 ()) { 01421 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 01422 cuda_abort("Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match the length of 'norms'"); 01423 #else 01424 std::ostringstream os; 01425 os << "Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match " 01426 "the length of 'norms'. X is " << X.dimension_0 () << " x " << 01427 X.dimension_1 () << ", norms has length " << norms.dimension_0 () << 01428 ", and value_count = " << value_count << "."; 01429 throw std::invalid_argument (os.str ()); 01430 #endif 01431 } 01432 } 01433 01434 KOKKOS_INLINE_FUNCTION void 01435 operator() (const size_type i, value_type sum) const 01436 { 01437 const size_type numVecs = value_count; 01438 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01439 #pragma ivdep 01440 #endif 01441 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01442 #pragma vector always 01443 #endif 01444 for (size_type k = 0; k < numVecs; ++k) { 01445 const typename IPT::mag_type tmp = IPT::norm (X_(i,k)); 01446 sum[k] += tmp * tmp; 01447 } 01448 } 01449 01450 KOKKOS_INLINE_FUNCTION void 01451 init (value_type update) const 01452 { 01453 const size_type numVecs = value_count; 01454 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01455 #pragma ivdep 01456 #endif 01457 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01458 #pragma vector always 01459 #endif 01460 for (size_type k = 0; k < numVecs; ++k) { 01461 update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01462 } 01463 } 01464 01465 KOKKOS_INLINE_FUNCTION void 01466 join (volatile value_type update, 01467 const volatile value_type source) const 01468 { 01469 const size_type numVecs = value_count; 01470 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01471 #pragma ivdep 01472 #endif 01473 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01474 #pragma vector always 01475 #endif 01476 for (size_type k = 0; k < numVecs; ++k) { 01477 update[k] += source[k]; 01478 } 01479 } 01480 01481 // On device, write the reduction result to the output View. 01482 KOKKOS_INLINE_FUNCTION void 01483 final (const value_type dst) const 01484 { 01485 const size_type numVecs = value_count; 01486 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01487 #pragma ivdep 01488 #endif 01489 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01490 #pragma vector always 01491 #endif 01492 for (size_type k = 0; k < numVecs; ++k) { 01493 norms_(k) = dst[k]; 01494 } 01495 } 01496 }; 01497 01498 01499 // Implementation detail of Tpetra::MultiVector::norm1, when the 01500 // MultiVector has constant stride. Compute the one-norm of each 01501 // column of a multivector. 01502 template<class MultiVecViewType> 01503 struct MultiVecNorm1Functor { 01504 typedef typename MultiVecViewType::device_type device_type; 01505 typedef typename MultiVecViewType::size_type size_type; 01506 typedef typename MultiVecViewType::value_type mv_value_type; 01507 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01508 typedef typename IPT::mag_type value_type[]; 01509 01510 typedef MultiVecViewType mv_view_type; 01511 typedef typename MultiVecViewType::const_type mv_const_view_type; 01512 typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type; 01513 01514 mv_const_view_type X_; 01515 norms_view_type norms_; 01516 // Kokkos::parallel_reduce wants this, so it needs to be public. 01517 size_type value_count; 01518 01519 MultiVecNorm1Functor (const mv_const_view_type& X, 01520 const norms_view_type& norms) : 01521 X_ (X), norms_ (norms), value_count (X.dimension_1 ()) 01522 { 01523 if (value_count != norms.dimension_0 ()) { 01524 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 01525 cuda_abort("Kokkos::MultiVecNorm1Functor: value_count does not match the length of 'norms'"); 01526 #else 01527 std::ostringstream os; 01528 os << "Kokkos::MultiVecNorm1Functor: value_count does not match the " 01529 << "length of 'norms'. X is " << X.dimension_0 () << " x " 01530 << X.dimension_1 () << ", norms has length " << norms.dimension_0 () 01531 << ", and value_count = " << value_count << "."; 01532 throw std::invalid_argument (os.str ()); 01533 #endif 01534 } 01535 } 01536 01537 KOKKOS_INLINE_FUNCTION void 01538 operator() (const size_type i, value_type sum) const 01539 { 01540 const size_type numVecs = value_count; 01541 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01542 #pragma ivdep 01543 #endif 01544 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01545 #pragma vector always 01546 #endif 01547 for (size_type k = 0; k < numVecs; ++k) { 01548 sum[k] += IPT::norm (X_(i,k)); // absolute value 01549 } 01550 } 01551 01552 KOKKOS_INLINE_FUNCTION void 01553 init (value_type update) const 01554 { 01555 const size_type numVecs = value_count; 01556 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01557 #pragma ivdep 01558 #endif 01559 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01560 #pragma vector always 01561 #endif 01562 for (size_type k = 0; k < numVecs; ++k) { 01563 update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01564 } 01565 } 01566 01567 KOKKOS_INLINE_FUNCTION void 01568 join (volatile value_type update, 01569 const volatile value_type source) const 01570 { 01571 const size_type numVecs = value_count; 01572 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01573 #pragma ivdep 01574 #endif 01575 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01576 #pragma vector always 01577 #endif 01578 for (size_type k = 0; k < numVecs; ++k) { 01579 update[k] += source[k]; 01580 } 01581 } 01582 01583 // On device, write the reduction result to the output View. 01584 KOKKOS_INLINE_FUNCTION void 01585 final (const value_type dst) const 01586 { 01587 const size_type numVecs = value_count; 01588 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01589 #pragma ivdep 01590 #endif 01591 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01592 #pragma vector always 01593 #endif 01594 for (size_type k = 0; k < numVecs; ++k) { 01595 norms_(k) = dst[k]; 01596 } 01597 } 01598 }; 01599 01600 01601 // Implementation detail of Tpetra::MultiVector::normInf, when the 01602 // MultiVector has constant stride. Compute the infinity-norm of each 01603 // column of a multivector. 01604 template<class MultiVecViewType> 01605 struct MultiVecNormInfFunctor { 01606 typedef typename MultiVecViewType::device_type device_type; 01607 typedef typename MultiVecViewType::size_type size_type; 01608 typedef typename MultiVecViewType::value_type mv_value_type; 01609 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01610 typedef typename IPT::mag_type value_type[]; 01611 01612 typedef MultiVecViewType mv_view_type; 01613 typedef typename MultiVecViewType::const_type mv_const_view_type; 01614 typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type; 01615 01616 mv_const_view_type X_; 01617 norms_view_type norms_; 01618 // Kokkos::parallel_reduce wants this, so it needs to be public. 01619 size_type value_count; 01620 01621 MultiVecNormInfFunctor (const mv_const_view_type& X, 01622 const norms_view_type& norms) : 01623 X_ (X), norms_ (norms), value_count (X.dimension_1 ()) 01624 { 01625 if (value_count != norms.dimension_0 ()) { 01626 #if defined(__CUDACC__) && defined(__CUDA_ARCH__) 01627 cuda_abort("Kokkos::MultiVecNormInfFunctor: value_count does not match the length of 'norms'"); 01628 #else 01629 std::ostringstream os; 01630 os << "Kokkos::MultiVecNormInfFunctor: value_count does not match the " 01631 << "length of 'norms'. X is " << X.dimension_0 () << " x " 01632 << X.dimension_1 () << ", norms has length " << norms.dimension_0 () 01633 << ", and value_count = " << value_count << "."; 01634 throw std::invalid_argument (os.str ()); 01635 #endif 01636 } 01637 } 01638 01639 KOKKOS_INLINE_FUNCTION void 01640 operator() (const size_type i, value_type maxes) const 01641 { 01642 const size_type numVecs = value_count; 01643 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01644 #pragma ivdep 01645 #endif 01646 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01647 #pragma vector always 01648 #endif 01649 for (size_type k = 0; k < numVecs; ++k) { 01650 const typename IPT::mag_type curVal = maxes[k]; 01651 const typename IPT::mag_type newVal = IPT::norm (X_(i,k)); 01652 // max(curVal, newVal). Any comparison predicate involving NaN 01653 // evaluates to false. Thus, this will never assign NaN to 01654 // update[k], unless it contains NaN already. The initial value 01655 // is zero, so NaNs won't propagate. (This definition makes NaN 01656 // into an "invalid value," which is useful for statistics and 01657 // other applications that use NaN to indicate a "hole.") 01658 if (curVal < newVal) { 01659 maxes[k] = newVal; 01660 } 01661 } 01662 } 01663 01664 KOKKOS_INLINE_FUNCTION void 01665 init (value_type update) const 01666 { 01667 const size_type numVecs = value_count; 01668 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01669 #pragma ivdep 01670 #endif 01671 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01672 #pragma vector always 01673 #endif 01674 for (size_type k = 0; k < numVecs; ++k) { 01675 // Zero is a good default value for magnitudes (which are 01676 // nonnegative by definition). That way, MPI processes with 01677 // zero rows won't affect the global maximum. 01678 update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01679 } 01680 } 01681 01682 KOKKOS_INLINE_FUNCTION void 01683 join (volatile value_type update, 01684 const volatile value_type source) const 01685 { 01686 const size_type numVecs = value_count; 01687 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01688 #pragma ivdep 01689 #endif 01690 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01691 #pragma vector always 01692 #endif 01693 for (size_type k = 0; k < numVecs; ++k) { 01694 const typename IPT::mag_type curVal = update[k]; 01695 const typename IPT::mag_type newVal = source[k]; 01696 // max(curVal, newVal). Any comparison predicate involving NaN 01697 // evaluates to false. Thus, this will never assign NaN to 01698 // update[k], unless it contains NaN already. The initial value 01699 // is zero, so NaNs won't propagate. (This definition makes NaN 01700 // into an "invalid value," which is useful for statistics and 01701 // other applications that use NaN to indicate a "hole.") 01702 if (curVal < newVal) { 01703 update[k] = newVal; 01704 } 01705 } 01706 } 01707 01708 // On device, write the reduction result to the output View. 01709 KOKKOS_INLINE_FUNCTION void 01710 final (const value_type dst) const 01711 { 01712 const size_type numVecs = value_count; 01713 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 01714 #pragma ivdep 01715 #endif 01716 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 01717 #pragma vector always 01718 #endif 01719 for (size_type k = 0; k < numVecs; ++k) { 01720 norms_(k) = dst[k]; 01721 } 01722 } 01723 }; 01724 01725 01726 // Implementation detail of Tpetra::MultiVector::dot, for single 01727 // vectors (columns). 01728 template<class VecViewType> 01729 struct VecDotFunctor { 01730 typedef typename VecViewType::device_type device_type; 01731 typedef typename VecViewType::size_type size_type; 01732 typedef typename VecViewType::value_type mv_value_type; 01733 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01734 typedef typename IPT::dot_type value_type; 01735 typedef typename VecViewType::const_type vec_const_view_type; 01736 // This is a nonconst scalar view. It holds one dot_type instance. 01737 typedef Kokkos::View<typename IPT::dot_type, device_type> dot_view_type; 01738 typedef Kokkos::View<typename IPT::dot_type*, device_type> dots_view_type; 01739 01740 vec_const_view_type x_, y_; 01741 dot_view_type dot_; 01742 01743 VecDotFunctor (const vec_const_view_type& x, 01744 const vec_const_view_type& y, 01745 const dot_view_type& dot) : 01746 x_ (x), y_ (y), dot_ (dot) 01747 { 01748 if (x.dimension_0 () != y.dimension_0 ()) { 01749 std::ostringstream os; 01750 os << "Kokkos::VecDotFunctor: The dimensions of x and y do not match. " 01751 "x.dimension_0() = " << x.dimension_0 () 01752 << " != y.dimension_0() = " << y.dimension_0 () << "."; 01753 throw std::invalid_argument (os.str ()); 01754 } 01755 } 01756 01757 KOKKOS_INLINE_FUNCTION void 01758 operator() (const size_type i, value_type& sum) const { 01759 sum += IPT::dot (x_(i), y_(i)); 01760 } 01761 01762 KOKKOS_INLINE_FUNCTION void 01763 init (value_type& update) const { 01764 update = Kokkos::Details::ArithTraits<typename IPT::dot_type>::zero (); 01765 } 01766 01767 KOKKOS_INLINE_FUNCTION void 01768 join (volatile value_type& update, 01769 const volatile value_type& source) const { 01770 update += source; 01771 } 01772 01773 // On device, write the reduction result to the output View. 01774 KOKKOS_INLINE_FUNCTION void final (value_type& dst) const { 01775 // BADNESS HERE 01776 dot_() = dst; 01777 } 01778 }; 01779 01780 01781 // Compute the square of the two-norm of a single vector. 01782 template<class VecViewType> 01783 struct VecNorm2SquaredFunctor { 01784 typedef typename VecViewType::device_type device_type; 01785 typedef typename VecViewType::size_type size_type; 01786 typedef typename VecViewType::value_type mv_value_type; 01787 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01788 typedef typename IPT::mag_type value_type; 01789 typedef typename VecViewType::const_type vec_const_view_type; 01790 // This is a nonconst scalar view. It holds one mag_type instance. 01791 typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type; 01792 01793 vec_const_view_type x_; 01794 norm_view_type norm_; 01795 01796 // Constructor 01797 // 01798 // x: the vector for which to compute the square of the two-norm. 01799 // norm: scalar View into which to put the result. 01800 VecNorm2SquaredFunctor (const vec_const_view_type& x, 01801 const norm_view_type& norm) : 01802 x_ (x), norm_ (norm) 01803 {} 01804 01805 KOKKOS_INLINE_FUNCTION void 01806 operator() (const size_type i, value_type& sum) const { 01807 const typename IPT::mag_type tmp = IPT::norm (x_(i)); 01808 sum += tmp * tmp; 01809 } 01810 01811 KOKKOS_INLINE_FUNCTION void 01812 init (value_type& update) const { 01813 update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01814 } 01815 01816 KOKKOS_INLINE_FUNCTION void 01817 join (volatile value_type& update, 01818 const volatile value_type& source) const { 01819 update += source; 01820 } 01821 01822 // On device, write the reduction result to the output View. 01823 KOKKOS_INLINE_FUNCTION void final (value_type& dst) const { 01824 norm_ () = dst; 01825 } 01826 }; 01827 01828 01829 // Compute the square of the one-norm of a single vector. 01830 template<class VecViewType> 01831 struct VecNorm1Functor { 01832 typedef typename VecViewType::device_type device_type; 01833 typedef typename VecViewType::size_type size_type; 01834 typedef typename VecViewType::value_type mv_value_type; 01835 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01836 typedef typename IPT::mag_type value_type; 01837 typedef typename VecViewType::const_type vec_const_view_type; 01838 // This is a nonconst scalar view. It holds one mag_type instance. 01839 typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type; 01840 01841 vec_const_view_type x_; 01842 norm_view_type norm_; 01843 01844 // Constructor 01845 // 01846 // x: the vector for which to compute the one-norm. 01847 // norm: scalar View into which to put the result. 01848 VecNorm1Functor (const vec_const_view_type& x, 01849 const norm_view_type& norm) : 01850 x_ (x), norm_ (norm) 01851 {} 01852 01853 KOKKOS_INLINE_FUNCTION void 01854 operator() (const size_type i, value_type& sum) const { 01855 sum += IPT::norm (x_(i)); // absolute value 01856 } 01857 01858 KOKKOS_INLINE_FUNCTION void 01859 init (value_type& update) const { 01860 update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01861 } 01862 01863 KOKKOS_INLINE_FUNCTION void 01864 join (volatile value_type& update, 01865 const volatile value_type& source) const { 01866 update += source; 01867 } 01868 01869 // On device, write the reduction result to the output View. 01870 KOKKOS_INLINE_FUNCTION void final (value_type& dst) const { 01871 norm_ () = dst; 01872 } 01873 }; 01874 01875 01876 // Compute the square of the infinity-norm of a single vector. 01877 template<class VecViewType> 01878 struct VecNormInfFunctor { 01879 typedef typename VecViewType::device_type device_type; 01880 typedef typename VecViewType::size_type size_type; 01881 typedef typename VecViewType::value_type mv_value_type; 01882 typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT; 01883 typedef typename IPT::mag_type value_type; 01884 typedef typename VecViewType::const_type vec_const_view_type; 01885 // This is a nonconst scalar view. It holds one mag_type instance. 01886 typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type; 01887 01888 vec_const_view_type x_; 01889 norm_view_type norm_; 01890 01891 // Constructor 01892 // 01893 // x: the vector for which to compute the infinity-norm. 01894 // norm: scalar View into which to put the result. 01895 VecNormInfFunctor (const vec_const_view_type& x, 01896 const norm_view_type& norm) : 01897 x_ (x), norm_ (norm) 01898 {} 01899 01900 KOKKOS_INLINE_FUNCTION void 01901 operator() (const size_type i, value_type& curVal) const { 01902 const typename IPT::mag_type newVal = IPT::norm (x_(i)); 01903 // max(curVal, newVal). Any comparison predicate involving NaN 01904 // evaluates to false. Thus, this will never assign NaN to 01905 // update[k], unless it contains NaN already. The initial value 01906 // is zero, so NaNs won't propagate. (This definition makes NaN 01907 // into an "invalid value," which is useful for statistics and 01908 // other applications that use NaN to indicate a "hole.") 01909 if (curVal < newVal) { 01910 curVal = newVal; 01911 } 01912 } 01913 01914 KOKKOS_INLINE_FUNCTION void 01915 init (value_type& update) const { 01916 // Zero is a good default value for magnitudes (which are 01917 // nonnegative by definition). That way, MPI processes with 01918 // zero rows won't affect the global maximum. 01919 update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero (); 01920 } 01921 01922 KOKKOS_INLINE_FUNCTION void 01923 join (volatile value_type& update, 01924 const volatile value_type& source) const { 01925 // max(update, source). Any comparison predicate involving NaN 01926 // evaluates to false. Thus, this will never assign NaN to 01927 // update, unless it contains NaN already. The initial value is 01928 // zero, so NaNs won't propagate. (This definition makes NaN into 01929 // an "invalid value," which is useful for statistics and other 01930 // applications that use NaN to indicate a "hole.") 01931 if (update < source) { 01932 update = source; 01933 } 01934 } 01935 01936 // On device, write the reduction result to the output View. 01937 KOKKOS_INLINE_FUNCTION void final (value_type& dst) const { 01938 norm_ () = dst; 01939 } 01940 }; 01941 01942 01943 01944 // parallel_for functor for computing the square root, in place, of a 01945 // one-dimensional View. This is useful for following the MPI 01946 // all-reduce that computes the square of the two-norms of the local 01947 // columns of a Tpetra::MultiVector. 01948 // 01949 // mfh 14 Jul 2014: Carter says that, for now, the standard idiom for 01950 // operating on a single scalar value on the device, is to run in a 01951 // parallel_for with N = 1. 01952 // 01953 // FIXME (mfh 14 Jul 2014): If we could assume C++11, this functor 01954 // would go away. 01955 template<class ViewType> 01956 class SquareRootFunctor { 01957 public: 01958 typedef typename ViewType::device_type device_type; 01959 typedef typename ViewType::size_type size_type; 01960 01961 SquareRootFunctor (const ViewType& theView) : theView_ (theView) {} 01962 01963 KOKKOS_INLINE_FUNCTION void operator() (const size_type i) const { 01964 typedef typename ViewType::value_type value_type; 01965 theView_(i) = Kokkos::Details::ArithTraits<value_type>::sqrt (theView_(i)); 01966 } 01967 01968 private: 01969 ViewType theView_; 01970 }; 01971 01972 01973 template<class XVector,class YVector,int UNROLL> 01974 struct MV_DotProduct_Right_FunctorUnroll 01975 { 01976 typedef typename XVector::device_type device_type; 01977 typedef typename XVector::size_type size_type; 01978 typedef typename XVector::non_const_value_type xvalue_type; 01979 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 01980 typedef typename IPT::dot_type value_type[]; 01981 size_type value_count; 01982 01983 typedef typename XVector::const_type x_const_type; 01984 typedef typename YVector::const_type y_const_type; 01985 01986 x_const_type m_x ; 01987 y_const_type m_y ; 01988 01989 //-------------------------------------------------------------------------- 01990 01991 KOKKOS_INLINE_FUNCTION 01992 void operator()( const size_type i, value_type sum ) const 01993 { 01994 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 01995 #pragma unroll 01996 #endif 01997 for(size_type k=0;k<UNROLL;k++) 01998 sum[k]+= IPT::dot( m_x(i,k), m_y(i,k) ); // m_x(i,k) * m_y(i,k) 01999 } 02000 KOKKOS_INLINE_FUNCTION void init( volatile value_type update) const 02001 { 02002 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 02003 #pragma unroll 02004 #endif 02005 for(size_type k=0;k<UNROLL;k++) 02006 update[k] = 0; 02007 } 02008 KOKKOS_INLINE_FUNCTION void join( volatile value_type update , 02009 const volatile value_type source) const 02010 { 02011 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL 02012 #pragma unroll 02013 #endif 02014 for(size_type k=0;k<UNROLL;k++) 02015 update[k] += source[k] ; 02016 } 02017 }; 02018 02019 template<class rVector, class XVector, class YVector> 02020 rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = -1) 02021 { 02022 typedef typename XVector::size_type size_type; 02023 const size_type numVecs = x.dimension(1); 02024 02025 if(n<0) n = x.dimension_0(); 02026 if(numVecs>16){ 02027 02028 MV_DotProduct_Right_FunctorVector<XVector,YVector> op; 02029 op.m_x = x; 02030 op.m_y = y; 02031 op.value_count = numVecs; 02032 02033 Kokkos::parallel_reduce( n , op, r ); 02034 return r; 02035 } 02036 else 02037 switch(numVecs) { 02038 case 16: { 02039 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op; 02040 op.m_x = x; 02041 op.m_y = y; 02042 op.value_count = numVecs; 02043 Kokkos::parallel_reduce( n , op, r ); 02044 break; 02045 } 02046 case 15: { 02047 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op; 02048 op.m_x = x; 02049 op.m_y = y; 02050 op.value_count = numVecs; 02051 Kokkos::parallel_reduce( n , op, r ); 02052 break; 02053 } 02054 case 14: { 02055 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op; 02056 op.m_x = x; 02057 op.m_y = y; 02058 op.value_count = numVecs; 02059 Kokkos::parallel_reduce( n , op, r ); 02060 break; 02061 } 02062 case 13: { 02063 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op; 02064 op.m_x = x; 02065 op.m_y = y; 02066 op.value_count = numVecs; 02067 Kokkos::parallel_reduce( n , op, r ); 02068 break; 02069 } 02070 case 12: { 02071 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op; 02072 op.m_x = x; 02073 op.m_y = y; 02074 op.value_count = numVecs; 02075 Kokkos::parallel_reduce( n , op, r ); 02076 break; 02077 } 02078 case 11: { 02079 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op; 02080 op.m_x = x; 02081 op.m_y = y; 02082 op.value_count = numVecs; 02083 Kokkos::parallel_reduce( n , op, r ); 02084 break; 02085 } 02086 case 10: { 02087 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op; 02088 op.m_x = x; 02089 op.m_y = y; 02090 op.value_count = numVecs; 02091 Kokkos::parallel_reduce( n , op, r ); 02092 break; 02093 } 02094 case 9: { 02095 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op; 02096 op.m_x = x; 02097 op.m_y = y; 02098 op.value_count = numVecs; 02099 Kokkos::parallel_reduce( n , op, r ); 02100 break; 02101 } 02102 case 8: { 02103 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op; 02104 op.m_x = x; 02105 op.m_y = y; 02106 op.value_count = numVecs; 02107 Kokkos::parallel_reduce( n , op, r ); 02108 break; 02109 } 02110 case 7: { 02111 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op; 02112 op.m_x = x; 02113 op.m_y = y; 02114 op.value_count = numVecs; 02115 Kokkos::parallel_reduce( n , op, r ); 02116 break; 02117 } 02118 case 6: { 02119 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op; 02120 op.m_x = x; 02121 op.m_y = y; 02122 op.value_count = numVecs; 02123 Kokkos::parallel_reduce( n , op, r ); 02124 break; 02125 } 02126 case 5: { 02127 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op; 02128 op.m_x = x; 02129 op.m_y = y; 02130 op.value_count = numVecs; 02131 Kokkos::parallel_reduce( n , op, r ); 02132 break; 02133 } 02134 case 4: { 02135 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op; 02136 op.m_x = x; 02137 op.m_y = y; 02138 op.value_count = numVecs; 02139 Kokkos::parallel_reduce( n , op, r ); 02140 02141 break; 02142 } 02143 case 3: { 02144 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op; 02145 op.m_x = x; 02146 op.m_y = y; 02147 op.value_count = numVecs; 02148 Kokkos::parallel_reduce( n , op, r ); 02149 break; 02150 } 02151 case 2: { 02152 MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op; 02153 op.m_x = x; 02154 op.m_y = y; 02155 op.value_count = numVecs; 02156 Kokkos::parallel_reduce( n , op, r ); 02157 break; 02158 } 02159 case 1: { 02160 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 02161 typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D; 02162 02163 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 02164 YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 ); 02165 r[0] = V_Dot(x_1d,y_1d,n); 02166 break; 02167 } 02168 } 02169 02170 return r; 02171 } 02172 02173 /*------------------------------------------------------------------------------------------ 02174 *-------------------------- Compute Sum ------------------------------------------------- 02175 *------------------------------------------------------------------------------------------*/ 02176 template<class XVector> 02177 struct MV_Sum_Functor 02178 { 02179 typedef typename XVector::device_type device_type; 02180 typedef typename XVector::size_type size_type; 02181 typedef typename XVector::non_const_value_type xvalue_type; 02182 typedef xvalue_type value_type[]; 02183 02184 typename XVector::const_type m_x ; 02185 size_type value_count; 02186 02187 MV_Sum_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {} 02188 //-------------------------------------------------------------------------- 02189 02190 KOKKOS_INLINE_FUNCTION 02191 void operator()( const size_type i, value_type sum ) const 02192 { 02193 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02194 #pragma ivdep 02195 #endif 02196 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02197 #pragma vector always 02198 #endif 02199 for(size_type k=0;k<value_count;k++){ 02200 sum[k] += m_x(i,k); 02201 } 02202 } 02203 02204 KOKKOS_INLINE_FUNCTION 02205 void init( value_type update) const 02206 { 02207 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02208 #pragma ivdep 02209 #endif 02210 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02211 #pragma vector always 02212 #endif 02213 for(size_type k=0;k<value_count;k++) 02214 update[k] = 0; 02215 } 02216 02217 KOKKOS_INLINE_FUNCTION 02218 void join( volatile value_type update , 02219 const volatile value_type source ) const 02220 { 02221 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02222 #pragma ivdep 02223 #endif 02224 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02225 #pragma vector always 02226 #endif 02227 for(size_type k=0;k<value_count;k++){ 02228 update[k] += source[k]; 02229 } 02230 } 02231 }; 02232 02233 02234 template<class normVector, class VectorType> 02235 normVector MV_Sum(const normVector &r, const VectorType & x, int n = -1) 02236 { 02237 if (n < 0) { 02238 n = x.dimension_0 (); 02239 } 02240 02241 Kokkos::parallel_reduce (n , MV_Sum_Functor<VectorType> (x), r); 02242 return r; 02243 } 02244 02245 /*------------------------------------------------------------------------------------------ 02246 *-------------------------- Compute Norm1-------------------------------------------------- 02247 *------------------------------------------------------------------------------------------*/ 02248 template<class XVector> 02249 struct MV_Norm1_Functor 02250 { 02251 typedef typename XVector::device_type device_type; 02252 typedef typename XVector::size_type size_type; 02253 typedef typename XVector::non_const_value_type xvalue_type; 02254 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 02255 typedef typename IPT::dot_type value_type[]; 02256 02257 typename XVector::const_type m_x ; 02258 size_type value_count; 02259 02260 MV_Norm1_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {} 02261 //-------------------------------------------------------------------------- 02262 02263 KOKKOS_INLINE_FUNCTION 02264 void operator()( const size_type i, value_type sum ) const 02265 { 02266 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02267 #pragma ivdep 02268 #endif 02269 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02270 #pragma vector always 02271 #endif 02272 for(size_type k=0;k<value_count;k++){ 02273 sum[k] += Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k)); 02274 } 02275 } 02276 02277 KOKKOS_INLINE_FUNCTION 02278 void init( value_type update) const 02279 { 02280 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02281 #pragma ivdep 02282 #endif 02283 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02284 #pragma vector always 02285 #endif 02286 for(size_type k=0;k<value_count;k++) 02287 update[k] = 0; 02288 } 02289 02290 KOKKOS_INLINE_FUNCTION 02291 void join( volatile value_type update , 02292 const volatile value_type source ) const 02293 { 02294 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02295 #pragma ivdep 02296 #endif 02297 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02298 #pragma vector always 02299 #endif 02300 for(size_type k=0;k<value_count;k++){ 02301 update[k] += source[k]; 02302 } 02303 } 02304 }; 02305 02306 template<class normVector, class VectorType> 02307 normVector MV_Norm1(const normVector &r, const VectorType & x, int n = -1) 02308 { 02309 if (n < 0) { 02310 n = x.dimension_0 (); 02311 } 02312 02313 Kokkos::parallel_reduce (n , MV_Norm1_Functor<VectorType> (x), r); 02314 return r; 02315 } 02316 02317 /*------------------------------------------------------------------------------------------ 02318 *-------------------------- Compute NormInf-------------------------------------------------- 02319 *------------------------------------------------------------------------------------------*/ 02320 template<class XVector> 02321 struct MV_NormInf_Functor 02322 { 02323 typedef typename XVector::device_type device_type; 02324 typedef typename XVector::size_type size_type; 02325 typedef typename XVector::non_const_value_type xvalue_type; 02326 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 02327 typedef typename IPT::dot_type value_type[]; 02328 02329 typename XVector::const_type m_x ; 02330 size_type value_count; 02331 02332 MV_NormInf_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {} 02333 //-------------------------------------------------------------------------- 02334 KOKKOS_INLINE_FUNCTION 02335 void operator()( const size_type i, value_type sum ) const 02336 { 02337 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02338 #pragma ivdep 02339 #endif 02340 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02341 #pragma vector always 02342 #endif 02343 for(size_type k=0;k<value_count;k++){ 02344 sum[k] = MAX(sum[k],Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k))); 02345 } 02346 } 02347 02348 KOKKOS_INLINE_FUNCTION void init( value_type update) const 02349 { 02350 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02351 #pragma ivdep 02352 #endif 02353 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02354 #pragma vector always 02355 #endif 02356 for(size_type k=0;k<value_count;k++) 02357 update[k] = 0; 02358 } 02359 KOKKOS_INLINE_FUNCTION void join( volatile value_type update , 02360 const volatile value_type source ) const 02361 { 02362 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02363 #pragma ivdep 02364 #endif 02365 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02366 #pragma vector always 02367 #endif 02368 for(size_type k=0;k<value_count;k++){ 02369 update[k] = MAX(update[k],source[k]); 02370 } 02371 } 02372 }; 02373 02374 02375 template<class normVector, class VectorType> 02376 normVector MV_NormInf(const normVector &r, const VectorType & x, int n = -1) 02377 { 02378 if (n < 0) { 02379 n = x.dimension_0 (); 02380 } 02381 02382 Kokkos::parallel_reduce (n , MV_NormInf_Functor<VectorType> (x), r); 02383 return r; 02384 } 02385 02386 /*------------------------------------------------------------------------------------------ 02387 *-------------------------- Compute Weighted Dot-product (sum(x_i/w_i)^2)---------------------------------- 02388 *------------------------------------------------------------------------------------------*/ 02389 template<class WeightVector, class XVector,int WeightsRanks> 02390 struct MV_DotWeighted_Functor{}; 02391 02392 template<class WeightVector, class XVector> 02393 struct MV_DotWeighted_Functor<WeightVector,XVector,1> 02394 { 02395 typedef typename XVector::device_type device_type; 02396 typedef typename XVector::size_type size_type; 02397 typedef typename XVector::non_const_value_type xvalue_type; 02398 typedef typename WeightVector::non_const_value_type wvalue_type; 02399 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT; 02400 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT; 02401 typedef typename XIPT::dot_type value_type[]; 02402 02403 typename WeightVector::const_type m_w ; 02404 typename XVector::const_type m_x ; 02405 size_type value_count; 02406 02407 MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {} 02408 //-------------------------------------------------------------------------- 02409 KOKKOS_INLINE_FUNCTION 02410 void operator()( const size_type i, value_type sum ) const 02411 { 02412 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02413 #pragma ivdep 02414 #endif 02415 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02416 #pragma vector always 02417 #endif 02418 for(size_type k=0;k<value_count;k++){ 02419 sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i), m_w(i) ); 02420 } 02421 } 02422 02423 KOKKOS_INLINE_FUNCTION void init( value_type update) const 02424 { 02425 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02426 #pragma ivdep 02427 #endif 02428 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02429 #pragma vector always 02430 #endif 02431 for(size_type k=0;k<value_count;k++) 02432 update[k] = 0; 02433 } 02434 KOKKOS_INLINE_FUNCTION void join( volatile value_type update , 02435 const volatile value_type source ) const 02436 { 02437 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02438 #pragma ivdep 02439 #endif 02440 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02441 #pragma vector always 02442 #endif 02443 for(size_type k=0;k<value_count;k++){ 02444 update[k] += source[k]; 02445 } 02446 } 02447 }; 02448 02449 template<class WeightVector, class XVector> 02450 struct MV_DotWeighted_Functor<WeightVector,XVector,2> 02451 { 02452 typedef typename XVector::device_type device_type; 02453 typedef typename XVector::size_type size_type; 02454 typedef typename XVector::non_const_value_type xvalue_type; 02455 typedef typename WeightVector::non_const_value_type wvalue_type; 02456 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT; 02457 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT; 02458 typedef typename XIPT::dot_type value_type[]; 02459 02460 typename WeightVector::const_type m_w ; 02461 typename XVector::const_type m_x ; 02462 size_type value_count; 02463 02464 MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {} 02465 //-------------------------------------------------------------------------- 02466 KOKKOS_INLINE_FUNCTION 02467 void operator()( const size_type i, value_type sum ) const 02468 { 02469 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02470 #pragma ivdep 02471 #endif 02472 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02473 #pragma vector always 02474 #endif 02475 for(size_type k=0;k<value_count;k++){ 02476 sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i,k), m_w(i,k) ); 02477 } 02478 } 02479 02480 KOKKOS_INLINE_FUNCTION void init( value_type update) const 02481 { 02482 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02483 #pragma ivdep 02484 #endif 02485 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02486 #pragma vector always 02487 #endif 02488 for(size_type k=0;k<value_count;k++) 02489 update[k] = 0; 02490 } 02491 KOKKOS_INLINE_FUNCTION void join( volatile value_type update , 02492 const volatile value_type source ) const 02493 { 02494 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 02495 #pragma ivdep 02496 #endif 02497 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR 02498 #pragma vector always 02499 #endif 02500 for(size_type k=0;k<value_count;k++){ 02501 update[k] += source[k]; 02502 } 02503 } 02504 }; 02505 02506 template<class rVector, class WeightVector, class XVector> 02507 rVector MV_DotWeighted(const rVector &r, const WeightVector & w, const XVector & x, int n = -1) 02508 { 02509 if (n < 0) { 02510 n = x.dimension_0 (); 02511 } 02512 02513 typedef MV_DotWeighted_Functor<WeightVector, XVector, WeightVector::Rank> functor_type; 02514 Kokkos::parallel_reduce (n , functor_type (w, x), r); 02515 return r; 02516 } 02517 02518 /*------------------------------------------------------------------------------------------ 02519 *-------------------------- Multiply with scalar: y = a * x ------------------------------- 02520 *------------------------------------------------------------------------------------------*/ 02521 template<class RVector, class aVector, class XVector> 02522 struct V_MulScalarFunctor 02523 { 02524 typedef typename XVector::device_type device_type; 02525 typedef typename XVector::size_type size_type; 02526 02527 RVector m_r; 02528 typename XVector::const_type m_x ; 02529 typename aVector::const_type m_a ; 02530 //-------------------------------------------------------------------------- 02531 02532 KOKKOS_INLINE_FUNCTION 02533 void operator()( const size_type i) const 02534 { 02535 m_r(i) = m_a[0]*m_x(i); 02536 } 02537 }; 02538 02539 template<class aVector, class XVector> 02540 struct V_MulScalarFunctorSelf 02541 { 02542 typedef typename XVector::device_type device_type; 02543 typedef typename XVector::size_type size_type; 02544 02545 XVector m_x; 02546 typename aVector::const_type m_a ; 02547 //-------------------------------------------------------------------------- 02548 02549 KOKKOS_INLINE_FUNCTION 02550 void operator()( const size_type i) const 02551 { 02552 m_x(i) *= m_a(0); 02553 } 02554 }; 02555 02556 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector> 02557 RVector V_MulScalar( const RVector & r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation> & a, const XVector & x) 02558 { 02559 typedef typename Kokkos::View<DataType,Layout,Device,MemoryManagement> aVector; 02560 if(r==x) { 02561 V_MulScalarFunctorSelf<aVector,XVector> op ; 02562 op.m_x = x ; 02563 op.m_a = a ; 02564 Kokkos::parallel_for( x.dimension(0) , op ); 02565 return r; 02566 } 02567 02568 V_MulScalarFunctor<RVector,aVector,XVector> op ; 02569 op.m_r = r ; 02570 op.m_x = x ; 02571 op.m_a = a ; 02572 Kokkos::parallel_for( x.dimension(0) , op ); 02573 return r; 02574 } 02575 02576 template<class RVector, class XVector> 02577 struct V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> 02578 { 02579 typedef typename XVector::device_type device_type; 02580 typedef typename XVector::size_type size_type; 02581 02582 RVector m_r; 02583 typename XVector::const_type m_x ; 02584 typename XVector::value_type m_a ; 02585 //-------------------------------------------------------------------------- 02586 02587 KOKKOS_INLINE_FUNCTION 02588 void operator()( const size_type i) const 02589 { 02590 m_r(i) = m_a*m_x(i); 02591 } 02592 }; 02593 02594 template<class XVector> 02595 struct V_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> 02596 { 02597 typedef typename XVector::device_type device_type; 02598 typedef typename XVector::size_type size_type; 02599 02600 XVector m_x; 02601 typename XVector::non_const_value_type m_a ; 02602 //-------------------------------------------------------------------------- 02603 02604 KOKKOS_INLINE_FUNCTION 02605 void operator()( const size_type i) const 02606 { 02607 m_x(i) *= m_a; 02608 } 02609 }; 02610 02611 02612 template<class RVector, class XVector> 02613 RVector V_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x) 02614 { 02615 if(r==x) { 02616 V_MulScalarFunctorSelf<typename RVector::value_type,RVector> op ; 02617 op.m_x = r ; 02618 op.m_a = a ; 02619 Kokkos::parallel_for( x.dimension(0) , op ); 02620 return r; 02621 } 02622 02623 V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ; 02624 op.m_r = r ; 02625 op.m_x = x ; 02626 op.m_a = a ; 02627 Kokkos::parallel_for( x.dimension(0) , op ); 02628 return r; 02629 } 02630 02631 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y> 02632 struct V_AddVectorFunctor 02633 { 02634 typedef typename RVector::device_type device_type; 02635 typedef typename RVector::size_type size_type; 02636 typedef typename XVector::non_const_value_type value_type; 02637 RVector m_r ; 02638 typename XVector::const_type m_x ; 02639 typename YVector::const_type m_y ; 02640 const value_type m_a; 02641 const value_type m_b; 02642 02643 //-------------------------------------------------------------------------- 02644 V_AddVectorFunctor(const RVector& r, const value_type& a,const XVector& x,const value_type& b,const YVector& y): 02645 m_r(r),m_x(x),m_y(y),m_a(a),m_b(b) 02646 { } 02647 02648 KOKKOS_INLINE_FUNCTION 02649 void operator()( const size_type i ) const 02650 { 02651 if((scalar_x==1)&&(scalar_y==1)) 02652 m_r(i) = m_x(i) + m_y(i); 02653 if((scalar_x==1)&&(scalar_y==-1)) 02654 m_r(i) = m_x(i) - m_y(i); 02655 if((scalar_x==-1)&&(scalar_y==-1)) 02656 m_r(i) = -m_x(i) - m_y(i); 02657 if((scalar_x==-1)&&(scalar_y==1)) 02658 m_r(i) = -m_x(i) + m_y(i); 02659 if((scalar_x==2)&&(scalar_y==1)) 02660 m_r(i) = m_a*m_x(i) + m_y(i); 02661 if((scalar_x==2)&&(scalar_y==-1)) 02662 m_r(i) = m_a*m_x(i) - m_y(i); 02663 if((scalar_x==1)&&(scalar_y==2)) 02664 m_r(i) = m_x(i) + m_b*m_y(i); 02665 if((scalar_x==-1)&&(scalar_y==2)) 02666 m_r(i) = -m_x(i) + m_b*m_y(i); 02667 if((scalar_x==2)&&(scalar_y==2)) 02668 m_r(i) = m_a*m_x(i) + m_b*m_y(i); 02669 } 02670 }; 02671 02672 template<class RVector, class XVector, int scalar_x> 02673 struct V_AddVectorSelfFunctor 02674 { 02675 typedef typename RVector::device_type device_type; 02676 typedef typename RVector::size_type size_type; 02677 typedef typename XVector::non_const_value_type value_type; 02678 RVector m_r ; 02679 typename XVector::const_type m_x ; 02680 const value_type m_a; 02681 02682 V_AddVectorSelfFunctor(const RVector& r, const value_type& a,const XVector& x): 02683 m_r(r),m_x(x),m_a(a) 02684 { } 02685 02686 KOKKOS_INLINE_FUNCTION 02687 void operator()( const size_type i ) const 02688 { 02689 if((scalar_x==1)) 02690 m_r(i) += m_x(i); 02691 if((scalar_x==-1)) 02692 m_r(i) -= m_x(i); 02693 if((scalar_x==2)) 02694 m_r(i) += m_a*m_x(i); 02695 } 02696 }; 02697 template<class RVector, class XVector, class YVector, int doalpha, int dobeta> 02698 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x, 02699 const typename XVector::non_const_value_type &bv, const YVector & y,int n=-1) 02700 { 02701 if(n == -1) n = x.dimension_0(); 02702 if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) { 02703 V_AddVectorSelfFunctor<RVector,YVector,dobeta> f(r,bv,y); 02704 parallel_for(n,f); 02705 } else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) { 02706 V_AddVectorSelfFunctor<RVector,XVector,doalpha> f(r,av,x); 02707 parallel_for(n,f); 02708 } else { 02709 V_AddVectorFunctor<RVector,XVector,YVector,doalpha,dobeta> f(r,av,x,bv,y); 02710 parallel_for(n,f); 02711 } 02712 return r; 02713 } 02714 02715 template<class RVector, class XVector, class YVector> 02716 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x, 02717 const typename YVector::non_const_value_type &bv, const YVector & y, int n = -1, 02718 int a=2,int b=2) 02719 { 02720 if(a==-1) { 02721 if(b==-1) 02722 V_AddVector<RVector,XVector,YVector,-1,-1>(r,av,x,bv,y,n); 02723 else if(b==0) 02724 V_AddVector<RVector,XVector,YVector,-1,0>(r,av,x,bv,y,n); 02725 else if(b==1) 02726 V_AddVector<RVector,XVector,YVector,-1,1>(r,av,x,bv,y,n); 02727 else 02728 V_AddVector<RVector,XVector,YVector,-1,2>(r,av,x,bv,y,n); 02729 } else if (a==0) { 02730 if(b==-1) 02731 V_AddVector<RVector,XVector,YVector,0,-1>(r,av,x,bv,y,n); 02732 else if(b==0) 02733 V_AddVector<RVector,XVector,YVector,0,0>(r,av,x,bv,y,n); 02734 else if(b==1) 02735 V_AddVector<RVector,XVector,YVector,0,1>(r,av,x,bv,y,n); 02736 else 02737 V_AddVector<RVector,XVector,YVector,0,2>(r,av,x,bv,y,n); 02738 } else if (a==1) { 02739 if(b==-1) 02740 V_AddVector<RVector,XVector,YVector,1,-1>(r,av,x,bv,y,n); 02741 else if(b==0) 02742 V_AddVector<RVector,XVector,YVector,1,0>(r,av,x,bv,y,n); 02743 else if(b==1) 02744 V_AddVector<RVector,XVector,YVector,1,1>(r,av,x,bv,y,n); 02745 else 02746 V_AddVector<RVector,XVector,YVector,1,2>(r,av,x,bv,y,n); 02747 } else if (a==2) { 02748 if(b==-1) 02749 V_AddVector<RVector,XVector,YVector,2,-1>(r,av,x,bv,y,n); 02750 else if(b==0) 02751 V_AddVector<RVector,XVector,YVector,2,0>(r,av,x,bv,y,n); 02752 else if(b==1) 02753 V_AddVector<RVector,XVector,YVector,2,1>(r,av,x,bv,y,n); 02754 else 02755 V_AddVector<RVector,XVector,YVector,2,2>(r,av,x,bv,y,n); 02756 } 02757 return r; 02758 } 02759 02760 template<class RVector,class XVector,class YVector> 02761 RVector V_Add( const RVector & r, const XVector & x, const YVector & y, int n=-1) 02762 { 02763 return V_AddVector( r,1,x,1,y,n,1,1); 02764 } 02765 02766 template<class RVector,class XVector,class YVector> 02767 RVector V_Add( const RVector & r, const XVector & x, const typename XVector::non_const_value_type & bv, const YVector & y,int n=-1 ) 02768 { 02769 int b = 2; 02770 //if(bv == 0) b = 0; 02771 //if(bv == 1) b = 1; 02772 //if(bv == -1) b = -1; 02773 return V_AddVector(r,bv,x,bv,y,n,1,b); 02774 } 02775 02776 template<class RVector,class XVector,class YVector> 02777 RVector V_Add( const RVector & r, const typename XVector::non_const_value_type & av, const XVector & x, const typename XVector::non_const_value_type & bv, const YVector & y,int n=-1 ) 02778 { 02779 int a = 2; 02780 int b = 2; 02781 //if(av == 0) a = 0; 02782 //if(av == 1) a = 1; 02783 //if(av == -1) a = -1; 02784 //if(bv == 0) b = 0; 02785 //if(bv == 1) b = 1; 02786 //if(bv == -1) b = -1; 02787 02788 return V_AddVector(r,av,x,bv,y,n,a,b); 02789 } 02790 02791 template<class XVector, class YVector> 02792 struct V_DotFunctor 02793 { 02794 typedef typename XVector::device_type device_type; 02795 typedef typename XVector::size_type size_type; 02796 typedef typename XVector::non_const_value_type xvalue_type; 02797 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 02798 typedef typename IPT::dot_type value_type; 02799 XVector m_x ; 02800 YVector m_y ; 02801 02802 //-------------------------------------------------------------------------- 02803 V_DotFunctor(const XVector& x,const YVector& y): 02804 m_x(x),m_y(y) 02805 { } 02806 02807 KOKKOS_INLINE_FUNCTION 02808 void operator()( const size_type &i, value_type &sum ) const 02809 { 02810 sum += IPT::dot( m_x(i), m_y(i) ); // m_x(i) * m_y(i) 02811 } 02812 02813 KOKKOS_INLINE_FUNCTION 02814 void init( volatile value_type &update) const 02815 { 02816 update = 0; 02817 } 02818 02819 KOKKOS_INLINE_FUNCTION 02820 void join( volatile value_type &update , 02821 const volatile value_type &source ) const 02822 { 02823 update += source ; 02824 } 02825 }; 02826 02827 template<class XVector, class YVector> 02828 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type 02829 V_Dot( const XVector & x, const YVector & y, int n = -1) 02830 { 02831 typedef V_DotFunctor<XVector,YVector> Functor; 02832 Functor f(x,y); 02833 if (n<0) n = x.dimension_0(); 02834 typename Functor::value_type ret_val; 02835 parallel_reduce(n,f,ret_val); 02836 return ret_val; 02837 } 02838 02839 template<class WeightVector, class XVector> 02840 struct V_DotWeighted_Functor 02841 { 02842 typedef typename XVector::device_type device_type; 02843 typedef typename XVector::size_type size_type; 02844 typedef typename XVector::non_const_value_type xvalue_type; 02845 typedef typename WeightVector::non_const_value_type wvalue_type; 02846 typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT; 02847 typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT; 02848 typedef typename XIPT::dot_type value_type; 02849 02850 typename WeightVector::const_type m_w ; 02851 typename XVector::const_type m_x ; 02852 02853 V_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x) {} 02854 //-------------------------------------------------------------------------- 02855 KOKKOS_INLINE_FUNCTION 02856 void operator()( const size_type i, value_type& sum ) const 02857 { 02858 sum += XIPT::dot( m_x(i), m_x(i) )/ WIPT::dot( m_w(i), m_w(i) ); 02859 } 02860 02861 02862 KOKKOS_INLINE_FUNCTION void init( value_type & update) const 02863 { 02864 update = 0; 02865 } 02866 KOKKOS_INLINE_FUNCTION void join( volatile value_type & update , 02867 const volatile value_type & source ) const 02868 { 02869 update += source; 02870 } 02871 }; 02872 02873 template<class WeightVector, class XVector> 02874 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type 02875 V_DotWeighted(const WeightVector & w, const XVector & x, int n = -1) 02876 { 02877 if (n < 0) { 02878 n = x.dimension_0 (); 02879 } 02880 02881 typedef Details::InnerProductSpaceTraits<typename XVector::non_const_value_type> IPT; 02882 typedef typename IPT::dot_type value_type; 02883 value_type ret_val; 02884 02885 typedef V_DotWeighted_Functor<WeightVector,XVector> functor_type; 02886 Kokkos::parallel_reduce (n , functor_type (w, x), ret_val); 02887 return ret_val; 02888 } 02889 02890 /*------------------------------------------------------------------------------------------ 02891 *-------------------------- Compute Sum ------------------------------------------------- 02892 *------------------------------------------------------------------------------------------*/ 02893 template<class XVector> 02894 struct V_Sum_Functor 02895 { 02896 typedef typename XVector::device_type device_type; 02897 typedef typename XVector::size_type size_type; 02898 typedef typename XVector::non_const_value_type xvalue_type; 02899 typedef xvalue_type value_type; 02900 02901 typename XVector::const_type m_x ; 02902 02903 V_Sum_Functor(XVector x):m_x(x) {} 02904 //-------------------------------------------------------------------------- 02905 02906 KOKKOS_INLINE_FUNCTION 02907 void operator()( const size_type i, value_type& sum ) const 02908 { 02909 sum += m_x(i); 02910 } 02911 02912 KOKKOS_INLINE_FUNCTION 02913 void init( value_type& update) const 02914 { 02915 update = 0; 02916 } 02917 02918 KOKKOS_INLINE_FUNCTION 02919 void join( volatile value_type& update , 02920 const volatile value_type& source ) const 02921 { 02922 update += source; 02923 } 02924 }; 02925 02926 02927 template<class VectorType> 02928 typename VectorType::non_const_value_type 02929 V_Sum (const VectorType& x, int n = -1) 02930 { 02931 if (n < 0) { 02932 n = x.dimension_0 (); 02933 } 02934 02935 typedef typename VectorType::non_const_value_type value_type; 02936 value_type ret_val; 02937 Kokkos::parallel_reduce (n, V_Sum_Functor<VectorType> (x), ret_val); 02938 return ret_val; 02939 } 02940 02941 /*------------------------------------------------------------------------------------------ 02942 *-------------------------- Compute Norm1-------------------------------------------------- 02943 *------------------------------------------------------------------------------------------*/ 02944 template<class XVector> 02945 struct V_Norm1_Functor 02946 { 02947 typedef typename XVector::device_type device_type; 02948 typedef typename XVector::size_type size_type; 02949 typedef typename XVector::non_const_value_type xvalue_type; 02950 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 02951 typedef typename IPT::dot_type value_type; 02952 02953 typename XVector::const_type m_x ; 02954 02955 V_Norm1_Functor(XVector x):m_x(x) {} 02956 //-------------------------------------------------------------------------- 02957 KOKKOS_INLINE_FUNCTION 02958 void operator()( const size_type i, value_type& sum ) const 02959 { 02960 sum += Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i)); 02961 } 02962 KOKKOS_INLINE_FUNCTION void init( value_type& update) const 02963 { 02964 update = 0; 02965 } 02966 KOKKOS_INLINE_FUNCTION void join( volatile value_type& update , 02967 const volatile value_type& source ) const 02968 { 02969 update += source; 02970 } 02971 }; 02972 02973 template<class VectorType> 02974 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type 02975 V_Norm1( const VectorType & x, int n = -1) 02976 { 02977 if (n < 0) { 02978 n = x.dimension_0 (); 02979 } 02980 02981 typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT; 02982 typedef typename IPT::dot_type value_type; 02983 value_type ret_val; 02984 Kokkos::parallel_reduce (n, V_Norm1_Functor<VectorType> (x), ret_val); 02985 return ret_val; 02986 } 02987 /*------------------------------------------------------------------------------------------ 02988 *-------------------------- Compute NormInf-------------------------------------------------- 02989 *------------------------------------------------------------------------------------------*/ 02990 template<class XVector> 02991 struct V_NormInf_Functor 02992 { 02993 typedef typename XVector::device_type device_type; 02994 typedef typename XVector::size_type size_type; 02995 typedef typename XVector::non_const_value_type xvalue_type; 02996 typedef Details::InnerProductSpaceTraits<xvalue_type> IPT; 02997 typedef typename IPT::dot_type value_type; 02998 02999 typename XVector::const_type m_x ; 03000 03001 V_NormInf_Functor(XVector x):m_x(x) {} 03002 //-------------------------------------------------------------------------- 03003 KOKKOS_INLINE_FUNCTION 03004 void operator()( const size_type i, value_type& sum ) const 03005 { 03006 sum = MAX(sum,Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i))); 03007 } 03008 03009 KOKKOS_INLINE_FUNCTION void init( value_type& update) const 03010 { 03011 update = 0; 03012 } 03013 KOKKOS_INLINE_FUNCTION void join( volatile value_type& update , 03014 const volatile value_type& source ) const 03015 { 03016 update = MAX(update,source); 03017 } 03018 }; 03019 03020 template<class VectorType> 03021 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type 03022 V_NormInf (const VectorType& x, int n = -1) 03023 { 03024 if (n < 0) { 03025 n = x.dimension_0 (); 03026 } 03027 03028 typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT; 03029 typedef typename IPT::dot_type value_type; 03030 value_type ret_val; 03031 Kokkos::parallel_reduce (n, V_NormInf_Functor<VectorType> (x), ret_val); 03032 return ret_val; 03033 } 03034 03035 /*------------------------------------------------------------------------------------------ 03036 *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------ 03037 *------------------------------------------------------------------------------------------*/ 03038 template<class RVector, class XVector> 03039 struct V_ReciprocalFunctor 03040 { 03041 typedef typename XVector::device_type device_type; 03042 typedef typename XVector::size_type size_type; 03043 03044 RVector m_r; 03045 typename XVector::const_type m_x ; 03046 03047 V_ReciprocalFunctor(RVector r, XVector x):m_r(r),m_x(x) {} 03048 //-------------------------------------------------------------------------- 03049 03050 KOKKOS_INLINE_FUNCTION 03051 void operator()( const size_type i) const 03052 { 03053 m_r(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i); 03054 } 03055 }; 03056 03057 template<class XVector> 03058 struct V_ReciprocalSelfFunctor 03059 { 03060 typedef typename XVector::device_type device_type; 03061 typedef typename XVector::size_type size_type; 03062 03063 XVector m_x ; 03064 03065 V_ReciprocalSelfFunctor(XVector x):m_x(x) {} 03066 //-------------------------------------------------------------------------- 03067 03068 KOKKOS_INLINE_FUNCTION 03069 void operator()( const size_type i) const 03070 { 03071 m_x(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i); 03072 } 03073 }; 03074 03075 template<class RVector, class XVector> 03076 RVector V_Reciprocal( const RVector & r, const XVector & x) 03077 { 03078 // TODO: Add error check (didn't link for some reason?) 03079 /*if(r.dimension_0() != x.dimension_0()) 03080 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match"); 03081 */ 03082 03083 03084 if(r==x) { 03085 V_ReciprocalSelfFunctor<XVector> op(x) ; 03086 Kokkos::parallel_for( x.dimension_0() , op ); 03087 return r; 03088 } 03089 03090 V_ReciprocalFunctor<RVector,XVector> op(r,x) ; 03091 Kokkos::parallel_for( x.dimension_0() , op ); 03092 return r; 03093 } 03094 03095 /*------------------------------------------------------------------------------------------ 03096 *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ---------------- 03097 *------------------------------------------------------------------------------------------*/ 03098 template<class XVector> 03099 struct V_ReciprocalThresholdSelfFunctor 03100 { 03101 typedef typename XVector::device_type device_type; 03102 typedef typename XVector::size_type size_type; 03103 typedef typename XVector::non_const_value_type value_type; 03104 typedef Kokkos::Details::ArithTraits<value_type> KAT; 03105 typedef typename KAT::mag_type mag_type; 03106 03107 const XVector m_x; 03108 const value_type m_min_val; 03109 const value_type m_min_val_mag; 03110 03111 V_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val) : 03112 m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)) {} 03113 //-------------------------------------------------------------------------- 03114 03115 KOKKOS_INLINE_FUNCTION 03116 void operator()( const size_type i) const 03117 { 03118 if (KAT::abs(m_x(i)) < m_min_val_mag) 03119 m_x(i) = m_min_val; 03120 else 03121 m_x(i) = KAT::one() / m_x(i); 03122 } 03123 }; 03124 03125 template<class XVector> 03126 XVector V_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val ) 03127 { 03128 V_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val) ; 03129 Kokkos::parallel_for( x.dimension_0() , op ); 03130 return x; 03131 } 03132 03133 /*------------------------------------------------------------------------------------------ 03134 *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------ 03135 *------------------------------------------------------------------------------------------*/ 03136 template<class RVector, class XVector> 03137 struct V_AbsFunctor 03138 { 03139 typedef typename XVector::device_type device_type; 03140 typedef typename XVector::size_type size_type; 03141 03142 RVector m_r; 03143 typename XVector::const_type m_x ; 03144 03145 V_AbsFunctor(RVector r, XVector x):m_r(r),m_x(x) {} 03146 //-------------------------------------------------------------------------- 03147 03148 KOKKOS_INLINE_FUNCTION 03149 void operator()( const size_type i) const 03150 { 03151 m_r(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i)); 03152 } 03153 }; 03154 03155 template<class XVector> 03156 struct V_AbsSelfFunctor 03157 { 03158 typedef typename XVector::device_type device_type; 03159 typedef typename XVector::size_type size_type; 03160 03161 XVector m_x ; 03162 03163 V_AbsSelfFunctor(XVector x):m_x(x) {} 03164 //-------------------------------------------------------------------------- 03165 03166 KOKKOS_INLINE_FUNCTION 03167 void operator()( const size_type i) const 03168 { 03169 m_x(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i)); 03170 } 03171 }; 03172 03173 template<class RVector, class XVector> 03174 RVector V_Abs( const RVector & r, const XVector & x) 03175 { 03176 // TODO: Add error check (didn't link for some reason?) 03177 /*if(r.dimension_0() != x.dimension_0()) 03178 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match"); 03179 */ 03180 03181 03182 if(r==x) { 03183 V_AbsSelfFunctor<XVector> op(x) ; 03184 Kokkos::parallel_for( x.dimension_0() , op ); 03185 return r; 03186 } 03187 03188 V_AbsFunctor<RVector,XVector> op(r,x) ; 03189 Kokkos::parallel_for( x.dimension_0() , op ); 03190 return r; 03191 } 03192 03193 /*------------------------------------------------------------------------------------------ 03194 *------ ElementWiseMultiply element wise: C(i) = c*C(i) + ab*A(i)*B(i) -------------- 03195 *------------------------------------------------------------------------------------------*/ 03196 template<class CVector, class AVector, class BVector> 03197 struct V_ElementWiseMultiplyFunctor 03198 { 03199 typedef typename CVector::device_type device_type; 03200 typedef typename CVector::size_type size_type; 03201 03202 typename CVector::const_value_type m_c; 03203 CVector m_C; 03204 typename AVector::const_value_type m_ab; 03205 typename AVector::const_type m_A ; 03206 typename BVector::const_type m_B ; 03207 03208 V_ElementWiseMultiplyFunctor( 03209 typename CVector::const_value_type c, 03210 CVector C, 03211 typename AVector::const_value_type ab, 03212 typename AVector::const_type A, 03213 typename BVector::const_type B): 03214 m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B) 03215 {} 03216 //-------------------------------------------------------------------------- 03217 03218 KOKKOS_INLINE_FUNCTION 03219 void operator()( const size_type i) const 03220 { 03221 m_C(i) = m_c*m_C(i) + m_ab*m_A(i)*m_B(i); 03222 } 03223 }; 03224 03225 03226 template<class CVector, class AVector, class BVector> 03227 CVector V_ElementWiseMultiply( 03228 typename CVector::const_value_type c, 03229 CVector C, 03230 typename AVector::const_value_type ab, 03231 AVector A, 03232 BVector B 03233 ) 03234 { 03235 // TODO: Add error check (didn't link for some reason?) 03236 /*if(r.dimension_0() != x.dimension_0()) 03237 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match"); 03238 if(r.dimension_1() != x.dimension_1()) 03239 Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/ 03240 03241 //TODO: Get 1D version done 03242 /*if(r.dimension_1()==1) { 03243 typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D; 03244 typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D; 03245 03246 RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 ); 03247 XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 ); 03248 return V_ElementWiseMultiply(r_1d,x_1d); 03249 }*/ 03250 03251 V_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B) ; 03252 Kokkos::parallel_for( C.dimension_0() , op ); 03253 return C; 03254 } 03255 }//end namespace Kokkos 03256 #endif /* KOKKOS_MULTIVECTOR_H_ */
1.7.6.1