Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends
Kokkos_MV.hpp
00001 #ifndef KOKKOS_MULTIVECTOR_H_
00002 #define KOKKOS_MULTIVECTOR_H_
00003 
00004 #include <Kokkos_Core.hpp>
00005 #include <Kokkos_InnerProductSpaceTraits.hpp>
00006 #include <ctime>
00007 
00008 #include <iostream>
00009 #include <stdexcept> // For some recently added error handling
00010 
00011 #define MAX(a,b) (a<b?b:a)
00012 
00013 namespace Kokkos {
00014 
00015 
00016 template<typename Scalar, class device>
00017 struct MultiVectorDynamic{
00018 #ifdef KOKKOS_USE_CUSPARSE
00019   typedef typename Kokkos::LayoutLeft layout;
00020 #else
00021 #ifdef KOKKOS_USE_MKL
00022   typedef typename Kokkos::LayoutRight layout;
00023 #else
00024   typedef typename device::array_layout layout;
00025 #endif
00026 #endif
00027   typedef typename Kokkos::View<Scalar**  , layout, device>  type ;
00028   typedef typename Kokkos::View<const Scalar**  , layout, device>  const_type ;
00029   typedef typename Kokkos::View<const Scalar**  , layout, device, Kokkos::MemoryRandomAccess>  random_read_type ;
00030   MultiVectorDynamic() {}
00031   ~MultiVectorDynamic() {}
00032 };
00033 
00034 template<typename Scalar, class device, int n>
00035 struct MultiVectorStatic{
00036   typedef Scalar scalar;
00037   typedef typename device::array_layout layout;
00038   typedef typename Kokkos::View<Scalar*[n]  , layout, device>  type ;
00039   typedef typename Kokkos::View<const Scalar*[n]  , layout, device>  const_type ;
00040   typedef typename Kokkos::View<const Scalar*[n]  , layout, device, Kokkos::MemoryRandomAccess>  random_read_type ;
00041   MultiVectorStatic() {}
00042   ~MultiVectorStatic() {}
00043 };
00044 
00045 
00046 
00047 /*------------------------------------------------------------------------------------------
00048  *-------------------------- Multiply with scalar: y = a * x -------------------------------
00049  *------------------------------------------------------------------------------------------*/
00050 template<class RVector, class aVector, class XVector>
00051 struct MV_MulScalarFunctor
00052 {
00053   typedef typename XVector::device_type        device_type;
00054   typedef typename XVector::size_type            size_type;
00055 
00056   RVector m_r;
00057   typename XVector::const_type m_x ;
00058   typename aVector::const_type m_a ;
00059   size_type n;
00060   MV_MulScalarFunctor() {n=1;}
00061   //--------------------------------------------------------------------------
00062 
00063   KOKKOS_INLINE_FUNCTION
00064   void operator()( const size_type i) const
00065   {
00066 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00067 #pragma ivdep
00068 #endif
00069         for(size_type k=0;k<n;k++)
00070            m_r(i,k) = m_a[k]*m_x(i,k);
00071   }
00072 };
00073 
00074 template<class aVector, class XVector>
00075 struct MV_MulScalarFunctorSelf
00076 {
00077   typedef typename XVector::device_type        device_type;
00078   typedef typename XVector::size_type            size_type;
00079 
00080   XVector m_x;
00081   typename aVector::const_type   m_a ;
00082   size_type n;
00083   //--------------------------------------------------------------------------
00084 
00085   KOKKOS_INLINE_FUNCTION
00086   void operator()( const size_type i) const
00087   {
00088 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00089 #pragma ivdep
00090 #endif
00091         for(size_type k=0;k<n;k++)
00092            m_x(i,k) *= m_a[k];
00093   }
00094 };
00095 
00096 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector>
00097 RVector MV_MulScalar (const RVector& r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation>& a, const XVector& x)
00098 {
00099   typedef typename Kokkos::View<DataType,Layout,Device,MemoryManagement> aVector;
00100   if (r == x) {
00101     MV_MulScalarFunctorSelf<aVector,XVector> op ;
00102     op.m_x = x ;
00103     op.m_a = a ;
00104     op.n = x.dimension(1);
00105     Kokkos::parallel_for (x.dimension (0), op);
00106     return r;
00107   }
00108 
00109   MV_MulScalarFunctor<RVector,aVector,XVector> op ;
00110   op.m_r = r ;
00111   op.m_x = x ;
00112   op.m_a = a ;
00113   op.n = x.dimension(1);
00114   Kokkos::parallel_for( x.dimension(0) , op );
00115   return r;
00116 }
00117 
00118 template<class RVector, class XVector>
00119 struct MV_MulScalarFunctor<RVector,typename RVector::value_type,XVector>
00120 {
00121   typedef typename XVector::device_type        device_type;
00122   typedef typename XVector::size_type            size_type;
00123 
00124   RVector m_r;
00125   typename XVector::const_type m_x ;
00126   typename RVector::value_type m_a ;
00127   size_type n;
00128   MV_MulScalarFunctor() {n=1;}
00129   //--------------------------------------------------------------------------
00130 
00131   KOKKOS_INLINE_FUNCTION
00132   void operator()( const size_type i) const
00133   {
00134 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00135 #pragma ivdep
00136 #endif
00137         for(size_type k=0;k<n;k++)
00138            m_r(i,k) = m_a*m_x(i,k);
00139   }
00140 };
00141 
00142 template<class XVector>
00143 struct MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
00144 {
00145   typedef typename XVector::device_type        device_type;
00146   typedef typename XVector::size_type            size_type;
00147 
00148   XVector m_x;
00149   typename XVector::non_const_value_type m_a ;
00150   size_type n;
00151   //--------------------------------------------------------------------------
00152 
00153   KOKKOS_INLINE_FUNCTION
00154   void operator()( const size_type i) const
00155   {
00156 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00157 #pragma ivdep
00158 #endif
00159         for(size_type k=0;k<n;k++)
00160            m_x(i,k) *= m_a;
00161   }
00162 };
00163 
00164 template<class RVector, class XVector>
00165 RVector MV_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x)
00166 {
00167   /*if(r.dimension_1()==1) {
00168     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
00169     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
00170 
00171     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
00172     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
00173     return V_MulScalar(r_1d,a,x_1d);
00174   }*/
00175   if (r == x) {
00176     MV_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector> op ;
00177     op.m_x = x ;
00178     op.m_a = a ;
00179     op.n = x.dimension(1);
00180     Kokkos::parallel_for (x.dimension (0), op);
00181     return r;
00182   }
00183 
00184   MV_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
00185   op.m_r = r ;
00186   op.m_x = x ;
00187   op.m_a = a ;
00188   op.n = x.dimension(1);
00189   Kokkos::parallel_for( x.dimension(0) , op );
00190   return r;
00191 }
00192 
00193 /*------------------------------------------------------------------------------------------
00194  *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------
00195  *------------------------------------------------------------------------------------------*/
00196 template<class RVector, class XVector>
00197 struct MV_ReciprocalFunctor
00198 {
00199   typedef typename XVector::device_type        device_type;
00200   typedef typename XVector::size_type            size_type;
00201 
00202   RVector m_r;
00203   typename XVector::const_type m_x ;
00204 
00205   const size_type m_n;
00206   MV_ReciprocalFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
00207   //--------------------------------------------------------------------------
00208 
00209   KOKKOS_INLINE_FUNCTION
00210   void operator()( const size_type i) const
00211   {
00212 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00213 #pragma ivdep
00214 #endif
00215   for(size_type k=0;k<m_n;k++)
00216      m_r(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i,k);
00217   }
00218 };
00219 
00220 template<class XVector>
00221 struct MV_ReciprocalSelfFunctor
00222 {
00223   typedef typename XVector::device_type        device_type;
00224   typedef typename XVector::size_type            size_type;
00225 
00226   XVector m_x ;
00227 
00228   const size_type m_n;
00229   MV_ReciprocalSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
00230   //--------------------------------------------------------------------------
00231 
00232   KOKKOS_INLINE_FUNCTION
00233   void operator()( const size_type i) const
00234   {
00235 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00236 #pragma ivdep
00237 #endif
00238   for(size_type k=0;k<m_n;k++)
00239      m_x(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i,k);
00240   }
00241 };
00242 
00243 template<class RVector, class XVector>
00244 RVector MV_Reciprocal( const RVector & r, const XVector & x)
00245 {
00246   // TODO: Add error check (didn't link for some reason?)
00247   /*if(r.dimension_0() != x.dimension_0())
00248     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match");
00249   if(r.dimension_1() != x.dimension_1())
00250     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(1) of r and x don't match");*/
00251 
00252   //TODO: Get 1D version done
00253   /*if(r.dimension_1()==1) {
00254     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
00255     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
00256 
00257     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
00258     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
00259     return V_MulScalar(r_1d,a,x_1d);
00260   }*/
00261   if(r==x) {
00262     MV_ReciprocalSelfFunctor<XVector> op(x,x.dimension_1()) ;
00263     Kokkos::parallel_for( x.dimension_0() , op );
00264     return r;
00265   }
00266 
00267   MV_ReciprocalFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
00268   Kokkos::parallel_for( x.dimension_0() , op );
00269   return r;
00270 }
00271 
00272 /*------------------------------------------------------------------------------------------
00273  *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ----------------
00274  *------------------------------------------------------------------------------------------*/
00275 template<class XVector>
00276 struct MV_ReciprocalThresholdSelfFunctor
00277 {
00278   typedef typename XVector::device_type           device_type;
00279   typedef typename XVector::size_type               size_type;
00280   typedef typename XVector::non_const_value_type   value_type;
00281   typedef Kokkos::Details::ArithTraits<value_type>        KAT;
00282   typedef typename KAT::mag_type                     mag_type;
00283 
00284   const XVector    m_x;
00285   const value_type m_min_val;
00286   const value_type m_min_val_mag;
00287   const size_type  m_n;
00288 
00289   MV_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val, const size_type n) :
00290     m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)), m_n(n) {}
00291   //--------------------------------------------------------------------------
00292 
00293   KOKKOS_INLINE_FUNCTION
00294   void operator()( const size_type i) const
00295   {
00296 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00297 #pragma ivdep
00298 #endif
00299     for(size_type k=0;k<m_n;k++) {
00300       if (KAT::abs(m_x(i,k)) < m_min_val_mag)
00301         m_x(i,k) = m_min_val;
00302       else
00303         m_x(i,k) = KAT::one() / m_x(i,k);
00304     }
00305   }
00306 };
00307 
00308 template<class XVector>
00309 XVector MV_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val )
00310 {
00311   MV_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val,x.dimension_1()) ;
00312   Kokkos::parallel_for( x.dimension_0() , op );
00313   return x;
00314 }
00315 
00316 /*------------------------------------------------------------------------------------------
00317  *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------
00318  *------------------------------------------------------------------------------------------*/
00319 template<class RVector, class XVector>
00320 struct MV_AbsFunctor
00321 {
00322   typedef typename XVector::device_type        device_type;
00323   typedef typename XVector::size_type            size_type;
00324 
00325   RVector m_r;
00326   typename XVector::const_type m_x ;
00327 
00328   const size_type m_n;
00329   MV_AbsFunctor(RVector r, XVector x, size_type n):m_r(r),m_x(x),m_n(n) {}
00330   //--------------------------------------------------------------------------
00331 
00332   KOKKOS_INLINE_FUNCTION
00333   void operator()( const size_type i) const
00334   {
00335 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00336 #pragma ivdep
00337 #endif
00338   for(size_type k=0;k<m_n;k++)
00339      m_r(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k));
00340   }
00341 };
00342 
00343 template<class XVector>
00344 struct MV_AbsSelfFunctor
00345 {
00346   typedef typename XVector::device_type        device_type;
00347   typedef typename XVector::size_type            size_type;
00348 
00349   XVector m_x ;
00350 
00351   const size_type m_n;
00352   MV_AbsSelfFunctor(XVector x, size_type n):m_x(x),m_n(n) {}
00353   //--------------------------------------------------------------------------
00354 
00355   KOKKOS_INLINE_FUNCTION
00356   void operator()( const size_type i) const
00357   {
00358 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00359 #pragma ivdep
00360 #endif
00361   for(size_type k=0;k<m_n;k++)
00362      m_x(i,k) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k));
00363   }
00364 };
00365 
00366 template<class RVector, class XVector>
00367 RVector MV_Abs( const RVector & r, const XVector & x)
00368 {
00369   // TODO: Add error check (didn't link for some reason?)
00370   /*if(r.dimension_0() != x.dimension_0())
00371     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match");
00372   if(r.dimension_1() != x.dimension_1())
00373     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(1) of r and x don't match");*/
00374 
00375   //TODO: Get 1D version done
00376   /*if(r.dimension_1()==1) {
00377     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
00378     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
00379 
00380     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
00381     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
00382     return V_Abs(r_1d,x_1d);
00383   }*/
00384   if(r==x) {
00385     MV_AbsSelfFunctor<XVector> op(x,x.dimension_1()) ;
00386     Kokkos::parallel_for( x.dimension_0() , op );
00387     return r;
00388   }
00389 
00390   MV_AbsFunctor<RVector,XVector> op(r,x,x.dimension_1()) ;
00391   Kokkos::parallel_for( x.dimension_0() , op );
00392   return r;
00393 }
00394 
00395 /*------------------------------------------------------------------------------------------
00396  *------ ElementWiseMultiply element wise: C(i,j) = c*C(i,j) + ab*A(i)*B(i,j) --------------
00397  *------------------------------------------------------------------------------------------*/
00398 template<class CVector, class AVector, class BVector>
00399 struct MV_ElementWiseMultiplyFunctor
00400 {
00401   typedef typename CVector::device_type        device_type;
00402   typedef typename CVector::size_type            size_type;
00403 
00404   typename CVector::const_value_type m_c;
00405   CVector m_C;
00406   typename AVector::const_value_type m_ab;
00407   typename AVector::const_type m_A ;
00408   typename BVector::const_type m_B ;
00409 
00410   const size_type m_n;
00411   MV_ElementWiseMultiplyFunctor(
00412       typename CVector::const_value_type c,
00413       CVector C,
00414       typename AVector::const_value_type ab,
00415       typename AVector::const_type A,
00416       typename BVector::const_type B,
00417       const size_type n):
00418       m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B),m_n(n)
00419       {}
00420   //--------------------------------------------------------------------------
00421 
00422   KOKKOS_INLINE_FUNCTION
00423   void operator()( const size_type i) const
00424   {
00425     typename AVector::const_value_type Ai = m_A(i);
00426 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00427 #pragma ivdep
00428 #endif
00429   for(size_type k=0;k<m_n;k++)
00430      m_C(i,k) = m_c*m_C(i,k) + m_ab*Ai*m_B(i,k);
00431   }
00432 };
00433 
00434 
00435 template<class CVector, class AVector, class BVector>
00436 CVector MV_ElementWiseMultiply(
00437       typename CVector::const_value_type c,
00438       CVector C,
00439       typename AVector::const_value_type ab,
00440       AVector A,
00441       BVector B
00442     )
00443 {
00444   // TODO: Add error check (didn't link for some reason?)
00445   /*if(r.dimension_0() != x.dimension_0())
00446     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match");
00447   if(r.dimension_1() != x.dimension_1())
00448     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/
00449 
00450   //TODO: Get 1D version done
00451   /*if(r.dimension_1()==1) {
00452     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
00453     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
00454 
00455     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
00456     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
00457     return V_ElementWiseMultiply(r_1d,x_1d);
00458   }*/
00459 
00460   MV_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B,C.dimension_1()) ;
00461   Kokkos::parallel_for( C.dimension_0() , op );
00462   return C;
00463 }
00464 
00465 /*------------------------------------------------------------------------------------------
00466  *-------------------------- Vector Add: r = a*x + b*y -------------------------------------
00467  *------------------------------------------------------------------------------------------*/
00468 
00469 /* Variants of Functors with a and b being vectors. */
00470 
00471 //Unroll for n<=16
00472 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y,int UNROLL>
00473 struct MV_AddUnrollFunctor
00474 {
00475   typedef typename RVector::device_type        device_type;
00476   typedef typename RVector::size_type            size_type;
00477 
00478   RVector   m_r ;
00479   XVector  m_x ;
00480   YVector   m_y ;
00481   aVector m_a;
00482   bVector m_b;
00483   size_type n;
00484   size_type start;
00485 
00486   MV_AddUnrollFunctor() {n=UNROLL;}
00487   //--------------------------------------------------------------------------
00488 
00489   KOKKOS_INLINE_FUNCTION
00490   void operator()( const size_type i ) const
00491   {
00492         if((scalar_x==1)&&(scalar_y==1)){
00493 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00494 #pragma unroll
00495 #endif
00496     for(size_type k=0;k<UNROLL;k++)
00497       m_r(i,k) = m_x(i,k) + m_y(i,k);
00498         }
00499         if((scalar_x==1)&&(scalar_y==-1)){
00500 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00501 #pragma unroll
00502 #endif
00503           for(size_type k=0;k<UNROLL;k++)
00504       m_r(i,k) = m_x(i,k) - m_y(i,k);
00505         }
00506         if((scalar_x==-1)&&(scalar_y==-1)){
00507 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00508 #pragma unroll
00509 #endif
00510 for(size_type k=0;k<UNROLL;k++)
00511       m_r(i,k) = -m_x(i,k) - m_y(i,k);
00512         }
00513         if((scalar_x==-1)&&(scalar_y==1)){
00514 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00515 #pragma unroll
00516 #endif
00517 for(size_type k=0;k<UNROLL;k++)
00518       m_r(i,k) = -m_x(i,k) + m_y(i,k);
00519         }
00520         if((scalar_x==2)&&(scalar_y==1)){
00521 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00522 #pragma unroll
00523 #endif
00524 for(size_type k=0;k<UNROLL;k++)
00525       m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
00526         }
00527         if((scalar_x==2)&&(scalar_y==-1)){
00528 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00529 #pragma unroll
00530 #endif
00531 for(size_type k=0;k<UNROLL;k++)
00532       m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
00533         }
00534         if((scalar_x==1)&&(scalar_y==2)){
00535 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00536 #pragma unroll
00537 #endif
00538 for(size_type k=0;k<UNROLL;k++)
00539       m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
00540         }
00541         if((scalar_x==-1)&&(scalar_y==2)){
00542 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00543 #pragma unroll
00544 #endif
00545 for(size_type k=0;k<UNROLL;k++)
00546       m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
00547         }
00548         if((scalar_x==2)&&(scalar_y==2)){
00549 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00550 #pragma unroll
00551 #endif
00552 for(size_type k=0;k<UNROLL;k++)
00553       m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
00554         }
00555   }
00556 };
00557 
00558 template<class RVector,class aVector, class XVector, class bVector, class YVector, int scalar_x, int scalar_y>
00559 struct MV_AddVectorFunctor
00560 {
00561   typedef typename RVector::device_type        device_type;
00562   typedef typename RVector::size_type            size_type;
00563 
00564   RVector   m_r ;
00565   XVector  m_x ;
00566   YVector   m_y ;
00567   aVector m_a;
00568   bVector m_b;
00569   size_type n;
00570 
00571   MV_AddVectorFunctor() {n=1;}
00572   //--------------------------------------------------------------------------
00573 
00574   KOKKOS_INLINE_FUNCTION
00575   void operator()( const size_type i ) const
00576   {
00577         if((scalar_x==1)&&(scalar_y==1))
00578 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00579 #pragma ivdep
00580 #endif
00581 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00582 #pragma vector always
00583 #endif
00584       for(size_type k=0;k<n;k++)
00585             m_r(i,k) = m_x(i,k) + m_y(i,k);
00586         if((scalar_x==1)&&(scalar_y==-1))
00587 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00588 #pragma ivdep
00589 #endif
00590 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00591 #pragma vector always
00592 #endif
00593       for(size_type k=0;k<n;k++)
00594             m_r(i,k) = m_x(i,k) - m_y(i,k);
00595         if((scalar_x==-1)&&(scalar_y==-1))
00596 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00597 #pragma ivdep
00598 #endif
00599 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00600 #pragma vector always
00601 #endif
00602       for(size_type k=0;k<n;k++)
00603             m_r(i,k) = -m_x(i,k) - m_y(i,k);
00604         if((scalar_x==-1)&&(scalar_y==1))
00605 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00606 #pragma ivdep
00607 #endif
00608 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00609 #pragma vector always
00610 #endif
00611       for(size_type k=0;k<n;k++)
00612             m_r(i,k) = -m_x(i,k) + m_y(i,k);
00613         if((scalar_x==2)&&(scalar_y==1))
00614 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00615 #pragma ivdep
00616 #endif
00617 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00618 #pragma vector always
00619 #endif
00620       for(size_type k=0;k<n;k++)
00621             m_r(i,k) = m_a(k)*m_x(i,k) + m_y(i,k);
00622         if((scalar_x==2)&&(scalar_y==-1))
00623 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00624 #pragma ivdep
00625 #endif
00626 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00627 #pragma vector always
00628 #endif
00629       for(size_type k=0;k<n;k++)
00630             m_r(i,k) = m_a(k)*m_x(i,k) - m_y(i,k);
00631         if((scalar_x==1)&&(scalar_y==2))
00632 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00633 #pragma ivdep
00634 #endif
00635 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00636 #pragma vector always
00637 #endif
00638       for(size_type k=0;k<n;k++)
00639             m_r(i,k) = m_x(i,k) + m_b(k)*m_y(i,k);
00640         if((scalar_x==-1)&&(scalar_y==2))
00641 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00642 #pragma ivdep
00643 #endif
00644 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00645 #pragma vector always
00646 #endif
00647       for(size_type k=0;k<n;k++)
00648             m_r(i,k) = -m_x(i,k) + m_b(k)*m_y(i,k);
00649         if((scalar_x==2)&&(scalar_y==2))
00650 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00651 #pragma ivdep
00652 #endif
00653 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00654 #pragma vector always
00655 #endif
00656       for(size_type k=0;k<n;k++)
00657             m_r(i,k) = m_a(k)*m_x(i,k) + m_b(k)*m_y(i,k);
00658 
00659   }
00660 };
00661 
00662 /* Variants of Functors with a and b being scalars. */
00663 
00664 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y,int UNROLL>
00665 struct MV_AddUnrollFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y,UNROLL>
00666 {
00667   typedef typename RVector::device_type        device_type;
00668   typedef typename RVector::size_type            size_type;
00669 
00670   RVector   m_r ;
00671   XVector  m_x ;
00672   YVector   m_y ;
00673   typename XVector::non_const_value_type m_a;
00674   typename YVector::non_const_value_type m_b;
00675   size_type n;
00676   size_type start;
00677 
00678   MV_AddUnrollFunctor() {n=UNROLL;}
00679   //--------------------------------------------------------------------------
00680 
00681   KOKKOS_INLINE_FUNCTION
00682   void operator()( const size_type i ) const
00683   {
00684   if((scalar_x==1)&&(scalar_y==1)){
00685 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00686 #pragma unroll
00687 #endif
00688     for(size_type k=0;k<UNROLL;k++)
00689       m_r(i,k) = m_x(i,k) + m_y(i,k);
00690   }
00691   if((scalar_x==1)&&(scalar_y==-1)){
00692 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00693 #pragma unroll
00694 #endif
00695     for(size_type k=0;k<UNROLL;k++)
00696       m_r(i,k) = m_x(i,k) - m_y(i,k);
00697   }
00698   if((scalar_x==-1)&&(scalar_y==-1)){
00699 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00700 #pragma unroll
00701 #endif
00702 for(size_type k=0;k<UNROLL;k++)
00703       m_r(i,k) = -m_x(i,k) - m_y(i,k);
00704   }
00705   if((scalar_x==-1)&&(scalar_y==1)){
00706 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00707 #pragma unroll
00708 #endif
00709 for(size_type k=0;k<UNROLL;k++)
00710       m_r(i,k) = -m_x(i,k) + m_y(i,k);
00711   }
00712   if((scalar_x==2)&&(scalar_y==1)){
00713 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00714 #pragma unroll
00715 #endif
00716 for(size_type k=0;k<UNROLL;k++)
00717       m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
00718   }
00719   if((scalar_x==2)&&(scalar_y==-1)){
00720 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00721 #pragma unroll
00722 #endif
00723 for(size_type k=0;k<UNROLL;k++)
00724       m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
00725   }
00726   if((scalar_x==1)&&(scalar_y==2)){
00727 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00728 #pragma unroll
00729 #endif
00730 for(size_type k=0;k<UNROLL;k++)
00731       m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
00732   }
00733   if((scalar_x==-1)&&(scalar_y==2)){
00734 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00735 #pragma unroll
00736 #endif
00737 for(size_type k=0;k<UNROLL;k++)
00738       m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
00739   }
00740   if((scalar_x==2)&&(scalar_y==2)){
00741 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
00742 #pragma unroll
00743 #endif
00744 for(size_type k=0;k<UNROLL;k++)
00745       m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
00746   }
00747   }
00748 };
00749 
00750 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y>
00751 struct MV_AddVectorFunctor<RVector,typename XVector::non_const_value_type, XVector, typename YVector::non_const_value_type,YVector,scalar_x,scalar_y>
00752 {
00753   typedef typename RVector::device_type        device_type;
00754   typedef typename RVector::size_type            size_type;
00755 
00756   RVector   m_r ;
00757   XVector  m_x ;
00758   YVector   m_y ;
00759   typename XVector::non_const_value_type m_a;
00760   typename YVector::non_const_value_type m_b;
00761   size_type n;
00762 
00763   MV_AddVectorFunctor() {n=1;}
00764   //--------------------------------------------------------------------------
00765 
00766   KOKKOS_INLINE_FUNCTION
00767   void operator()( const size_type i ) const
00768   {
00769   if((scalar_x==1)&&(scalar_y==1))
00770 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00771 #pragma ivdep
00772 #endif
00773 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00774 #pragma vector always
00775 #endif
00776       for(size_type k=0;k<n;k++)
00777       m_r(i,k) = m_x(i,k) + m_y(i,k);
00778   if((scalar_x==1)&&(scalar_y==-1))
00779 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00780 #pragma ivdep
00781 #endif
00782 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00783 #pragma vector always
00784 #endif
00785       for(size_type k=0;k<n;k++)
00786       m_r(i,k) = m_x(i,k) - m_y(i,k);
00787   if((scalar_x==-1)&&(scalar_y==-1))
00788 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00789 #pragma ivdep
00790 #endif
00791 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00792 #pragma vector always
00793 #endif
00794       for(size_type k=0;k<n;k++)
00795       m_r(i,k) = -m_x(i,k) - m_y(i,k);
00796   if((scalar_x==-1)&&(scalar_y==1))
00797 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00798 #pragma ivdep
00799 #endif
00800 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00801 #pragma vector always
00802 #endif
00803       for(size_type k=0;k<n;k++)
00804       m_r(i,k) = -m_x(i,k) + m_y(i,k);
00805   if((scalar_x==2)&&(scalar_y==1))
00806 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00807 #pragma ivdep
00808 #endif
00809 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00810 #pragma vector always
00811 #endif
00812       for(size_type k=0;k<n;k++)
00813       m_r(i,k) = m_a*m_x(i,k) + m_y(i,k);
00814   if((scalar_x==2)&&(scalar_y==-1))
00815 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00816 #pragma ivdep
00817 #endif
00818 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00819 #pragma vector always
00820 #endif
00821       for(size_type k=0;k<n;k++)
00822       m_r(i,k) = m_a*m_x(i,k) - m_y(i,k);
00823   if((scalar_x==1)&&(scalar_y==2))
00824 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00825 #pragma ivdep
00826 #endif
00827 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00828 #pragma vector always
00829 #endif
00830       for(size_type k=0;k<n;k++)
00831       m_r(i,k) = m_x(i,k) + m_b*m_y(i,k);
00832   if((scalar_x==-1)&&(scalar_y==2))
00833 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00834 #pragma ivdep
00835 #endif
00836 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00837 #pragma vector always
00838 #endif
00839       for(size_type k=0;k<n;k++)
00840       m_r(i,k) = -m_x(i,k) + m_b*m_y(i,k);
00841   if((scalar_x==2)&&(scalar_y==2))
00842 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
00843 #pragma ivdep
00844 #endif
00845 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
00846 #pragma vector always
00847 #endif
00848       for(size_type k=0;k<n;k++)
00849       m_r(i,k) = m_a*m_x(i,k) + m_b*m_y(i,k);
00850 
00851   }
00852 };
00853 
00854 template<class RVector,class aVector, class XVector, class bVector, class YVector,int UNROLL>
00855 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x,
00856                 const bVector &bv, const YVector & y, int n,
00857                 int a=2,int b=2)
00858 {
00859    if(a==1&&b==1) {
00860      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,1,UNROLL> op ;
00861      op.m_r = r ;
00862      op.m_x = x ;
00863      op.m_y = y ;
00864      op.m_a = av ;
00865      op.m_b = bv ;
00866      op.n = x.dimension(1);
00867      Kokkos::parallel_for( n , op );
00868      return r;
00869    }
00870    if(a==1&&b==-1) {
00871      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,-1,UNROLL> op ;
00872      op.m_r = r ;
00873      op.m_x = x ;
00874      op.m_y = y ;
00875      op.m_a = av ;
00876      op.m_b = bv ;
00877      op.n = x.dimension(1);
00878      Kokkos::parallel_for( n , op );
00879      return r;
00880    }
00881    if(a==-1&&b==1) {
00882      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,1,UNROLL> op ;
00883      op.m_r = r ;
00884      op.m_x = x ;
00885      op.m_y = y ;
00886      op.m_a = av ;
00887      op.m_b = bv ;
00888      op.n = x.dimension(1);
00889      Kokkos::parallel_for( n , op );
00890      return r;
00891    }
00892    if(a==-1&&b==-1) {
00893      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1,UNROLL> op ;
00894      op.m_r = r ;
00895      op.m_x = x ;
00896      op.m_y = y ;
00897      op.m_a = av ;
00898      op.m_b = bv ;
00899      op.n = x.dimension(1);
00900      Kokkos::parallel_for( n , op );
00901      return r;
00902    }
00903    if(a*a!=1&&b==1) {
00904      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,1,UNROLL> op ;
00905      op.m_r = r ;
00906      op.m_x = x ;
00907      op.m_y = y ;
00908      op.m_a = av ;
00909      op.m_b = bv ;
00910      op.n = x.dimension(1);
00911      Kokkos::parallel_for( n , op );
00912      return r;
00913    }
00914    if(a*a!=1&&b==-1) {
00915      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,-1,UNROLL> op ;
00916      op.m_r = r ;
00917      op.m_x = x ;
00918      op.m_y = y ;
00919      op.m_a = av ;
00920      op.m_b = bv ;
00921      op.n = x.dimension(1);
00922      Kokkos::parallel_for( n , op );
00923      return r;
00924    }
00925    if(a==1&&b*b!=1) {
00926      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,1,2,UNROLL> op ;
00927      op.m_r = r ;
00928      op.m_x = x ;
00929      op.m_y = y ;
00930      op.m_a = av ;
00931      op.m_b = bv ;
00932      op.n = x.dimension(1);
00933      Kokkos::parallel_for( n , op );
00934      return r;
00935    }
00936    if(a==-1&&b*b!=1) {
00937      MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,-1,2,UNROLL> op ;
00938      op.m_r = r ;
00939      op.m_x = x ;
00940      op.m_y = y ;
00941      op.m_a = av ;
00942      op.m_b = bv ;
00943      op.n = x.dimension(1);
00944      Kokkos::parallel_for( n , op );
00945      return r;
00946    }
00947    MV_AddUnrollFunctor<RVector,aVector,XVector,bVector,YVector,2,2,UNROLL> op ;
00948    op.m_r = r ;
00949    op.m_x = x ;
00950    op.m_y = y ;
00951    op.m_a = av ;
00952    op.m_b = bv ;
00953    op.n = x.dimension(1);
00954    Kokkos::parallel_for( n , op );
00955 
00956    return r;
00957 }
00958 
00959 template<class RVector,class aVector, class XVector, class bVector, class YVector>
00960 RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x,
00961                 const bVector &bv, const YVector & y, int n,
00962                 int a=2,int b=2)
00963 {
00964         switch (x.dimension(1)){
00965       case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,n,a,b);
00966                   break;
00967       case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,n,a,b);
00968                   break;
00969       case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,n,a,b);
00970                   break;
00971       case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,n,a,b);
00972                   break;
00973       case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,n,a,b);
00974                   break;
00975       case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,n,a,b);
00976                   break;
00977       case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,n,a,b);
00978                   break;
00979       case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,n,a,b);
00980                   break;
00981       case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,n,a,b);
00982                   break;
00983       case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,n,a,b);
00984                   break;
00985       case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,n,a,b);
00986                   break;
00987       case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,n,a,b);
00988                   break;
00989       case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,n,a,b);
00990                   break;
00991       case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,n,a,b);
00992                   break;
00993       case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,n,a,b);
00994                   break;
00995       case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,n,a,b);
00996                   break;
00997         }
00998         return r;
00999 }
01000 
01001 
01002 template<class RVector,class aVector, class XVector, class bVector, class YVector>
01003 RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x,
01004                 const bVector &bv, const YVector & y, int n,
01005                 int a=2,int b=2)
01006 {
01007    if(a==1&&b==1) {
01008      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ;
01009      op.m_r = r ;
01010      op.m_x = x ;
01011      op.m_y = y ;
01012      op.m_a = av ;
01013      op.m_b = bv ;
01014      op.n = x.dimension(1);
01015      Kokkos::parallel_for( n , op );
01016      return r;
01017    }
01018    if(a==1&&b==-1) {
01019      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ;
01020      op.m_r = r ;
01021      op.m_x = x ;
01022      op.m_y = y ;
01023      op.m_a = av ;
01024      op.m_b = bv ;
01025      op.n = x.dimension(1);
01026      Kokkos::parallel_for( n , op );
01027      return r;
01028    }
01029    if(a==-1&&b==1) {
01030      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ;
01031      op.m_r = r ;
01032      op.m_x = x ;
01033      op.m_y = y ;
01034      op.m_a = av ;
01035      op.m_b = bv ;
01036      op.n = x.dimension(1);
01037      Kokkos::parallel_for( n , op );
01038      return r;
01039    }
01040    if(a==-1&&b==-1) {
01041      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ;
01042      op.m_r = r ;
01043      op.m_x = x ;
01044      op.m_y = y ;
01045      op.m_a = av ;
01046      op.m_b = bv ;
01047      op.n = x.dimension(1);
01048      Kokkos::parallel_for( n , op );
01049      return r;
01050    }
01051    if(a*a!=1&&b==1) {
01052      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ;
01053      op.m_r = r ;
01054      op.m_x = x ;
01055      op.m_y = y ;
01056      op.m_a = av ;
01057      op.m_b = bv ;
01058      op.n = x.dimension(1);
01059      Kokkos::parallel_for( n , op );
01060      return r;
01061    }
01062    if(a*a!=1&&b==-1) {
01063      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ;
01064      op.m_r = r ;
01065      op.m_x = x ;
01066      op.m_y = y ;
01067      op.m_a = av ;
01068      op.m_b = bv ;
01069      op.n = x.dimension(1);
01070      Kokkos::parallel_for( n , op );
01071      return r;
01072    }
01073    if(a==1&&b*b!=1) {
01074      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ;
01075      op.m_r = r ;
01076      op.m_x = x ;
01077      op.m_y = y ;
01078      op.m_a = av ;
01079      op.m_b = bv ;
01080      op.n = x.dimension(1);
01081      Kokkos::parallel_for( n , op );
01082      return r;
01083    }
01084    if(a==-1&&b*b!=1) {
01085      MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ;
01086      op.m_r = r ;
01087      op.m_x = x ;
01088      op.m_y = y ;
01089      op.m_a = av ;
01090      op.m_b = bv ;
01091      op.n = x.dimension(1);
01092      Kokkos::parallel_for( n , op );
01093      return r;
01094    }
01095    MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ;
01096    op.m_r = r ;
01097    op.m_x = x ;
01098    op.m_y = y ;
01099    op.m_a = av ;
01100    op.m_b = bv ;
01101    op.n = x.dimension(1);
01102    Kokkos::parallel_for( n , op );
01103 
01104    return r;
01105 }
01106 
01107 template<class RVector,class aVector, class XVector, class bVector, class YVector>
01108 RVector MV_AddSpecialise( const RVector & r,const aVector &av,const XVector & x,
01109                 const bVector &bv, const YVector & y, unsigned int n,
01110                 int a=2,int b=2)
01111 {
01112 
01113         if(x.dimension(1)>16)
01114                 return MV_AddVector( r,av,x,bv,y,a,b);
01115 
01116         if(x.dimension_1()==1) {
01117     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
01118     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
01119     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
01120 
01121     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
01122     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
01123     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
01124 
01125     V_Add(r_1d,av,x_1d,bv,y_1d,n);
01126     return r;
01127   } else
01128         return MV_AddUnroll( r,av,x,bv,y,a,b);
01129 }
01130 
01131 template<class RVector,class aVector, class XVector, class bVector, class YVector>
01132 RVector MV_Add( const RVector & r,const aVector &av,const XVector & x,
01133     const bVector &bv, const YVector & y, int n = -1)
01134 {
01135   if(n==-1) n = x.dimension_0();
01136   if(x.dimension(1)>16)
01137     return MV_AddVector( r,av,x,bv,y,n,2,2);
01138 
01139   if(x.dimension_1()==1) {
01140     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
01141     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
01142     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
01143 
01144     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
01145     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
01146     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
01147 
01148     V_Add(r_1d,av,x_1d,bv,y_1d,n);
01149     return r;
01150   } else
01151   return MV_AddUnroll( r,av,x,bv,y,n,2,2);
01152 }
01153 
01154 template<class RVector,class XVector,class YVector>
01155 RVector MV_Add( const RVector & r, const XVector & x, const YVector & y, int n = -1)
01156 {
01157   if(n==-1) n = x.dimension_0();
01158   if(x.dimension_1()==1) {
01159     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
01160     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
01161     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
01162 
01163     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
01164     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
01165     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
01166 
01167     V_Add(r_1d,x_1d,y_1d,n);
01168     return r;
01169   } else {
01170     typename XVector::const_value_type a = 1.0;
01171     return MV_AddSpecialise(r,a,x,a,y,n,1,1);
01172   }
01173 }
01174 
01175 template<class RVector,class XVector,class bVector, class YVector>
01176 RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const YVector & y, int n = -1 )
01177 {
01178   if(n==-1) n = x.dimension_0();
01179   if(x.dimension_1()==1) {
01180     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
01181     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
01182     typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
01183 
01184     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
01185     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
01186     YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
01187 
01188     V_Add(r_1d,x_1d,bv,y_1d,n);
01189     return r;
01190   } else {
01191     MV_AddSpecialise(r,bv,x,bv,y,n,1,2);
01192   }
01193 }
01194 
01195 
01196 template<class XVector,class YVector>
01197 struct MV_DotProduct_Right_FunctorVector
01198 {
01199   typedef typename XVector::device_type         device_type;
01200   typedef typename XVector::size_type             size_type;
01201   typedef typename XVector::non_const_value_type          xvalue_type;
01202   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
01203   typedef typename IPT::dot_type               value_type[];
01204   size_type value_count;
01205 
01206 
01207   typedef typename XVector::const_type        x_const_type;
01208   typedef typename YVector::const_type        y_const_type;
01209   x_const_type  m_x ;
01210   y_const_type  m_y ;
01211 
01212   //--------------------------------------------------------------------------
01213 
01214   KOKKOS_INLINE_FUNCTION
01215   void operator()( const size_type i, value_type sum ) const
01216   {
01217     const size_type numVecs=value_count;
01218 
01219 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01220 #pragma ivdep
01221 #endif
01222 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01223 #pragma vector always
01224 #endif
01225     for(size_type k=0;k<numVecs;k++)
01226       sum[k]+=IPT::dot( m_x(i,k), m_y(i,k) );  // m_x(i,k) * m_y(i,k)
01227   }
01228   KOKKOS_INLINE_FUNCTION void init( value_type update) const
01229   {
01230     const size_type numVecs = value_count;
01231 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01232 #pragma ivdep
01233 #endif
01234 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01235 #pragma vector always
01236 #endif
01237     for(size_type k=0;k<numVecs;k++)
01238       update[k] = 0;
01239   }
01240   KOKKOS_INLINE_FUNCTION void join( volatile value_type  update ,
01241                                     const volatile value_type  source ) const
01242   {
01243     const size_type numVecs = value_count;
01244 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01245 #pragma ivdep
01246 #endif
01247 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01248 #pragma vector always
01249 #endif
01250     for(size_type k=0;k<numVecs;k++){
01251       update[k] += source[k];
01252     }
01253   }
01254 };
01255 
01256 
01257 // Implementation detail of Tpetra::MultiVector::dot, when both
01258 // MultiVectors in the dot product have constant stride.  Compute the
01259 // dot product of the local part of each corresponding vector (column)
01260 // in two MultiVectors.
01261 template<class MultiVecViewType>
01262 struct MultiVecDotFunctor {
01263   typedef typename MultiVecViewType::device_type device_type;
01264   typedef typename MultiVecViewType::size_type size_type;
01265   typedef typename MultiVecViewType::value_type mv_value_type;
01266   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01267   typedef typename IPT::dot_type value_type[];
01268 
01269   typedef MultiVecViewType mv_view_type;
01270   typedef typename MultiVecViewType::const_type mv_const_view_type;
01271   typedef Kokkos::View<typename IPT::dot_type*, device_type> dot_view_type;
01272 
01273   mv_const_view_type X_, Y_;
01274   dot_view_type dots_;
01275   // Kokkos::parallel_reduce wants this, so it needs to be public.
01276   size_type value_count;
01277 
01278   MultiVecDotFunctor (const mv_const_view_type& X,
01279                       const mv_const_view_type& Y,
01280                       const dot_view_type& dots) :
01281     X_ (X), Y_ (Y), dots_ (dots), value_count (X.dimension_1 ())
01282   {
01283     if (value_count != dots.dimension_0 ()) {
01284 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
01285       cuda_abort("Kokkos::MultiVecDotFunctor: value_count does not match the length of 'dots'");
01286 #else
01287       std::ostringstream os;
01288       os << "Kokkos::MultiVecDotFunctor: value_count does not match the length "
01289         "of 'dots'.  X is " << X.dimension_0 () << " x " << X.dimension_1 () <<
01290         ", Y is " << Y.dimension_0 () << " x " << Y.dimension_1 () << ", "
01291         "dots has length " << dots.dimension_0 () << ", and value_count = " <<
01292         value_count << ".";
01293       throw std::invalid_argument (os.str ());
01294 #endif
01295     }
01296 
01297   }
01298 
01299   KOKKOS_INLINE_FUNCTION void
01300   operator() (const size_type i, value_type sum) const
01301   {
01302     const size_type numVecs = value_count;
01303 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01304 #pragma ivdep
01305 #endif
01306 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01307 #pragma vector always
01308 #endif
01309     for (size_type k = 0; k < numVecs; ++k) {
01310       sum[k] += IPT::dot (X_(i,k), Y_(i,k));
01311     }
01312   }
01313 
01314   KOKKOS_INLINE_FUNCTION void
01315   init (value_type update) const
01316   {
01317     const size_type numVecs = value_count;
01318 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01319 #pragma ivdep
01320 #endif
01321 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01322 #pragma vector always
01323 #endif
01324     for (size_type k = 0; k < numVecs; ++k) {
01325       update[k] = Kokkos::Details::ArithTraits<typename IPT::dot_type>::zero ();
01326     }
01327   }
01328 
01329   KOKKOS_INLINE_FUNCTION void
01330   join (volatile value_type update,
01331         const volatile value_type source) const
01332   {
01333     const size_type numVecs = value_count;
01334 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01335 #pragma ivdep
01336 #endif
01337 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01338 #pragma vector always
01339 #endif
01340     for (size_type k = 0; k < numVecs; ++k) {
01341       update[k] += source[k];
01342     }
01343   }
01344 
01345   // On device, write the reduction result to the output View.
01346   KOKKOS_INLINE_FUNCTION void
01347   final (const value_type dst) const
01348   {
01349     const size_type numVecs = value_count;
01350 
01351 #if !defined(__CUDA_ARCH__)
01352     // DEBUGGING ONLY
01353     {
01354       std::ostringstream os;
01355       os << "numVecs: " << numVecs << ", dst: [";
01356       for (size_t j = 0; j < numVecs; ++j) {
01357         os << dst[j];
01358         if (j + 1 < numVecs) {
01359           os << ", ";
01360         }
01361       }
01362       os << "]" << std::endl;
01363       std::cerr << os.str ();
01364     }
01365 #endif
01366 
01367 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01368 #pragma ivdep
01369 #endif
01370 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01371 #pragma vector always
01372 #endif
01373     for (size_type k = 0; k < numVecs; ++k) {
01374       dots_(k) = dst[k];
01375     }
01376 
01377 #if !defined(__CUDA_ARCH__)
01378     // DEBUGGING ONLY
01379     {
01380       std::ostringstream os;
01381       os << "numVecs: " << numVecs << ", dots_: [";
01382       for (size_t j = 0; j < numVecs; ++j) {
01383         os << dots_(j);
01384         if (j + 1 < numVecs) {
01385           os << ", ";
01386         }
01387       }
01388       os << "]" << std::endl;
01389       std::cerr << os.str ();
01390     }
01391 #endif
01392   }
01393 };
01394 
01395 
01396 // Implementation detail of Tpetra::MultiVector::norm2, when the
01397 // MultiVector has constant stride.  Compute the square of the
01398 // two-norm of each column of a multivector.
01399 template<class MultiVecViewType>
01400 struct MultiVecNorm2SquaredFunctor {
01401   typedef typename MultiVecViewType::device_type device_type;
01402   typedef typename MultiVecViewType::size_type size_type;
01403   typedef typename MultiVecViewType::value_type mv_value_type;
01404   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01405   typedef typename IPT::mag_type value_type[];
01406 
01407   typedef MultiVecViewType mv_view_type;
01408   typedef typename MultiVecViewType::const_type mv_const_view_type;
01409   typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type;
01410 
01411   mv_const_view_type X_;
01412   norms_view_type norms_;
01413   // Kokkos::parallel_reduce wants this, so it needs to be public.
01414   size_type value_count;
01415 
01416   MultiVecNorm2SquaredFunctor (const mv_const_view_type& X,
01417                                const norms_view_type& norms) :
01418     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
01419   {
01420     if (value_count != norms.dimension_0 ()) {
01421 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
01422       cuda_abort("Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match the length of 'norms'");
01423 #else
01424       std::ostringstream os;
01425       os << "Kokkos::MultiVecNorm2SquaredFunctor: value_count does not match "
01426         "the length of 'norms'.  X is " << X.dimension_0 () << " x " <<
01427         X.dimension_1 () << ", norms has length " << norms.dimension_0 () <<
01428         ", and value_count = " << value_count << ".";
01429       throw std::invalid_argument (os.str ());
01430 #endif
01431     }
01432   }
01433 
01434   KOKKOS_INLINE_FUNCTION void
01435   operator() (const size_type i, value_type sum) const
01436   {
01437     const size_type numVecs = value_count;
01438 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01439 #pragma ivdep
01440 #endif
01441 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01442 #pragma vector always
01443 #endif
01444     for (size_type k = 0; k < numVecs; ++k) {
01445       const typename IPT::mag_type tmp = IPT::norm (X_(i,k));
01446       sum[k] += tmp * tmp;
01447     }
01448   }
01449 
01450   KOKKOS_INLINE_FUNCTION void
01451   init (value_type update) const
01452   {
01453     const size_type numVecs = value_count;
01454 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01455 #pragma ivdep
01456 #endif
01457 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01458 #pragma vector always
01459 #endif
01460     for (size_type k = 0; k < numVecs; ++k) {
01461       update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01462     }
01463   }
01464 
01465   KOKKOS_INLINE_FUNCTION void
01466   join (volatile value_type update,
01467         const volatile value_type source) const
01468   {
01469     const size_type numVecs = value_count;
01470 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01471 #pragma ivdep
01472 #endif
01473 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01474 #pragma vector always
01475 #endif
01476     for (size_type k = 0; k < numVecs; ++k) {
01477       update[k] += source[k];
01478     }
01479   }
01480 
01481   // On device, write the reduction result to the output View.
01482   KOKKOS_INLINE_FUNCTION void
01483   final (const value_type dst) const
01484   {
01485     const size_type numVecs = value_count;
01486 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01487 #pragma ivdep
01488 #endif
01489 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01490 #pragma vector always
01491 #endif
01492     for (size_type k = 0; k < numVecs; ++k) {
01493       norms_(k) = dst[k];
01494     }
01495   }
01496 };
01497 
01498 
01499 // Implementation detail of Tpetra::MultiVector::norm1, when the
01500 // MultiVector has constant stride.  Compute the one-norm of each
01501 // column of a multivector.
01502 template<class MultiVecViewType>
01503 struct MultiVecNorm1Functor {
01504   typedef typename MultiVecViewType::device_type device_type;
01505   typedef typename MultiVecViewType::size_type size_type;
01506   typedef typename MultiVecViewType::value_type mv_value_type;
01507   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01508   typedef typename IPT::mag_type value_type[];
01509 
01510   typedef MultiVecViewType mv_view_type;
01511   typedef typename MultiVecViewType::const_type mv_const_view_type;
01512   typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type;
01513 
01514   mv_const_view_type X_;
01515   norms_view_type norms_;
01516   // Kokkos::parallel_reduce wants this, so it needs to be public.
01517   size_type value_count;
01518 
01519   MultiVecNorm1Functor (const mv_const_view_type& X,
01520                         const norms_view_type& norms) :
01521     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
01522   {
01523     if (value_count != norms.dimension_0 ()) {
01524 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
01525       cuda_abort("Kokkos::MultiVecNorm1Functor: value_count does not match the length of 'norms'");
01526 #else
01527       std::ostringstream os;
01528       os << "Kokkos::MultiVecNorm1Functor: value_count does not match the "
01529          << "length of 'norms'.  X is " << X.dimension_0 () << " x "
01530          << X.dimension_1 () << ", norms has length " << norms.dimension_0 ()
01531          << ", and value_count = " << value_count << ".";
01532       throw std::invalid_argument (os.str ());
01533 #endif
01534     }
01535   }
01536 
01537   KOKKOS_INLINE_FUNCTION void
01538   operator() (const size_type i, value_type sum) const
01539   {
01540     const size_type numVecs = value_count;
01541 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01542 #pragma ivdep
01543 #endif
01544 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01545 #pragma vector always
01546 #endif
01547     for (size_type k = 0; k < numVecs; ++k) {
01548       sum[k] += IPT::norm (X_(i,k)); // absolute value
01549     }
01550   }
01551 
01552   KOKKOS_INLINE_FUNCTION void
01553   init (value_type update) const
01554   {
01555     const size_type numVecs = value_count;
01556 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01557 #pragma ivdep
01558 #endif
01559 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01560 #pragma vector always
01561 #endif
01562     for (size_type k = 0; k < numVecs; ++k) {
01563       update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01564     }
01565   }
01566 
01567   KOKKOS_INLINE_FUNCTION void
01568   join (volatile value_type update,
01569         const volatile value_type source) const
01570   {
01571     const size_type numVecs = value_count;
01572 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01573 #pragma ivdep
01574 #endif
01575 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01576 #pragma vector always
01577 #endif
01578     for (size_type k = 0; k < numVecs; ++k) {
01579       update[k] += source[k];
01580     }
01581   }
01582 
01583   // On device, write the reduction result to the output View.
01584   KOKKOS_INLINE_FUNCTION void
01585   final (const value_type dst) const
01586   {
01587     const size_type numVecs = value_count;
01588 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01589 #pragma ivdep
01590 #endif
01591 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01592 #pragma vector always
01593 #endif
01594     for (size_type k = 0; k < numVecs; ++k) {
01595       norms_(k) = dst[k];
01596     }
01597   }
01598 };
01599 
01600 
01601 // Implementation detail of Tpetra::MultiVector::normInf, when the
01602 // MultiVector has constant stride.  Compute the infinity-norm of each
01603 // column of a multivector.
01604 template<class MultiVecViewType>
01605 struct MultiVecNormInfFunctor {
01606   typedef typename MultiVecViewType::device_type device_type;
01607   typedef typename MultiVecViewType::size_type size_type;
01608   typedef typename MultiVecViewType::value_type mv_value_type;
01609   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01610   typedef typename IPT::mag_type value_type[];
01611 
01612   typedef MultiVecViewType mv_view_type;
01613   typedef typename MultiVecViewType::const_type mv_const_view_type;
01614   typedef Kokkos::View<typename IPT::mag_type*, device_type> norms_view_type;
01615 
01616   mv_const_view_type X_;
01617   norms_view_type norms_;
01618   // Kokkos::parallel_reduce wants this, so it needs to be public.
01619   size_type value_count;
01620 
01621   MultiVecNormInfFunctor (const mv_const_view_type& X,
01622                           const norms_view_type& norms) :
01623     X_ (X), norms_ (norms), value_count (X.dimension_1 ())
01624   {
01625     if (value_count != norms.dimension_0 ()) {
01626 #if defined(__CUDACC__) && defined(__CUDA_ARCH__)
01627       cuda_abort("Kokkos::MultiVecNormInfFunctor: value_count does not match the length of 'norms'");
01628 #else
01629       std::ostringstream os;
01630       os << "Kokkos::MultiVecNormInfFunctor: value_count does not match the "
01631          << "length of 'norms'.  X is " << X.dimension_0 () << " x "
01632          << X.dimension_1 () << ", norms has length " << norms.dimension_0 ()
01633          << ", and value_count = " << value_count << ".";
01634       throw std::invalid_argument (os.str ());
01635 #endif
01636     }
01637   }
01638 
01639   KOKKOS_INLINE_FUNCTION void
01640   operator() (const size_type i, value_type maxes) const
01641   {
01642     const size_type numVecs = value_count;
01643 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01644 #pragma ivdep
01645 #endif
01646 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01647 #pragma vector always
01648 #endif
01649     for (size_type k = 0; k < numVecs; ++k) {
01650       const typename IPT::mag_type curVal = maxes[k];
01651       const typename IPT::mag_type newVal = IPT::norm (X_(i,k));
01652       // max(curVal, newVal).  Any comparison predicate involving NaN
01653       // evaluates to false.  Thus, this will never assign NaN to
01654       // update[k], unless it contains NaN already.  The initial value
01655       // is zero, so NaNs won't propagate.  (This definition makes NaN
01656       // into an "invalid value," which is useful for statistics and
01657       // other applications that use NaN to indicate a "hole.")
01658       if (curVal < newVal) {
01659         maxes[k] = newVal;
01660       }
01661     }
01662   }
01663 
01664   KOKKOS_INLINE_FUNCTION void
01665   init (value_type update) const
01666   {
01667     const size_type numVecs = value_count;
01668 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01669 #pragma ivdep
01670 #endif
01671 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01672 #pragma vector always
01673 #endif
01674     for (size_type k = 0; k < numVecs; ++k) {
01675       // Zero is a good default value for magnitudes (which are
01676       // nonnegative by definition).  That way, MPI processes with
01677       // zero rows won't affect the global maximum.
01678       update[k] = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01679     }
01680   }
01681 
01682   KOKKOS_INLINE_FUNCTION void
01683   join (volatile value_type update,
01684         const volatile value_type source) const
01685   {
01686     const size_type numVecs = value_count;
01687 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01688 #pragma ivdep
01689 #endif
01690 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01691 #pragma vector always
01692 #endif
01693     for (size_type k = 0; k < numVecs; ++k) {
01694       const typename IPT::mag_type curVal = update[k];
01695       const typename IPT::mag_type newVal = source[k];
01696       // max(curVal, newVal).  Any comparison predicate involving NaN
01697       // evaluates to false.  Thus, this will never assign NaN to
01698       // update[k], unless it contains NaN already.  The initial value
01699       // is zero, so NaNs won't propagate.  (This definition makes NaN
01700       // into an "invalid value," which is useful for statistics and
01701       // other applications that use NaN to indicate a "hole.")
01702       if (curVal < newVal) {
01703         update[k] = newVal;
01704       }
01705     }
01706   }
01707 
01708   // On device, write the reduction result to the output View.
01709   KOKKOS_INLINE_FUNCTION void
01710   final (const value_type dst) const
01711   {
01712     const size_type numVecs = value_count;
01713 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
01714 #pragma ivdep
01715 #endif
01716 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
01717 #pragma vector always
01718 #endif
01719     for (size_type k = 0; k < numVecs; ++k) {
01720       norms_(k) = dst[k];
01721     }
01722   }
01723 };
01724 
01725 
01726 // Implementation detail of Tpetra::MultiVector::dot, for single
01727 // vectors (columns).
01728 template<class VecViewType>
01729 struct VecDotFunctor {
01730   typedef typename VecViewType::device_type device_type;
01731   typedef typename VecViewType::size_type size_type;
01732   typedef typename VecViewType::value_type mv_value_type;
01733   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01734   typedef typename IPT::dot_type value_type;
01735   typedef typename VecViewType::const_type vec_const_view_type;
01736   // This is a nonconst scalar view.  It holds one dot_type instance.
01737   typedef Kokkos::View<typename IPT::dot_type, device_type> dot_view_type;
01738   typedef Kokkos::View<typename IPT::dot_type*, device_type> dots_view_type;
01739 
01740   vec_const_view_type x_, y_;
01741   dot_view_type dot_;
01742 
01743   VecDotFunctor (const vec_const_view_type& x,
01744                  const vec_const_view_type& y,
01745                  const dot_view_type& dot) :
01746     x_ (x), y_ (y), dot_ (dot)
01747   {
01748     if (x.dimension_0 () != y.dimension_0 ()) {
01749       std::ostringstream os;
01750       os << "Kokkos::VecDotFunctor: The dimensions of x and y do not match.  "
01751         "x.dimension_0() = " << x.dimension_0 ()
01752          << " != y.dimension_0() = " << y.dimension_0 () << ".";
01753       throw std::invalid_argument (os.str ());
01754     }
01755   }
01756 
01757   KOKKOS_INLINE_FUNCTION void
01758   operator() (const size_type i, value_type& sum) const {
01759     sum += IPT::dot (x_(i), y_(i));
01760   }
01761 
01762   KOKKOS_INLINE_FUNCTION void
01763   init (value_type& update) const {
01764     update = Kokkos::Details::ArithTraits<typename IPT::dot_type>::zero ();
01765   }
01766 
01767   KOKKOS_INLINE_FUNCTION void
01768   join (volatile value_type& update,
01769         const volatile value_type& source) const {
01770     update += source;
01771   }
01772 
01773   // On device, write the reduction result to the output View.
01774   KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
01775     // BADNESS HERE
01776     dot_() = dst;
01777   }
01778 };
01779 
01780 
01781 // Compute the square of the two-norm of a single vector.
01782 template<class VecViewType>
01783 struct VecNorm2SquaredFunctor {
01784   typedef typename VecViewType::device_type device_type;
01785   typedef typename VecViewType::size_type size_type;
01786   typedef typename VecViewType::value_type mv_value_type;
01787   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01788   typedef typename IPT::mag_type value_type;
01789   typedef typename VecViewType::const_type vec_const_view_type;
01790   // This is a nonconst scalar view.  It holds one mag_type instance.
01791   typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type;
01792 
01793   vec_const_view_type x_;
01794   norm_view_type norm_;
01795 
01796   // Constructor
01797   //
01798   // x: the vector for which to compute the square of the two-norm.
01799   // norm: scalar View into which to put the result.
01800   VecNorm2SquaredFunctor (const vec_const_view_type& x,
01801                           const norm_view_type& norm) :
01802     x_ (x), norm_ (norm)
01803   {}
01804 
01805   KOKKOS_INLINE_FUNCTION void
01806   operator() (const size_type i, value_type& sum) const {
01807     const typename IPT::mag_type tmp = IPT::norm (x_(i));
01808     sum += tmp * tmp;
01809   }
01810 
01811   KOKKOS_INLINE_FUNCTION void
01812   init (value_type& update) const {
01813     update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01814   }
01815 
01816   KOKKOS_INLINE_FUNCTION void
01817   join (volatile value_type& update,
01818         const volatile value_type& source) const {
01819     update += source;
01820   }
01821 
01822   // On device, write the reduction result to the output View.
01823   KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
01824     norm_ () = dst;
01825   }
01826 };
01827 
01828 
01829 // Compute the square of the one-norm of a single vector.
01830 template<class VecViewType>
01831 struct VecNorm1Functor {
01832   typedef typename VecViewType::device_type device_type;
01833   typedef typename VecViewType::size_type size_type;
01834   typedef typename VecViewType::value_type mv_value_type;
01835   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01836   typedef typename IPT::mag_type value_type;
01837   typedef typename VecViewType::const_type vec_const_view_type;
01838   // This is a nonconst scalar view.  It holds one mag_type instance.
01839   typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type;
01840 
01841   vec_const_view_type x_;
01842   norm_view_type norm_;
01843 
01844   // Constructor
01845   //
01846   // x: the vector for which to compute the one-norm.
01847   // norm: scalar View into which to put the result.
01848   VecNorm1Functor (const vec_const_view_type& x,
01849                    const norm_view_type& norm) :
01850     x_ (x), norm_ (norm)
01851   {}
01852 
01853   KOKKOS_INLINE_FUNCTION void
01854   operator() (const size_type i, value_type& sum) const {
01855     sum += IPT::norm (x_(i)); // absolute value
01856   }
01857 
01858   KOKKOS_INLINE_FUNCTION void
01859   init (value_type& update) const {
01860     update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01861   }
01862 
01863   KOKKOS_INLINE_FUNCTION void
01864   join (volatile value_type& update,
01865         const volatile value_type& source) const {
01866     update += source;
01867   }
01868 
01869   // On device, write the reduction result to the output View.
01870   KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
01871     norm_ () = dst;
01872   }
01873 };
01874 
01875 
01876 // Compute the square of the infinity-norm of a single vector.
01877 template<class VecViewType>
01878 struct VecNormInfFunctor {
01879   typedef typename VecViewType::device_type device_type;
01880   typedef typename VecViewType::size_type size_type;
01881   typedef typename VecViewType::value_type mv_value_type;
01882   typedef Kokkos::Details::InnerProductSpaceTraits<mv_value_type> IPT;
01883   typedef typename IPT::mag_type value_type;
01884   typedef typename VecViewType::const_type vec_const_view_type;
01885   // This is a nonconst scalar view.  It holds one mag_type instance.
01886   typedef Kokkos::View<typename IPT::mag_type, device_type> norm_view_type;
01887 
01888   vec_const_view_type x_;
01889   norm_view_type norm_;
01890 
01891   // Constructor
01892   //
01893   // x: the vector for which to compute the infinity-norm.
01894   // norm: scalar View into which to put the result.
01895   VecNormInfFunctor (const vec_const_view_type& x,
01896                      const norm_view_type& norm) :
01897     x_ (x), norm_ (norm)
01898   {}
01899 
01900   KOKKOS_INLINE_FUNCTION void
01901   operator() (const size_type i, value_type& curVal) const {
01902     const typename IPT::mag_type newVal = IPT::norm (x_(i));
01903     // max(curVal, newVal).  Any comparison predicate involving NaN
01904     // evaluates to false.  Thus, this will never assign NaN to
01905     // update[k], unless it contains NaN already.  The initial value
01906     // is zero, so NaNs won't propagate.  (This definition makes NaN
01907     // into an "invalid value," which is useful for statistics and
01908     // other applications that use NaN to indicate a "hole.")
01909     if (curVal < newVal) {
01910       curVal = newVal;
01911     }
01912   }
01913 
01914   KOKKOS_INLINE_FUNCTION void
01915   init (value_type& update) const {
01916     // Zero is a good default value for magnitudes (which are
01917     // nonnegative by definition).  That way, MPI processes with
01918     // zero rows won't affect the global maximum.
01919     update = Kokkos::Details::ArithTraits<typename IPT::mag_type>::zero ();
01920   }
01921 
01922   KOKKOS_INLINE_FUNCTION void
01923   join (volatile value_type& update,
01924         const volatile value_type& source) const {
01925     // max(update, source).  Any comparison predicate involving NaN
01926     // evaluates to false.  Thus, this will never assign NaN to
01927     // update, unless it contains NaN already.  The initial value is
01928     // zero, so NaNs won't propagate.  (This definition makes NaN into
01929     // an "invalid value," which is useful for statistics and other
01930     // applications that use NaN to indicate a "hole.")
01931     if (update < source) {
01932       update = source;
01933     }
01934   }
01935 
01936   // On device, write the reduction result to the output View.
01937   KOKKOS_INLINE_FUNCTION void final (value_type& dst) const {
01938     norm_ () = dst;
01939   }
01940 };
01941 
01942 
01943 
01944 // parallel_for functor for computing the square root, in place, of a
01945 // one-dimensional View.  This is useful for following the MPI
01946 // all-reduce that computes the square of the two-norms of the local
01947 // columns of a Tpetra::MultiVector.
01948 //
01949 // mfh 14 Jul 2014: Carter says that, for now, the standard idiom for
01950 // operating on a single scalar value on the device, is to run in a
01951 // parallel_for with N = 1.
01952 //
01953 // FIXME (mfh 14 Jul 2014): If we could assume C++11, this functor
01954 // would go away.
01955 template<class ViewType>
01956 class SquareRootFunctor {
01957 public:
01958   typedef typename ViewType::device_type device_type;
01959   typedef typename ViewType::size_type size_type;
01960 
01961   SquareRootFunctor (const ViewType& theView) : theView_ (theView) {}
01962 
01963   KOKKOS_INLINE_FUNCTION void operator() (const size_type i) const {
01964     typedef typename ViewType::value_type value_type;
01965     theView_(i) = Kokkos::Details::ArithTraits<value_type>::sqrt (theView_(i));
01966   }
01967 
01968 private:
01969   ViewType theView_;
01970 };
01971 
01972 
01973 template<class XVector,class YVector,int UNROLL>
01974 struct MV_DotProduct_Right_FunctorUnroll
01975 {
01976   typedef typename XVector::device_type         device_type;
01977   typedef typename XVector::size_type             size_type;
01978   typedef typename XVector::non_const_value_type          xvalue_type;
01979   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
01980   typedef typename IPT::dot_type               value_type[];
01981   size_type value_count;
01982 
01983   typedef typename XVector::const_type        x_const_type;
01984   typedef typename YVector::const_type        y_const_type;
01985 
01986   x_const_type  m_x ;
01987   y_const_type  m_y ;
01988 
01989   //--------------------------------------------------------------------------
01990 
01991   KOKKOS_INLINE_FUNCTION
01992   void operator()( const size_type i, value_type sum ) const
01993   {
01994 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
01995 #pragma unroll
01996 #endif
01997     for(size_type k=0;k<UNROLL;k++)
01998       sum[k]+= IPT::dot( m_x(i,k), m_y(i,k) );  // m_x(i,k) * m_y(i,k)
01999   }
02000   KOKKOS_INLINE_FUNCTION void init( volatile value_type update) const
02001   {
02002 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
02003 #pragma unroll
02004 #endif
02005     for(size_type k=0;k<UNROLL;k++)
02006       update[k] = 0;
02007   }
02008   KOKKOS_INLINE_FUNCTION void join( volatile value_type update ,
02009                                     const volatile value_type source) const
02010   {
02011 #ifdef KOKKOS_HAVE_PRAGMA_UNROLL
02012 #pragma unroll
02013 #endif
02014     for(size_type k=0;k<UNROLL;k++)
02015       update[k] += source[k] ;
02016   }
02017 };
02018 
02019 template<class rVector, class XVector, class YVector>
02020 rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y, int n = -1)
02021 {
02022     typedef typename XVector::size_type            size_type;
02023     const size_type numVecs = x.dimension(1);
02024 
02025     if(n<0) n = x.dimension_0();
02026     if(numVecs>16){
02027 
02028         MV_DotProduct_Right_FunctorVector<XVector,YVector> op;
02029         op.m_x = x;
02030         op.m_y = y;
02031         op.value_count = numVecs;
02032 
02033         Kokkos::parallel_reduce( n , op, r );
02034         return r;
02035      }
02036      else
02037      switch(numVecs) {
02038        case 16: {
02039            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op;
02040            op.m_x = x;
02041            op.m_y = y;
02042            op.value_count = numVecs;
02043            Kokkos::parallel_reduce( n , op, r );
02044            break;
02045        }
02046        case 15: {
02047            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op;
02048            op.m_x = x;
02049            op.m_y = y;
02050            op.value_count = numVecs;
02051            Kokkos::parallel_reduce( n , op, r );
02052            break;
02053        }
02054        case 14: {
02055            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op;
02056            op.m_x = x;
02057            op.m_y = y;
02058            op.value_count = numVecs;
02059            Kokkos::parallel_reduce( n , op, r );
02060            break;
02061        }
02062        case 13: {
02063            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op;
02064            op.m_x = x;
02065            op.m_y = y;
02066            op.value_count = numVecs;
02067            Kokkos::parallel_reduce( n , op, r );
02068            break;
02069        }
02070        case 12: {
02071            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op;
02072            op.m_x = x;
02073            op.m_y = y;
02074            op.value_count = numVecs;
02075            Kokkos::parallel_reduce( n , op, r );
02076            break;
02077        }
02078        case 11: {
02079            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op;
02080            op.m_x = x;
02081            op.m_y = y;
02082            op.value_count = numVecs;
02083            Kokkos::parallel_reduce( n , op, r );
02084            break;
02085        }
02086        case 10: {
02087            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op;
02088            op.m_x = x;
02089            op.m_y = y;
02090            op.value_count = numVecs;
02091            Kokkos::parallel_reduce( n , op, r );
02092            break;
02093        }
02094        case 9: {
02095            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op;
02096            op.m_x = x;
02097            op.m_y = y;
02098            op.value_count = numVecs;
02099            Kokkos::parallel_reduce( n , op, r );
02100            break;
02101        }
02102        case 8: {
02103            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op;
02104            op.m_x = x;
02105            op.m_y = y;
02106            op.value_count = numVecs;
02107            Kokkos::parallel_reduce( n , op, r );
02108            break;
02109        }
02110        case 7: {
02111            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op;
02112            op.m_x = x;
02113            op.m_y = y;
02114            op.value_count = numVecs;
02115            Kokkos::parallel_reduce( n , op, r );
02116            break;
02117        }
02118        case 6: {
02119            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op;
02120            op.m_x = x;
02121            op.m_y = y;
02122            op.value_count = numVecs;
02123            Kokkos::parallel_reduce( n , op, r );
02124            break;
02125        }
02126        case 5: {
02127            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op;
02128            op.m_x = x;
02129            op.m_y = y;
02130            op.value_count = numVecs;
02131            Kokkos::parallel_reduce( n , op, r );
02132            break;
02133        }
02134        case 4: {
02135            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op;
02136            op.m_x = x;
02137            op.m_y = y;
02138            op.value_count = numVecs;
02139            Kokkos::parallel_reduce( n , op, r );
02140 
02141            break;
02142        }
02143        case 3: {
02144            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op;
02145            op.m_x = x;
02146            op.m_y = y;
02147            op.value_count = numVecs;
02148            Kokkos::parallel_reduce( n , op, r );
02149            break;
02150        }
02151        case 2: {
02152            MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op;
02153            op.m_x = x;
02154            op.m_y = y;
02155            op.value_count = numVecs;
02156            Kokkos::parallel_reduce( n , op, r );
02157            break;
02158        }
02159        case 1: {
02160          typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
02161          typedef View<typename YVector::const_value_type*,typename YVector::device_type> YVector1D;
02162 
02163          XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
02164          YVector1D y_1d = Kokkos::subview< YVector1D >( y , ALL(),0 );
02165          r[0] = V_Dot(x_1d,y_1d,n);
02166            break;
02167        }
02168      }
02169 
02170     return r;
02171 }
02172 
02173 /*------------------------------------------------------------------------------------------
02174  *-------------------------- Compute Sum -------------------------------------------------
02175  *------------------------------------------------------------------------------------------*/
02176 template<class XVector>
02177 struct MV_Sum_Functor
02178 {
02179   typedef typename XVector::device_type        device_type;
02180   typedef typename XVector::size_type            size_type;
02181   typedef typename XVector::non_const_value_type          xvalue_type;
02182   typedef xvalue_type                         value_type[];
02183 
02184   typename XVector::const_type m_x ;
02185   size_type value_count;
02186 
02187   MV_Sum_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
02188   //--------------------------------------------------------------------------
02189 
02190   KOKKOS_INLINE_FUNCTION
02191   void operator()( const size_type i, value_type sum ) const
02192   {
02193 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02194 #pragma ivdep
02195 #endif
02196 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02197 #pragma vector always
02198 #endif
02199     for(size_type k=0;k<value_count;k++){
02200       sum[k] += m_x(i,k);
02201     }
02202   }
02203 
02204   KOKKOS_INLINE_FUNCTION
02205   void init( value_type update) const
02206   {
02207 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02208 #pragma ivdep
02209 #endif
02210 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02211 #pragma vector always
02212 #endif
02213     for(size_type k=0;k<value_count;k++)
02214       update[k] = 0;
02215   }
02216 
02217   KOKKOS_INLINE_FUNCTION
02218   void join( volatile value_type  update ,
02219                                     const volatile value_type  source ) const
02220   {
02221 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02222 #pragma ivdep
02223 #endif
02224 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02225 #pragma vector always
02226 #endif
02227     for(size_type k=0;k<value_count;k++){
02228       update[k] += source[k];
02229     }
02230   }
02231 };
02232 
02233 
02234 template<class normVector, class VectorType>
02235 normVector MV_Sum(const normVector &r, const VectorType & x, int n = -1)
02236 {
02237   if (n < 0) {
02238     n = x.dimension_0 ();
02239   }
02240 
02241   Kokkos::parallel_reduce (n , MV_Sum_Functor<VectorType> (x), r);
02242   return r;
02243 }
02244 
02245 /*------------------------------------------------------------------------------------------
02246  *-------------------------- Compute Norm1--------------------------------------------------
02247  *------------------------------------------------------------------------------------------*/
02248 template<class XVector>
02249 struct MV_Norm1_Functor
02250 {
02251   typedef typename XVector::device_type        device_type;
02252   typedef typename XVector::size_type            size_type;
02253   typedef typename XVector::non_const_value_type          xvalue_type;
02254   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
02255   typedef typename IPT::dot_type               value_type[];
02256 
02257   typename XVector::const_type m_x ;
02258   size_type value_count;
02259 
02260   MV_Norm1_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
02261   //--------------------------------------------------------------------------
02262 
02263   KOKKOS_INLINE_FUNCTION
02264   void operator()( const size_type i, value_type sum ) const
02265   {
02266 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02267 #pragma ivdep
02268 #endif
02269 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02270 #pragma vector always
02271 #endif
02272     for(size_type k=0;k<value_count;k++){
02273       sum[k] += Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k));
02274     }
02275   }
02276 
02277   KOKKOS_INLINE_FUNCTION
02278   void init( value_type update) const
02279   {
02280 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02281 #pragma ivdep
02282 #endif
02283 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02284 #pragma vector always
02285 #endif
02286     for(size_type k=0;k<value_count;k++)
02287       update[k] = 0;
02288   }
02289 
02290   KOKKOS_INLINE_FUNCTION
02291   void join( volatile value_type  update ,
02292                                     const volatile value_type  source ) const
02293   {
02294 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02295 #pragma ivdep
02296 #endif
02297 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02298 #pragma vector always
02299 #endif
02300     for(size_type k=0;k<value_count;k++){
02301       update[k] += source[k];
02302     }
02303   }
02304 };
02305 
02306 template<class normVector, class VectorType>
02307 normVector MV_Norm1(const normVector &r, const VectorType & x, int n = -1)
02308 {
02309   if (n < 0) {
02310     n = x.dimension_0 ();
02311   }
02312 
02313   Kokkos::parallel_reduce (n , MV_Norm1_Functor<VectorType> (x), r);
02314   return r;
02315 }
02316 
02317 /*------------------------------------------------------------------------------------------
02318  *-------------------------- Compute NormInf--------------------------------------------------
02319  *------------------------------------------------------------------------------------------*/
02320 template<class XVector>
02321 struct MV_NormInf_Functor
02322 {
02323   typedef typename XVector::device_type        device_type;
02324   typedef typename XVector::size_type            size_type;
02325   typedef typename XVector::non_const_value_type          xvalue_type;
02326   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
02327   typedef typename IPT::dot_type               value_type[];
02328 
02329   typename XVector::const_type m_x ;
02330   size_type value_count;
02331 
02332   MV_NormInf_Functor(XVector x):m_x(x),value_count(x.dimension_1()) {}
02333   //--------------------------------------------------------------------------
02334   KOKKOS_INLINE_FUNCTION
02335   void operator()( const size_type i, value_type sum ) const
02336   {
02337 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02338 #pragma ivdep
02339 #endif
02340 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02341 #pragma vector always
02342 #endif
02343     for(size_type k=0;k<value_count;k++){
02344       sum[k] = MAX(sum[k],Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i,k)));
02345     }
02346   }
02347 
02348   KOKKOS_INLINE_FUNCTION void init( value_type update) const
02349   {
02350 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02351 #pragma ivdep
02352 #endif
02353 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02354 #pragma vector always
02355 #endif
02356     for(size_type k=0;k<value_count;k++)
02357       update[k] = 0;
02358   }
02359   KOKKOS_INLINE_FUNCTION void join( volatile value_type  update ,
02360                                     const volatile value_type  source ) const
02361   {
02362 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02363 #pragma ivdep
02364 #endif
02365 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02366 #pragma vector always
02367 #endif
02368     for(size_type k=0;k<value_count;k++){
02369       update[k] = MAX(update[k],source[k]);
02370     }
02371   }
02372 };
02373 
02374 
02375 template<class normVector, class VectorType>
02376 normVector MV_NormInf(const normVector &r, const VectorType & x, int n = -1)
02377 {
02378   if (n < 0) {
02379     n = x.dimension_0 ();
02380   }
02381 
02382   Kokkos::parallel_reduce (n , MV_NormInf_Functor<VectorType> (x), r);
02383   return r;
02384 }
02385 
02386 /*------------------------------------------------------------------------------------------
02387  *-------------------------- Compute Weighted Dot-product (sum(x_i/w_i)^2)----------------------------------
02388  *------------------------------------------------------------------------------------------*/
02389 template<class WeightVector, class XVector,int WeightsRanks>
02390 struct MV_DotWeighted_Functor{};
02391 
02392 template<class WeightVector, class XVector>
02393 struct MV_DotWeighted_Functor<WeightVector,XVector,1>
02394 {
02395   typedef typename XVector::device_type        device_type;
02396   typedef typename XVector::size_type            size_type;
02397   typedef typename XVector::non_const_value_type          xvalue_type;
02398   typedef typename WeightVector::non_const_value_type     wvalue_type;
02399   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
02400   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
02401   typedef typename XIPT::dot_type               value_type[];
02402 
02403   typename WeightVector::const_type m_w ;
02404   typename XVector::const_type m_x ;
02405   size_type value_count;
02406 
02407   MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
02408   //--------------------------------------------------------------------------
02409   KOKKOS_INLINE_FUNCTION
02410   void operator()( const size_type i, value_type sum ) const
02411   {
02412 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02413 #pragma ivdep
02414 #endif
02415 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02416 #pragma vector always
02417 #endif
02418     for(size_type k=0;k<value_count;k++){
02419       sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i), m_w(i) );
02420     }
02421   }
02422 
02423   KOKKOS_INLINE_FUNCTION void init( value_type update) const
02424   {
02425 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02426 #pragma ivdep
02427 #endif
02428 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02429 #pragma vector always
02430 #endif
02431     for(size_type k=0;k<value_count;k++)
02432       update[k] = 0;
02433   }
02434   KOKKOS_INLINE_FUNCTION void join( volatile value_type  update ,
02435                                     const volatile value_type  source ) const
02436   {
02437 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02438 #pragma ivdep
02439 #endif
02440 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02441 #pragma vector always
02442 #endif
02443     for(size_type k=0;k<value_count;k++){
02444       update[k] += source[k];
02445     }
02446   }
02447 };
02448 
02449 template<class WeightVector, class XVector>
02450 struct MV_DotWeighted_Functor<WeightVector,XVector,2>
02451 {
02452   typedef typename XVector::device_type        device_type;
02453   typedef typename XVector::size_type            size_type;
02454   typedef typename XVector::non_const_value_type          xvalue_type;
02455   typedef typename WeightVector::non_const_value_type     wvalue_type;
02456   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
02457   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
02458   typedef typename XIPT::dot_type               value_type[];
02459 
02460   typename WeightVector::const_type m_w ;
02461   typename XVector::const_type m_x ;
02462   size_type value_count;
02463 
02464   MV_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x),value_count(x.dimension_1()) {}
02465   //--------------------------------------------------------------------------
02466   KOKKOS_INLINE_FUNCTION
02467   void operator()( const size_type i, value_type sum ) const
02468   {
02469 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02470 #pragma ivdep
02471 #endif
02472 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02473 #pragma vector always
02474 #endif
02475     for(size_type k=0;k<value_count;k++){
02476       sum[k] += XIPT::dot( m_x(i,k), m_x(i,k) ) / WIPT::dot( m_w(i,k), m_w(i,k) );
02477     }
02478   }
02479 
02480   KOKKOS_INLINE_FUNCTION void init( value_type update) const
02481   {
02482 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02483 #pragma ivdep
02484 #endif
02485 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02486 #pragma vector always
02487 #endif
02488     for(size_type k=0;k<value_count;k++)
02489       update[k] = 0;
02490   }
02491   KOKKOS_INLINE_FUNCTION void join( volatile value_type  update ,
02492                                     const volatile value_type  source ) const
02493   {
02494 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
02495 #pragma ivdep
02496 #endif
02497 #ifdef KOKKOS_HAVE_PRAGMA_VECTOR
02498 #pragma vector always
02499 #endif
02500     for(size_type k=0;k<value_count;k++){
02501       update[k] += source[k];
02502     }
02503   }
02504 };
02505 
02506 template<class rVector, class WeightVector, class XVector>
02507 rVector MV_DotWeighted(const rVector &r, const WeightVector & w, const XVector & x, int n = -1)
02508 {
02509   if (n < 0) {
02510     n = x.dimension_0 ();
02511   }
02512 
02513   typedef MV_DotWeighted_Functor<WeightVector, XVector, WeightVector::Rank> functor_type;
02514   Kokkos::parallel_reduce (n , functor_type (w, x), r);
02515   return r;
02516 }
02517 
02518 /*------------------------------------------------------------------------------------------
02519  *-------------------------- Multiply with scalar: y = a * x -------------------------------
02520  *------------------------------------------------------------------------------------------*/
02521 template<class RVector, class aVector, class XVector>
02522 struct V_MulScalarFunctor
02523 {
02524   typedef typename XVector::device_type        device_type;
02525   typedef typename XVector::size_type            size_type;
02526 
02527   RVector m_r;
02528   typename XVector::const_type m_x ;
02529   typename aVector::const_type m_a ;
02530   //--------------------------------------------------------------------------
02531 
02532   KOKKOS_INLINE_FUNCTION
02533   void operator()( const size_type i) const
02534   {
02535     m_r(i) = m_a[0]*m_x(i);
02536   }
02537 };
02538 
02539 template<class aVector, class XVector>
02540 struct V_MulScalarFunctorSelf
02541 {
02542   typedef typename XVector::device_type        device_type;
02543   typedef typename XVector::size_type            size_type;
02544 
02545   XVector m_x;
02546   typename aVector::const_type   m_a ;
02547   //--------------------------------------------------------------------------
02548 
02549   KOKKOS_INLINE_FUNCTION
02550   void operator()( const size_type i) const
02551   {
02552     m_x(i) *= m_a(0);
02553   }
02554 };
02555 
02556 template<class RVector, class DataType,class Layout,class Device, class MemoryManagement,class Specialisation, class XVector>
02557 RVector V_MulScalar( const RVector & r, const typename Kokkos::View<DataType,Layout,Device,MemoryManagement,Specialisation> & a, const XVector & x)
02558 {
02559   typedef       typename Kokkos::View<DataType,Layout,Device,MemoryManagement> aVector;
02560   if(r==x) {
02561     V_MulScalarFunctorSelf<aVector,XVector> op ;
02562           op.m_x = x ;
02563           op.m_a = a ;
02564           Kokkos::parallel_for( x.dimension(0) , op );
02565           return r;
02566   }
02567 
02568   V_MulScalarFunctor<RVector,aVector,XVector> op ;
02569   op.m_r = r ;
02570   op.m_x = x ;
02571   op.m_a = a ;
02572   Kokkos::parallel_for( x.dimension(0) , op );
02573   return r;
02574 }
02575 
02576 template<class RVector, class XVector>
02577 struct V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector>
02578 {
02579   typedef typename XVector::device_type        device_type;
02580   typedef typename XVector::size_type            size_type;
02581 
02582   RVector m_r;
02583   typename XVector::const_type m_x ;
02584   typename XVector::value_type m_a ;
02585   //--------------------------------------------------------------------------
02586 
02587   KOKKOS_INLINE_FUNCTION
02588   void operator()( const size_type i) const
02589   {
02590     m_r(i) = m_a*m_x(i);
02591   }
02592 };
02593 
02594 template<class XVector>
02595 struct V_MulScalarFunctorSelf<typename XVector::non_const_value_type,XVector>
02596 {
02597   typedef typename XVector::device_type        device_type;
02598   typedef typename XVector::size_type            size_type;
02599 
02600   XVector m_x;
02601   typename XVector::non_const_value_type m_a ;
02602   //--------------------------------------------------------------------------
02603 
02604   KOKKOS_INLINE_FUNCTION
02605   void operator()( const size_type i) const
02606   {
02607     m_x(i) *= m_a;
02608   }
02609 };
02610 
02611 
02612 template<class RVector, class XVector>
02613 RVector V_MulScalar( const RVector & r, const typename XVector::non_const_value_type &a, const XVector & x)
02614 {
02615   if(r==x) {
02616     V_MulScalarFunctorSelf<typename RVector::value_type,RVector> op ;
02617           op.m_x = r ;
02618           op.m_a = a ;
02619           Kokkos::parallel_for( x.dimension(0) , op );
02620           return r;
02621   }
02622 
02623   V_MulScalarFunctor<RVector,typename XVector::non_const_value_type,XVector> op ;
02624   op.m_r = r ;
02625   op.m_x = x ;
02626   op.m_a = a ;
02627   Kokkos::parallel_for( x.dimension(0) , op );
02628   return r;
02629 }
02630 
02631 template<class RVector, class XVector, class YVector, int scalar_x, int scalar_y>
02632 struct V_AddVectorFunctor
02633 {
02634   typedef typename RVector::device_type        device_type;
02635   typedef typename RVector::size_type            size_type;
02636   typedef typename XVector::non_const_value_type           value_type;
02637   RVector   m_r ;
02638   typename XVector::const_type  m_x ;
02639   typename YVector::const_type   m_y ;
02640   const value_type m_a;
02641   const value_type m_b;
02642 
02643   //--------------------------------------------------------------------------
02644   V_AddVectorFunctor(const RVector& r, const value_type& a,const XVector& x,const value_type& b,const YVector& y):
02645           m_r(r),m_x(x),m_y(y),m_a(a),m_b(b)
02646   { }
02647 
02648   KOKKOS_INLINE_FUNCTION
02649   void operator()( const size_type i ) const
02650   {
02651         if((scalar_x==1)&&(scalar_y==1))
02652             m_r(i) = m_x(i) + m_y(i);
02653         if((scalar_x==1)&&(scalar_y==-1))
02654             m_r(i) = m_x(i) - m_y(i);
02655         if((scalar_x==-1)&&(scalar_y==-1))
02656             m_r(i) = -m_x(i) - m_y(i);
02657         if((scalar_x==-1)&&(scalar_y==1))
02658             m_r(i) = -m_x(i) + m_y(i);
02659         if((scalar_x==2)&&(scalar_y==1))
02660             m_r(i) = m_a*m_x(i) + m_y(i);
02661         if((scalar_x==2)&&(scalar_y==-1))
02662             m_r(i) = m_a*m_x(i) - m_y(i);
02663         if((scalar_x==1)&&(scalar_y==2))
02664             m_r(i) = m_x(i) + m_b*m_y(i);
02665         if((scalar_x==-1)&&(scalar_y==2))
02666             m_r(i) = -m_x(i) + m_b*m_y(i);
02667         if((scalar_x==2)&&(scalar_y==2))
02668             m_r(i) = m_a*m_x(i) + m_b*m_y(i);
02669   }
02670 };
02671 
02672 template<class RVector, class XVector, int scalar_x>
02673 struct V_AddVectorSelfFunctor
02674 {
02675   typedef typename RVector::device_type        device_type;
02676   typedef typename RVector::size_type            size_type;
02677   typedef typename XVector::non_const_value_type      value_type;
02678   RVector   m_r ;
02679   typename XVector::const_type  m_x ;
02680   const value_type m_a;
02681 
02682   V_AddVectorSelfFunctor(const RVector& r, const value_type& a,const XVector& x):
02683     m_r(r),m_x(x),m_a(a)
02684   { }
02685 
02686   KOKKOS_INLINE_FUNCTION
02687   void operator()( const size_type i ) const
02688   {
02689   if((scalar_x==1))
02690       m_r(i) += m_x(i);
02691   if((scalar_x==-1))
02692       m_r(i) -= m_x(i);
02693   if((scalar_x==2))
02694       m_r(i) += m_a*m_x(i);
02695   }
02696 };
02697 template<class RVector, class XVector, class YVector, int doalpha, int dobeta>
02698 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x,
02699                 const typename XVector::non_const_value_type &bv, const YVector & y,int n=-1)
02700 {
02701   if(n == -1) n = x.dimension_0();
02702   if(r.ptr_on_device()==x.ptr_on_device() && doalpha == 1) {
02703     V_AddVectorSelfFunctor<RVector,YVector,dobeta> f(r,bv,y);
02704     parallel_for(n,f);
02705   } else if(r.ptr_on_device()==y.ptr_on_device() && dobeta == 1) {
02706     V_AddVectorSelfFunctor<RVector,XVector,doalpha> f(r,av,x);
02707     parallel_for(n,f);
02708   } else {
02709     V_AddVectorFunctor<RVector,XVector,YVector,doalpha,dobeta> f(r,av,x,bv,y);
02710     parallel_for(n,f);
02711   }
02712   return r;
02713 }
02714 
02715 template<class RVector, class XVector, class YVector>
02716 RVector V_AddVector( const RVector & r,const typename XVector::non_const_value_type &av,const XVector & x,
02717                 const typename YVector::non_const_value_type &bv, const YVector & y, int n = -1,
02718                 int a=2,int b=2)
02719 {
02720         if(a==-1) {
02721           if(b==-1)
02722                   V_AddVector<RVector,XVector,YVector,-1,-1>(r,av,x,bv,y,n);
02723           else if(b==0)
02724                   V_AddVector<RVector,XVector,YVector,-1,0>(r,av,x,bv,y,n);
02725           else if(b==1)
02726               V_AddVector<RVector,XVector,YVector,-1,1>(r,av,x,bv,y,n);
02727           else
02728               V_AddVector<RVector,XVector,YVector,-1,2>(r,av,x,bv,y,n);
02729         } else if (a==0) {
02730           if(b==-1)
02731                   V_AddVector<RVector,XVector,YVector,0,-1>(r,av,x,bv,y,n);
02732           else if(b==0)
02733                   V_AddVector<RVector,XVector,YVector,0,0>(r,av,x,bv,y,n);
02734           else if(b==1)
02735               V_AddVector<RVector,XVector,YVector,0,1>(r,av,x,bv,y,n);
02736           else
02737               V_AddVector<RVector,XVector,YVector,0,2>(r,av,x,bv,y,n);
02738         } else if (a==1) {
02739           if(b==-1)
02740                   V_AddVector<RVector,XVector,YVector,1,-1>(r,av,x,bv,y,n);
02741           else if(b==0)
02742                   V_AddVector<RVector,XVector,YVector,1,0>(r,av,x,bv,y,n);
02743           else if(b==1)
02744               V_AddVector<RVector,XVector,YVector,1,1>(r,av,x,bv,y,n);
02745           else
02746               V_AddVector<RVector,XVector,YVector,1,2>(r,av,x,bv,y,n);
02747         } else if (a==2) {
02748           if(b==-1)
02749                   V_AddVector<RVector,XVector,YVector,2,-1>(r,av,x,bv,y,n);
02750           else if(b==0)
02751                   V_AddVector<RVector,XVector,YVector,2,0>(r,av,x,bv,y,n);
02752           else if(b==1)
02753               V_AddVector<RVector,XVector,YVector,2,1>(r,av,x,bv,y,n);
02754           else
02755               V_AddVector<RVector,XVector,YVector,2,2>(r,av,x,bv,y,n);
02756         }
02757         return r;
02758 }
02759 
02760 template<class RVector,class XVector,class YVector>
02761 RVector V_Add( const RVector & r, const XVector & x, const YVector & y, int n=-1)
02762 {
02763         return V_AddVector( r,1,x,1,y,n,1,1);
02764 }
02765 
02766 template<class RVector,class XVector,class YVector>
02767 RVector V_Add( const RVector & r, const XVector & x, const typename XVector::non_const_value_type  & bv, const YVector & y,int n=-1 )
02768 {
02769   int b = 2;
02770   //if(bv == 0) b = 0;
02771   //if(bv == 1) b = 1;
02772   //if(bv == -1) b = -1;
02773   return V_AddVector(r,bv,x,bv,y,n,1,b);
02774 }
02775 
02776 template<class RVector,class XVector,class YVector>
02777 RVector V_Add( const RVector & r, const typename XVector::non_const_value_type  & av, const XVector & x, const typename XVector::non_const_value_type  & bv, const YVector & y,int n=-1 )
02778 {
02779   int a = 2;
02780   int b = 2;
02781   //if(av == 0) a = 0;
02782   //if(av == 1) a = 1;
02783   //if(av == -1) a = -1;
02784   //if(bv == 0) b = 0;
02785   //if(bv == 1) b = 1;
02786   //if(bv == -1) b = -1;
02787 
02788   return V_AddVector(r,av,x,bv,y,n,a,b);
02789 }
02790 
02791 template<class XVector, class YVector>
02792 struct V_DotFunctor
02793 {
02794   typedef typename XVector::device_type          device_type;
02795   typedef typename XVector::size_type              size_type;
02796   typedef typename XVector::non_const_value_type xvalue_type;
02797   typedef Details::InnerProductSpaceTraits<xvalue_type>  IPT;
02798   typedef typename IPT::dot_type                  value_type;
02799   XVector  m_x ;
02800   YVector  m_y ;
02801 
02802   //--------------------------------------------------------------------------
02803   V_DotFunctor(const XVector& x,const YVector& y):
02804     m_x(x),m_y(y)
02805   { }
02806 
02807   KOKKOS_INLINE_FUNCTION
02808   void operator()( const size_type &i, value_type &sum ) const
02809   {
02810     sum += IPT::dot( m_x(i), m_y(i) );  // m_x(i) * m_y(i)
02811   }
02812 
02813   KOKKOS_INLINE_FUNCTION
02814   void init( volatile value_type &update) const
02815   {
02816     update = 0;
02817   }
02818 
02819   KOKKOS_INLINE_FUNCTION
02820   void join( volatile value_type &update ,
02821              const volatile value_type &source ) const
02822   {
02823     update += source ;
02824   }
02825 };
02826 
02827 template<class XVector, class YVector>
02828 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
02829 V_Dot( const XVector & x, const YVector & y, int n = -1)
02830 {
02831   typedef V_DotFunctor<XVector,YVector> Functor;
02832   Functor f(x,y);
02833   if (n<0) n = x.dimension_0();
02834   typename Functor::value_type ret_val;
02835   parallel_reduce(n,f,ret_val);
02836   return ret_val;
02837 }
02838 
02839 template<class WeightVector, class XVector>
02840 struct V_DotWeighted_Functor
02841 {
02842   typedef typename XVector::device_type        device_type;
02843   typedef typename XVector::size_type            size_type;
02844   typedef typename XVector::non_const_value_type          xvalue_type;
02845   typedef typename WeightVector::non_const_value_type     wvalue_type;
02846   typedef Details::InnerProductSpaceTraits<xvalue_type> XIPT;
02847   typedef Details::InnerProductSpaceTraits<wvalue_type> WIPT;
02848   typedef typename XIPT::dot_type               value_type;
02849 
02850   typename WeightVector::const_type m_w ;
02851   typename XVector::const_type m_x ;
02852 
02853   V_DotWeighted_Functor(WeightVector w, XVector x):m_w(w),m_x(x) {}
02854   //--------------------------------------------------------------------------
02855   KOKKOS_INLINE_FUNCTION
02856   void operator()( const size_type i, value_type& sum ) const
02857   {
02858     sum += XIPT::dot( m_x(i), m_x(i) )/ WIPT::dot( m_w(i), m_w(i) );
02859   }
02860 
02861 
02862   KOKKOS_INLINE_FUNCTION void init( value_type & update) const
02863   {
02864       update = 0;
02865   }
02866   KOKKOS_INLINE_FUNCTION void join( volatile value_type & update ,
02867                                     const volatile value_type & source ) const
02868   {
02869       update += source;
02870   }
02871 };
02872 
02873 template<class WeightVector, class XVector>
02874 typename Details::InnerProductSpaceTraits<typename XVector::non_const_value_type>::dot_type
02875 V_DotWeighted(const WeightVector & w, const XVector & x, int n = -1)
02876 {
02877   if (n < 0) {
02878     n = x.dimension_0 ();
02879   }
02880 
02881   typedef Details::InnerProductSpaceTraits<typename XVector::non_const_value_type> IPT;
02882   typedef typename IPT::dot_type value_type;
02883   value_type ret_val;
02884 
02885   typedef V_DotWeighted_Functor<WeightVector,XVector> functor_type;
02886   Kokkos::parallel_reduce (n , functor_type (w, x), ret_val);
02887   return ret_val;
02888 }
02889 
02890 /*------------------------------------------------------------------------------------------
02891  *-------------------------- Compute Sum -------------------------------------------------
02892  *------------------------------------------------------------------------------------------*/
02893 template<class XVector>
02894 struct V_Sum_Functor
02895 {
02896   typedef typename XVector::device_type        device_type;
02897   typedef typename XVector::size_type            size_type;
02898   typedef typename XVector::non_const_value_type          xvalue_type;
02899   typedef xvalue_type                           value_type;
02900 
02901   typename XVector::const_type m_x ;
02902 
02903   V_Sum_Functor(XVector x):m_x(x) {}
02904   //--------------------------------------------------------------------------
02905 
02906   KOKKOS_INLINE_FUNCTION
02907   void operator()( const size_type i, value_type& sum ) const
02908   {
02909       sum += m_x(i);
02910   }
02911 
02912   KOKKOS_INLINE_FUNCTION
02913   void init( value_type& update) const
02914   {
02915       update = 0;
02916   }
02917 
02918   KOKKOS_INLINE_FUNCTION
02919   void join( volatile value_type&  update ,
02920                                     const volatile value_type&  source ) const
02921   {
02922       update += source;
02923   }
02924 };
02925 
02926 
02927 template<class VectorType>
02928 typename VectorType::non_const_value_type
02929 V_Sum (const VectorType& x, int n = -1)
02930 {
02931   if (n < 0) {
02932     n = x.dimension_0 ();
02933   }
02934 
02935   typedef typename VectorType::non_const_value_type value_type;
02936   value_type ret_val;
02937   Kokkos::parallel_reduce (n, V_Sum_Functor<VectorType> (x), ret_val);
02938   return ret_val;
02939 }
02940 
02941 /*------------------------------------------------------------------------------------------
02942  *-------------------------- Compute Norm1--------------------------------------------------
02943  *------------------------------------------------------------------------------------------*/
02944 template<class XVector>
02945 struct V_Norm1_Functor
02946 {
02947   typedef typename XVector::device_type        device_type;
02948   typedef typename XVector::size_type            size_type;
02949   typedef typename XVector::non_const_value_type          xvalue_type;
02950   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
02951   typedef typename IPT::dot_type               value_type;
02952 
02953   typename XVector::const_type m_x ;
02954 
02955   V_Norm1_Functor(XVector x):m_x(x) {}
02956   //--------------------------------------------------------------------------
02957   KOKKOS_INLINE_FUNCTION
02958   void operator()( const size_type i, value_type& sum ) const
02959   {
02960     sum += Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i));
02961   }
02962   KOKKOS_INLINE_FUNCTION void init( value_type& update) const
02963   {
02964     update = 0;
02965   }
02966   KOKKOS_INLINE_FUNCTION void join( volatile value_type&  update ,
02967                                     const volatile value_type&  source ) const
02968   {
02969     update += source;
02970   }
02971 };
02972 
02973 template<class VectorType>
02974 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
02975 V_Norm1( const VectorType & x, int n = -1)
02976 {
02977   if (n < 0) {
02978     n = x.dimension_0 ();
02979   }
02980 
02981   typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
02982   typedef typename IPT::dot_type value_type;
02983   value_type ret_val;
02984   Kokkos::parallel_reduce (n, V_Norm1_Functor<VectorType> (x), ret_val);
02985   return ret_val;
02986 }
02987 /*------------------------------------------------------------------------------------------
02988  *-------------------------- Compute NormInf--------------------------------------------------
02989  *------------------------------------------------------------------------------------------*/
02990 template<class XVector>
02991 struct V_NormInf_Functor
02992 {
02993   typedef typename XVector::device_type        device_type;
02994   typedef typename XVector::size_type            size_type;
02995   typedef typename XVector::non_const_value_type          xvalue_type;
02996   typedef Details::InnerProductSpaceTraits<xvalue_type> IPT;
02997   typedef typename IPT::dot_type               value_type;
02998 
02999   typename XVector::const_type m_x ;
03000 
03001   V_NormInf_Functor(XVector x):m_x(x) {}
03002   //--------------------------------------------------------------------------
03003   KOKKOS_INLINE_FUNCTION
03004   void operator()( const size_type i, value_type& sum ) const
03005   {
03006     sum = MAX(sum,Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i)));
03007   }
03008 
03009   KOKKOS_INLINE_FUNCTION void init( value_type& update) const
03010   {
03011     update = 0;
03012   }
03013   KOKKOS_INLINE_FUNCTION void join( volatile value_type&  update ,
03014                                     const volatile value_type&  source ) const
03015   {
03016     update = MAX(update,source);
03017   }
03018 };
03019 
03020 template<class VectorType>
03021 typename Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type>::dot_type
03022 V_NormInf (const VectorType& x, int n = -1)
03023 {
03024   if (n < 0) {
03025     n = x.dimension_0 ();
03026   }
03027 
03028   typedef Details::InnerProductSpaceTraits<typename VectorType::non_const_value_type> IPT;
03029   typedef typename IPT::dot_type value_type;
03030   value_type ret_val;
03031   Kokkos::parallel_reduce (n, V_NormInf_Functor<VectorType> (x), ret_val);
03032   return ret_val;
03033 }
03034 
03035 /*------------------------------------------------------------------------------------------
03036  *-------------------------- Reciprocal element wise: y[i] = 1/x[i] ------------------------
03037  *------------------------------------------------------------------------------------------*/
03038 template<class RVector, class XVector>
03039 struct V_ReciprocalFunctor
03040 {
03041   typedef typename XVector::device_type        device_type;
03042   typedef typename XVector::size_type            size_type;
03043 
03044   RVector m_r;
03045   typename XVector::const_type m_x ;
03046 
03047   V_ReciprocalFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
03048   //--------------------------------------------------------------------------
03049 
03050   KOKKOS_INLINE_FUNCTION
03051   void operator()( const size_type i) const
03052   {
03053     m_r(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i);
03054   }
03055 };
03056 
03057 template<class XVector>
03058 struct V_ReciprocalSelfFunctor
03059 {
03060   typedef typename XVector::device_type        device_type;
03061   typedef typename XVector::size_type            size_type;
03062 
03063   XVector m_x ;
03064 
03065   V_ReciprocalSelfFunctor(XVector x):m_x(x) {}
03066   //--------------------------------------------------------------------------
03067 
03068   KOKKOS_INLINE_FUNCTION
03069   void operator()( const size_type i) const
03070   {
03071      m_x(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::one() / m_x(i);
03072   }
03073 };
03074 
03075 template<class RVector, class XVector>
03076 RVector V_Reciprocal( const RVector & r, const XVector & x)
03077 {
03078   // TODO: Add error check (didn't link for some reason?)
03079   /*if(r.dimension_0() != x.dimension_0())
03080     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Reciprocal -- dimension(0) of r and x don't match");
03081   */
03082 
03083 
03084   if(r==x) {
03085     V_ReciprocalSelfFunctor<XVector> op(x) ;
03086     Kokkos::parallel_for( x.dimension_0() , op );
03087     return r;
03088   }
03089 
03090   V_ReciprocalFunctor<RVector,XVector> op(r,x) ;
03091   Kokkos::parallel_for( x.dimension_0() , op );
03092   return r;
03093 }
03094 
03095 /*------------------------------------------------------------------------------------------
03096  *------------------- Reciprocal element wise with threshold: x[i] = 1/x[i] ----------------
03097  *------------------------------------------------------------------------------------------*/
03098 template<class XVector>
03099 struct V_ReciprocalThresholdSelfFunctor
03100 {
03101   typedef typename XVector::device_type           device_type;
03102   typedef typename XVector::size_type               size_type;
03103   typedef typename XVector::non_const_value_type   value_type;
03104   typedef Kokkos::Details::ArithTraits<value_type>        KAT;
03105   typedef typename KAT::mag_type                     mag_type;
03106 
03107   const XVector    m_x;
03108   const value_type m_min_val;
03109   const value_type m_min_val_mag;
03110 
03111   V_ReciprocalThresholdSelfFunctor(const XVector& x, const value_type& min_val) :
03112     m_x(x), m_min_val(min_val), m_min_val_mag(KAT::abs(min_val)) {}
03113   //--------------------------------------------------------------------------
03114 
03115   KOKKOS_INLINE_FUNCTION
03116   void operator()( const size_type i) const
03117   {
03118     if (KAT::abs(m_x(i)) < m_min_val_mag)
03119       m_x(i) = m_min_val;
03120     else
03121       m_x(i) = KAT::one() / m_x(i);
03122   }
03123 };
03124 
03125 template<class XVector>
03126 XVector V_ReciprocalThreshold( const XVector & x, const typename XVector::non_const_value_type& min_val )
03127 {
03128   V_ReciprocalThresholdSelfFunctor<XVector> op(x,min_val) ;
03129   Kokkos::parallel_for( x.dimension_0() , op );
03130   return x;
03131 }
03132 
03133 /*------------------------------------------------------------------------------------------
03134  *-------------------------- Abs element wise: y[i] = abs(x[i]) ------------------------
03135  *------------------------------------------------------------------------------------------*/
03136 template<class RVector, class XVector>
03137 struct V_AbsFunctor
03138 {
03139   typedef typename XVector::device_type        device_type;
03140   typedef typename XVector::size_type            size_type;
03141 
03142   RVector m_r;
03143   typename XVector::const_type m_x ;
03144 
03145   V_AbsFunctor(RVector r, XVector x):m_r(r),m_x(x) {}
03146   //--------------------------------------------------------------------------
03147 
03148   KOKKOS_INLINE_FUNCTION
03149   void operator()( const size_type i) const
03150   {
03151     m_r(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i));
03152   }
03153 };
03154 
03155 template<class XVector>
03156 struct V_AbsSelfFunctor
03157 {
03158   typedef typename XVector::device_type        device_type;
03159   typedef typename XVector::size_type            size_type;
03160 
03161   XVector m_x ;
03162 
03163   V_AbsSelfFunctor(XVector x):m_x(x) {}
03164   //--------------------------------------------------------------------------
03165 
03166   KOKKOS_INLINE_FUNCTION
03167   void operator()( const size_type i) const
03168   {
03169      m_x(i) = Kokkos::Details::ArithTraits<typename XVector::non_const_value_type>::abs(m_x(i));
03170   }
03171 };
03172 
03173 template<class RVector, class XVector>
03174 RVector V_Abs( const RVector & r, const XVector & x)
03175 {
03176   // TODO: Add error check (didn't link for some reason?)
03177   /*if(r.dimension_0() != x.dimension_0())
03178     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_Abs -- dimension(0) of r and x don't match");
03179   */
03180 
03181 
03182   if(r==x) {
03183     V_AbsSelfFunctor<XVector> op(x) ;
03184     Kokkos::parallel_for( x.dimension_0() , op );
03185     return r;
03186   }
03187 
03188   V_AbsFunctor<RVector,XVector> op(r,x) ;
03189   Kokkos::parallel_for( x.dimension_0() , op );
03190   return r;
03191 }
03192 
03193 /*------------------------------------------------------------------------------------------
03194  *------ ElementWiseMultiply element wise: C(i) = c*C(i) + ab*A(i)*B(i) --------------
03195  *------------------------------------------------------------------------------------------*/
03196 template<class CVector, class AVector, class BVector>
03197 struct V_ElementWiseMultiplyFunctor
03198 {
03199   typedef typename CVector::device_type        device_type;
03200   typedef typename CVector::size_type            size_type;
03201 
03202   typename CVector::const_value_type m_c;
03203   CVector m_C;
03204   typename AVector::const_value_type m_ab;
03205   typename AVector::const_type m_A ;
03206   typename BVector::const_type m_B ;
03207 
03208   V_ElementWiseMultiplyFunctor(
03209       typename CVector::const_value_type c,
03210       CVector C,
03211       typename AVector::const_value_type ab,
03212       typename AVector::const_type A,
03213       typename BVector::const_type B):
03214       m_c(c),m_C(C),m_ab(ab),m_A(A),m_B(B)
03215       {}
03216   //--------------------------------------------------------------------------
03217 
03218   KOKKOS_INLINE_FUNCTION
03219   void operator()( const size_type i) const
03220   {
03221      m_C(i) = m_c*m_C(i) + m_ab*m_A(i)*m_B(i);
03222   }
03223 };
03224 
03225 
03226 template<class CVector, class AVector, class BVector>
03227 CVector V_ElementWiseMultiply(
03228       typename CVector::const_value_type c,
03229       CVector C,
03230       typename AVector::const_value_type ab,
03231       AVector A,
03232       BVector B
03233     )
03234 {
03235   // TODO: Add error check (didn't link for some reason?)
03236   /*if(r.dimension_0() != x.dimension_0())
03237     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(0) of r and x don't match");
03238   if(r.dimension_1() != x.dimension_1())
03239     Kokkos::Impl::throw_runtime_exception("Kokkos::MV_ElementWiseMultiply -- dimension(1) of r and x don't match");*/
03240 
03241   //TODO: Get 1D version done
03242   /*if(r.dimension_1()==1) {
03243     typedef View<typename RVector::value_type*,typename RVector::device_type> RVector1D;
03244     typedef View<typename XVector::const_value_type*,typename XVector::device_type> XVector1D;
03245 
03246     RVector1D r_1d = Kokkos::subview< RVector1D >( r , ALL(),0 );
03247     XVector1D x_1d = Kokkos::subview< XVector1D >( x , ALL(),0 );
03248     return V_ElementWiseMultiply(r_1d,x_1d);
03249   }*/
03250 
03251   V_ElementWiseMultiplyFunctor<CVector,AVector,BVector> op(c,C,ab,A,B) ;
03252   Kokkos::parallel_for( C.dimension_0() , op );
03253   return C;
03254 }
03255 }//end namespace Kokkos
03256 #endif /* KOKKOS_MULTIVECTOR_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends