|
Sierra Toolkit
Version of the Day
|
00001 /*------------------------------------------------------------------------*/ 00002 /* Copyright 2010 Sandia Corporation. */ 00003 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ 00004 /* license for use of this work by or on behalf of the U.S. Government. */ 00005 /* Export of this program may require a license from the */ 00006 /* United States Government. */ 00007 /*------------------------------------------------------------------------*/ 00008 00009 #ifndef stk_util_parallel_ParallelReduce_hpp 00010 #define stk_util_parallel_ParallelReduce_hpp 00011 00012 #include <cstddef> 00013 #include <iosfwd> 00014 #include <string> 00015 #include <stk_util/parallel/Parallel.hpp> 00016 #include <stk_util/util/SimpleArrayOps.hpp> 00017 00018 //------------------------------------------------------------------------ 00019 00020 namespace stk_classic { 00021 00026 // REFACTOR: Replace ReduceSum with Sum?, etc... Should be possible 00027 00031 void all_write_string( ParallelMachine , 00032 std::ostream & , 00033 const std::string & ); 00034 00036 void all_reduce_sum( ParallelMachine , 00037 const double * local , double * global , unsigned count ); 00038 00040 void all_reduce_sum( ParallelMachine , 00041 const float * local , float * global , unsigned count ); 00042 00044 void all_reduce_sum( ParallelMachine , 00045 const int * local , int * global , unsigned count ); 00046 00048 void all_reduce_sum( ParallelMachine , 00049 const size_t * local , size_t * global , unsigned count ); 00050 00052 void all_reduce_bor( ParallelMachine , 00053 const unsigned * local , 00054 unsigned * global , unsigned count ); 00055 00074 template < class ReduceOp > 00075 void all_reduce( ParallelMachine , const ReduceOp & ); 00076 00079 } 00080 00081 //---------------------------------------------------------------------- 00082 //---------------------------------------------------------------------- 00083 00084 #ifndef DOXYGEN_COMPILE 00085 00086 namespace stk_classic { 00087 namespace { 00088 // Blank namespace so that this class produces local symbols, 00089 // avoiding complaints from a linker of multiple-define symbols. 00090 00091 struct ReduceEnd { 00092 struct WorkType {}; 00093 void copyin( WorkType & ) const {} 00094 void copyout( WorkType & ) const {} 00095 static void op( WorkType & , WorkType & ) {} 00096 }; 00097 00098 // Workhorse class for aggregating reduction operations. 00099 00100 template <class Op, typename T, class Next> 00101 struct Reduce { 00102 00103 typedef T Type ; 00104 enum { N = Op::N }; 00105 00106 struct WorkType { 00107 typename Next::WorkType m_next ; 00108 Type m_value[N]; 00109 }; 00110 00111 Next m_next ; 00112 Type * m_value ; 00113 00114 // Copy values into buffer: 00115 void copyin( WorkType & w ) const 00116 { Copy<N>( w.m_value , m_value ); m_next.copyin( w.m_next ); } 00117 00118 // Copy value out from buffer: 00119 void copyout( WorkType & w ) const 00120 { Copy<N>( m_value , w.m_value ); m_next.copyout( w.m_next ); } 00121 00122 // Reduction function 00123 static void op( WorkType & out , WorkType & in ) 00124 { Op( out.m_value , in.m_value ); Next::op( out.m_next , in.m_next ); } 00125 00126 // Aggregate reduction operations, use '&' for left-to-right evaluation 00127 template<class OpB, typename TB> 00128 Reduce<OpB, TB, Reduce<Op,T,Next> > 00129 operator & ( const Reduce<OpB,TB,ReduceEnd> & rhs ) 00130 { return Reduce<OpB, TB, Reduce<Op,T,Next> >( rhs , *this ); } 00131 00132 // Constructor for aggregation: 00133 Reduce( const Reduce<Op,T, ReduceEnd> & arg_val , const Next & arg_next ) 00134 : m_next( arg_next ), m_value( arg_val.m_value ) {} 00135 00136 // Constructor for aggregate member: 00137 explicit Reduce( Type * arg_value ) 00138 : m_next(), m_value( arg_value ) {} 00139 00140 static void void_op( void*inv, void*inoutv, int*, ParallelDatatype*); 00141 }; 00142 00143 template <class Op, typename T, class Next> 00144 void Reduce<Op,T,Next>::void_op( void*inv, void*inoutv,int*,ParallelDatatype*) 00145 { 00146 op( * reinterpret_cast<WorkType*>( inoutv ) , 00147 * reinterpret_cast<WorkType*>( inv ) ); 00148 } 00149 00150 } 00151 } 00152 00153 //---------------------------------------------------------------------- 00154 //---------------------------------------------------------------------- 00155 00156 namespace stk_classic { 00157 00158 template<unsigned N, typename T> 00159 inline 00160 Reduce< Sum<N> , T, ReduceEnd> ReduceSum( T * value ) 00161 { return Reduce< Sum<N>, T, ReduceEnd >( value ); } 00162 00163 template<unsigned N, typename T> 00164 inline 00165 Reduce< Prod<N>, T, ReduceEnd > ReduceProd( T * value ) 00166 { return Reduce< Prod<N>, T, ReduceEnd >( value ); } 00167 00168 template<unsigned N, typename T> 00169 inline 00170 Reduce< Max<N>, T, ReduceEnd> ReduceMax( T * value ) 00171 { return Reduce< Max<N>, T, ReduceEnd>( value ); } 00172 00173 template<unsigned N, typename T> 00174 inline 00175 Reduce< Min<N>, T, ReduceEnd> ReduceMin( T * value ) 00176 { return Reduce<Min<N>, T, ReduceEnd>( value ); } 00177 00178 template<unsigned N, typename T> 00179 inline 00180 Reduce< BitOr<N>, T, ReduceEnd> ReduceBitOr( T * value ) 00181 { return Reduce< BitOr<N>, T, ReduceEnd>( value ); } 00182 00183 template<unsigned N, typename T> 00184 inline 00185 Reduce< BitAnd<N>, T, ReduceEnd> ReduceBitAnd( T * value ) 00186 { return Reduce< BitAnd<N>, T, ReduceEnd>( value ); } 00187 00188 //---------------------------------------------------------------------- 00189 // all_reduce( comm , ReduceSum<5>( A ) & ReduceMax<3>( B ) ); 00190 00191 extern "C" { 00192 typedef void (*ParallelReduceOp) 00193 ( void * inv , void * outv , int * , ParallelDatatype * ); 00194 } 00195 00196 void all_reduce( ParallelMachine arg_comm , 00197 ParallelReduceOp arg_op , 00198 void * arg_in , 00199 void * arg_out , 00200 unsigned arg_len ); 00201 00202 namespace { 00203 00204 template < class ReduceOp > 00205 void all_reduce_driver( ParallelMachine comm , const ReduceOp & op ) 00206 { 00207 typedef typename ReduceOp::WorkType WorkType ; 00208 00209 WorkType inbuf , outbuf ; 00210 00211 ParallelReduceOp f = 00212 reinterpret_cast<ParallelReduceOp>( & ReduceOp::void_op ); 00213 op.copyin( inbuf ); 00214 all_reduce( comm , f , & inbuf, & outbuf, sizeof(WorkType) ); 00215 op.copyout( outbuf ); 00216 } 00217 00218 } 00219 00220 template < class ReduceOp > 00221 inline 00222 void all_reduce( ParallelMachine comm , const ReduceOp & op ) 00223 { all_reduce_driver<ReduceOp>( comm , op ); } 00224 00225 } 00226 00227 #endif /* DOXYGEN_COMPILE */ 00228 00229 //---------------------------------------------------------------------- 00230 00231 #endif 00232