|
Sierra Toolkit
Version of the Day
|
00001 #ifndef STK_UTIL_PARALLEL_MPI_hpp 00002 #define STK_UTIL_PARALLEL_MPI_hpp 00003 00004 #include <stk_util/stk_config.h> 00005 #if defined( STK_HAS_MPI ) 00006 00007 #include <mpi.h> 00008 #include <vector> 00009 #include <iterator> 00010 #include <stdexcept> 00011 #include <complex> 00012 00013 namespace sierra { 00014 namespace MPI { 00015 00016 template<class T> 00017 inline 00018 void 00019 mpi_real_complex_sum( 00020 void * invec, 00021 void * inoutvec, 00022 int * len, 00023 MPI_Datatype * datatype) 00024 { 00025 std::complex<T> *complex_in = static_cast<std::complex<T> *>(invec); 00026 std::complex<T> *complex_inout = static_cast<std::complex<T> *>(inoutvec); 00027 00028 for (int i = 0; i < *len; ++i) 00029 complex_inout[i] += complex_in[i]; 00030 } 00031 00036 00037 00044 MPI_Datatype float_complex_type(); 00045 00052 MPI_Datatype double_complex_type(); 00053 00060 MPI_Op double_complex_sum_op(); 00061 00068 template<class T> 00069 inline 00070 MPI_Op real_complex_sum_op() 00071 { 00072 static MPI_Op s_mpi_real_complex_sum; 00073 static bool initialized = false; 00074 00075 if (!initialized) { 00076 initialized = true; 00077 00078 MPI_Op_create(mpi_real_complex_sum<T>, true, &s_mpi_real_complex_sum); 00079 } 00080 return s_mpi_real_complex_sum; 00081 } 00082 00088 MPI_Datatype long_long_int_int_type(); 00089 00090 00096 MPI_Datatype double_double_int_type(); 00097 00098 00104 template <typename T> 00105 struct Loc 00106 { 00107 Loc() 00108 : m_value(), 00109 m_loc(0) 00110 {} 00111 00112 Loc(const T &value, int loc) 00113 : m_value(value), 00114 m_loc(loc) 00115 {} 00116 00117 T m_value; 00118 int m_loc; 00119 }; 00120 00121 struct TempLoc 00122 { 00123 TempLoc() 00124 : m_value(), 00125 m_other(), 00126 m_loc(0) 00127 {} 00128 00129 TempLoc(double value, double other, int loc) 00130 : m_value(value), 00131 m_other(other), 00132 m_loc(loc) 00133 {} 00134 00135 double m_value; 00136 double m_other; 00137 int m_loc; 00138 }; 00139 00140 00149 template <typename T> 00150 struct Datatype; 00151 00152 template <> 00153 struct Datatype<char> 00154 { 00155 static MPI_Datatype type() { 00156 return MPI_CHAR; 00157 } 00158 }; 00159 00160 template <> 00161 struct Datatype<signed char> 00162 { 00163 static MPI_Datatype type() { 00164 return MPI_CHAR; 00165 } 00166 }; 00167 00168 template <> 00169 struct Datatype<unsigned char> 00170 { 00171 static MPI_Datatype type() { 00172 return MPI_BYTE; 00173 } 00174 }; 00175 00176 template <> 00177 struct Datatype<int> 00178 { 00179 static MPI_Datatype type() { 00180 return MPI_INT; 00181 } 00182 }; 00183 00184 template <> 00185 struct Datatype<unsigned int> 00186 { 00187 static MPI_Datatype type() { 00188 return MPI_UNSIGNED; 00189 } 00190 }; 00191 00192 template <> 00193 struct Datatype<short> 00194 { 00195 static MPI_Datatype type() { 00196 return MPI_SHORT; 00197 } 00198 }; 00199 00200 template <> 00201 struct Datatype<unsigned short> 00202 { 00203 static MPI_Datatype type() { 00204 return MPI_UNSIGNED_SHORT; 00205 } 00206 }; 00207 00208 template <> 00209 struct Datatype<long> 00210 { 00211 static MPI_Datatype type() { 00212 return MPI_LONG; 00213 } 00214 }; 00215 00216 template <> 00217 struct Datatype<unsigned long> 00218 { 00219 static MPI_Datatype type() { 00220 return MPI_UNSIGNED_LONG; 00221 } 00222 }; 00223 00224 // #ifdef MPI_LONG_LONG_INT 00225 // template <> 00226 // struct Datatype<long long> 00227 // { 00228 // static MPI_Datatype type() { 00229 // return MPI_LONG_LONG_INT; 00230 // } 00231 // }; 00232 // #endif 00233 00234 // #ifdef MPI_UNSIGNED_LONG_LONG_INT 00235 // template <> 00236 // struct Datatype<unsigned long long> 00237 // { 00238 // static MPI_Datatype type() { 00239 // return MPI_UNSIGNED_LONG_LONG_INT; 00240 // } 00241 // }; 00242 // #endif 00243 00244 template <> 00245 struct Datatype<float> 00246 { 00247 static MPI_Datatype type() { 00248 return MPI_FLOAT; 00249 } 00250 }; 00251 00252 template <> 00253 struct Datatype<double> 00254 { 00255 static MPI_Datatype type() { 00256 return MPI_DOUBLE; 00257 } 00258 }; 00259 00260 00261 template <> 00262 struct Datatype<std::complex<float> > 00263 { 00264 static MPI_Datatype type() { 00265 return float_complex_type(); 00266 } 00267 }; 00268 00269 template <> 00270 struct Datatype<std::complex<double> > 00271 { 00272 static MPI_Datatype type() { 00273 return double_complex_type(); 00274 } 00275 }; 00276 00277 00278 template <> 00279 struct Datatype<Loc<int> > 00280 { 00281 static MPI_Datatype type() { 00282 return MPI_2INT; 00283 } 00284 }; 00285 00286 template <> 00287 struct Datatype<Loc<short> > 00288 { 00289 static MPI_Datatype type() { 00290 return MPI_SHORT_INT; 00291 } 00292 }; 00293 00294 template <> 00295 struct Datatype<Loc<long> > 00296 { 00297 static MPI_Datatype type() { 00298 return MPI_LONG_INT; 00299 } 00300 }; 00301 00302 template <> 00303 struct Datatype<Loc<unsigned long> > 00304 { 00305 static MPI_Datatype type() { 00306 return MPI_LONG_INT; 00307 } 00308 }; 00309 00310 // #ifdef MPI_LONG_LONG_INT 00311 // template <> 00312 // struct Datatype<Loc<long long> > 00313 // { 00314 // static MPI_Datatype type() { 00315 // return long_long_int_int_type(); 00316 // } 00317 // }; 00318 // #endif 00319 00320 template <> 00321 struct Datatype<Loc<float> > 00322 { 00323 static MPI_Datatype type() { 00324 return MPI_FLOAT_INT; 00325 } 00326 }; 00327 00328 template <> 00329 struct Datatype<Loc<double> > 00330 { 00331 static MPI_Datatype type() { 00332 return MPI_DOUBLE_INT; 00333 } 00334 }; 00335 00336 template <> 00337 struct Datatype<TempLoc> 00338 { 00339 static MPI_Datatype type() { 00340 return double_double_int_type(); 00341 } 00342 }; 00343 00344 00360 template<class T> 00361 inline void 00362 AllReduce(MPI_Comm mpi_comm, MPI_Op op, T *src_dest, size_t size) 00363 { 00364 std::vector<T> source(src_dest, src_dest + size); 00365 00366 if (MPI_Allreduce(&source[0], &src_dest[0], (int) size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS ) 00367 throw std::runtime_error("MPI_Allreduce failed"); 00368 } 00369 00385 template<class T> 00386 inline void 00387 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &dest) 00388 { 00389 std::vector<T> source(dest); 00390 00391 if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS ) 00392 throw std::runtime_error("MPI_Allreduce failed"); 00393 } 00394 00413 template<class T> 00414 inline void 00415 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) 00416 { 00417 if (source.size() != dest.size()) 00418 throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal"); 00419 00420 if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS ) 00421 throw std::runtime_error("MPI_Allreduce failed"); 00422 } 00423 00424 00425 template<class T> 00426 inline void 00427 AllGather(MPI_Comm mpi_comm, std::vector<T> &source, std::vector<T> &dest) 00428 { 00429 int nproc = 1; 00430 MPI_Comm_size(mpi_comm,&nproc); 00431 if (source.size()*nproc != dest.size()) 00432 throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal"); 00433 00434 if (MPI_Allgather(&source[0], (int)source.size(), Datatype<T>::type(), 00435 &dest[0], (int)source.size(), Datatype<T>::type(), 00436 mpi_comm) != MPI_SUCCESS ){ 00437 throw std::runtime_error("MPI_Allreduce failed"); 00438 } 00439 } 00440 00450 template <typename T> 00451 T *align_cast(void *p) 00452 { 00453 enum {alignment = (sizeof(T) > sizeof(double) ? sizeof(double) : sizeof(T))}; 00454 enum {mask = alignment - 1}; 00455 00456 char * c = reinterpret_cast<char *>(p); 00457 size_t front_misalign = (c - (char *)0) & mask; 00458 if (front_misalign > 0) { 00459 size_t correction = alignment - front_misalign; 00460 T *q = reinterpret_cast<T *>((c - (char *)0) + correction); 00461 return q; 00462 } 00463 00464 return reinterpret_cast<T *>(p); 00465 } 00466 00479 struct ReduceInterface { 00484 ReduceInterface() 00485 {} 00486 00491 virtual ~ReduceInterface() 00492 {} 00493 00503 virtual void size(void *&inbuf) const = 0; 00504 00516 virtual void copyin(void *&inbuf) const = 0; 00517 00529 virtual void copyout(void *&outbuf) const = 0; 00530 00547 virtual void op(void *&inbuf, void *&outbuf) const = 0; 00548 }; 00549 00550 00563 template<class Op, class LocalIt, class GlobalIt = LocalIt> 00564 struct Reduce : public ReduceInterface 00565 { 00566 typedef typename std::iterator_traits<LocalIt>::value_type value_type; 00567 typedef typename std::iterator_traits<LocalIt>::difference_type difference_type; 00568 00569 Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) 00570 : m_localBegin(local_begin), 00571 m_localEnd(local_end), 00572 m_globalBegin(global_begin), 00573 m_globalEnd(global_end), 00574 m_length(local_end - local_begin) 00575 { 00576 if (global_end - global_begin != m_length) 00577 throw std::runtime_error("sierra::MPI::Reduce::Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) local and global lengths not equal"); 00578 } 00579 00580 virtual ~Reduce() 00581 {} 00582 00583 virtual void size(void *&inbuf) const { 00584 value_type *t = align_cast<value_type>(inbuf); 00585 t += m_length; 00586 inbuf = t; 00587 } 00588 00589 virtual void copyin(void *&inbuf) const { 00590 value_type *t = align_cast<value_type>(inbuf); 00591 for (LocalIt it = m_localBegin; it != m_localEnd; ++it) 00592 *t++ = (*it); 00593 inbuf = t; 00594 } 00595 00596 virtual void copyout(void *&outbuf) const { 00597 value_type *t = align_cast<value_type>(outbuf); 00598 for (GlobalIt it = m_globalBegin; it != m_globalEnd; ++it) 00599 (*it) = *t++; 00600 outbuf = t; 00601 } 00602 00603 virtual void op(void *&inbuf, void *&outbuf) const { 00604 value_type *tin = align_cast<value_type>(inbuf); 00605 value_type *tout = align_cast<value_type>(outbuf); 00606 00607 for (size_t i = m_length; i; --i) 00608 Op(tout++, tin++); 00609 inbuf = tin; 00610 outbuf = tout; 00611 } 00612 00613 LocalIt m_localBegin; 00614 LocalIt m_localEnd; 00615 GlobalIt m_globalBegin; 00616 GlobalIt m_globalEnd; 00617 difference_type m_length; 00618 }; 00619 00620 00625 class ReduceSet 00626 { 00627 public: 00628 typedef std::vector<ReduceInterface *> ReduceVector; 00629 00630 ReduceSet(); 00631 00632 virtual ~ReduceSet(); 00633 00634 void add(ReduceInterface *reduce_interface); 00635 00636 size_t size() const; 00637 00638 void copyin(void * const buffer_in) const; 00639 00640 void copyout(void * const buffer_out) const; 00641 00642 void op(void * const buffer_in, void * const buffer_out) const; 00643 00644 static void void_op(void * inv, void * outv, int *n, MPI_Datatype *datatype); 00645 00646 private: 00647 ReduceVector m_reduceVector; 00648 }; 00649 00658 void AllReduce(MPI_Comm comm, const ReduceSet &reduce_set); 00659 00664 struct Sum 00665 { 00666 template <typename T> 00667 inline Sum(T * dest, const T *source) { 00668 *dest += *source; 00669 } 00670 }; 00671 00676 struct Prod 00677 { 00678 template <typename T> 00679 inline Prod(T * dest, const T *source) { 00680 *dest *= *source; 00681 } 00682 }; 00683 00688 struct Min 00689 { 00690 template <typename T> 00691 inline Min(T * dest, const T *source) { 00692 *dest = std::min(*dest, *source); 00693 } 00694 }; 00695 00700 struct Max 00701 { 00702 template <typename T> 00703 inline Max(T * dest, const T *source) { 00704 *dest = std::max(*dest, *source); 00705 } 00706 }; 00707 00712 struct MinLoc 00713 { 00714 template <typename T> 00715 inline MinLoc(Loc<T> * dest, const Loc<T> *source) { 00716 if (source->m_value < dest->m_value) { 00717 dest->m_value = source->m_value; 00718 dest->m_loc = source->m_loc; 00719 } 00720 else if (source->m_value == dest->m_value) 00721 dest->m_loc = std::min(dest->m_loc, source->m_loc); 00722 } 00723 }; 00724 00729 struct MaxLoc 00730 { 00731 template <typename T> 00732 inline MaxLoc(Loc<T> * dest, const Loc<T> *source) { 00733 if (source->m_value > dest->m_value) { 00734 dest->m_value = source->m_value; 00735 dest->m_loc = source->m_loc; 00736 } 00737 else if (source->m_value == dest->m_value) 00738 dest->m_loc = std::min(dest->m_loc, source->m_loc); 00739 } 00740 }; 00741 00742 00743 struct MaxTempLoc 00744 { 00745 inline MaxTempLoc(TempLoc * dest, const TempLoc *source) { 00746 if (source->m_value > dest->m_value) { 00747 dest->m_value = source->m_value; 00748 dest->m_other = source->m_other; 00749 dest->m_loc = source->m_loc; 00750 } 00751 else if (source->m_value == dest->m_value) { 00752 if (dest->m_loc > source->m_loc) { 00753 dest->m_other = source->m_other; 00754 dest->m_loc = source->m_loc; 00755 } 00756 } 00757 } 00758 }; 00759 00760 struct MinTempLoc 00761 { 00762 inline MinTempLoc(TempLoc * dest, const TempLoc *source) { 00763 if (source->m_value < dest->m_value) { 00764 dest->m_value = source->m_value; 00765 dest->m_other = source->m_other; 00766 dest->m_loc = source->m_loc; 00767 } 00768 else if (source->m_value == dest->m_value) { 00769 if (dest->m_loc > source->m_loc) { 00770 dest->m_other = source->m_other; 00771 dest->m_loc = source->m_loc; 00772 } 00773 } 00774 } 00775 }; 00776 00788 template<typename T> 00789 Reduce<Sum, T *> *ReduceSum(T *t, T *u, size_t length) { 00790 return new Reduce<Sum, T *>(t, t + length, u, u + length); 00791 } 00792 00804 template<typename T> 00805 Reduce<Prod, T *> *ReduceProd(T *t, T *u, size_t length) { 00806 return new Reduce<Prod, T *>(t, t + length, u, u + length); 00807 } 00808 00820 template<typename T> 00821 Reduce<Max, T *> *ReduceMax(T *t, T *u, size_t length) { 00822 return new Reduce<Max, T *>(t, t + length, u, u + length); 00823 } 00824 00836 template<typename T> 00837 Reduce<Min, T *> *ReduceMin(T *t, T *u, size_t length) { 00838 return new Reduce<Min, T *>(t, t + length, u, u + length); 00839 } 00840 00841 00851 template<typename T> 00852 Reduce<Sum, T *> *ReduceSum(T &t, T &u) { 00853 return new Reduce<Sum, T *>(&t, &t + 1, &u, &u + 1); 00854 } 00855 00865 template<typename T> 00866 Reduce<Prod, T *> *ReduceProd(T &t, T &u) { 00867 return new Reduce<Prod, T *>(&t, &t + 1, &u, &u + 1); 00868 } 00869 00879 template<typename T> 00880 Reduce<Max, T *> *ReduceMax(T &t, T &u) { 00881 return new Reduce<Max, T *>(&t, &t + 1, &u, &u + 1); 00882 } 00883 00893 template<typename T> 00894 Reduce<Min, T *> *ReduceMin(T &t, T &u) { 00895 return new Reduce<Min, T *>(&t, &t + 1, &u, &u + 1); 00896 } 00897 00898 00912 template<class LocalIt, class GlobalIt> 00913 Reduce<Sum, LocalIt, GlobalIt> *ReduceSum(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) { 00914 return new Reduce<Sum, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end); 00915 } 00916 00930 template<class LocalIt, class GlobalIt> 00931 Reduce<Prod, LocalIt, GlobalIt> *ReduceProd(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) { 00932 return new Reduce<Prod, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end); 00933 } 00934 00948 template<typename T, class LocalIt, class GlobalIt> 00949 Reduce<Min, LocalIt, GlobalIt> *ReduceMin(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) { 00950 return new Reduce<Min, LocalIt>(local_begin, local_end, global_begin, global_end); 00951 } 00952 00966 template<typename T, class LocalIt, class GlobalIt> 00967 Reduce<Max, LocalIt, GlobalIt> *ReduceMax(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) { 00968 return new Reduce<Max, LocalIt>(local_begin, local_end, global_begin, global_end); 00969 } 00970 00981 template<class T, class U> 00982 inline void 00983 AllReduceCollected(MPI_Comm mpi_comm, MPI_Op op, U collector) 00984 { 00985 std::vector<T> source; 00986 00987 std::back_insert_iterator<std::vector<T> > source_inserter(source); 00988 collector.gather(op, source_inserter); 00989 00990 int size = source.size(); 00991 00992 #ifdef SIERRA_DEBUG 00993 // 00994 // Check that the array lengths being reduces are all the same 00995 // 00996 int num_proc; 00997 int my_proc; 00998 MPI_Comm_size(mpi_comm, &num_proc); 00999 MPI_Comm_rank(mpi_comm, &my_proc); 01000 01001 01002 std::vector<int> local_array_len(num_proc, 0); 01003 local_array_len[my_proc] == size; 01004 std::vector<int> global_array_len(num_proc, 0); 01005 01006 MPI_Allreduce(&local_array_len[0], &global_array_len[0], num_proc, MPI_INT, MPI_SUM, mpi_comm); 01007 01008 for(unsigned i = 0; i < num_proc; ++i) { 01009 if(global_array_len[i] != size) { 01010 throw std::runtime_error("Slib_MPI.h::AllReduceCollected, not all processors have the same length array"); 01011 } 01012 } 01013 #endif 01014 01015 if (source.empty()) return; 01016 std::vector<T> dest(size); 01017 01018 if (MPI_Allreduce(&source[0], &dest[0], size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS ) 01019 throw std::runtime_error("MPI_Allreduce failed"); 01020 01021 typename std::vector<T>::iterator dest_getter = dest.begin(); 01022 collector.scatter(op, dest_getter); 01023 } 01024 01028 01029 } // namespace MPI 01030 } // namespace sierra 01031 01032 #endif // if defined( STK_HAS_MPI ) 01033 #endif // STK_UTIL_PARALLEL_MPI_hpp