|
RTOpPack: Extra C/C++ Code for Vector Reduction/Transformation Operators
Version of the Day
|
00001 /* 00002 // @HEADER 00003 // *********************************************************************** 00004 // 00005 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00006 // Copyright (2003) Sandia Corporation 00007 // 00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00009 // license for use of this work by or on behalf of the U.S. Government. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00039 // 00040 // *********************************************************************** 00041 // @HEADER 00042 */ 00043 00044 #include "RTOp_apply_op_mpi.h" 00045 #include "RTOp_parallel_helpers.h" 00046 #include "RTOpToMPI.h" 00047 00048 #include <stdlib.h> 00049 00050 int RTOp_apply_op_mpi( 00051 MPI_Comm comm 00052 ,RTOp_index_type global_dim_in, RTOp_index_type local_sub_dim_in, RTOp_index_type local_offset_in 00053 ,const int num_cols 00054 ,const int num_vecs, const RTOp_value_type* l_vec_ptrs[], const ptrdiff_t l_vec_strides[], const ptrdiff_t l_vec_leading_dim[] 00055 ,const int num_targ_vecs, RTOp_value_type* l_targ_vec_ptrs[], const ptrdiff_t l_targ_vec_strides[], const ptrdiff_t l_targ_vec_leading_dim[] 00056 ,const RTOp_index_type first_ele_in, const RTOp_index_type sub_dim_in, const RTOp_index_type global_offset_in 00057 ,const struct RTOp_RTOp* op 00058 ,RTOp_ReductTarget reduct_objs[] 00059 ) 00060 { 00061 int err = 0; 00062 struct RTOp_SubVector *local_vecs = NULL; 00063 struct RTOp_MutableSubVector *local_targ_vecs = NULL; 00064 RTOp_index_type overlap_first_local_ele = 0; 00065 RTOp_index_type overalap_local_sub_dim = 0; 00066 RTOp_index_type overlap_global_offset = 0; 00067 int k; 00068 int kc; 00069 /* Validate the input */ 00070 #ifdef RTOp_DEBUG 00071 assert( num_vecs || num_targ_vecs ); 00072 if(num_vecs) 00073 assert( l_vec_ptrs != NULL ); 00074 if(num_targ_vecs) 00075 assert( l_targ_vec_ptrs != NULL ); 00076 assert( 0 <= sub_dim_in && sub_dim_in <= global_dim_in ); 00077 #endif 00078 /* Pre-initialize the local sub-vectors */ 00079 if(num_vecs) { 00080 local_vecs = malloc( sizeof(struct RTOp_SubVector) * num_vecs * num_cols ); 00081 for( kc = 0; kc < num_cols; ++kc ) { 00082 for( k = 0; k < num_vecs; ++k ) 00083 RTOp_sub_vector_null(&local_vecs[kc*num_cols+k]); 00084 } 00085 } 00086 if(num_targ_vecs) { 00087 local_targ_vecs = malloc( sizeof(struct RTOp_MutableSubVector) * num_targ_vecs ); 00088 for( kc = 0; kc < num_cols; ++kc ) { 00089 for( k = 0; k < num_targ_vecs; ++k ) 00090 RTOp_mutable_sub_vector_null(&local_targ_vecs[kc*num_cols+k]); 00091 } 00092 } 00093 /* Get the overlap in the current process with the input logical sub-vector */ 00094 /* from (first_ele_in,sub_dim_in,global_offset_in) */ 00095 RTOp_parallel_calc_overlap( 00096 global_dim_in, local_sub_dim_in, local_offset_in, first_ele_in, sub_dim_in, global_offset_in 00097 ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset 00098 ); 00099 if( overlap_first_local_ele != 0 ) { 00100 /* Sub-vector structs for the local elements that are to participate in the */ 00101 /* reduction/transforamtion operation. */ 00102 for( kc = 0; kc < num_cols; ++kc ) { 00103 for(k = 0; k < num_vecs; ++k) { 00104 RTOp_sub_vector( 00105 overlap_global_offset /* global_offset */ 00106 ,overalap_local_sub_dim /* sub_dim */ 00107 ,l_vec_ptrs[k]+(overlap_first_local_ele-1)*l_vec_strides[k] 00108 + ( num_cols > 1 ? kc*l_vec_leading_dim[k] : 0 ) /* values */ 00109 ,l_vec_strides[k] /* values_stride */ 00110 ,&local_vecs[kc*num_cols+k] 00111 ); 00112 } 00113 for(k = 0; k < num_targ_vecs; ++k) { 00114 RTOp_mutable_sub_vector( 00115 overlap_global_offset /* global_offset */ 00116 ,overalap_local_sub_dim /* sub_dim */ 00117 ,l_targ_vec_ptrs[k]+(overlap_first_local_ele-1)*l_targ_vec_strides[k] 00118 + ( num_cols > 1 ? kc*l_targ_vec_leading_dim[k] : 0 ) /* values */ 00119 ,l_targ_vec_strides[k] /* values_stride */ 00120 ,&local_targ_vecs[kc*num_cols+k] 00121 ); 00122 } 00123 } 00124 } 00125 /* */ 00126 /* Apply the reduction operation over the sub-vectors in */ 00127 /* this process then collect the reductions over */ 00128 /* all the processes and return the result */ 00129 /* to all the processes (including this one of course). */ 00130 /* If all of the sub-svectors are empty then this will */ 00131 /* just call the reduction operation with NULL sub-vectors */ 00132 /* */ 00133 err = RTOp_MPI_apply_op( 00134 comm, op, -1 /* MPI_Allreduce(...) */ 00135 ,num_cols 00136 ,num_vecs, num_vecs && overlap_first_local_ele ? &local_vecs[0] : NULL 00137 ,num_targ_vecs, num_targ_vecs && overlap_first_local_ele ? &local_targ_vecs[0] : NULL 00138 ,reduct_objs 00139 ); 00140 00141 if(local_vecs) free(local_vecs); 00142 if(local_targ_vecs) free(local_targ_vecs); 00143 00144 /* Deallocate memory */ 00145 00146 return err; 00147 }
1.7.6.1