|
AbstractLinAlgPack: C++ Interfaces For Vectors, Matrices And Related Linear Algebra Objects
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #ifndef COO_MATRIX_TMPL_OP_DEF_H 00043 #define COO_MATRIX_TMPL_OP_DEF_H 00044 00045 #include "AbstractLinAlgPack_COOMatrixTmplOpDecl.hpp" 00046 00047 #include "DenseLinAlgPack_DMatrixClass.hpp" 00048 #include "DenseLinAlgPack_DVectorOp.hpp" 00049 #include "DenseLinAlgPack_AssertOp.hpp" 00050 00051 namespace AbstractLinAlgPack { 00052 00053 using BLAS_Cpp::trans_not; 00054 00055 using DenseLinAlgPack::Mp_M_assert_sizes; 00056 using DenseLinAlgPack::Vp_MtV_assert_sizes; 00057 using DenseLinAlgPack::Mp_MtM_assert_sizes; 00058 00059 // gms_lhs += alpha * coom_rhs (time = O(coom_rhs.nz()), space = O(1)) 00060 template<class T_COOM> 00061 void Mp_StCOOM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs 00062 , BLAS_Cpp::Transp trans_rhs) 00063 { 00064 Mp_M_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00065 , coom_rhs.rows(), coom_rhs.cols(), trans_rhs ); 00066 typename T_COOM::difference_type 00067 i_o = coom_rhs.row_offset(), 00068 j_o = coom_rhs.col_offset(); 00069 if(trans_rhs == BLAS_Cpp::no_trans) 00070 for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr) 00071 (*gms_lhs)(itr->row_i()+i_o,itr->col_j()+j_o) += alpha * itr->value(); 00072 else 00073 for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr) 00074 (*gms_lhs)(itr->col_j()+j_o,itr->row_i()+i_o) += alpha * itr->value(); 00075 } 00076 00077 // vs_lhs += alpha * coom_rhs1 * vs_rhs2 (BLAS xGEMV) (time = O(coom_rhs.nz()), space = O(1)) 00078 template<class T_COOM> 00079 void Vp_StCOOMtV(DVectorSlice* vs_lhs, value_type alpha, const T_COOM& coom_rhs1 00080 , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2) 00081 { 00082 Vp_MtV_assert_sizes( vs_lhs->dim(), coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1, vs_rhs2.dim() ); 00083 typename T_COOM::difference_type 00084 i_o = coom_rhs1.row_offset(), 00085 j_o = coom_rhs1.col_offset(); 00086 if(trans_rhs1 == BLAS_Cpp::no_trans) 00087 for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr) 00088 (*vs_lhs)(itr->row_i()+i_o) += alpha * itr->value() * vs_rhs2(itr->col_j()+j_o); 00089 else 00090 for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr) 00091 (*vs_lhs)(itr->col_j()+j_o) += alpha * itr->value() * vs_rhs2(itr->row_i()+i_o); 00092 } 00093 00094 namespace UtilityPack { 00095 00096 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM) 00097 template<class T_COOM> 00098 void imp_Mp_StMtCOOM(DMatrixSlice& gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha 00099 , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1 00100 , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2 ); 00101 00102 } // end namespace UtilityPack 00103 00104 00105 // gms_lhs += alpha * op(coom_rhs1) * op(gms_rhs2) (right) (BLAS xGEMM) 00106 template<class T_COOM> 00107 void Mp_StCOOMtM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00108 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2, BLAS_Cpp::Transp trans_rhs2) 00109 { 00110 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00111 , coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1 00112 , gms_rhs2.rows() ,gms_rhs2.cols(), trans_rhs2 ); 00113 UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::trans, alpha, gms_rhs2 00114 , trans_not(trans_rhs2), coom_rhs1, trans_not(trans_rhs1) ); 00115 } 00116 00117 // gms_lhs += alpha * op(gms_rhs1) * op(coom_rhs2) (left) (BLAS xGEMM) 00118 template<class T_COOM> 00119 void Mp_StMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00120 , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2) 00121 { 00122 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00123 , gms_rhs1.rows() ,gms_rhs1.cols(), trans_rhs1 00124 , coom_rhs2.rows(), coom_rhs2.cols(), trans_rhs2 ); 00125 UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::no_trans, alpha, gms_rhs1 00126 , trans_rhs1, coom_rhs2, trans_rhs2 ); 00127 } 00128 00129 // ToDo: implement the level-3 BLAS operations below 00130 00131 // gms_lhs = alpha * op(coom_rhs1) * op(sym_rhs2) (right) (BLAS xSYMM) 00132 //template<class T_COOM> 00133 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00134 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2, BLAS_Cpp::Transp trans_rhs2); 00135 00136 // gms_lhs = alpha * op(sym_rhs1) * op(coom_rhs2) (left) (BLAS xSYMM) 00137 //template<class T_COOM> 00138 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1 00139 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2); 00140 00141 // gms_lhs = alpha * op(coom_rhs1) * op(tri_rhs2) (right) (BLAS xTRMM) 00142 //template<class T_COOM> 00143 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00144 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2, BLAS_Cpp::Transp trans_rhs2); 00145 00146 // gms_lhs = alpha * op(tri_rhs1) * op(coom_rhs2) (left) (BLAS xTRMM) 00147 //template<class T_COOM> 00148 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00149 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2); 00150 00151 namespace UtilityPack { 00152 00153 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM) 00154 // 00155 // This function perform this operation by looping through the nonzero elements of 00156 // coom_rhs2 and for each nonzero element (val,i,j) it performs the following operation. 00157 // 00158 // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i) 00159 // 00160 // 00161 // jth ith jth 00162 // [ # ] [ # ] [ ] 00163 // [ # ] [ # ] [ ] 00164 // m [ # ] += [ # ] * [ ] n 00165 // [ # ] [ # ] ith [ val ] 00166 // n p [ ] 00167 // p 00168 // op(gms_lhs) op(gms_rhs1) op(coom_rhs2) 00169 // 00170 // The number of arithmetic operations performed are: 00171 // floats = (2*m + 1) * nz 00172 // 00173 // Strictly speeking the number of memory references is: 00174 // mem_refs = (2*m + 1) * nz 00175 // but this does not take into account that elements are accessed by columns 00176 // and this has some ramifications on cache effects and paging. If op(gms_lhs) 00177 // == gms_lhs' or op(gms_rhs1) == gms_rhs1' then elements in a column are 00178 // not adjacent to each other and if m is large enough each element may 00179 // even reside on a seperate page of memory. On Win32 0x86 systems a page is 00180 // 4 K so 4,000 (bytes/page) / 8 (bytes/double) = 500 doubles / page. If 00181 // the working set of pages is small this could cause some serious page thrashing 00182 // for large m. 00183 // 00184 // Another concideration is to sorting order of the elements in the COO matrix. 00185 // If op(coom_rhs2) is sorted by row then columns of op(gms_lhs) will be accessed 00186 // consecutivly and will result in better performance. The same goes for op(gms_rhs1) 00187 // if op(coom_rhs2) is sorted by column. 00188 // 00189 // There is opertunity for some vectorization and it is handled by calling 00190 // DenseLinAlgPack::Vp_StV(...). 00191 // 00192 template<class T_COOM> 00193 void imp_Mp_StMtCOOM(DMatrixSlice* gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha 00194 , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1 00195 , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2) 00196 { 00197 using BLAS_Cpp::rows; 00198 using BLAS_Cpp::cols; 00199 using DenseLinAlgPack::col; 00200 00201 typename T_COOM::difference_type 00202 i_o = coom_rhs2.row_offset(), 00203 j_o = coom_rhs2.col_offset(); 00204 for(typename T_COOM::const_iterator itr = coom_rhs2.begin(); itr != coom_rhs2.end(); ++itr) { 00205 size_type i = rows( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ), 00206 j = cols( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ); 00207 // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i) 00208 DenseLinAlgPack::Vp_StV( &col(*gms_lhs,trans_lhs,j), alpha * itr->value() 00209 , col(gms_rhs1,trans_rhs1,i) ); 00210 } 00211 } 00212 00213 } // end namespace UtilityPack 00214 00215 } // end namespace AbstractLinAlgPack 00216 00217 #endif // COO_MATRIX_TMPL_OP_DEF_H
1.7.6.1