|
EpetraExt
Development
|
00001 //@HEADER 00002 // *********************************************************************** 00003 // 00004 // EpetraExt: Epetra Extended - Linear Algebra Services Package 00005 // Copyright (2011) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 00042 #include <EpetraExt_ConfigDefs.h> 00043 #include <EpetraExt_MatrixMatrix.h> 00044 00045 #include <EpetraExt_MMHelpers.h> 00046 00047 #include <EpetraExt_Transpose_RowMatrix.h> 00048 00049 #include <Epetra_Export.h> 00050 #include <Epetra_Import.h> 00051 #include <Epetra_Util.h> 00052 #include <Epetra_Map.h> 00053 #include <Epetra_Comm.h> 00054 #include <Epetra_CrsMatrix.h> 00055 #include <Epetra_Directory.h> 00056 #include <Epetra_HashTable.h> 00057 #include <Epetra_Distributor.h> 00058 00059 #ifdef HAVE_VECTOR 00060 #include <vector> 00061 #endif 00062 00063 namespace EpetraExt { 00064 00065 // 00066 //Method for internal use... sparsedot forms a dot-product between two 00067 //sparsely-populated 'vectors'. 00068 //Important assumption: assumes the indices in u_ind and v_ind are sorted. 00069 // 00070 double sparsedot(double* u, int* u_ind, int u_len, 00071 double* v, int* v_ind, int v_len) 00072 { 00073 double result = 0.0; 00074 00075 int v_idx = 0; 00076 int u_idx = 0; 00077 00078 while(v_idx < v_len && u_idx < u_len) { 00079 int ui = u_ind[u_idx]; 00080 int vi = v_ind[v_idx]; 00081 00082 if (ui < vi) { 00083 ++u_idx; 00084 } 00085 else if (ui > vi) { 00086 ++v_idx; 00087 } 00088 else { 00089 result += u[u_idx++]*v[v_idx++]; 00090 } 00091 } 00092 00093 return(result); 00094 } 00095 00096 00097 00098 00099 //kernel method for computing the local portion of C = A*B 00100 int mult_A_B(CrsMatrixStruct& Aview, 00101 CrsMatrixStruct& Bview, 00102 CrsWrapper& C) 00103 { 00104 int C_firstCol = Bview.colMap->MinLID(); 00105 int C_lastCol = Bview.colMap->MaxLID(); 00106 00107 int C_firstCol_import = 0; 00108 int C_lastCol_import = -1; 00109 00110 int* bcols = Bview.colMap->MyGlobalElements(); 00111 int* bcols_import = NULL; 00112 if (Bview.importColMap != NULL) { 00113 C_firstCol_import = Bview.importColMap->MinLID(); 00114 C_lastCol_import = Bview.importColMap->MaxLID(); 00115 00116 bcols_import = Bview.importColMap->MyGlobalElements(); 00117 } 00118 00119 int C_numCols = C_lastCol - C_firstCol + 1; 00120 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00121 00122 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00123 00124 // Allocate workspace memory 00125 double* dwork = new double[C_numCols]; 00126 int* iwork = new int[C_numCols]; 00127 int *c_cols=iwork; 00128 double *c_vals=dwork; 00129 int *c_index=new int[C_numCols]; 00130 00131 int i, j, k; 00132 bool C_filled=C.Filled(); 00133 00134 00135 //To form C = A*B we're going to execute this expression: 00136 // 00137 // C(i,j) = sum_k( A(i,k)*B(k,j) ) 00138 // 00139 //Our goal, of course, is to navigate the data in A and B once, without 00140 //performing searches for column-indices, etc. 00141 00142 00143 // Run through all the hash table lookups once and for all 00144 int * Acol2Brow=new int[Aview.colMap->NumMyElements()]; 00145 if(Aview.colMap->SameAs(*Bview.rowMap)){ 00146 // Maps are the same: Use local IDs as the hash 00147 for(int i=0;i<Aview.colMap->NumMyElements();i++) 00148 Acol2Brow[i]=i; 00149 } 00150 else { 00151 // Maps are not the same: Use the map's hash 00152 for(int i=0;i<Aview.colMap->NumMyElements();i++) 00153 Acol2Brow[i]=Bview.rowMap->LID(Aview.colMap->GID(i)); 00154 } 00155 00156 // Mark indices as empty w/ -1 00157 for(k=0;k<C_numCols;k++) c_index[k]=-1; 00158 00159 //loop over the rows of A. 00160 for(i=0; i<Aview.numRows; ++i) { 00161 //only navigate the local portion of Aview... (It's probable that we 00162 //imported more of A than we need for A*B, because other cases like A^T*B 00163 //need the extra rows.) 00164 if (Aview.remote[i]) { 00165 continue; 00166 } 00167 00168 int* Aindices_i = Aview.indices[i]; 00169 double* Aval_i = Aview.values[i]; 00170 00171 int global_row = Aview.rowMap->GID(i); 00172 00173 //loop across the i-th row of A and for each corresponding row 00174 //in B, loop across colums and accumulate product 00175 //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words, 00176 //as we stride across B(k,:) we're calculating updates for row i of the 00177 //result matrix C. 00178 00179 /* Outline of the revised, ML-inspired algorithm 00180 00181 C_{i,j} = \sum_k A_{i,k} B_{k,j} 00182 00183 This algorithm uses a "middle product" formulation, with the loop ordering of 00184 i, k, j. This means we compute a row of C at a time, but compute partial sums of 00185 each entry in row i until we finish the k loop. 00186 00187 This algorithm also has a few twists worth documenting. 00188 00189 1) First off, we compute the Acol2Brow array above. This array answers the question: 00190 00191 "Given a LCID in A, what LRID in B corresponds to it?" 00192 00193 Since this involves LID calls, this is a hash table lookup. Thus we do this once for every 00194 LCID in A, rather than call LID inside the MMM loop. 00195 00196 2) The second major twist involves the c_index, c_cols and c_vals arrays. The arrays c_cols 00197 and c_vals store the *local* column index and values accumulator respectively. These 00198 arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols. 00199 The value c_current tells us how many non-zeros we currently have in this row. 00200 00201 So how do we take a LCID and find the right accumulator? This is where the c_index array 00202 comes in. At the start (and stop) and the i loop, c_index is filled with -1's. Now 00203 whenever we find a LCID in the k loop, we first loop at c_index[lcid]. If this value is 00204 -1 we haven't seen this entry yet. In which case we add the appropriate stuff to c_cols 00205 and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before 00206 we increment it). If the value is NOT -1, this tells us the location in the c_vals/c_cols 00207 arrays (namely c_index[lcid]) where our accumulator lives. 00208 00209 This trick works because we're working with local ids. We can then loop from 0 to c_current 00210 and reset c_index to -1's when we're done, only touching the arrays that have changed. 00211 While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues. 00212 Thus, the effect of this trick is to avoid passes over the index array. 00213 00214 3) The third major twist involves handling the remote and local components of B separately. 00215 (ML doesn't need to do this, because its local ordering scheme is consistent between the local 00216 and imported components of B.) Since they have different column maps, they have inconsistent 00217 local column ids. This means the "second twist" won't work as stated on both matrices at the 00218 same time. While this could be handled any number of ways, I have chosen to do the two parts 00219 of B separately to make the code easier to read (and reduce the memory footprint of the MMM). 00220 */ 00221 00222 // Local matrix: Zero Current counts for matrix 00223 int c_current=0; 00224 00225 // Local matrix: Do the "middle product" 00226 for(k=0; k<Aview.numEntriesPerRow[i]; ++k) { 00227 int Ak=Acol2Brow[Aindices_i[k]]; 00228 double Aval = Aval_i[k]; 00229 // We're skipping remote entries on this pass. 00230 if(Bview.remote[Ak] || Aval==0) continue; 00231 00232 int* Bcol_inds = Bview.indices[Ak]; 00233 double* Bvals_k = Bview.values[Ak]; 00234 00235 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00236 int col=Bcol_inds[j]; 00237 if(c_index[col]<0){ 00238 // We haven't seen this entry before; add it. (In ML, on 00239 // the first pass, you haven't seen any of the entries 00240 // before, so they are added without the check. Not sure 00241 // how much more efficient that would be; depends on branch 00242 // prediction. We've favored code readability here.) 00243 c_cols[c_current]=col; 00244 c_vals[c_current]=Aval*Bvals_k[j]; 00245 c_index[col]=c_current; 00246 c_current++; 00247 } 00248 else{ 00249 // We've already seen this entry; accumulate it. 00250 c_vals[c_index[col]]+=Aval*Bvals_k[j]; 00251 } 00252 } 00253 } 00254 // Local matrix: Reset c_index and switch c_cols to GIDs 00255 for(k=0; k<c_current; k++){ 00256 c_index[c_cols[k]]=-1; 00257 c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs. 00258 } 00259 // Local matrix: Insert. 00260 // 00261 // We should check global error results after the algorithm is 00262 // through. It's probably safer just to let the algorithm run all 00263 // the way through before doing this, since otherwise we have to 00264 // remember to free all allocations carefully. 00265 int err = C_filled ? 00266 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols) 00267 : 00268 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols); 00269 00270 // Remote matrix: Zero current counts again for matrix 00271 c_current=0; 00272 00273 // Remote matrix: Do the "middle product" 00274 for(k=0; k<Aview.numEntriesPerRow[i]; ++k) { 00275 int Ak=Acol2Brow[Aindices_i[k]]; 00276 double Aval = Aval_i[k]; 00277 // We're skipping local entries on this pass. 00278 if(!Bview.remote[Ak] || Aval==0) continue; 00279 00280 int* Bcol_inds = Bview.indices[Ak]; 00281 double* Bvals_k = Bview.values[Ak]; 00282 00283 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00284 int col=Bcol_inds[j]; 00285 if(c_index[col]<0){ 00286 c_cols[c_current]=col; 00287 c_vals[c_current]=Aval*Bvals_k[j]; 00288 c_index[col]=c_current; 00289 c_current++; 00290 } 00291 else{ 00292 c_vals[c_index[col]]+=Aval*Bvals_k[j]; 00293 } 00294 } 00295 } 00296 // Remote matrix: Reset c_index and switch c_cols to GIDs 00297 for(k=0; k<c_current; k++){ 00298 c_index[c_cols[k]]=-1; 00299 c_cols[k]=bcols_import[c_cols[k]]; 00300 } 00301 // Remove matrix: Insert 00302 // 00303 // See above (on error handling). 00304 err = C_filled ? 00305 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols) 00306 : 00307 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols); 00308 } 00309 00310 00311 delete [] dwork; 00312 delete [] iwork; 00313 delete [] c_index; 00314 delete [] Acol2Brow; 00315 return(0); 00316 } 00317 00318 //kernel method for computing the local portion of C = A*B^T 00319 int mult_A_Btrans(CrsMatrixStruct& Aview, 00320 CrsMatrixStruct& Bview, 00321 CrsWrapper& C) 00322 { 00323 int i, j, k; 00324 int returnValue = 0; 00325 00326 int maxlen = 0; 00327 for(i=0; i<Aview.numRows; ++i) { 00328 if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i]; 00329 } 00330 for(i=0; i<Bview.numRows; ++i) { 00331 if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i]; 00332 } 00333 00334 //cout << "Aview: " << endl; 00335 //dumpCrsMatrixStruct(Aview); 00336 00337 //cout << "Bview: " << endl; 00338 //dumpCrsMatrixStruct(Bview); 00339 00340 int numBcols = Bview.colMap->NumMyElements(); 00341 int numBrows = Bview.numRows; 00342 00343 int iworklen = maxlen*2 + numBcols; 00344 int* iwork = new int[iworklen]; 00345 00346 int* bcols = iwork+maxlen*2; 00347 int* bgids = Bview.colMap->MyGlobalElements(); 00348 double* bvals = new double[maxlen*2]; 00349 double* avals = bvals+maxlen; 00350 00351 int max_all_b = Bview.colMap->MaxAllGID(); 00352 int min_all_b = Bview.colMap->MinAllGID(); 00353 00354 //bcols will hold the GIDs from B's column-map for fast access 00355 //during the computations below 00356 for(i=0; i<numBcols; ++i) { 00357 int blid = Bview.colMap->LID(bgids[i]); 00358 bcols[blid] = bgids[i]; 00359 } 00360 00361 //next create arrays indicating the first and last column-index in 00362 //each row of B, so that we can know when to skip certain rows below. 00363 //This will provide a large performance gain for banded matrices, and 00364 //a somewhat smaller gain for *most* other matrices. 00365 int* b_firstcol = new int[2*numBrows]; 00366 int* b_lastcol = b_firstcol+numBrows; 00367 int temp; 00368 for(i=0; i<numBrows; ++i) { 00369 b_firstcol[i] = max_all_b; 00370 b_lastcol[i] = min_all_b; 00371 00372 int Blen_i = Bview.numEntriesPerRow[i]; 00373 if (Blen_i < 1) continue; 00374 int* Bindices_i = Bview.indices[i]; 00375 00376 if (Bview.remote[i]) { 00377 for(k=0; k<Blen_i; ++k) { 00378 temp = Bview.importColMap->GID(Bindices_i[k]); 00379 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00380 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00381 } 00382 } 00383 else { 00384 for(k=0; k<Blen_i; ++k) { 00385 temp = bcols[Bindices_i[k]]; 00386 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00387 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00388 } 00389 } 00390 } 00391 00392 Epetra_Util util; 00393 00394 int* Aind = iwork; 00395 int* Bind = iwork+maxlen; 00396 00397 bool C_filled = C.Filled(); 00398 00399 //To form C = A*B^T, we're going to execute this expression: 00400 // 00401 // C(i,j) = sum_k( A(i,k)*B(j,k) ) 00402 // 00403 //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T). 00404 //But it requires the use of a 'sparsedot' function (we're simply forming 00405 //dot-products with row A_i and row B_j for all i and j). 00406 00407 //loop over the rows of A. 00408 for(i=0; i<Aview.numRows; ++i) { 00409 if (Aview.remote[i]) { 00410 continue; 00411 } 00412 00413 int* Aindices_i = Aview.indices[i]; 00414 double* Aval_i = Aview.values[i]; 00415 int A_len_i = Aview.numEntriesPerRow[i]; 00416 if (A_len_i < 1) { 00417 continue; 00418 } 00419 00420 for(k=0; k<A_len_i; ++k) { 00421 Aind[k] = Aview.colMap->GID(Aindices_i[k]); 00422 avals[k] = Aval_i[k]; 00423 } 00424 00425 util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL); 00426 00427 int mina = Aind[0]; 00428 int maxa = Aind[A_len_i-1]; 00429 00430 if (mina > max_all_b || maxa < min_all_b) { 00431 continue; 00432 } 00433 00434 int global_row = Aview.rowMap->GID(i); 00435 00436 //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:)) 00437 for(j=0; j<Bview.numRows; ++j) { 00438 if (b_firstcol[j] > maxa || b_lastcol[j] < mina) { 00439 continue; 00440 } 00441 00442 int* Bindices_j = Bview.indices[j]; 00443 int B_len_j = Bview.numEntriesPerRow[j]; 00444 if (B_len_j < 1) { 00445 continue; 00446 } 00447 00448 int tmp, Blen = 0; 00449 00450 if (Bview.remote[j]) { 00451 for(k=0; k<B_len_j; ++k) { 00452 tmp = Bview.importColMap->GID(Bindices_j[k]); 00453 if (tmp < mina || tmp > maxa) { 00454 continue; 00455 } 00456 00457 bvals[Blen] = Bview.values[j][k]; 00458 Bind[Blen++] = tmp; 00459 } 00460 } 00461 else { 00462 for(k=0; k<B_len_j; ++k) { 00463 tmp = bcols[Bindices_j[k]]; 00464 if (tmp < mina || tmp > maxa) { 00465 continue; 00466 } 00467 00468 bvals[Blen] = Bview.values[j][k]; 00469 Bind[Blen++] = tmp; 00470 } 00471 } 00472 00473 if (Blen < 1) { 00474 continue; 00475 } 00476 00477 util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL); 00478 00479 double C_ij = sparsedot(avals, Aind, A_len_i, 00480 bvals, Bind, Blen); 00481 00482 if (C_ij == 0.0) { 00483 continue; 00484 } 00485 int global_col = Bview.rowMap->GID(j); 00486 00487 int err = C_filled ? 00488 C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col) 00489 : 00490 C.InsertGlobalValues(global_row, 1, &C_ij, &global_col); 00491 00492 if (err < 0) { 00493 return(err); 00494 } 00495 if (err > 0) { 00496 if (C_filled) { 00497 //C.Filled()==true, and C doesn't have all the necessary nonzero 00498 //locations, or global_row or global_col is out of range (less 00499 //than 0 or non local). 00500 std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed " 00501 << "to insert value in result matrix at position "<<global_row 00502 <<"," <<global_col<<", possibly because result matrix has a " 00503 << "column-map that doesn't include column "<<global_col 00504 <<" on this proc." <<std::endl; 00505 return(err); 00506 } 00507 } 00508 } 00509 } 00510 00511 delete [] iwork; 00512 delete [] bvals; 00513 delete [] b_firstcol; 00514 00515 return(returnValue); 00516 } 00517 00518 //kernel method for computing the local portion of C = A^T*B 00519 int mult_Atrans_B(CrsMatrixStruct& Aview, 00520 CrsMatrixStruct& Bview, 00521 CrsWrapper& C) 00522 { 00523 int C_firstCol = Bview.colMap->MinLID(); 00524 int C_lastCol = Bview.colMap->MaxLID(); 00525 00526 int C_firstCol_import = 0; 00527 int C_lastCol_import = -1; 00528 00529 if (Bview.importColMap != NULL) { 00530 C_firstCol_import = Bview.importColMap->MinLID(); 00531 C_lastCol_import = Bview.importColMap->MaxLID(); 00532 } 00533 00534 int C_numCols = C_lastCol - C_firstCol + 1; 00535 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00536 00537 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00538 00539 double* C_row_i = new double[C_numCols]; 00540 int* C_colInds = new int[C_numCols]; 00541 00542 int i, j, k; 00543 00544 for(j=0; j<C_numCols; ++j) { 00545 C_row_i[j] = 0.0; 00546 C_colInds[j] = 0; 00547 } 00548 00549 //To form C = A^T*B, compute a series of outer-product updates. 00550 // 00551 // for (ith column of A^T) { 00552 // C_i = outer product of A^T(:,i) and B(i,:) 00553 // Where C_i is the ith matrix update, 00554 // A^T(:,i) is the ith column of A^T, and 00555 // B(i,:) is the ith row of B. 00556 // 00557 00558 //dumpCrsMatrixStruct(Aview); 00559 //dumpCrsMatrixStruct(Bview); 00560 int localProc = Bview.colMap->Comm().MyPID(); 00561 00562 int* Arows = Aview.rowMap->MyGlobalElements(); 00563 00564 bool C_filled = C.Filled(); 00565 00566 //loop over the rows of A (which are the columns of A^T). 00567 for(i=0; i<Aview.numRows; ++i) { 00568 00569 int* Aindices_i = Aview.indices[i]; 00570 double* Aval_i = Aview.values[i]; 00571 00572 //we'll need to get the row of B corresponding to Arows[i], 00573 //where Arows[i] is the GID of A's ith row. 00574 int Bi = Bview.rowMap->LID(Arows[i]); 00575 if (Bi<0) { 00576 cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row " 00577 <<Arows[i]<<" of matrix B, but doesn't have it."<<endl; 00578 return(-1); 00579 } 00580 00581 int* Bcol_inds = Bview.indices[Bi]; 00582 double* Bvals_i = Bview.values[Bi]; 00583 00584 //for each column-index Aj in the i-th row of A, we'll update 00585 //global-row GID(Aj) of the result matrix C. In that row of C, 00586 //we'll update column-indices given by the column-indices in the 00587 //ith row of B that we're now holding (Bcol_inds). 00588 00589 //First create a list of GIDs for the column-indices 00590 //that we'll be updating. 00591 00592 int Blen = Bview.numEntriesPerRow[Bi]; 00593 if (Bview.remote[Bi]) { 00594 for(j=0; j<Blen; ++j) { 00595 C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]); 00596 } 00597 } 00598 else { 00599 for(j=0; j<Blen; ++j) { 00600 C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]); 00601 } 00602 } 00603 00604 //loop across the i-th row of A (column of A^T) 00605 for(j=0; j<Aview.numEntriesPerRow[i]; ++j) { 00606 00607 int Aj = Aindices_i[j]; 00608 double Aval = Aval_i[j]; 00609 00610 int global_row; 00611 if (Aview.remote[i]) { 00612 global_row = Aview.importColMap->GID(Aj); 00613 } 00614 else { 00615 global_row = Aview.colMap->GID(Aj); 00616 } 00617 00618 if (!C.RowMap().MyGID(global_row)) { 00619 continue; 00620 } 00621 00622 for(k=0; k<Blen; ++k) { 00623 C_row_i[k] = Aval*Bvals_i[k]; 00624 } 00625 00626 // 00627 //Now add this row-update to C. 00628 // 00629 00630 int err = C_filled ? 00631 C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds) 00632 : 00633 C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds); 00634 00635 if (err < 0) { 00636 return(err); 00637 } 00638 if (err > 0) { 00639 if (C_filled) { 00640 //C is Filled, and doesn't have all the necessary nonzero locations. 00641 return(err); 00642 } 00643 } 00644 } 00645 } 00646 00647 delete [] C_row_i; 00648 delete [] C_colInds; 00649 00650 return(0); 00651 } 00652 00653 //kernel method for computing the local portion of C = A^T*B^T 00654 int mult_Atrans_Btrans(CrsMatrixStruct& Aview, 00655 CrsMatrixStruct& Bview, 00656 CrsWrapper& C) 00657 { 00658 int C_firstCol = Aview.rowMap->MinLID(); 00659 int C_lastCol = Aview.rowMap->MaxLID(); 00660 00661 int C_firstCol_import = 0; 00662 int C_lastCol_import = -1; 00663 00664 if (Aview.importColMap != NULL) { 00665 C_firstCol_import = Aview.importColMap->MinLID(); 00666 C_lastCol_import = Aview.importColMap->MaxLID(); 00667 } 00668 00669 int C_numCols = C_lastCol - C_firstCol + 1; 00670 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00671 00672 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00673 00674 double* dwork = new double[C_numCols]; 00675 int* iwork = new int[C_numCols]; 00676 00677 double* C_col_j = dwork; 00678 int* C_inds = iwork; 00679 00680 //cout << "Aview: " << endl; 00681 //dumpCrsMatrixStruct(Aview); 00682 00683 //cout << "Bview: " << endl; 00684 //dumpCrsMatrixStruct(Bview); 00685 00686 00687 int i, j, k; 00688 00689 for(j=0; j<C_numCols; ++j) { 00690 C_col_j[j] = 0.0; 00691 C_inds[j] = -1; 00692 } 00693 00694 int* A_col_inds = Aview.colMap->MyGlobalElements(); 00695 int* A_col_inds_import = Aview.importColMap ? 00696 Aview.importColMap->MyGlobalElements() : 0; 00697 00698 const Epetra_Map* Crowmap = &(C.RowMap()); 00699 00700 //To form C = A^T*B^T, we're going to execute this expression: 00701 // 00702 // C(i,j) = sum_k( A(k,i)*B(j,k) ) 00703 // 00704 //Our goal, of course, is to navigate the data in A and B once, without 00705 //performing searches for column-indices, etc. In other words, we avoid 00706 //column-wise operations like the plague... 00707 00708 int* Brows = Bview.rowMap->MyGlobalElements(); 00709 00710 //loop over the rows of B 00711 for(j=0; j<Bview.numRows; ++j) { 00712 int* Bindices_j = Bview.indices[j]; 00713 double* Bvals_j = Bview.values[j]; 00714 00715 int global_col = Brows[j]; 00716 00717 //loop across columns in the j-th row of B and for each corresponding 00718 //row in A, loop across columns and accumulate product 00719 //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other 00720 //words, as we stride across B(j,:), we use selected rows in A to 00721 //calculate updates for column j of the result matrix C. 00722 00723 for(k=0; k<Bview.numEntriesPerRow[j]; ++k) { 00724 00725 int bk = Bindices_j[k]; 00726 double Bval = Bvals_j[k]; 00727 00728 int global_k; 00729 if (Bview.remote[j]) { 00730 global_k = Bview.importColMap->GID(bk); 00731 } 00732 else { 00733 global_k = Bview.colMap->GID(bk); 00734 } 00735 00736 //get the corresponding row in A 00737 int ak = Aview.rowMap->LID(global_k); 00738 if (ak<0) { 00739 continue; 00740 } 00741 00742 int* Aindices_k = Aview.indices[ak]; 00743 double* Avals_k = Aview.values[ak]; 00744 00745 int C_len = 0; 00746 00747 if (Aview.remote[ak]) { 00748 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00749 C_col_j[C_len] = Avals_k[i]*Bval; 00750 C_inds[C_len++] = A_col_inds_import[Aindices_k[i]]; 00751 } 00752 } 00753 else { 00754 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00755 C_col_j[C_len] = Avals_k[i]*Bval; 00756 C_inds[C_len++] = A_col_inds[Aindices_k[i]]; 00757 } 00758 } 00759 00760 //Now loop across the C_col_j values and put non-zeros into C. 00761 00762 for(i=0; i < C_len ; ++i) { 00763 if (C_col_j[i] == 0.0) continue; 00764 00765 int global_row = C_inds[i]; 00766 if (!Crowmap->MyGID(global_row)) { 00767 continue; 00768 } 00769 00770 int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00771 00772 if (err < 0) { 00773 return(err); 00774 } 00775 else { 00776 if (err > 0) { 00777 err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00778 if (err < 0) { 00779 return(err); 00780 } 00781 } 00782 } 00783 } 00784 00785 } 00786 } 00787 00788 delete [] dwork; 00789 delete [] iwork; 00790 00791 return(0); 00792 } 00793 00794 int import_and_extract_views(const Epetra_CrsMatrix& M, 00795 const Epetra_Map& targetMap, 00796 CrsMatrixStruct& Mview) 00797 { 00798 //The goal of this method is to populate the 'Mview' struct with views of the 00799 //rows of M, including all rows that correspond to elements in 'targetMap'. 00800 // 00801 //If targetMap includes local elements that correspond to remotely-owned rows 00802 //of M, then those remotely-owned rows will be imported into 00803 //'Mview.importMatrix', and views of them will be included in 'Mview'. 00804 00805 Mview.deleteContents(); 00806 00807 const Epetra_Map& Mrowmap = M.RowMap(); 00808 00809 int numProcs = Mrowmap.Comm().NumProc(); 00810 00811 Mview.numRows = targetMap.NumMyElements(); 00812 00813 int* Mrows = targetMap.MyGlobalElements(); 00814 00815 if (Mview.numRows > 0) { 00816 Mview.numEntriesPerRow = new int[Mview.numRows]; 00817 Mview.indices = new int*[Mview.numRows]; 00818 Mview.values = new double*[Mview.numRows]; 00819 Mview.remote = new bool[Mview.numRows]; 00820 } 00821 00822 Mview.numRemote = 0; 00823 00824 int i; 00825 for(i=0; i<Mview.numRows; ++i) { 00826 int mlid = Mrowmap.LID(Mrows[i]); 00827 if (mlid < 0) { 00828 Mview.remote[i] = true; 00829 ++Mview.numRemote; 00830 } 00831 else { 00832 EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i], 00833 Mview.values[i], Mview.indices[i]) ); 00834 Mview.remote[i] = false; 00835 } 00836 } 00837 00838 Mview.origRowMap = &(M.RowMap()); 00839 Mview.rowMap = &targetMap; 00840 Mview.colMap = &(M.ColMap()); 00841 Mview.domainMap = &(M.DomainMap()); 00842 Mview.importColMap = NULL; 00843 00844 if (numProcs < 2) { 00845 if (Mview.numRemote > 0) { 00846 cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but " 00847 << "attempting to import remote matrix rows."<<endl; 00848 return(-1); 00849 } 00850 00851 //If only one processor we don't need to import any remote rows, so return. 00852 return(0); 00853 } 00854 00855 // 00856 //Now we will import the needed remote rows of M, if the global maximum 00857 //value of numRemote is greater than 0. 00858 // 00859 00860 int globalMaxNumRemote = 0; 00861 Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1); 00862 00863 if (globalMaxNumRemote > 0) { 00864 //Create a map that describes the remote rows of M that we need. 00865 00866 int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL; 00867 int offset = 0; 00868 for(i=0; i<Mview.numRows; ++i) { 00869 if (Mview.remote[i]) { 00870 MremoteRows[offset++] = Mrows[i]; 00871 } 00872 } 00873 00874 Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows, 00875 Mrowmap.IndexBase(), Mrowmap.Comm()); 00876 00877 //Create an importer with target-map MremoteRowMap and 00878 //source-map Mrowmap. 00879 Epetra_Import importer(MremoteRowMap, Mrowmap); 00880 00881 //Now create a new matrix into which we can import the remote rows of M 00882 //that we need. 00883 Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1); 00884 00885 EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) ); 00886 00887 EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) ); 00888 00889 //Finally, use the freshly imported data to fill in the gaps in our views 00890 //of rows of M. 00891 for(i=0; i<Mview.numRows; ++i) { 00892 if (Mview.remote[i]) { 00893 int importLID = MremoteRowMap.LID(Mrows[i]); 00894 EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID, 00895 Mview.numEntriesPerRow[i], 00896 Mview.values[i], 00897 Mview.indices[i]) ); 00898 } 00899 } 00900 00901 Mview.importColMap = &(Mview.importMatrix->ColMap()); 00902 00903 delete [] MremoteRows; 00904 } 00905 00906 return(0); 00907 } 00908 00909 int form_map_union(const Epetra_Map* map1, 00910 const Epetra_Map* map2, 00911 const Epetra_Map*& mapunion) 00912 { 00913 //form the union of two maps 00914 00915 if (map1 == NULL) { 00916 mapunion = new Epetra_Map(*map2); 00917 return(0); 00918 } 00919 00920 if (map2 == NULL) { 00921 mapunion = new Epetra_Map(*map1); 00922 return(0); 00923 } 00924 00925 int map1_len = map1->NumMyElements(); 00926 int* map1_elements = map1->MyGlobalElements(); 00927 int map2_len = map2->NumMyElements(); 00928 int* map2_elements = map2->MyGlobalElements(); 00929 00930 int* union_elements = new int[map1_len+map2_len]; 00931 00932 int map1_offset = 0, map2_offset = 0, union_offset = 0; 00933 00934 while(map1_offset < map1_len && map2_offset < map2_len) { 00935 int map1_elem = map1_elements[map1_offset]; 00936 int map2_elem = map2_elements[map2_offset]; 00937 00938 if (map1_elem < map2_elem) { 00939 union_elements[union_offset++] = map1_elem; 00940 ++map1_offset; 00941 } 00942 else if (map1_elem > map2_elem) { 00943 union_elements[union_offset++] = map2_elem; 00944 ++map2_offset; 00945 } 00946 else { 00947 union_elements[union_offset++] = map1_elem; 00948 ++map1_offset; 00949 ++map2_offset; 00950 } 00951 } 00952 00953 int i; 00954 for(i=map1_offset; i<map1_len; ++i) { 00955 union_elements[union_offset++] = map1_elements[i]; 00956 } 00957 00958 for(i=map2_offset; i<map2_len; ++i) { 00959 union_elements[union_offset++] = map2_elements[i]; 00960 } 00961 00962 mapunion = new Epetra_Map(-1, union_offset, union_elements, 00963 map1->IndexBase(), map1->Comm()); 00964 00965 delete [] union_elements; 00966 00967 return(0); 00968 } 00969 00970 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M, 00971 const Epetra_Map& column_map) 00972 { 00973 //The goal of this function is to find all rows in the matrix M that contain 00974 //column-indices which are in 'column_map'. A map containing those rows is 00975 //returned. (The returned map will then be used as the source row-map for 00976 //importing those rows into an overlapping distribution.) 00977 00978 std::pair<int,int> my_col_range = get_col_range(column_map); 00979 00980 const Epetra_Comm& Comm = M.RowMap().Comm(); 00981 int num_procs = Comm.NumProc(); 00982 int my_proc = Comm.MyPID(); 00983 std::vector<int> send_procs; 00984 send_procs.reserve(num_procs-1); 00985 std::vector<int> col_ranges; 00986 col_ranges.reserve(2*(num_procs-1)); 00987 for(int p=0; p<num_procs; ++p) { 00988 if (p == my_proc) continue; 00989 send_procs.push_back(p); 00990 col_ranges.push_back(my_col_range.first); 00991 col_ranges.push_back(my_col_range.second); 00992 } 00993 00994 Epetra_Distributor* distor = Comm.CreateDistributor(); 00995 00996 int num_recv_procs = 0; 00997 int num_send_procs = send_procs.size(); 00998 bool deterministic = true; 00999 int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL; 01000 distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs); 01001 01002 int len_import_chars = 0; 01003 char* import_chars = NULL; 01004 01005 char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL; 01006 int err = distor->Do(export_chars, 2*sizeof(int), len_import_chars, import_chars); 01007 if (err != 0) { 01008 std::cout << "ERROR returned from Distributor::Do."<<std::endl; 01009 } 01010 01011 int* IntImports = reinterpret_cast<int*>(import_chars); 01012 int num_import_pairs = len_import_chars/(2*sizeof(int)); 01013 int offset = 0; 01014 std::vector<int> send_procs2; 01015 send_procs2.reserve(num_procs); 01016 std::vector<int> proc_col_ranges; 01017 std::pair<int,int> M_col_range = get_col_range(M); 01018 for(int i=0; i<num_import_pairs; ++i) { 01019 int first_col = IntImports[offset++]; 01020 int last_col = IntImports[offset++]; 01021 if (first_col <= M_col_range.second && last_col >= M_col_range.first) { 01022 send_procs2.push_back(send_procs[i]); 01023 proc_col_ranges.push_back(first_col); 01024 proc_col_ranges.push_back(last_col); 01025 } 01026 } 01027 01028 std::vector<int> send_rows; 01029 std::vector<int> rows_per_send_proc; 01030 pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc); 01031 01032 Epetra_Distributor* distor2 = Comm.CreateDistributor(); 01033 01034 int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL; 01035 num_recv_procs = 0; 01036 err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs); 01037 01038 export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL; 01039 int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL; 01040 len_import_chars = 0; 01041 err = distor2->Do(export_chars, (int)sizeof(int), rows_per_send_proc_ptr, len_import_chars, import_chars); 01042 01043 int* recvd_row_ints = reinterpret_cast<int*>(import_chars); 01044 int num_recvd_rows = len_import_chars/sizeof(int); 01045 01046 Epetra_Map recvd_rows(-1, num_recvd_rows, recvd_row_ints, column_map.IndexBase(), Comm); 01047 01048 delete distor; 01049 delete distor2; 01050 delete [] import_chars; 01051 01052 Epetra_Map* result_map = NULL; 01053 01054 err = form_map_union(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map); 01055 if (err != 0) { 01056 return(NULL); 01057 } 01058 01059 return(result_map); 01060 } 01061 01062 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A, 01063 bool transposeA, 01064 const Epetra_CrsMatrix& B, 01065 bool transposeB, 01066 Epetra_CrsMatrix& C, 01067 bool call_FillComplete_on_result) 01068 { 01069 // 01070 //This method forms the matrix-matrix product C = op(A) * op(B), where 01071 //op(A) == A if transposeA is false, 01072 //op(A) == A^T if transposeA is true, 01073 //and similarly for op(B). 01074 // 01075 01076 //A and B should already be Filled. 01077 //(Should we go ahead and call FillComplete() on them if necessary? 01078 // or error out? For now, we choose to error out.) 01079 if (!A.Filled() || !B.Filled()) { 01080 EPETRA_CHK_ERR(-1); 01081 } 01082 01083 //We're going to refer to the different combinations of op(A) and op(B) 01084 //as scenario 1 through 4. 01085 01086 int scenario = 1;//A*B 01087 if (transposeB && !transposeA) scenario = 2;//A*B^T 01088 if (transposeA && !transposeB) scenario = 3;//A^T*B 01089 if (transposeA && transposeB) scenario = 4;//A^T*B^T 01090 01091 //now check size compatibility 01092 int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows(); 01093 int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols(); 01094 int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols(); 01095 int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows(); 01096 if (Ainner != Binner) { 01097 cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) " 01098 << "must match for matrix-matrix product. op(A) is " 01099 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl; 01100 return(-1); 01101 } 01102 01103 //The result matrix C must at least have a row-map that reflects the 01104 //correct row-size. Don't check the number of columns because rectangular 01105 //matrices which were constructed with only one map can still end up 01106 //having the correct capacity and dimensions when filled. 01107 if (Aouter > C.NumGlobalRows()) { 01108 cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 01109 << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows() 01110 << " rows, should have at least "<<Aouter << endl; 01111 return(-1); 01112 } 01113 01114 //It doesn't matter whether C is already Filled or not. If it is already 01115 //Filled, it must have space allocated for the positions that will be 01116 //referenced in forming C = op(A)*op(B). If it doesn't have enough space, 01117 //we'll error out later when trying to store result values. 01118 01119 //We're going to need to import remotely-owned sections of A and/or B 01120 //if more than 1 processor is performing this run, depending on the scenario. 01121 int numProcs = A.Comm().NumProc(); 01122 01123 //If we are to use the transpose of A and/or B, we'll need to be able to 01124 //access, on the local processor, all rows that contain column-indices in 01125 //the domain-map. 01126 const Epetra_Map* domainMap_A = &(A.DomainMap()); 01127 const Epetra_Map* domainMap_B = &(B.DomainMap()); 01128 01129 const Epetra_Map* rowmap_A = &(A.RowMap()); 01130 const Epetra_Map* rowmap_B = &(B.RowMap()); 01131 01132 //Declare some 'work-space' maps which may be created depending on 01133 //the scenario, and which will be deleted before exiting this function. 01134 const Epetra_Map* workmap1 = NULL; 01135 const Epetra_Map* workmap2 = NULL; 01136 const Epetra_Map* mapunion1 = NULL; 01137 01138 //Declare a couple of structs that will be used to hold views of the data 01139 //of A and B, to be used for fast access during the matrix-multiplication. 01140 CrsMatrixStruct Aview; 01141 CrsMatrixStruct Bview; 01142 01143 const Epetra_Map* targetMap_A = rowmap_A; 01144 const Epetra_Map* targetMap_B = rowmap_B; 01145 01146 if (numProcs > 1) { 01147 //If op(A) = A^T, find all rows of A that contain column-indices in the 01148 //local portion of the domain-map. (We'll import any remote rows 01149 //that fit this criteria onto the local processor.) 01150 if (transposeA) { 01151 workmap1 = find_rows_containing_cols(A, *domainMap_A); 01152 targetMap_A = workmap1; 01153 } 01154 } 01155 01156 //Now import any needed remote rows and populate the Aview struct. 01157 EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) ); 01158 01159 01160 // Make sure B's views are consistent with A even in serial for A*B (scenario 1) 01161 if(scenario==1){ 01162 targetMap_B = &(A.ColMap()); 01163 } 01164 01165 if (numProcs > 1) { 01166 // Get the target map 01167 const Epetra_Map* colmap_op_A = NULL; 01168 if (transposeA) { 01169 colmap_op_A = targetMap_A; 01170 } 01171 else { 01172 colmap_op_A = &(A.ColMap()); 01173 } 01174 targetMap_B = colmap_op_A; 01175 01176 01177 //If op(B) = B^T, find all rows of B that contain column-indices in the 01178 //local-portion of the domain-map, or in the column-map of op(A). 01179 //We'll import any remote rows that fit this criteria onto the 01180 //local processor. 01181 if (transposeB) { 01182 EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) ); 01183 workmap2 = find_rows_containing_cols(B, *mapunion1); 01184 targetMap_B = workmap2; 01185 } 01186 } 01187 01188 //Now import any needed remote rows and populate the Bview struct. 01189 EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) ); 01190 01191 // Zero if filled 01192 if(C.Filled()) C.PutScalar(0.0); 01193 01194 //Now call the appropriate method to perform the actual multiplication. 01195 CrsWrapper_Epetra_CrsMatrix ecrsmat(C); 01196 01197 switch(scenario) { 01198 case 1: EPETRA_CHK_ERR( mult_A_B(Aview, Bview, ecrsmat) ); 01199 break; 01200 case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) ); 01201 break; 01202 case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) ); 01203 break; 01204 case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) ); 01205 break; 01206 } 01207 01208 if (call_FillComplete_on_result) { 01209 //We'll call FillComplete on the C matrix before we exit, and give 01210 //it a domain-map and a range-map. 01211 //The domain-map will be the domain-map of B, unless 01212 //op(B)==transpose(B), in which case the range-map of B will be used. 01213 //The range-map will be the range-map of A, unless 01214 //op(A)==transpose(A), in which case the domain-map of A will be used. 01215 01216 const Epetra_Map* domainmap = 01217 transposeB ? &(B.RangeMap()) : &(B.DomainMap()); 01218 01219 const Epetra_Map* rangemap = 01220 transposeA ? &(A.DomainMap()) : &(A.RangeMap()); 01221 01222 if (!C.Filled()) { 01223 EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) ); 01224 } 01225 } 01226 01227 //Finally, delete the objects that were potentially created 01228 //during the course of importing remote sections of A and B. 01229 01230 delete mapunion1; mapunion1 = NULL; 01231 delete workmap1; workmap1 = NULL; 01232 delete workmap2; workmap2 = NULL; 01233 01234 return(0); 01235 } 01236 01237 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01238 bool transposeA, 01239 double scalarA, 01240 Epetra_CrsMatrix& B, 01241 double scalarB ) 01242 { 01243 // 01244 //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where 01245 01246 //A should already be Filled. It doesn't matter whether B is 01247 //already Filled, but if it is, then its graph must already contain 01248 //all nonzero locations that will be referenced in forming the 01249 //sum. 01250 01251 if (!A.Filled() ) { 01252 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl; 01253 EPETRA_CHK_ERR(-1); 01254 } 01255 01256 //explicit tranpose A formed as necessary 01257 Epetra_CrsMatrix * Aprime = 0; 01258 EpetraExt::RowMatrix_Transpose * Atrans = 0; 01259 if( transposeA ) 01260 { 01261 Atrans = new EpetraExt::RowMatrix_Transpose(); 01262 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01263 } 01264 else 01265 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01266 01267 int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() ); 01268 int A_NumEntries, B_NumEntries; 01269 int * A_Indices = new int[MaxNumEntries]; 01270 double * A_Values = new double[MaxNumEntries]; 01271 int* B_Indices; 01272 double* B_Values; 01273 01274 int NumMyRows = B.NumMyRows(); 01275 int Row, err; 01276 int ierr = 0; 01277 01278 if( scalarA ) 01279 { 01280 //Loop over B's rows and sum into 01281 for( int i = 0; i < NumMyRows; ++i ) 01282 { 01283 Row = B.GRID(i); 01284 EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) ); 01285 01286 if (scalarB != 1.0) { 01287 if (!B.Filled()) { 01288 EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries, 01289 B_Values, B_Indices)); 01290 } 01291 else { 01292 EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries, 01293 B_Values, B_Indices)); 01294 } 01295 01296 for(int jj=0; jj<B_NumEntries; ++jj) { 01297 B_Values[jj] = scalarB*B_Values[jj]; 01298 } 01299 } 01300 01301 if( scalarA != 1.0 ) { 01302 for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA; 01303 } 01304 01305 if( B.Filled() ) {//Sum In Values 01306 err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01307 assert( err >= 0 ); 01308 if (err < 0) ierr = err; 01309 } 01310 else { 01311 err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01312 assert( err == 0 || err == 1 || err == 3 ); 01313 if (err < 0) ierr = err; 01314 } 01315 } 01316 } 01317 else { 01318 EPETRA_CHK_ERR( B.Scale(scalarB) ); 01319 } 01320 01321 delete [] A_Indices; 01322 delete [] A_Values; 01323 01324 if( Atrans ) delete Atrans; 01325 01326 return(ierr); 01327 } 01328 01329 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01330 bool transposeA, 01331 double scalarA, 01332 const Epetra_CrsMatrix & B, 01333 bool transposeB, 01334 double scalarB, 01335 Epetra_CrsMatrix * & C) 01336 { 01337 // 01338 //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where 01339 01340 //A and B should already be Filled. C should be an empty pointer. 01341 01342 if (!A.Filled() || !B.Filled() ) { 01343 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false," 01344 << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl; 01345 EPETRA_CHK_ERR(-1); 01346 } 01347 01348 Epetra_CrsMatrix * Aprime = 0, * Bprime=0; 01349 EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0; 01350 01351 //explicit tranpose A formed as necessary 01352 if( transposeA ) { 01353 Atrans = new EpetraExt::RowMatrix_Transpose(); 01354 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01355 } 01356 else 01357 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01358 01359 //explicit tranpose B formed as necessary 01360 if( transposeB ) { 01361 Btrans = new EpetraExt::RowMatrix_Transpose(); 01362 Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B))))); 01363 } 01364 else 01365 Bprime = const_cast<Epetra_CrsMatrix*>(&B); 01366 01367 // allocate or zero the new matrix 01368 if(C!=0) 01369 C->PutScalar(0.0); 01370 else 01371 C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0); 01372 01373 // build arrays for easy resuse 01374 int ierr = 0; 01375 Epetra_CrsMatrix * Mat[] = { Aprime,Bprime}; 01376 double scalar[] = { scalarA, scalarB}; 01377 01378 // do a loop over each matrix to add: A reordering might be more efficient 01379 for(int k=0;k<2;k++) { 01380 int MaxNumEntries = Mat[k]->MaxNumEntries(); 01381 int NumEntries; 01382 int * Indices = new int[MaxNumEntries]; 01383 double * Values = new double[MaxNumEntries]; 01384 01385 int NumMyRows = Mat[k]->NumMyRows(); 01386 int Row, err; 01387 int ierr = 0; 01388 01389 //Loop over rows and sum into C 01390 for( int i = 0; i < NumMyRows; ++i ) { 01391 Row = Mat[k]->GRID(i); 01392 EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices)); 01393 01394 if( scalar[k] != 1.0 ) 01395 for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k]; 01396 01397 if(C->Filled()) { // Sum in values 01398 err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices ); 01399 if (err < 0) ierr = err; 01400 } else { // just add it to the unfilled CRS Matrix 01401 err = C->InsertGlobalValues( Row, NumEntries, Values, Indices ); 01402 if (err < 0) ierr = err; 01403 } 01404 } 01405 01406 delete [] Indices; 01407 delete [] Values; 01408 } 01409 01410 if( Atrans ) delete Atrans; 01411 if( Btrans ) delete Btrans; 01412 01413 return(ierr); 01414 } 01415 01416 } // namespace EpetraExt 01417
1.7.6.1