|
EpetraExt
Development
|
00001 //@HEADER 00002 // *********************************************************************** 00003 // 00004 // EpetraExt: Epetra Extended - Linear Algebra Services Package 00005 // Copyright (2011) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 00042 #include <EpetraExt_ConfigDefs.h> 00043 #include <EpetraExt_MatrixMatrix.h> 00044 00045 #include <EpetraExt_MMHelpers.h> 00046 00047 #include <EpetraExt_Transpose_RowMatrix.h> 00048 00049 #include <Epetra_Export.h> 00050 #include <Epetra_Import.h> 00051 #include <Epetra_Util.h> 00052 #include <Epetra_Map.h> 00053 #include <Epetra_Comm.h> 00054 #include <Epetra_CrsMatrix.h> 00055 #include <Epetra_Vector.h> 00056 #include <Epetra_Directory.h> 00057 #include <Epetra_HashTable.h> 00058 #include <Epetra_Distributor.h> 00059 #include <Epetra_IntSerialDenseVector.h> 00060 00061 #ifdef HAVE_VECTOR 00062 #include <vector> 00063 #endif 00064 00065 #ifdef HAVE_MPI 00066 #include <Epetra_MpiDistributor.h> 00067 #endif 00068 00069 00070 #include <Teuchos_TimeMonitor.hpp> 00071 00072 namespace EpetraExt { 00073 00074 /*****************************************************************************/ 00075 /*****************************************************************************/ 00076 /*****************************************************************************/ 00077 static inline int C_estimate_nnz(const Epetra_CrsMatrix & A, const Epetra_CrsMatrix &B){ 00078 // Follows the NZ estimate in ML's ml_matmatmult.c 00079 int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100; 00080 int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100; 00081 00082 int nnzperrow=(int)(sqrt((double)Aest) + sqrt((double)Best) - 1); 00083 nnzperrow*=nnzperrow; 00084 00085 return (int)(A.NumMyRows()*nnzperrow*0.75 + 100); 00086 } 00087 00088 /*****************************************************************************/ 00089 /*****************************************************************************/ 00090 /*****************************************************************************/ 00091 static inline int auto_resize(std::vector<int> &x,int num_new){ 00092 int newsize=x.size() + EPETRA_MAX((int)x.size(),num_new); 00093 x.resize(newsize); 00094 return newsize; 00095 } 00096 00097 00098 /*****************************************************************************/ 00099 /*****************************************************************************/ 00100 /*****************************************************************************/ 00101 template<typename int_type> 00102 int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map*& unionmap, std::vector<int>& Cremotepids, 00103 std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols) 00104 { 00105 #ifdef HAVE_MPI 00106 00107 #ifdef ENABLE_MMM_TIMINGS 00108 using Teuchos::TimeMonitor; 00109 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 1"))); 00110 #endif 00111 00112 // So we need to merge the ColMap of B and the TargetMap of Bimport in an AztecOO/Ifpack/ML compliant order. 00113 Epetra_Util util; 00114 int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc(); 00115 int Bstart=0, Istart=0, Cstart=0,Pstart=0; 00116 00117 const Epetra_Map & BColMap = B.ColMap(); 00118 const Epetra_Map & DomainMap = B.DomainMap(); 00119 const LightweightMap & IColMap = Bimport.ColMap_; 00120 00121 int Nb = BColMap.NumMyElements(); 00122 int_type * Bgids = 0; 00123 BColMap.MyGlobalElementsPtr(Bgids); 00124 int Ni = IColMap.NumMyElements(); 00125 int_type * Igids = 0; 00126 if(Ni>0) 00127 IColMap.MyGlobalElementsPtr(Igids); 00128 00129 if((int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb); 00130 if((int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni); 00131 00132 // Since we're getting called, we know we have to be using an MPI implementation of Epetra. 00133 // Which means we should have an MpiDistributor for both B and Bimport. 00134 // Unless all of B's columns are owned by the calling proc (e.g. MueLu for A*Ptent w/ uncoupled aggregation) 00135 Epetra_MpiDistributor *Distor=0; 00136 if(B.Importer()) { 00137 Distor=dynamic_cast<Epetra_MpiDistributor*>(&B.Importer()->Distributor()); 00138 if(!Distor) EPETRA_CHK_ERR(-2); 00139 } 00140 00141 // ********************** 00142 // Stage 1: Get the owning PIDs 00143 // ********************** 00144 // Note: if B doesn't have an importer, the calling proc owns all its colids... 00145 std::vector<int> Bpids(Nb), Ipids(Ni); 00146 if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,true));} 00147 else Bpids.assign(Nb,-1); 00148 00149 if(Ni != (int) Bimport.ColMapOwningPIDs_.size()) { 00150 EPETRA_CHK_ERR(-21); 00151 } 00152 for(i=0;i<Ni;i++){ 00153 Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]); 00154 } 00155 00156 // ********************** 00157 // Stage 2: Allocate memory (make things too big) 00158 // ********************** 00159 int Csize=Nb+Ni; 00160 int Psize=Nb+Ni; 00161 std::vector<int_type> Cgids(Csize); 00162 Cremotepids.resize(Psize); 00163 00164 #ifdef ENABLE_MMM_TIMINGS 00165 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 2"))); 00166 #endif 00167 00168 // ********************** 00169 // Stage 3: Local Unknowns 00170 // ********************** 00171 if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) { 00172 // B's colmap has all of the domain elements. We can just copy those into the start of the array. 00173 DomainMap.MyGlobalElements(Cgids.size() ? &Cgids[0] : 0); 00174 Cstart=DomainMap.NumMyElements(); 00175 Bstart=DomainMap.NumMyElements(); 00176 00177 for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i; 00178 } 00179 else { 00180 // There are more entries in the DomainMap than B's ColMap. So we stream through both B and Bimport for the copy. 00181 int NumDomainElements = DomainMap.NumMyElements(); 00182 for(i = 0; i < NumDomainElements; i++) { 00183 int_type GID = (int_type) DomainMap.GID64(i); 00184 int LID = BColMap.LID(GID); 00185 // B has this guy 00186 if(LID!=-1) { 00187 Bcols2Ccols[LID]=Cstart; 00188 Cgids[Cstart] = GID; 00189 Cstart++; 00190 Bstart++; 00191 } 00192 else { 00193 // B import has this guy 00194 LID = IColMap.LID(GID); 00195 if(LID!=-1) { 00196 Icols2Ccols[LID]=Cstart; 00197 Cgids[Cstart] = GID; 00198 Cstart++; 00199 } 00200 } 00201 } 00202 } 00203 00204 // Now advance Ilast to the last owned unknown in Bimport 00205 for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) { 00206 while(Cgids[j]!=Igids[i]) j++; 00207 Icols2Ccols[i]=j; 00208 } 00209 Istart=i; 00210 00211 00212 #ifdef ENABLE_MMM_TIMINGS 00213 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 3"))); 00214 #endif 00215 00216 00217 // ********************** 00218 // Stage 4: Processor-by-processor set_union 00219 // ********************** 00220 // NOTE: Intial sizes for Btemp/Itemp from B's distributor. This should be exact for Btemp and a decent guess for Itemp 00221 int initial_temp_length = 0; 00222 const int * lengths_from=0; 00223 if(Distor) { 00224 lengths_from= Distor->LengthsFrom(); 00225 for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i]; 00226 } 00227 else initial_temp_length=100; 00228 00229 std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length); 00230 std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length); 00231 00232 00233 while (Bstart < Nb || Istart < Ni) { 00234 int Bproc=NumProc+1, Iproc=NumProc+1, Cproc; 00235 00236 // Find the next active processor ID 00237 if(Bstart < Nb) Bproc=Bpids[Bstart]; 00238 if(Istart < Ni) Iproc=Ipids[Istart]; 00239 00240 Cproc = (Bproc < Iproc)?Bproc:Iproc; 00241 00242 if(Bproc == Cproc && Iproc != Cproc) { 00243 // Only B has this processor. Copy the data. 00244 // B: Find the beginning of the next processor 00245 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {} 00246 int Bnext=i; 00247 00248 // Copy data to C 00249 int tCsize = Bnext-Bstart; 00250 if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);} 00251 00252 for(i=Bstart; i<Bnext; i++) { 00253 Cremotepids[i-Bstart+Pstart] = Cproc; 00254 Cgids[i-Bstart+Cstart] = Bgids[i]; 00255 Btemp2[i-Bstart] = i; 00256 } 00257 00258 // Sort & record reindexing 00259 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; 00260 util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0); 00261 00262 for(i=0, j=Cstart; i<tCsize; i++){ 00263 while(Cgids[j] != Bgids[Btemp2[i]]) j++; 00264 Bcols2Ccols[Btemp2[i]] = j; 00265 } 00266 Cstart+=tCsize; 00267 Pstart+=tCsize; 00268 Bstart=Bnext; 00269 } 00270 else if(Bproc != Cproc && Iproc == Cproc) { 00271 // Only I has this processor. Copy the data. 00272 // I: Find the beginning of the next processor 00273 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {} 00274 int Inext=i; 00275 00276 // Copy data to C 00277 int tCsize = Inext-Istart; 00278 if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);} 00279 00280 for(i=Istart; i<Inext; i++) { 00281 Cremotepids[i-Istart+Pstart] = Cproc; 00282 Cgids[i-Istart+Cstart] = Igids[i]; 00283 Itemp2[i-Istart] = i; 00284 } 00285 00286 // Sort & record reindexing 00287 int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0; 00288 util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0); 00289 00290 for(i=0, j=Cstart; i<tCsize; i++){ 00291 while(Cgids[j] != Igids[Itemp2[i]]) j++; 00292 Icols2Ccols[Itemp2[i]] = j; 00293 } 00294 Cstart+=tCsize; 00295 Pstart+=tCsize; 00296 Istart=Inext; 00297 } 00298 else { 00299 // Both B and I have this processor, so we need to do a set_union. So we need to sort. 00300 int Bnext, Inext; 00301 // B: Find the beginning of the next processor 00302 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {} 00303 Bnext=i; 00304 00305 // I: Find the beginning of the next processor 00306 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {} 00307 Inext=i; 00308 00309 // Copy data to temp 00310 int tBsize = Bnext-Bstart; 00311 int tIsize = Inext-Istart; 00312 // int tCsize = tBsize+tIsize; 00313 if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);} 00314 if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);} 00315 00316 for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;} 00317 for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;} 00318 00319 // Sort & set_union 00320 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0; 00321 util.Sort(true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0); 00322 util.Sort(true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0); 00323 typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart; 00324 typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart); 00325 00326 for(i=0, j=Cstart; i<tBsize; i++){ 00327 while(Cgids[j] != Bgids[Btemp2[i]]) j++; 00328 Bcols2Ccols[Btemp2[i]] = j; 00329 } 00330 00331 for(i=0, j=Cstart; i<tIsize; i++){ 00332 while(Cgids[j] != Igids[Itemp2[i]]) j++; 00333 Icols2Ccols[Itemp2[i]] = j; 00334 } 00335 00336 for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc; 00337 Cstart = (last_el - mycstart) + Cstart; 00338 Pstart = (last_el - mycstart) + Pstart; 00339 Bstart=Bnext; 00340 Istart=Inext; 00341 } 00342 } // end while 00343 00344 #ifdef ENABLE_MMM_TIMINGS 00345 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 4"))); 00346 #endif 00347 00348 // Resize the RemotePIDs down 00349 Cremotepids.resize(Pstart); 00350 00351 // ********************** 00352 // Stage 5: Call constructor 00353 // ********************** 00354 // Make the map 00355 unionmap=new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.ColMap().IndexBase64(), 00356 B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64()); 00357 return 0; 00358 #else 00359 return -1; 00360 #endif 00361 } 00362 00363 /*****************************************************************************/ 00364 /*****************************************************************************/ 00365 /*****************************************************************************/ 00366 // Provide a "resize" operation for double*'s. 00367 inline void resize_doubles(int nold,int nnew,double*& d){ 00368 if(nnew > nold){ 00369 double *tmp = new double[nnew]; 00370 for(int i=0; i<nold; i++) 00371 tmp[i]=d[i]; 00372 delete [] d; 00373 d=tmp; 00374 } 00375 } 00376 00377 00378 /*****************************************************************************/ 00379 /*****************************************************************************/ 00380 /*****************************************************************************/ 00381 template<typename int_type> 00382 int mult_A_B_newmatrix(const Epetra_CrsMatrix & A, 00383 const Epetra_CrsMatrix & B, 00384 const CrsMatrixStruct& Bview, 00385 std::vector<int> & Bcol2Ccol, 00386 std::vector<int> & Bimportcol2Ccol, 00387 std::vector<int>& Cremotepids, 00388 Epetra_CrsMatrix& C 00389 ){ 00390 #ifdef ENABLE_MMM_TIMINGS 00391 using Teuchos::TimeMonitor; 00392 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix SerialCore"))); 00393 #endif 00394 00395 // ***************************** 00396 // Improved Parallel Gustavson in Local IDs 00397 // ***************************** 00398 const Epetra_Map * colmap_C = &(C.ColMap()); 00399 00400 00401 int NumMyDiagonals=0; // Counter to speed up ESFC 00402 00403 int m=A.NumMyRows(); 00404 int n=colmap_C->NumMyElements(); 00405 int i,j,k; 00406 00407 // DataPointers for A 00408 int *Arowptr, *Acolind; 00409 double *Avals; 00410 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals)); 00411 00412 // DataPointers for B, Bimport 00413 int *Browptr, *Bcolind; 00414 double *Bvals; 00415 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals)); 00416 00417 int *Irowptr=0, *Icolind=0; 00418 double *Ivals=0; 00419 if(Bview.importMatrix){ 00420 Irowptr = &Bview.importMatrix->rowptr_[0]; 00421 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0; 00422 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0; 00423 } 00424 00425 // MemorySetup: If somebody else is sharing this C's graphdata, make a new one. 00426 // This is needed because I'm about to walk all over the CrsGrapData... 00427 C.ExpertMakeUniqueCrsGraphData(); 00428 00429 // The status array will contain the index into colind where this entry was last deposited. 00430 // c_status[i] < CSR_ip - not in the row yet. 00431 // c_status[i] >= CSR_ip, this is the entry where you can find the data 00432 // We start with this filled with -1's indicating that there are no entries yet. 00433 std::vector<int> c_status(n, -1); 00434 00435 // Classic csr assembly (low memory edition) 00436 int CSR_alloc=C_estimate_nnz(A,B); 00437 if(CSR_alloc < n) CSR_alloc = n; 00438 int CSR_ip=0,OLD_ip=0; 00439 Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset(); 00440 Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices(); 00441 double *& CSR_vals = C.ExpertExtractValues(); 00442 00443 CSR_rowptr.Resize(m+1); 00444 CSR_colind.Resize(CSR_alloc); 00445 resize_doubles(0,CSR_alloc,CSR_vals); 00446 00447 // Static Profile stuff 00448 std::vector<int> NumEntriesPerRow(m); 00449 00450 // For each row of A/C 00451 for(i=0; i<m; i++){ 00452 bool found_diagonal=false; 00453 CSR_rowptr[i]=CSR_ip; 00454 00455 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){ 00456 int Ak = Acolind[k]; 00457 double Aval = Avals[k]; 00458 if(Aval==0) continue; 00459 00460 if(Bview.targetMapToOrigRow[Ak] != -1){ 00461 // Local matrix 00462 int Bk = Bview.targetMapToOrigRow[Ak]; 00463 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 00464 int Cj=Bcol2Ccol[Bcolind[j]]; 00465 00466 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;} 00467 00468 if(c_status[Cj]<OLD_ip){ 00469 // New entry 00470 c_status[Cj]=CSR_ip; 00471 CSR_colind[CSR_ip]=Cj; 00472 CSR_vals[CSR_ip]=Aval*Bvals[j]; 00473 CSR_ip++; 00474 } 00475 else 00476 CSR_vals[c_status[Cj]]+=Aval*Bvals[j]; 00477 } 00478 } 00479 else{ 00480 // Remote matrix 00481 int Ik = Bview.targetMapToImportRow[Ak]; 00482 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 00483 int Cj=Bimportcol2Ccol[Icolind[j]]; 00484 00485 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;} 00486 00487 if(c_status[Cj]<OLD_ip){ 00488 // New entry 00489 c_status[Cj]=CSR_ip; 00490 CSR_colind[CSR_ip]=Cj; 00491 CSR_vals[CSR_ip]=Aval*Ivals[j]; 00492 CSR_ip++; 00493 } 00494 else 00495 CSR_vals[c_status[Cj]]+=Aval*Ivals[j]; 00496 } 00497 } 00498 } 00499 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i]; 00500 00501 // Resize for next pass if needed 00502 if(CSR_ip + n > CSR_alloc){ 00503 resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals); 00504 CSR_alloc*=2; 00505 CSR_colind.Resize(CSR_alloc); 00506 } 00507 OLD_ip=CSR_ip; 00508 } 00509 00510 CSR_rowptr[m]=CSR_ip; 00511 00512 #ifdef ENABLE_MMM_TIMINGS 00513 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Final Sort"))); 00514 #endif 00515 00516 // Sort the entries 00517 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]); 00518 00519 #ifdef ENABLE_MMM_TIMINGS 00520 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Fast IE"))); 00521 #endif 00522 00523 // Do a fast build of C's importer 00524 Epetra_Import * Cimport=0; 00525 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0; 00526 int NumExports=0; 00527 int *ExportLIDs=0, *ExportPIDs=0; 00528 if(Bview.importMatrix) { 00529 NumExports = Bview.importMatrix->ExportLIDs_.size(); 00530 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0; 00531 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0; 00532 } 00533 else if(B.Importer()) { 00534 // Grab the exports from B proper 00535 NumExports = B.Importer()->NumExportIDs(); 00536 ExportLIDs = B.Importer()->ExportLIDs(); 00537 ExportPIDs = B.Importer()->ExportPIDs(); 00538 } 00539 00540 00541 if(B.Importer() && C.ColMap().SameAs(B.ColMap())) 00542 Cimport = new Epetra_Import(*B.Importer()); // Because the domain maps are the same 00543 else if(!C.ColMap().SameAs(B.DomainMap())) 00544 Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs); 00545 00546 00547 #ifdef ENABLE_MMM_TIMINGS 00548 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix ESFC"))); 00549 #endif 00550 00551 // Update the CrsGraphData 00552 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals); 00553 00554 return 0; 00555 } 00556 00557 00558 00559 00560 /*****************************************************************************/ 00561 /*****************************************************************************/ 00562 /*****************************************************************************/ 00563 template<typename int_type> 00564 int mult_A_B_reuse(const Epetra_CrsMatrix & A, 00565 const Epetra_CrsMatrix & B, 00566 CrsMatrixStruct& Bview, 00567 std::vector<int> & Bcol2Ccol, 00568 std::vector<int> & Bimportcol2Ccol, 00569 Epetra_CrsMatrix& C){ 00570 00571 #ifdef ENABLE_MMM_TIMINGS 00572 using Teuchos::TimeMonitor; 00573 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse SerialCore"))); 00574 #endif 00575 00576 // ***************************** 00577 // Improved Parallel Gustavson in Local IDs 00578 // ***************************** 00579 const Epetra_Map * colmap_C = &(C.ColMap()); 00580 00581 int m=A.NumMyRows(); 00582 int n=colmap_C->NumMyElements(); 00583 int i,j,k; 00584 00585 // DataPointers for A 00586 int *Arowptr, *Acolind; 00587 double *Avals; 00588 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals)); 00589 00590 // DataPointers for B, Bimport 00591 int *Browptr, *Bcolind; 00592 double *Bvals; 00593 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals)); 00594 00595 int *Irowptr=0, *Icolind=0; 00596 double *Ivals=0; 00597 if(Bview.importMatrix){ 00598 Irowptr = &Bview.importMatrix->rowptr_[0]; 00599 Icolind = &Bview.importMatrix->colind_[0]; 00600 Ivals = &Bview.importMatrix->vals_[0]; 00601 } 00602 00603 // DataPointers for C 00604 int *CSR_rowptr, *CSR_colind; 00605 double *CSR_vals; 00606 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals)); 00607 00608 00609 // The status array will contain the index into colind where this dude was last deposited. 00610 // c_status[i] < CSR_ip - not in the row yet. 00611 // c_status[i] >= CSR_ip, this is the entry where you can find the data 00612 // We start with this filled with -1's indicating that there are no entries yet. 00613 std::vector<int> c_status(n, -1); 00614 00615 // Classic csr assembly 00616 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0]; 00617 int CSR_ip=0,OLD_ip=0; 00618 00619 // For each row of A/C 00620 for(i=0; i<m; i++){ 00621 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){ 00622 int Ak=Acolind[k]; 00623 double Aval = Avals[k]; 00624 if(Aval==0) continue; 00625 00626 if(Bview.targetMapToOrigRow[Ak] != -1){ 00627 // Local matrix 00628 int Bk = Bview.targetMapToOrigRow[Ak]; 00629 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 00630 int Cj=Bcol2Ccol[Bcolind[j]]; 00631 00632 if(c_status[Cj]<OLD_ip){ 00633 // New entry 00634 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13); 00635 c_status[Cj]=CSR_ip; 00636 CSR_colind[CSR_ip]=Cj; 00637 CSR_vals[CSR_ip]=Aval*Bvals[j]; 00638 CSR_ip++; 00639 } 00640 else 00641 CSR_vals[c_status[Cj]]+=Aval*Bvals[j]; 00642 } 00643 } 00644 else{ 00645 // Remote matrix 00646 int Ik = Bview.targetMapToImportRow[Ak]; 00647 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 00648 int Cj=Bimportcol2Ccol[Icolind[j]]; 00649 00650 if(c_status[Cj]<OLD_ip){ 00651 // New entry 00652 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14); 00653 c_status[Cj]=CSR_ip; 00654 CSR_colind[CSR_ip]=Cj; 00655 CSR_vals[CSR_ip]=Aval*Ivals[j]; 00656 CSR_ip++; 00657 } 00658 else 00659 CSR_vals[c_status[Cj]]+=Aval*Ivals[j]; 00660 } 00661 } 00662 } 00663 OLD_ip=CSR_ip; 00664 } 00665 00666 #ifdef ENABLE_MMM_TIMINGS 00667 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Final Sort"))); 00668 #endif 00669 00670 // Sort the entries 00671 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]); 00672 00673 return 0; 00674 } 00675 00676 00677 00678 /*****************************************************************************/ 00679 /*****************************************************************************/ 00680 /*****************************************************************************/ 00681 //kernel method for computing the local portion of C = A*B 00682 template<typename int_type> 00683 int mult_A_B_general(const Epetra_CrsMatrix & A, 00684 CrsMatrixStruct & Aview, 00685 const Epetra_CrsMatrix & B, 00686 CrsMatrixStruct& Bview, 00687 Epetra_CrsMatrix& C, 00688 bool call_FillComplete_on_result) 00689 { 00690 int C_firstCol = Bview.colMap->MinLID(); 00691 int C_lastCol = Bview.colMap->MaxLID(); 00692 00693 int C_firstCol_import = 0; 00694 int C_lastCol_import = -1; 00695 00696 int_type* bcols = 0; 00697 Bview.colMap->MyGlobalElementsPtr(bcols); 00698 int_type* bcols_import = NULL; 00699 if (Bview.importMatrix != NULL) { 00700 C_firstCol_import = Bview.importMatrix->ColMap_.MinLID(); 00701 C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID(); 00702 Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import); 00703 } 00704 00705 int C_numCols = C_lastCol - C_firstCol + 1; 00706 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00707 00708 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00709 00710 // Allocate workspace memory 00711 double* dwork = new double[C_numCols]; 00712 int_type* iwork = new int_type[C_numCols]; 00713 int_type *c_cols=iwork; 00714 double *c_vals=dwork; 00715 int *c_index=new int[C_numCols]; 00716 00717 int i, j, k; 00718 bool C_filled=C.Filled(); 00719 00720 // DataPointers for A 00721 int *Arowptr, *Acolind; 00722 double *Avals; 00723 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals)); 00724 00725 //To form C = A*B we're going to execute this expression: 00726 // 00727 // C(i,j) = sum_k( A(i,k)*B(k,j) ) 00728 // 00729 //Our goal, of course, is to navigate the data in A and B once, without 00730 //performing searches for column-indices, etc. 00731 00732 // Mark indices as empty w/ -1 00733 for(k=0;k<C_numCols;k++) c_index[k]=-1; 00734 00735 //loop over the rows of A. 00736 for(i=0; i<A.NumMyRows(); ++i) { 00737 00738 int_type global_row = (int_type) Aview.rowMap->GID64(i); 00739 00740 //loop across the i-th row of A and for each corresponding row 00741 //in B, loop across colums and accumulate product 00742 //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words, 00743 //as we stride across B(k,:) we're calculating updates for row i of the 00744 //result matrix C. 00745 00746 /* Outline of the revised, ML-inspired algorithm 00747 00748 C_{i,j} = \sum_k A_{i,k} B_{k,j} 00749 00750 This algorithm uses a "middle product" formulation, with the loop ordering of 00751 i, k, j. This means we compute a row of C at a time, but compute partial sums of 00752 each entry in row i until we finish the k loop. 00753 00754 This algorithm also has a few twists worth documenting. 00755 00756 1) The first major twist involves the c_index, c_cols and c_vals arrays. The arrays c_cols 00757 and c_vals store the *local* column index and values accumulator respectively. These 00758 arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols. 00759 The value c_current tells us how many non-zeros we currently have in this row. 00760 00761 So how do we take a LCID and find the right accumulator? This is where the c_index array 00762 comes in. At the start (and stop) and the i loop, c_index is filled with -1's. Now 00763 whenever we find a LCID in the k loop, we first loop at c_index[lcid]. If this value is 00764 -1 we haven't seen this entry yet. In which case we add the appropriate stuff to c_cols 00765 and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before 00766 we increment it). If the value is NOT -1, this tells us the location in the c_vals/c_cols 00767 arrays (namely c_index[lcid]) where our accumulator lives. 00768 00769 This trick works because we're working with local ids. We can then loop from 0 to c_current 00770 and reset c_index to -1's when we're done, only touching the arrays that have changed. 00771 While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues. 00772 Thus, the effect of this trick is to avoid passes over the index array. 00773 00774 2) The second major twist involves handling the remote and local components of B separately. 00775 (ML doesn't need to do this, because its local ordering scheme is consistent between the local 00776 and imported components of B.) Since they have different column maps, they have inconsistent 00777 local column ids. This means the "second twist" won't work as stated on both matrices at the 00778 same time. While this could be handled any number of ways, I have chosen to do the two parts 00779 of B separately to make the code easier to read (and reduce the memory footprint of the MMM). 00780 */ 00781 00782 // Local matrix: Zero Current counts for matrix 00783 int c_current=0; 00784 00785 // Local matrix: Do the "middle product" 00786 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) { 00787 int Ak = Acolind[k]; 00788 double Aval = Avals[k]; 00789 // We're skipping remote entries on this pass. 00790 if(Bview.remote[Ak] || Aval==0) continue; 00791 00792 int* Bcol_inds = Bview.indices[Ak]; 00793 double* Bvals_k = Bview.values[Ak]; 00794 00795 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00796 int col=Bcol_inds[j]; 00797 if(c_index[col]<0){ 00798 // We haven't seen this entry before; add it. (In ML, on 00799 // the first pass, you haven't seen any of the entries 00800 // before, so they are added without the check. Not sure 00801 // how much more efficient that would be; depends on branch 00802 // prediction. We've favored code readability here.) 00803 c_cols[c_current]=col; 00804 c_vals[c_current]=Aval*Bvals_k[j]; 00805 c_index[col]=c_current; 00806 c_current++; 00807 } 00808 else{ 00809 // We've already seen this entry; accumulate it. 00810 c_vals[c_index[col]]+=Aval*Bvals_k[j]; 00811 } 00812 } 00813 } 00814 // Local matrix: Reset c_index and switch c_cols to GIDs 00815 for(k=0; k<c_current; k++){ 00816 c_index[c_cols[k]]=-1; 00817 c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs. 00818 } 00819 // Local matrix: Insert. 00820 // 00821 // We should check global error results after the algorithm is 00822 // through. It's probably safer just to let the algorithm run all 00823 // the way through before doing this, since otherwise we have to 00824 // remember to free all allocations carefully. 00825 int err = C_filled ? 00826 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols) 00827 : 00828 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols); 00829 00830 // Remote matrix: Zero current counts again for matrix 00831 c_current=0; 00832 00833 // Remote matrix: Do the "middle product" 00834 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) { 00835 int Ak = Acolind[k]; 00836 double Aval = Avals[k]; 00837 // We're skipping local entries on this pass. 00838 if(!Bview.remote[Ak] || Aval==0) continue; 00839 00840 int* Bcol_inds = Bview.indices[Ak]; 00841 double* Bvals_k = Bview.values[Ak]; 00842 00843 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00844 int col=Bcol_inds[j]; 00845 if(c_index[col]<0){ 00846 c_cols[c_current]=col; 00847 c_vals[c_current]=Aval*Bvals_k[j]; 00848 c_index[col]=c_current; 00849 c_current++; 00850 } 00851 else{ 00852 c_vals[c_index[col]]+=Aval*Bvals_k[j]; 00853 } 00854 } 00855 } 00856 // Remote matrix: Reset c_index and switch c_cols to GIDs 00857 for(k=0; k<c_current; k++){ 00858 c_index[c_cols[k]]=-1; 00859 c_cols[k]=bcols_import[c_cols[k]]; 00860 } 00861 // Remove matrix: Insert 00862 // 00863 // See above (on error handling). 00864 err = C_filled ? 00865 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols) 00866 : 00867 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols); 00868 } 00869 00870 // Since Multiply won't do this 00871 if(call_FillComplete_on_result) 00872 C.FillComplete(B.DomainMap(),A.RangeMap()); 00873 00874 delete [] dwork; 00875 delete [] iwork; 00876 delete [] c_index; 00877 return(0); 00878 } 00879 00880 00881 00882 00883 00884 /*****************************************************************************/ 00885 /*****************************************************************************/ 00886 /*****************************************************************************/ 00887 template<typename int_type> 00888 int MatrixMatrix::Tmult_A_B(const Epetra_CrsMatrix & A, 00889 CrsMatrixStruct & Aview, 00890 const Epetra_CrsMatrix & B, 00891 CrsMatrixStruct& Bview, 00892 Epetra_CrsMatrix& C, 00893 bool call_FillComplete_on_result){ 00894 00895 int i,rv; 00896 Epetra_Map* mapunion = 0; 00897 const Epetra_Map * colmap_B = &(B.ColMap()); 00898 const Epetra_Map * colmap_C = &(C.ColMap()); 00899 00900 std::vector<int> Cremotepids; 00901 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements()); 00902 std::vector<int> Bimportcol2Ccol; 00903 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements()); 00904 00905 #ifdef ENABLE_MMM_TIMINGS 00906 using Teuchos::TimeMonitor; 00907 Teuchos::RCP<Teuchos::TimeMonitor> MM; 00908 #endif 00909 00910 // If the user doesn't want us to call FillComplete, use the general routine 00911 if(!call_FillComplete_on_result) { 00912 #ifdef ENABLE_MMM_TIMINGS 00913 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply"))); 00914 #endif 00915 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false); 00916 return rv; 00917 } 00918 00919 // Is this a "clean" matrix 00920 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal(); 00921 00922 // Does ExtractCrsDataPointers work? 00923 int *C_rowptr, *C_colind; 00924 double * C_vals; 00925 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals); 00926 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals; 00927 00928 // It's a new matrix that hasn't been fill-completed, use the general routine 00929 if(!NewFlag && ExtractFailFlag){ 00930 #ifdef ENABLE_MMM_TIMINGS 00931 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply"))); 00932 #endif 00933 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result); 00934 return rv; 00935 } 00936 00937 00938 #ifdef ENABLE_MMM_TIMINGS 00939 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix CMap"))); 00940 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse CMap"))); 00941 #endif 00942 00943 // If new, build & clobber a colmap for C 00944 if(NewFlag){ 00945 if(Bview.importMatrix) { 00946 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) ); 00947 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) ); 00948 } 00949 else { 00950 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) ); 00951 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i; 00952 00953 // Copy B's remote list (if any) 00954 if(B.Importer()) 00955 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids)); 00956 } 00957 } 00958 00959 #ifdef ENABLE_MMM_TIMINGS 00960 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Lookups"))); 00961 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Lookups"))); 00962 #endif 00963 00964 // ******************************************** 00965 // Setup Bcol2Ccol / Bimportcol2Ccol lookups 00966 // ******************************************** 00967 // Note: If we ran the map_union, we have this information already 00968 00969 if(!NewFlag) { 00970 if(colmap_B->SameAs(*colmap_C)){ 00971 // Maps are the same: Use local IDs as the hash 00972 for(i=0;i<colmap_B->NumMyElements();i++) 00973 Bcol2Ccol[i]=i; 00974 } 00975 else { 00976 // Maps are not the same: Use the map's hash 00977 for(i=0;i<colmap_B->NumMyElements();i++){ 00978 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i)); 00979 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11); 00980 } 00981 } 00982 00983 if(Bview.importMatrix){ 00984 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements()); 00985 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){ 00986 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i)); 00987 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12); 00988 } 00989 00990 } 00991 } 00992 #ifdef ENABLE_MMM_TIMINGS 00993 MM=Teuchos::null; 00994 #endif 00995 00996 // Call the appropriate core routine 00997 if(NewFlag) { 00998 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C)); 00999 } 01000 else { 01001 // This always has a real map 01002 EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C)); 01003 } 01004 01005 // Cleanup 01006 delete mapunion; 01007 return 0; 01008 } 01009 01010 01011 /*****************************************************************************/ 01012 /*****************************************************************************/ 01013 /*****************************************************************************/ 01014 int MatrixMatrix::mult_A_B(const Epetra_CrsMatrix & A, 01015 CrsMatrixStruct & Aview, 01016 const Epetra_CrsMatrix & B, 01017 CrsMatrixStruct& Bview, 01018 Epetra_CrsMatrix& C, 01019 bool call_FillComplete_on_result){ 01020 01021 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01022 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01023 return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result); 01024 } 01025 else 01026 #endif 01027 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01028 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01029 return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result); 01030 } 01031 else 01032 #endif 01033 throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown"; 01034 } 01035 01036 01037 /*****************************************************************************/ 01038 /*****************************************************************************/ 01039 /*****************************************************************************/ 01040 template<typename int_type> 01041 int jacobi_A_B_reuse(double omega, 01042 const Epetra_Vector & Dinv, 01043 const Epetra_CrsMatrix & A, 01044 const Epetra_CrsMatrix & B, 01045 CrsMatrixStruct& Bview, 01046 std::vector<int> & Bcol2Ccol, 01047 std::vector<int> & Bimportcol2Ccol, 01048 Epetra_CrsMatrix& C){ 01049 01050 #ifdef ENABLE_MMM_TIMINGS 01051 using Teuchos::TimeMonitor; 01052 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse SerialCore"))); 01053 #endif 01054 01055 // ***************************** 01056 // Improved Parallel Gustavson in Local IDs 01057 // ***************************** 01058 const Epetra_Map * colmap_C = &(C.ColMap()); 01059 01060 int m=A.NumMyRows(); 01061 int n=colmap_C->NumMyElements(); 01062 int i,j,k; 01063 01064 // DataPointers for A 01065 int *Arowptr, *Acolind; 01066 double *Avals; 01067 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals)); 01068 01069 // DataPointers for B, Bimport 01070 int *Browptr, *Bcolind; 01071 double *Bvals; 01072 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals)); 01073 01074 int *Irowptr=0, *Icolind=0; 01075 double *Ivals=0; 01076 if(Bview.importMatrix){ 01077 Irowptr = &Bview.importMatrix->rowptr_[0]; 01078 Icolind = &Bview.importMatrix->colind_[0]; 01079 Ivals = &Bview.importMatrix->vals_[0]; 01080 } 01081 01082 // Data pointer for Dinv 01083 const double *Dvals = Dinv.Values(); 01084 01085 // DataPointers for C 01086 int *CSR_rowptr, *CSR_colind; 01087 double *CSR_vals; 01088 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals)); 01089 01090 01091 // The status array will contain the index into colind where this dude was last deposited. 01092 // c_status[i] < CSR_ip - not in the row yet. 01093 // c_status[i] >= CSR_ip, this is the entry where you can find the data 01094 // We start with this filled with -1's indicating that there are no entries yet. 01095 std::vector<int> c_status(n, -1); 01096 01097 // Classic csr assembly 01098 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0]; 01099 int CSR_ip=0,OLD_ip=0; 01100 01101 // For each row of C 01102 for(i=0; i<m; i++){ 01103 double Dval = Dvals[i]; 01104 01105 // Entries of B 01106 for(k=Browptr[i]; k<Browptr[i+1]; k++){ 01107 // int Bk = Bcolind[k]; 01108 double Bval = Bvals[k]; 01109 if(Bval==0) continue; 01110 int Ck=Bcol2Ccol[Bcolind[k]]; 01111 01112 // Assume no repeated entries in B 01113 c_status[Ck]=CSR_ip; 01114 CSR_colind[CSR_ip]=Ck; 01115 CSR_vals[CSR_ip]= Bvals[k]; 01116 CSR_ip++; 01117 } 01118 01119 // Entries of -omega * Dinv * A * B 01120 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){ 01121 int Ak=Acolind[k]; 01122 double Aval = Avals[k]; 01123 if(Aval==0) continue; 01124 01125 if(Bview.targetMapToOrigRow[Ak] != -1){ 01126 // Local matrix 01127 int Bk = Bview.targetMapToOrigRow[Ak]; 01128 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 01129 int Cj=Bcol2Ccol[Bcolind[j]]; 01130 01131 if(c_status[Cj]<OLD_ip){ 01132 // New entry 01133 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13); 01134 c_status[Cj]=CSR_ip; 01135 CSR_colind[CSR_ip]=Cj; 01136 CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j]; 01137 CSR_ip++; 01138 } 01139 else 01140 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j]; 01141 } 01142 } 01143 else{ 01144 // Remote matrix 01145 int Ik = Bview.targetMapToImportRow[Ak]; 01146 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 01147 int Cj=Bimportcol2Ccol[Icolind[j]]; 01148 01149 if(c_status[Cj]<OLD_ip){ 01150 // New entry 01151 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14); 01152 c_status[Cj]=CSR_ip; 01153 CSR_colind[CSR_ip]=Cj; 01154 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j]; 01155 CSR_ip++; 01156 } 01157 else 01158 CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j]; 01159 } 01160 } 01161 } 01162 OLD_ip=CSR_ip; 01163 } 01164 01165 #ifdef ENABLE_MMM_TIMINGS 01166 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Final Sort"))); 01167 #endif 01168 // Sort the entries 01169 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]); 01170 01171 return 0; 01172 } 01173 01174 01175 /*****************************************************************************/ 01176 /*****************************************************************************/ 01177 /*****************************************************************************/ 01178 template<typename int_type> 01179 int jacobi_A_B_newmatrix(double omega, 01180 const Epetra_Vector & Dinv, 01181 const Epetra_CrsMatrix & A, 01182 const Epetra_CrsMatrix & B, 01183 CrsMatrixStruct& Bview, 01184 std::vector<int> & Bcol2Ccol, 01185 std::vector<int> & Bimportcol2Ccol, 01186 std::vector<int>& Cremotepids, 01187 Epetra_CrsMatrix& C) 01188 { 01189 #ifdef ENABLE_MMM_TIMINGS 01190 using Teuchos::TimeMonitor; 01191 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix SerialCore"))); 01192 #endif 01193 01194 // ***************************** 01195 // Improved Parallel Gustavson in Local IDs 01196 // ***************************** 01197 const Epetra_Map * colmap_C = &(C.ColMap()); 01198 int NumMyDiagonals=0; // Counter to speed up ESFC 01199 01200 int m=A.NumMyRows(); 01201 int n=colmap_C->NumMyElements(); 01202 int i,j,k; 01203 01204 // DataPointers for A 01205 int *Arowptr, *Acolind; 01206 double *Avals; 01207 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals)); 01208 01209 // DataPointers for B, Bimport 01210 int *Browptr, *Bcolind; 01211 double *Bvals; 01212 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals)); 01213 01214 // Data pointer for Dinv 01215 const double *Dvals = Dinv.Values(); 01216 01217 int *Irowptr=0, *Icolind=0; 01218 double *Ivals=0; 01219 if(Bview.importMatrix){ 01220 Irowptr = &Bview.importMatrix->rowptr_[0]; 01221 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0; 01222 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0; 01223 } 01224 01225 // MemorySetup: If somebody else is sharing this C's graphdata, make a new one. 01226 // This is needed because I'm about to walk all over the CrsGrapData... 01227 C.ExpertMakeUniqueCrsGraphData(); 01228 01229 // The status array will contain the index into colind where this entry was last deposited. 01230 // c_status[i] < CSR_ip - not in the row yet. 01231 // c_status[i] >= CSR_ip, this is the entry where you can find the data 01232 // We start with this filled with -1's indicating that there are no entries yet. 01233 std::vector<int> c_status(n, -1); 01234 01235 // Classic csr assembly (low memory edition) 01236 int CSR_alloc=C_estimate_nnz(A,B); 01237 if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros(); // update for Jacobi 01238 int CSR_ip=0,OLD_ip=0; 01239 Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset(); 01240 Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices(); 01241 double *& CSR_vals = C.ExpertExtractValues(); 01242 01243 CSR_rowptr.Resize(m+1); 01244 CSR_colind.Resize(CSR_alloc); 01245 resize_doubles(0,CSR_alloc,CSR_vals); 01246 01247 // Static Profile stuff 01248 std::vector<int> NumEntriesPerRow(m); 01249 01250 // For each row of C 01251 for(i=0; i<m; i++){ 01252 bool found_diagonal=false; 01253 CSR_rowptr[i]=CSR_ip; 01254 double Dval = Dvals[i]; 01255 01256 // Entries of B 01257 for(k=Browptr[i]; k<Browptr[i+1]; k++){ 01258 //int Bk = Bcolind[k]; 01259 double Bval = Bvals[k]; 01260 if(Bval==0) continue; 01261 int Ck=Bcol2Ccol[Bcolind[k]]; 01262 01263 // Assume no repeated entries in B 01264 c_status[Ck]=CSR_ip; 01265 CSR_colind[CSR_ip]=Ck; 01266 CSR_vals[CSR_ip]= Bvals[k]; 01267 CSR_ip++; 01268 } 01269 01270 // Entries of -omega * Dinv * A * B 01271 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){ 01272 int Ak = Acolind[k]; 01273 double Aval = Avals[k]; 01274 if(Aval==0) continue; 01275 01276 if(Bview.targetMapToOrigRow[Ak] != -1){ 01277 // Local matrix 01278 int Bk = Bview.targetMapToOrigRow[Ak]; 01279 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) { 01280 int Cj=Bcol2Ccol[Bcolind[j]]; 01281 01282 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;} 01283 01284 if(c_status[Cj]<OLD_ip){ 01285 // New entry 01286 c_status[Cj]=CSR_ip; 01287 CSR_colind[CSR_ip]=Cj; 01288 CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j]; 01289 CSR_ip++; 01290 } 01291 else 01292 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j]; 01293 } 01294 } 01295 else{ 01296 // Remote matrix 01297 int Ik = Bview.targetMapToImportRow[Ak]; 01298 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) { 01299 int Cj=Bimportcol2Ccol[Icolind[j]]; 01300 01301 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;} 01302 01303 if(c_status[Cj]<OLD_ip){ 01304 // New entry 01305 c_status[Cj]=CSR_ip; 01306 CSR_colind[CSR_ip]=Cj; 01307 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j]; 01308 CSR_ip++; 01309 } 01310 else 01311 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j]; 01312 } 01313 } 01314 } 01315 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i]; 01316 01317 // Resize for next pass if needed 01318 if(CSR_ip + n > CSR_alloc){ 01319 resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals); 01320 CSR_alloc*=2; 01321 CSR_colind.Resize(CSR_alloc); 01322 } 01323 OLD_ip=CSR_ip; 01324 } 01325 01326 CSR_rowptr[m]=CSR_ip; 01327 01328 #ifdef ENABLE_MMM_TIMINGS 01329 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Final Sort"))); 01330 #endif 01331 01332 // Sort the entries 01333 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]); 01334 01335 #ifdef ENABLE_MMM_TIMINGS 01336 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Fast IE"))); 01337 #endif 01338 01339 // Do a fast build of C's importer 01340 Epetra_Import * Cimport=0; 01341 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0; 01342 int NumExports=0; 01343 int *ExportLIDs=0, *ExportPIDs=0; 01344 if(Bview.importMatrix) { 01345 NumExports = Bview.importMatrix->ExportLIDs_.size(); 01346 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0; 01347 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0; 01348 } 01349 else if(B.Importer()) { 01350 // Grab the exports from B proper 01351 NumExports = B.Importer()->NumExportIDs(); 01352 ExportLIDs = B.Importer()->ExportLIDs(); 01353 ExportPIDs = B.Importer()->ExportPIDs(); 01354 } 01355 01356 if(!C.ColMap().SameAs(B.DomainMap())) 01357 Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs); 01358 01359 #ifdef ENABLE_MMM_TIMINGS 01360 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix ESFC"))); 01361 #endif 01362 01363 // Update the CrsGraphData 01364 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals); 01365 01366 return 0; 01367 } 01368 01369 01370 01371 /*****************************************************************************/ 01372 /*****************************************************************************/ 01373 /*****************************************************************************/ 01374 template<typename int_type> 01375 int MatrixMatrix::Tjacobi_A_B(double omega, 01376 const Epetra_Vector & Dinv, 01377 const Epetra_CrsMatrix & A, 01378 CrsMatrixStruct & Aview, 01379 const Epetra_CrsMatrix & B, 01380 CrsMatrixStruct& Bview, 01381 Epetra_CrsMatrix& C, 01382 bool call_FillComplete_on_result){ 01383 int i,rv; 01384 Epetra_Map* mapunion = 0; 01385 const Epetra_Map * colmap_B = &(B.ColMap()); 01386 const Epetra_Map * colmap_C = &(C.ColMap()); 01387 01388 std::vector<int> Cremotepids; 01389 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements()); 01390 std::vector<int> Bimportcol2Ccol; 01391 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements()); 01392 01393 #ifdef ENABLE_MMM_TIMINGS 01394 using Teuchos::TimeMonitor; 01395 Teuchos::RCP<Teuchos::TimeMonitor> MM; 01396 #endif 01397 01398 // If the user doesn't want us to call FillComplete, use the general routine 01399 if(!call_FillComplete_on_result) { 01400 #ifdef ENABLE_MMM_TIMINGS 01401 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply"))); 01402 #endif 01403 throw std::runtime_error("jacobi_A_B_general not implemented"); 01404 // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false); 01405 return rv; 01406 } 01407 01408 // Is this a "clean" matrix 01409 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal(); 01410 01411 // Does ExtractCrsDataPointers work? 01412 int *C_rowptr, *C_colind; 01413 double * C_vals; 01414 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals); 01415 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals; 01416 01417 // It's a new matrix that hasn't been fill-completed, use the general routine 01418 if(!NewFlag && ExtractFailFlag){ 01419 #ifdef ENABLE_MMM_TIMINGS 01420 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply"))); 01421 #endif 01422 throw std::runtime_error("jacobi_A_B_general not implemented"); 01423 // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result); 01424 return rv; 01425 } 01426 01427 #ifdef ENABLE_MMM_TIMINGS 01428 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix CMap"))); 01429 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse CMap"))); 01430 #endif 01431 01432 // If new, build & clobber a colmap for C 01433 if(NewFlag){ 01434 if(Bview.importMatrix) { 01435 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) ); 01436 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) ); 01437 } 01438 else { 01439 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) ); 01440 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i; 01441 01442 // Copy B's remote list (if any) 01443 if(B.Importer()) 01444 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids)); 01445 } 01446 } 01447 01448 #ifdef ENABLE_MMM_TIMINGS 01449 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix Lookups"))); 01450 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Lookups"))); 01451 #endif 01452 01453 // ******************************************** 01454 // Setup Bcol2Ccol / Bimportcol2Ccol lookups 01455 // ******************************************** 01456 // Note: If we ran the map_union, we have this information already 01457 01458 if(!NewFlag) { 01459 if(colmap_B->SameAs(*colmap_C)){ 01460 // Maps are the same: Use local IDs as the hash 01461 for(i=0;i<colmap_B->NumMyElements();i++) 01462 Bcol2Ccol[i]=i; 01463 } 01464 else { 01465 // Maps are not the same: Use the map's hash 01466 for(i=0;i<colmap_B->NumMyElements();i++){ 01467 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i)); 01468 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11); 01469 } 01470 } 01471 01472 if(Bview.importMatrix){ 01473 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements()); 01474 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){ 01475 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i)); 01476 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12); 01477 } 01478 01479 } 01480 } 01481 01482 01483 // Call the appropriate core routine 01484 if(NewFlag) { 01485 EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C)); 01486 } 01487 else { 01488 // This always has a real map 01489 EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C)); 01490 } 01491 01492 // Cleanup 01493 delete mapunion; 01494 return 0; 01495 } 01496 01497 01498 /*****************************************************************************/ 01499 /*****************************************************************************/ 01500 /*****************************************************************************/ 01501 int MatrixMatrix::jacobi_A_B(double omega, 01502 const Epetra_Vector & Dinv, 01503 const Epetra_CrsMatrix & A, 01504 CrsMatrixStruct & Aview, 01505 const Epetra_CrsMatrix & B, 01506 CrsMatrixStruct& Bview, 01507 Epetra_CrsMatrix& C, 01508 bool call_FillComplete_on_result) 01509 { 01510 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01511 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) { 01512 return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result); 01513 } 01514 else 01515 #endif 01516 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01517 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) { 01518 return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result); 01519 } 01520 else 01521 #endif 01522 throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown"; 01523 } 01524 01525 01526 01527 /*****************************************************************************/ 01528 /*****************************************************************************/ 01529 /*****************************************************************************/ 01530 template<typename int_type> 01531 int MatrixMatrix::Tmult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) { 01532 using Teuchos::RCP; 01533 using Teuchos::rcp; 01534 01535 #ifdef ENABLE_MMM_TIMINGS 01536 using Teuchos::TimeMonitor; 01537 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T Transpose"))); 01538 #endif 01539 01540 01541 /*************************************************************/ 01542 /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */ 01543 /*************************************************************/ 01544 #ifdef ENABLE_MMM_TIMINGS 01545 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T AB-core"))); 01546 #endif 01547 RCP<Epetra_CrsMatrix> Ctemp; 01548 01549 // If Atrans has no Exporter, we can use C instead of having to create a temp matrix 01550 bool needs_final_export = Atransview.origMatrix->Exporter() != 0; 01551 if(needs_final_export) 01552 Ctemp = rcp(new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0)); 01553 else { 01554 EPETRA_CHK_ERR( C.ReplaceColMap(Bview.origMatrix->ColMap()) ); 01555 Ctemp = rcp(&C,false);// don't allow deallocation 01556 } 01557 01558 // Multiply 01559 std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols()); 01560 for(int i=0; i<Bview.origMatrix->NumMyCols(); i++) 01561 Bcol2Ccol[i]=i; 01562 std::vector<int> Bimportcol2Ccol,Cremotepids; 01563 if(Bview.origMatrix->Importer()) 01564 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.origMatrix->Importer(),Cremotepids)); 01565 01566 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview, 01567 Bcol2Ccol,Bimportcol2Ccol,Cremotepids, 01568 *Ctemp)); 01569 01570 /*************************************************************/ 01571 /* 4) ExportAndFillComplete matrix (if needed) */ 01572 /*************************************************************/ 01573 #ifdef ENABLE_MMM_TIMINGS 01574 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T ESFC"))); 01575 #endif 01576 01577 if(needs_final_export) 01578 C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),false); 01579 01580 return 0; 01581 } 01582 01583 01584 01585 /*****************************************************************************/ 01586 /*****************************************************************************/ 01587 /*****************************************************************************/ 01588 int MatrixMatrix::mult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) { 01589 01590 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 01591 if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) { 01592 return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C); 01593 } 01594 else 01595 #endif 01596 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 01597 if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) { 01598 return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C); 01599 } 01600 else 01601 #endif 01602 throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown"; 01603 } 01604 01605 01606 01607 }//namespace EpetraExt
1.7.6.1