|
Tpetra Matrix/Vector Services
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_IMPORT_UTIL2_HPP 00043 #define TPETRA_IMPORT_UTIL2_HPP 00044 00050 #include "Tpetra_ConfigDefs.hpp" // for map, vector, string, and iostream 00051 #include "Tpetra_Import.hpp" 00052 #include "Tpetra_HashTable.hpp" 00053 #include "Tpetra_Map.hpp" 00054 #include "Tpetra_Util.hpp" 00055 #include "Tpetra_Distributor.hpp" 00056 #include "Tpetra_CrsMatrix.hpp" 00057 #include <Teuchos_Array.hpp> 00058 #include <utility> 00059 00060 namespace Tpetra { 00061 namespace Import_Util { 00062 00064 00070 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00071 void 00072 packAndPrepareWithOwningPIDs (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix, 00073 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00074 Teuchos::Array<char>& exports, 00075 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00076 size_t& constantNumPackets, 00077 Distributor &distor, 00078 const Teuchos::ArrayView<int>& SourcePids); 00079 00081 00088 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00089 size_t 00090 unpackAndCombineWithOwningPIDsCount (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix, 00091 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs, 00092 const Teuchos::ArrayView<const char> &imports, 00093 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 00094 size_t constantNumPackets, 00095 Distributor &distor, 00096 CombineMode combineMode, 00097 size_t numSameIDs, 00098 const ArrayView<const LocalOrdinal> &permuteToLIDs, 00099 const ArrayView<const LocalOrdinal> &permuteFromLIDs); 00100 00102 00113 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00114 void unpackAndCombineIntoCrsArrays(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix, 00115 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs, 00116 const Teuchos::ArrayView<const char> &imports, 00117 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 00118 size_t constantNumPackets, 00119 Distributor &distor, 00120 CombineMode combineMode, 00121 size_t numSameIDs, 00122 const ArrayView<const LocalOrdinal> &permuteToLIDs, 00123 const ArrayView<const LocalOrdinal> &permuteFromLIDs, 00124 size_t TargetNumRows, 00125 size_t TargetNumNonzeros, 00126 int MyTargetPID, 00127 const ArrayView<size_t> &rowPointers, 00128 const ArrayView<GlobalOrdinal> &columnIndices, 00129 const ArrayView<Scalar> &values, 00130 const Teuchos::ArrayView<const int> &SourcePids, 00131 Teuchos::Array<int> &TargetPids); 00132 00134 00136 template<typename Scalar, typename Ordinal> 00137 void 00138 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr, 00139 const Teuchos::ArrayView<Ordinal>& CRS_colind, 00140 const Teuchos::ArrayView<Scalar>&CRS_vals); 00141 00143 00145 template<typename Scalar, typename Ordinal> 00146 void 00147 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr, 00148 const Teuchos::ArrayView<Ordinal>& CRS_colind, 00149 const Teuchos::ArrayView<Scalar>& CRS_vals); 00150 00152 00163 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00164 void 00165 lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowPointers, 00166 const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID, 00167 const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID, 00168 const Tpetra::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & domainMap, 00169 const Teuchos::ArrayView<const int> &owningPids, 00170 Teuchos::Array<int> &remotePids, 00171 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap); 00172 00173 template <typename Matrix> 00174 struct MatrixSerializationTraits { 00175 typedef typename Matrix::scalar_type Scalar; 00176 00177 static inline 00178 size_t scalarSize( const Matrix& mat ) { return sizeof(Scalar); } 00179 00180 static inline 00181 void packBuffer( const Matrix& mat, 00182 const size_t numEntries, 00183 const Teuchos::ArrayView<const Scalar>& vals, 00184 const Teuchos::ArrayView<char> packed_vals ) { 00185 Teuchos::ArrayView<Scalar> packed_vals_scalar = 00186 Teuchos::av_reinterpret_cast<Scalar>(packed_vals); 00187 std::copy( vals.begin(), vals.begin()+numEntries, 00188 packed_vals_scalar.begin()); 00189 } 00190 00191 static inline 00192 void unpackScalar( const Matrix& mat, 00193 const char * val_char, 00194 Scalar& val ) { 00195 val = *( reinterpret_cast<const Scalar*>(val_char) ); 00196 } 00197 }; 00198 00199 } // namespace Import_Util 00200 } // namespace Tpetra 00201 00202 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00203 void 00204 Tpetra::Import_Util::packAndPrepareWithOwningPIDs (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &SourceMatrix, 00205 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs, 00206 Teuchos::Array<char>& exports, 00207 const Teuchos::ArrayView<size_t>& numPacketsPerLID, 00208 size_t& constantNumPackets, 00209 Distributor &distor, 00210 const Teuchos::ArrayView<int>& SourcePids) 00211 { 00212 using Teuchos::Array; 00213 using Teuchos::ArrayView; 00214 using Teuchos::as; 00215 using Teuchos::RCP; 00216 typedef LocalOrdinal LO; 00217 typedef GlobalOrdinal GO; 00218 typedef Map<LocalOrdinal,GlobalOrdinal,Node> map_type; 00219 typedef typename ArrayView<const LO>::size_type size_type; 00220 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type; 00221 typedef MatrixSerializationTraits<matrix_type> serialization_type; 00222 00223 TEUCHOS_TEST_FOR_EXCEPTION(!SourceMatrix.isLocallyIndexed(),std::invalid_argument, "packAndPrepareWithOwningPIDs: SourceMatrix must be locally indexed."); 00224 TEUCHOS_TEST_FOR_EXCEPTION(exportLIDs.size() != numPacketsPerLID.size(), 00225 std::invalid_argument, "packAndPrepareWithOwningPIDs: exportLIDs.size() = " << exportLIDs.size() 00226 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 00227 TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(SourcePids.size()) != SourceMatrix.getColMap()->getNodeNumElements(), 00228 std::invalid_argument, "packAndPrepareWithOwningPIDs: SourcePids.size() = " << SourcePids.size() 00229 << "!= SourceMatrix.getColMap()->getNodeNumElements() = " << SourceMatrix.getColMap()->getNodeNumElements() << "."); 00230 00231 // Get a reference to the matrix's row Map. 00232 const map_type& rowMap = * (SourceMatrix.getRowMap()); 00233 constantNumPackets = 0; 00234 00235 // Get the GIDs of the rows we want to pack. 00236 Array<GO> exportGIDs (exportLIDs.size()); 00237 const size_type numExportGIDs = exportGIDs.size (); 00238 for (size_type i = 0; i < numExportGIDs; ++i) { 00239 exportGIDs[i] = rowMap.getGlobalElement(exportLIDs[i]); 00240 } 00241 00242 00243 // Record initial sizing 00244 const size_t scalar_size = serialization_type::scalarSize( SourceMatrix ); 00245 const size_t sizeOfPacket = sizeof(GO) + scalar_size + sizeof(int); 00246 size_t totalNumEntries = 0; 00247 size_t maxRowLength = 0; 00248 for (size_type i = 0; i < exportGIDs.size(); ++i) { 00249 const size_t curNumEntries = SourceMatrix.getNumEntriesInGlobalRow(exportGIDs[i]); 00250 numPacketsPerLID[i] = curNumEntries * sizeOfPacket; 00251 totalNumEntries += curNumEntries; 00252 maxRowLength = std::max(curNumEntries, maxRowLength); 00253 } 00254 00255 // Pack export data by interleaving rows' indices, pids and values in 00256 // the following way: 00257 // 00258 // [inds_row0 pids_row0 vals_row0 inds_row1 pids_row1 vals_row1 ... ] 00259 if (totalNumEntries > 0) { 00260 const size_t totalNumBytes = totalNumEntries * sizeOfPacket; 00261 exports.resize(totalNumBytes); 00262 00263 // Current position in the 'exports' output array. 00264 size_t curOffsetInBytes = 0; 00265 00266 // For each row of the matrix owned by the calling process, pack 00267 // that row's column indices and values into the exports array. 00268 00269 // Locally indexed matrices always have a column Map. 00270 const map_type& colMap = * (SourceMatrix.getColMap()); 00271 ArrayView<const LocalOrdinal> lidsView; 00272 ArrayView<const Scalar> valsView; 00273 00274 // Temporary buffers for a copy of the column gids/pids 00275 Array<GO> gids(as<size_type>(maxRowLength)); 00276 Array<int> pids(as<size_type>(maxRowLength)); 00277 00278 const size_type numExportLIDs = exportLIDs.size(); 00279 for (size_type i = 0; i < numExportLIDs; i++) { 00280 // Get a (locally indexed) view of the current row's data. 00281 SourceMatrix.getLocalRowView(exportLIDs[i], lidsView, valsView); 00282 00283 // Convert column indices as LIDs to column indices as GIDs. 00284 const size_type curNumEntries = lidsView.size(); 00285 size_t curNumEntriesST = as<size_t>(curNumEntries); 00286 ArrayView<GO> gidsView = gids(0, curNumEntries); 00287 ArrayView<int> pidsView = pids(0, curNumEntries); 00288 for (size_type k = 0; k < curNumEntries; ++k) { 00289 gidsView[k] = colMap.getGlobalElement(lidsView[k]); 00290 pidsView[k] = SourcePids[lidsView[k]]; 00291 } 00292 00293 // Views of the right places in each array so everthing looks like the right data type 00294 ArrayView<char> gidsViewOutChar = exports(curOffsetInBytes, curNumEntriesST*sizeof(GO)); 00295 ArrayView<char> pidsViewOutChar = exports(curOffsetInBytes+curNumEntriesST*sizeof(GO), curNumEntriesST*sizeof(int)); 00296 ArrayView<char> valsViewOutChar = exports(curOffsetInBytes+curNumEntriesST*(sizeof(GO)+sizeof(int)), curNumEntriesST*scalar_size); 00297 00298 ArrayView<GO> gidsViewOut = av_reinterpret_cast<GO>(gidsViewOutChar); 00299 ArrayView<int> pidsViewOut = av_reinterpret_cast<int>(pidsViewOutChar); 00300 00301 // Copy the row's data into the views of the exports array. 00302 std::copy(gidsView.begin(), gidsView.begin() + curNumEntriesST, gidsViewOut.begin()); 00303 std::copy(pidsView.begin(), pidsView.begin() + curNumEntriesST, pidsViewOut.begin()); 00304 serialization_type::packBuffer( SourceMatrix, curNumEntriesST, valsView, valsViewOutChar ); 00305 00306 // Keep track of how many bytes we packed. 00307 curOffsetInBytes += sizeOfPacket * curNumEntries; 00308 } 00309 00310 #ifdef HAVE_TPETRA_DEBUG 00311 TEUCHOS_TEST_FOR_EXCEPTION(curOffsetInBytes != totalNumBytes, 00312 std::logic_error, "packAndPrepareWithOwningPids: At end of method, the final offset bytes count " 00313 "curOffsetInBytes=" << curOffsetInBytes << " does not equal the total " 00314 "number of bytes packed totalNumBytes=" << totalNumBytes << ". Please " 00315 "report this bug to the Tpetra developers."); 00316 #endif // HAVE_TPETRA_DEBUG 00317 } 00318 00319 } 00320 00321 00322 00323 //---------------------------------------------------------------------------- 00324 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00325 size_t Tpetra::Import_Util::unpackAndCombineWithOwningPIDsCount(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix, 00326 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs, 00327 const Teuchos::ArrayView<const char> &imports, 00328 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 00329 size_t constantNumPackets, 00330 Distributor &distor, 00331 CombineMode combineMode, 00332 size_t numSameIDs, 00333 const ArrayView<const LocalOrdinal> &permuteToLIDs, 00334 const ArrayView<const LocalOrdinal> &permuteFromLIDs) { 00335 typedef LocalOrdinal LO; 00336 typedef typename ArrayView<const LO>::size_type size_type; 00337 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type; 00338 typedef MatrixSerializationTraits<matrix_type> serialization_type; 00339 size_t nnz = 0; 00340 00341 // CopyAndPermuteSection 00342 TEUCHOS_TEST_FOR_EXCEPTION(permuteToLIDs.size() != permuteFromLIDs.size(), 00343 std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: permuteToLIDs.size() = " << permuteToLIDs.size() 00344 << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << "."); 00345 const bool locallyIndexed = SourceMatrix.isLocallyIndexed(); 00346 TEUCHOS_TEST_FOR_EXCEPTION(!locallyIndexed,std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: SourceMatrix must be locally indexed."); 00347 00348 // Copy 00349 const LO numSameIDs_as_LID = Teuchos::as<LO>(numSameIDs); 00350 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; sourceLID++) 00351 nnz+=SourceMatrix.getNumEntriesInLocalRow(sourceLID); 00352 00353 // Permute 00354 const size_t numPermuteToLIDs = Teuchos::as<size_t>(permuteToLIDs.size()); 00355 for (size_t p = 0; p < numPermuteToLIDs; p++) 00356 nnz+=SourceMatrix.getNumEntriesInLocalRow(permuteFromLIDs[p]); 00357 00358 // UnpackAndCombine Section 00359 TEUCHOS_TEST_FOR_EXCEPTION(importLIDs.size() != numPacketsPerLID.size(), 00360 std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: importLIDs.size() = " << importLIDs.size() 00361 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 00362 00363 const size_t scalar_size = serialization_type::scalarSize( SourceMatrix ); 00364 const size_t sizeOfPacket = sizeof(GlobalOrdinal) + sizeof(int) + scalar_size; 00365 00366 size_t curOffsetInBytes = 0; 00367 for (size_type i = 0; i < importLIDs.size(); ++i) { 00368 const size_t rowSize = numPacketsPerLID[i] / sizeOfPacket; 00369 curOffsetInBytes += rowSize * sizeOfPacket; 00370 nnz +=rowSize; 00371 } 00372 return nnz; 00373 } 00374 00375 00376 00377 //---------------------------------------------------------------------------- 00378 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00379 void Tpetra::Import_Util::unpackAndCombineIntoCrsArrays(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix, 00380 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs, 00381 const Teuchos::ArrayView<const char> &imports, 00382 const Teuchos::ArrayView<size_t> &numPacketsPerLID, 00383 size_t constantNumPackets, 00384 Distributor &distor, 00385 CombineMode combineMode, 00386 size_t numSameIDs, 00387 const ArrayView<const LocalOrdinal> &permuteToLIDs, 00388 const ArrayView<const LocalOrdinal> &permuteFromLIDs, 00389 size_t TargetNumRows, 00390 size_t TargetNumNonzeros, 00391 int MyTargetPID, 00392 const ArrayView<size_t> &CSR_rowptr, 00393 const ArrayView<GlobalOrdinal> &CSR_colind, 00394 const ArrayView<Scalar> &CSR_vals, 00395 const Teuchos::ArrayView<const int> &SourcePids, 00396 Teuchos::Array<int> &TargetPids) { 00397 using Teuchos::ArrayView; 00398 using Teuchos::as; 00399 using Teuchos::av_reinterpret_cast; 00400 typedef LocalOrdinal LO; 00401 typedef GlobalOrdinal GO; 00402 typedef Map<LocalOrdinal,GlobalOrdinal,Node> map_type; 00403 typedef typename ArrayView<const LO>::size_type size_type; 00404 typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type; 00405 typedef MatrixSerializationTraits<matrix_type> serialization_type; 00406 00407 size_t i,j; 00408 size_t N=TargetNumRows; 00409 size_t mynnz = TargetNumNonzeros; 00410 // In the case of reduced communicators, the SourceMatrix won't have the right "MyPID", so thus we have to supply it. 00411 int MyPID = MyTargetPID; 00412 00413 00414 // Zero the rowptr 00415 TEUCHOS_TEST_FOR_EXCEPTION(N+1 != as<size_t>(CSR_rowptr.size()), 00416 std::invalid_argument, "unpackAndCombineIntoCrsArrays: CsR_rowptr.size() = " << CSR_rowptr.size() 00417 << "!= TargetNumRows+1 = " << TargetNumRows+1 << "."); 00418 for(i=0; i<N+1; i++) CSR_rowptr[i]=0; 00419 00420 // SameIDs: Always first, always in the same place 00421 for(i=0; i<numSameIDs; i++) 00422 CSR_rowptr[i]=SourceMatrix.getNumEntriesInLocalRow(as<LO>(i)); 00423 00424 // PermuteIDs: Still local, but reordered 00425 TEUCHOS_TEST_FOR_EXCEPTION(permuteToLIDs.size() != permuteFromLIDs.size(), 00426 std::invalid_argument, "unpackAndCombineIntoCrsArrays: permuteToLIDs.size() = " << permuteToLIDs.size() 00427 << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << "."); 00428 size_t numPermuteIDs = permuteToLIDs.size(); 00429 for(i=0; i<numPermuteIDs; i++) 00430 CSR_rowptr[permuteToLIDs[i]] = SourceMatrix.getNumEntriesInLocalRow(permuteFromLIDs[i]); 00431 00432 00433 00434 // Setup CSR_rowptr for remotes 00435 TEUCHOS_TEST_FOR_EXCEPTION(importLIDs.size() != numPacketsPerLID.size(), 00436 std::invalid_argument, "unpackAndCombineIntoCrsArrays: importLIDs.size() = " << importLIDs.size() 00437 << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << "."); 00438 00439 const size_t scalar_size = serialization_type::scalarSize( SourceMatrix ); 00440 const size_t sizeOfPacket = sizeof(GlobalOrdinal) + sizeof(int) + scalar_size; 00441 const size_t totalNumBytes = imports.size(); 00442 const size_t RemoteNumEntries = totalNumBytes / sizeOfPacket; 00443 for (size_type k = 0; k < importLIDs.size(); ++k) { 00444 const size_t rowSize = numPacketsPerLID[k] / sizeOfPacket; 00445 CSR_rowptr[importLIDs[k]] += rowSize; 00446 } 00447 00448 // If multiple procs contribute to a row; 00449 Teuchos::Array<size_t> NewStartRow(N+1); 00450 00451 // Turn row length into a real CSR_rowptr 00452 size_t last_len = CSR_rowptr[0]; 00453 CSR_rowptr[0] = 0; 00454 for(i=1; i<N+1; i++){ 00455 size_t new_len = CSR_rowptr[i]; 00456 CSR_rowptr[i] = last_len + CSR_rowptr[i-1]; 00457 NewStartRow[i] = CSR_rowptr[i]; 00458 last_len = new_len; 00459 } 00460 00461 TEUCHOS_TEST_FOR_EXCEPTION(CSR_rowptr[N] != mynnz, 00462 std::invalid_argument, "unpackAndCombineIntoCrsArrays: CSR_rowptr[last] = " << CSR_rowptr[N] 00463 << "!= mynnz = " << mynnz << "."); 00464 00465 // Preseed TargetPids with -1 for local 00466 if(as<size_t>(TargetPids.size())!=mynnz) TargetPids.resize(mynnz); 00467 TargetPids.assign(mynnz,-1); 00468 00469 // Grab pointers for SourceMatrix 00470 ArrayRCP<const size_t> Source_rowptr_RCP; 00471 ArrayRCP<const LO> Source_colind_RCP; 00472 ArrayRCP<const Scalar> Source_vals_RCP; 00473 SourceMatrix.getAllValues(Source_rowptr_RCP,Source_colind_RCP,Source_vals_RCP); 00474 ArrayView<const size_t> Source_rowptr = Source_rowptr_RCP(); 00475 ArrayView<const LO> Source_colind = Source_colind_RCP(); 00476 ArrayView<const Scalar> Source_vals = Source_vals_RCP(); 00477 00478 const map_type& sourceColMap = * (SourceMatrix.getColMap()); 00479 ArrayView<const GO> globalColElts = sourceColMap.getNodeElementList(); 00480 00481 // SameIDs: Copy the data over 00482 for(i=0; i<numSameIDs; i++) { 00483 size_t FromRow = Source_rowptr[i]; 00484 size_t ToRow = CSR_rowptr[i]; 00485 NewStartRow[i] += Source_rowptr[i+1]-Source_rowptr[i]; 00486 00487 for(j=Source_rowptr[i]; j<Source_rowptr[i+1]; j++) { 00488 CSR_vals[ToRow + j - FromRow] = Source_vals[j]; 00489 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]]; 00490 TargetPids[ToRow + j - FromRow] = (SourcePids[Source_colind[j]] != MyPID) ? SourcePids[Source_colind[j]] : -1; 00491 } 00492 } 00493 00494 // PermuteIDs: Copy the data over 00495 for(i=0; i<numPermuteIDs; i++) { 00496 LO FromLID = permuteFromLIDs[i]; 00497 size_t FromRow = Source_rowptr[FromLID]; 00498 size_t ToRow = CSR_rowptr[permuteToLIDs[i]]; 00499 00500 NewStartRow[permuteToLIDs[i]] += Source_rowptr[FromLID+1]-Source_rowptr[FromLID]; 00501 00502 for(j=Source_rowptr[FromLID]; j<Source_rowptr[FromLID+1]; j++) { 00503 CSR_vals[ToRow + j - FromRow] = Source_vals[j]; 00504 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]]; 00505 TargetPids[ToRow + j - FromRow] = (SourcePids[Source_colind[j]] != MyPID) ? SourcePids[Source_colind[j]] : -1; 00506 } 00507 } 00508 00509 // RemoteIDs: Loop structure following UnpackAndCombine 00510 if(RemoteNumEntries > 0) { 00511 // data packed as follows: 00512 // [inds_row0 pids_row0 vals_row0 inds_row1 pids_row1 vals_row1 ...] 00513 ArrayView<const char> avIndsC, avPidsC, avValsC; 00514 ArrayView<const GO> avInds; 00515 ArrayView<const int> avPids; 00516 00517 size_t curOffsetInBytes = 0; 00518 for (i = 0; i < Teuchos::as<size_t>(importLIDs.size()); ++i) { 00519 const size_t rowSize = numPacketsPerLID[i] / sizeOfPacket; 00520 LO ToLID = importLIDs[i]; 00521 int StartRow = NewStartRow[ToLID]; 00522 NewStartRow[ToLID]+=rowSize; 00523 if (rowSize == 0) continue; 00524 00525 // Get views of the import (incoming data) buffers. 00526 avIndsC = imports(curOffsetInBytes, rowSize*sizeof(GO)); 00527 avPidsC = imports(curOffsetInBytes+rowSize*sizeof(GO), rowSize*sizeof(int)); 00528 avValsC = imports(curOffsetInBytes+rowSize*(sizeof(GO)+sizeof(int)), rowSize*scalar_size); 00529 00530 avInds = av_reinterpret_cast<const GO>(avIndsC); 00531 avPids = av_reinterpret_cast<const int>(avPidsC); 00532 00533 const char * avValsC_ptr = avValsC.getRawPtr(); 00534 00535 for(j=0; j<rowSize; j++){ 00536 serialization_type::unpackScalar( SourceMatrix, avValsC_ptr, CSR_vals[StartRow+j] ); 00537 CSR_colind[StartRow + j] = avInds[j]; 00538 TargetPids[StartRow + j] = (avPids[j] != MyPID) ? avPids[j] : -1; 00539 avValsC_ptr += scalar_size; 00540 } 00541 curOffsetInBytes += rowSize * sizeOfPacket; 00542 } 00543 #ifdef HAVE_TPETRA_DEBUG 00544 TEUCHOS_TEST_FOR_EXCEPTION(curOffsetInBytes != totalNumBytes, 00545 std::logic_error, "unpackAndCombineIntoCrsArrays: After unpacking and counting all the imports, the " 00546 "final offset in bytes curOffsetInBytes=" << curOffsetInBytes << " != " 00547 "total number of bytes totalNumBytes=" << totalNumBytes << ". Please " 00548 "report this bug to the Tpetra developers."); 00549 #endif // HAVE_TPETRA_DEBUG 00550 } 00551 } 00552 00553 00554 //---------------------------------------------------------------------------- 00555 // Note: This should get merged with the other Tpetra sort routines eventually. 00556 template<typename Scalar, typename Ordinal> 00557 void Tpetra::Import_Util::sortCrsEntries(const Teuchos::ArrayView<size_t> &CRS_rowptr, const Teuchos::ArrayView<Ordinal> & CRS_colind, const Teuchos::ArrayView<Scalar> &CRS_vals){ 00558 // For each row, sort column entries from smallest to largest. 00559 // Use shell sort. Stable sort so it is fast if indices are already sorted. 00560 // Code copied from Epetra_CrsMatrix::SortEntries() 00561 size_t NumRows = CRS_rowptr.size()-1; 00562 size_t nnz = CRS_colind.size(); 00563 00564 for(size_t i = 0; i < NumRows; i++){ 00565 size_t start=CRS_rowptr[i]; 00566 if(start >= nnz) continue; 00567 00568 Scalar* locValues = &CRS_vals[start]; 00569 size_t NumEntries = CRS_rowptr[i+1] - start; 00570 Ordinal* locIndices = &CRS_colind[start]; 00571 00572 Ordinal n = NumEntries; 00573 Ordinal m = n/2; 00574 00575 while(m > 0) { 00576 Ordinal max = n - m; 00577 for(Ordinal j = 0; j < max; j++) { 00578 for(Ordinal k = j; k >= 0; k-=m) { 00579 if(locIndices[k+m] >= locIndices[k]) 00580 break; 00581 Scalar dtemp = locValues[k+m]; 00582 locValues[k+m] = locValues[k]; 00583 locValues[k] = dtemp; 00584 Ordinal itemp = locIndices[k+m]; 00585 locIndices[k+m] = locIndices[k]; 00586 locIndices[k] = itemp; 00587 } 00588 } 00589 m = m/2; 00590 } 00591 } 00592 } 00593 00594 //---------------------------------------------------------------------------- 00595 // Note: This should get merged with the other Tpetra sort routines eventually. 00596 template<typename Scalar, typename Ordinal> 00597 void Tpetra::Import_Util::sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t> &CRS_rowptr, const Teuchos::ArrayView<Ordinal> & CRS_colind, const Teuchos::ArrayView<Scalar> &CRS_vals){ 00598 // For each row, sort column entries from smallest to largest, merging column ids that are identify by adding values 00599 // Use shell sort. Stable sort so it is fast if indices are already sorted. 00600 // Code copied from Epetra_CrsMatrix::SortEntries() 00601 00602 size_t NumRows = CRS_rowptr.size()-1; 00603 size_t nnz = CRS_colind.size(); 00604 size_t new_curr=CRS_rowptr[0], old_curr=CRS_rowptr[0]; 00605 00606 for(size_t i = 0; i < NumRows; i++){ 00607 size_t start=CRS_rowptr[i]; 00608 if(start >= nnz) continue; 00609 00610 Scalar* locValues = &CRS_vals[start]; 00611 size_t NumEntries = CRS_rowptr[i+1] - start; 00612 Ordinal* locIndices = &CRS_colind[start]; 00613 00614 // Sort phase 00615 Ordinal n = NumEntries; 00616 Ordinal m = n/2; 00617 00618 while(m > 0) { 00619 Ordinal max = n - m; 00620 for(Ordinal j = 0; j < max; j++) { 00621 for(Ordinal k = j; k >= 0; k-=m) { 00622 if(locIndices[k+m] >= locIndices[k]) 00623 break; 00624 Scalar dtemp = locValues[k+m]; 00625 locValues[k+m] = locValues[k]; 00626 locValues[k] = dtemp; 00627 Ordinal itemp = locIndices[k+m]; 00628 locIndices[k+m] = locIndices[k]; 00629 locIndices[k] = itemp; 00630 } 00631 } 00632 m = m/2; 00633 } 00634 00635 // Merge & shrink 00636 for(size_t j=CRS_rowptr[i]; j < CRS_rowptr[i+1]; j++) { 00637 if(j > CRS_rowptr[i] && CRS_colind[j]==CRS_colind[new_curr-1]) { 00638 CRS_vals[new_curr-1] += CRS_vals[j]; 00639 } 00640 else if(new_curr==j) { 00641 new_curr++; 00642 } 00643 else { 00644 CRS_colind[new_curr] = CRS_colind[j]; 00645 CRS_vals[new_curr] = CRS_vals[j]; 00646 new_curr++; 00647 } 00648 } 00649 00650 CRS_rowptr[i] = old_curr; 00651 old_curr=new_curr; 00652 } 00653 00654 CRS_rowptr[NumRows] = new_curr; 00655 } 00656 00657 //---------------------------------------------------------------------------- 00658 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node> 00659 void Tpetra::Import_Util::lowCommunicationMakeColMapAndReindex(const ArrayView<const size_t> &rowptr, 00660 const ArrayView<LocalOrdinal> &colind_LID, 00661 const ArrayView<GlobalOrdinal> &colind_GID, 00662 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP, 00663 const Teuchos::ArrayView<const int> &owningPIDs, 00664 Teuchos::Array<int> &remotePIDs, 00665 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap) { 00666 00667 // The domainMap is an RCP because there is a shortcut for a (common) special case to return the 00668 // columnMap = domainMap. 00669 const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> &domainMap = *domainMapRCP; 00670 00671 // Scan all column indices and sort into two groups: 00672 // Local: those whose GID matches a GID of the domain map on this processor and 00673 // Remote: All others. 00674 size_t numDomainElements = domainMap.getNodeNumElements(); 00675 Teuchos::Array<bool> LocalGIDs; 00676 if (numDomainElements>0) LocalGIDs.resize(numDomainElements,false); // Assume domain GIDs are not local 00677 00678 // In principle it is good to have RemoteGIDs and RemotGIDList be as long as the number of remote GIDs 00679 // on this processor, but this would require two passes through the column IDs, so we make it the max of 100 00680 // and the number of block rows. 00681 const size_t numMyRows = rowptr.size()-1; 00682 int hashsize = Teuchos::as<int>(numMyRows); 00683 if (hashsize < 100) hashsize = 100; 00684 00685 Tpetra::Details::HashTable<GlobalOrdinal,LocalOrdinal> RemoteGIDs(hashsize); 00686 Teuchos::Array<GlobalOrdinal> RemoteGIDList; RemoteGIDList.reserve(hashsize); 00687 Teuchos::Array<int> PIDList; PIDList.reserve(hashsize); 00688 00689 // Here we start using the *LocalOrdinal* colind_LID array. This is safe even if both 00690 // columnIndices arrays are actually the same (because LocalOrdinal==GlobalOrdinal). 00691 // For *local* GID's set colind_LID with with their LID in the domainMap. For *remote* GIDs, 00692 // we set colind_LID with (numDomainElements+NumRemoteColGIDs) before the increment of 00693 // the remote count. These numberings will be separate because no local LID is greater 00694 // than numDomainElements. 00695 00696 size_t NumLocalColGIDs = 0; 00697 LocalOrdinal NumRemoteColGIDs = 0; 00698 for(size_t i = 0; i < numMyRows; i++) { 00699 for(size_t j = rowptr[i]; j < rowptr[i+1]; j++) { 00700 GlobalOrdinal GID = colind_GID[j]; 00701 // Check if GID matches a row GID 00702 LocalOrdinal LID = domainMap.getLocalElement(GID); 00703 if(LID != -1) { 00704 bool alreadyFound = LocalGIDs[LID]; 00705 if (!alreadyFound) { 00706 LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID 00707 NumLocalColGIDs++; 00708 } 00709 colind_LID[j] = LID; 00710 } 00711 else { 00712 LocalOrdinal hash_value=RemoteGIDs.get(GID); 00713 if(hash_value == -1) { // This means its a new remote GID 00714 int PID = owningPIDs[j]; 00715 TEUCHOS_TEST_FOR_EXCEPTION(PID==-1,std::invalid_argument, "lowCommunicationMakeColMapAndReindex: Cannot figure out if PID is owned."); 00716 colind_LID[j] = Teuchos::as<LocalOrdinal>(numDomainElements + NumRemoteColGIDs); 00717 RemoteGIDs.add(GID, NumRemoteColGIDs); 00718 RemoteGIDList.push_back(GID); 00719 PIDList.push_back(PID); 00720 NumRemoteColGIDs++; 00721 } 00722 else 00723 colind_LID[j] = Teuchos::as<LocalOrdinal>(numDomainElements + hash_value); 00724 } 00725 } 00726 } 00727 00728 // Possible short-circuit: If all domain map GIDs are present as column indices, then set ColMap=domainMap and quit 00729 if (domainMap.getComm()->getSize()==1) { 00730 // Sanity check: When there is one processor,there can be no remoteGIDs 00731 TEUCHOS_TEST_FOR_EXCEPTION(NumRemoteColGIDs!=0,std::runtime_error,"lowCommunicationMakeColMapAndReindex: Some column IDs are not in domainMap."); 00732 if (Teuchos::as<size_t>(NumLocalColGIDs)==numDomainElements) { 00733 // In this case, we just use the domainMap's indices, which is, not coincidently, what we clobbered colind with up above anyway. 00734 // No further reindexing is needed. 00735 colMap = domainMapRCP; 00736 return; 00737 } 00738 } 00739 00740 // Now build the array containing column GIDs 00741 // Build back end, containing remote GIDs, first 00742 LocalOrdinal numMyCols = NumLocalColGIDs + NumRemoteColGIDs; 00743 Teuchos::Array<GlobalOrdinal> ColIndices; 00744 GlobalOrdinal * RemoteColIndices=0; 00745 if(numMyCols > 0) { 00746 ColIndices.resize(numMyCols); 00747 if(NumLocalColGIDs!=Teuchos::as<size_t>(numMyCols)) RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices 00748 } 00749 00750 for(LocalOrdinal i = 0; i < NumRemoteColGIDs; i++) 00751 RemoteColIndices[i] = RemoteGIDList[i]; 00752 00753 // Build permute array for *remote* reindexing. 00754 Teuchos::Array<LocalOrdinal> RemotePermuteIDs(NumRemoteColGIDs); 00755 for(LocalOrdinal i=0; i<NumRemoteColGIDs; i++) RemotePermuteIDs[i]=i; 00756 00757 00758 // Sort External column indices so that all columns coming from a given remote processor are contiguous 00759 // This is a sort with two auxillary arrays: RemoteColIndices and RemotePermuteIDs. 00760 Tpetra::sort3(PIDList.begin(),PIDList.end(),ColIndices.begin()+NumLocalColGIDs,RemotePermuteIDs.begin()); 00761 00762 // Stash the RemotePIDs 00763 // Note: If Teuchos::Array had a shrink_to_fit like std::vector, we'd call it here. 00764 remotePIDs = PIDList; 00765 00766 // Sort external column indices so that columns from a given remote processor are not only contiguous 00767 // but also in ascending order. NOTE: I don't know if the number of externals associated 00768 // with a given remote processor is known at this point ... so I count them here. 00769 00770 // NTS: Only sort the RemoteColIndices this time... 00771 LocalOrdinal StartCurrent = 0, StartNext = 1; 00772 while ( StartNext < NumRemoteColGIDs ) { 00773 if (PIDList[StartNext]==PIDList[StartNext-1]) StartNext++; 00774 else { 00775 Tpetra::sort2(ColIndices.begin()+NumLocalColGIDs+StartCurrent,ColIndices.begin()+NumLocalColGIDs+StartNext,RemotePermuteIDs.begin()+StartCurrent); 00776 StartCurrent = StartNext; StartNext++; 00777 } 00778 } 00779 Tpetra::sort2(ColIndices.begin()+NumLocalColGIDs+StartCurrent,ColIndices.begin()+NumLocalColGIDs+StartNext,RemotePermuteIDs.begin()+StartCurrent); 00780 00781 // Reverse the permutation to get the information we actually care about 00782 Teuchos::Array<LocalOrdinal> ReverseRemotePermuteIDs(NumRemoteColGIDs); 00783 for(LocalOrdinal i=0; i<NumRemoteColGIDs; i++) ReverseRemotePermuteIDs[RemotePermuteIDs[i]]=i; 00784 00785 // Build permute array for *local* reindexing. 00786 bool use_local_permute=false; 00787 Teuchos::Array<LocalOrdinal> LocalPermuteIDs(numDomainElements); 00788 00789 // Now fill front end. Two cases: 00790 // (1) If the number of Local column GIDs is the same as the number of Local domain GIDs, we 00791 // can simply read the domain GIDs into the front part of ColIndices, otherwise 00792 // (2) We step through the GIDs of the domainMap, checking to see if each domain GID is a column GID. 00793 // we want to do this to maintain a consistent ordering of GIDs between the columns and the domain. 00794 Teuchos::ArrayView<const GlobalOrdinal> domainGlobalElements = domainMap.getNodeElementList(); 00795 if(Teuchos::as<size_t>(NumLocalColGIDs) == numDomainElements) { 00796 if(NumLocalColGIDs > 0) { 00797 // Load Global Indices into first numMyCols elements column GID list 00798 std::copy(domainGlobalElements.begin(),domainGlobalElements.end(),ColIndices.begin()); 00799 } 00800 } 00801 else { 00802 LocalOrdinal NumLocalAgain = 0; 00803 use_local_permute = true; 00804 for(size_t i = 0; i < numDomainElements; i++) { 00805 if(LocalGIDs[i]) { 00806 LocalPermuteIDs[i] = NumLocalAgain; 00807 ColIndices[NumLocalAgain++] = domainGlobalElements[i]; 00808 } 00809 } 00810 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(NumLocalAgain)!=NumLocalColGIDs,std::runtime_error,"lowCommunicationMakeColMapAndReindex: Local ID count test failed."); 00811 } 00812 00813 // Make Column map 00814 Tpetra::global_size_t minus_one = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(); 00815 colMap = Teuchos::rcp(new Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node>(minus_one,ColIndices,domainMap.getIndexBase(),domainMap.getComm(),domainMap.getNode())); 00816 00817 // Low-cost reindex of the matrix 00818 for(size_t i=0; i<numMyRows; i++){ 00819 for(size_t j=rowptr[i]; j<rowptr[i+1]; j++){ 00820 LocalOrdinal ID=colind_LID[j]; 00821 if(Teuchos::as<size_t>(ID) < numDomainElements){ 00822 if(use_local_permute) colind_LID[j] = LocalPermuteIDs[colind_LID[j]]; 00823 // In the case where use_local_permute==false, we just copy the DomainMap's ordering, 00824 // which it so happens is what we put in colind_LID to begin with. 00825 } 00826 else 00827 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements]; 00828 } 00829 } 00830 00831 } 00832 00833 00834 00835 00836 00837 00838 #endif // TPETRA_IMPORT_UTIL_HPP
1.7.6.1