Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
Tpetra_Import_Util2.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_IMPORT_UTIL2_HPP
00043 #define TPETRA_IMPORT_UTIL2_HPP
00044 
00050 #include "Tpetra_ConfigDefs.hpp" // for map, vector, string, and iostream
00051 #include "Tpetra_Import.hpp"
00052 #include "Tpetra_HashTable.hpp"
00053 #include "Tpetra_Map.hpp"
00054 #include "Tpetra_Util.hpp"
00055 #include "Tpetra_Distributor.hpp"
00056 #include "Tpetra_CrsMatrix.hpp"
00057 #include <Teuchos_Array.hpp>
00058 #include <utility>
00059 
00060 namespace Tpetra {
00061   namespace Import_Util {
00062 
00064 
00070     template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00071     void
00072     packAndPrepareWithOwningPIDs (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
00073                                   const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00074                                   Teuchos::Array<char>& exports,
00075                                   const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00076                                   size_t& constantNumPackets,
00077                                   Distributor &distor,
00078                                   const Teuchos::ArrayView<int>& SourcePids);
00079 
00081 
00088     template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00089     size_t
00090     unpackAndCombineWithOwningPIDsCount (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& SourceMatrix,
00091                                          const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
00092                                          const Teuchos::ArrayView<const char> &imports,
00093                                          const Teuchos::ArrayView<size_t> &numPacketsPerLID,
00094                                          size_t constantNumPackets,
00095                                          Distributor &distor,
00096                                          CombineMode combineMode,
00097                                          size_t numSameIDs,
00098                                          const ArrayView<const LocalOrdinal> &permuteToLIDs,
00099                                          const ArrayView<const LocalOrdinal> &permuteFromLIDs);
00100 
00102 
00113     template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00114     void unpackAndCombineIntoCrsArrays(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
00115                                        const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
00116                                        const Teuchos::ArrayView<const char> &imports,
00117                                        const Teuchos::ArrayView<size_t> &numPacketsPerLID,
00118                                        size_t constantNumPackets,
00119                                        Distributor &distor,
00120                                        CombineMode combineMode,
00121                                        size_t numSameIDs,
00122                                        const ArrayView<const LocalOrdinal> &permuteToLIDs,
00123                                        const ArrayView<const LocalOrdinal> &permuteFromLIDs,
00124                                        size_t TargetNumRows,
00125                                        size_t TargetNumNonzeros,
00126                                        int MyTargetPID,
00127                                        const ArrayView<size_t> &rowPointers,
00128                                        const ArrayView<GlobalOrdinal> &columnIndices,
00129                                        const ArrayView<Scalar> &values,
00130                                        const Teuchos::ArrayView<const int> &SourcePids,
00131                                        Teuchos::Array<int> &TargetPids);
00132 
00134 
00136     template<typename Scalar, typename Ordinal>
00137     void
00138     sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
00139                     const Teuchos::ArrayView<Ordinal>& CRS_colind,
00140                     const Teuchos::ArrayView<Scalar>&CRS_vals);
00141 
00143 
00145     template<typename Scalar, typename Ordinal>
00146     void
00147     sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
00148                             const Teuchos::ArrayView<Ordinal>& CRS_colind,
00149                             const Teuchos::ArrayView<Scalar>& CRS_vals);
00150 
00152 
00163     template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00164     void
00165     lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowPointers,
00166                                           const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
00167                                           const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
00168                                           const Tpetra::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & domainMap,
00169                                           const Teuchos::ArrayView<const int> &owningPids,
00170                                           Teuchos::Array<int> &remotePids,
00171                                           Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
00172 
00173     template <typename Matrix>
00174     struct MatrixSerializationTraits {
00175       typedef typename Matrix::scalar_type Scalar;
00176 
00177       static inline
00178       size_t scalarSize( const Matrix& mat ) { return sizeof(Scalar); }
00179 
00180       static inline
00181       void packBuffer( const Matrix& mat,
00182                        const size_t numEntries,
00183                        const Teuchos::ArrayView<const Scalar>& vals,
00184                        const Teuchos::ArrayView<char> packed_vals ) {
00185         Teuchos::ArrayView<Scalar> packed_vals_scalar =
00186           Teuchos::av_reinterpret_cast<Scalar>(packed_vals);
00187         std::copy( vals.begin(), vals.begin()+numEntries,
00188                    packed_vals_scalar.begin());
00189       }
00190 
00191       static inline
00192       void unpackScalar( const Matrix& mat,
00193                          const char * val_char,
00194                          Scalar& val ) {
00195         val = *( reinterpret_cast<const Scalar*>(val_char) );
00196       }
00197     };
00198 
00199   } // namespace Import_Util
00200 } // namespace Tpetra
00201 
00202 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00203 void
00204 Tpetra::Import_Util::packAndPrepareWithOwningPIDs (const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &SourceMatrix,
00205                                                    const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00206                                                    Teuchos::Array<char>& exports,
00207                                                    const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00208                                                    size_t& constantNumPackets,
00209                                                    Distributor &distor,
00210                                                    const Teuchos::ArrayView<int>& SourcePids)
00211 {
00212   using Teuchos::Array;
00213   using Teuchos::ArrayView;
00214   using Teuchos::as;
00215   using Teuchos::RCP;
00216   typedef LocalOrdinal LO;
00217   typedef GlobalOrdinal GO;
00218   typedef Map<LocalOrdinal,GlobalOrdinal,Node>  map_type;
00219   typedef typename ArrayView<const LO>::size_type size_type;
00220   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type;
00221   typedef MatrixSerializationTraits<matrix_type> serialization_type;
00222 
00223   TEUCHOS_TEST_FOR_EXCEPTION(!SourceMatrix.isLocallyIndexed(),std::invalid_argument, "packAndPrepareWithOwningPIDs: SourceMatrix must be locally indexed.");
00224   TEUCHOS_TEST_FOR_EXCEPTION(exportLIDs.size() != numPacketsPerLID.size(),
00225                              std::invalid_argument, "packAndPrepareWithOwningPIDs: exportLIDs.size() = " << exportLIDs.size()
00226                              << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
00227   TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(SourcePids.size()) != SourceMatrix.getColMap()->getNodeNumElements(),
00228                              std::invalid_argument, "packAndPrepareWithOwningPIDs: SourcePids.size() = " << SourcePids.size()
00229                              << "!= SourceMatrix.getColMap()->getNodeNumElements() = " << SourceMatrix.getColMap()->getNodeNumElements() << ".");
00230 
00231   // Get a reference to the matrix's row Map.
00232   const map_type& rowMap = * (SourceMatrix.getRowMap());
00233   constantNumPackets = 0;
00234 
00235   // Get the GIDs of the rows we want to pack.
00236   Array<GO> exportGIDs (exportLIDs.size());
00237   const size_type numExportGIDs = exportGIDs.size ();
00238   for (size_type i = 0; i < numExportGIDs; ++i) {
00239     exportGIDs[i] = rowMap.getGlobalElement(exportLIDs[i]);
00240   }
00241 
00242 
00243   // Record initial sizing
00244   const size_t scalar_size = serialization_type::scalarSize( SourceMatrix );
00245   const size_t sizeOfPacket = sizeof(GO) + scalar_size + sizeof(int);
00246   size_t totalNumEntries = 0;
00247   size_t maxRowLength = 0;
00248   for (size_type i = 0; i < exportGIDs.size(); ++i) {
00249     const size_t curNumEntries = SourceMatrix.getNumEntriesInGlobalRow(exportGIDs[i]);
00250     numPacketsPerLID[i] = curNumEntries * sizeOfPacket;
00251     totalNumEntries += curNumEntries;
00252     maxRowLength = std::max(curNumEntries, maxRowLength);
00253   }
00254 
00255   // Pack export data by interleaving rows' indices, pids and values in
00256   // the following way:
00257   //
00258   // [inds_row0 pids_row0 vals_row0 inds_row1 pids_row1 vals_row1 ... ]
00259   if (totalNumEntries > 0) {
00260     const size_t totalNumBytes = totalNumEntries * sizeOfPacket;
00261     exports.resize(totalNumBytes);
00262 
00263     // Current position in the 'exports' output array.
00264     size_t curOffsetInBytes = 0;
00265 
00266     // For each row of the matrix owned by the calling process, pack
00267     // that row's column indices and values into the exports array.
00268 
00269     // Locally indexed matrices always have a column Map.
00270     const map_type& colMap = * (SourceMatrix.getColMap());
00271     ArrayView<const LocalOrdinal> lidsView;
00272     ArrayView<const Scalar> valsView;
00273 
00274     // Temporary buffers for a copy of the column gids/pids
00275     Array<GO>  gids(as<size_type>(maxRowLength));
00276     Array<int> pids(as<size_type>(maxRowLength));
00277 
00278     const size_type numExportLIDs = exportLIDs.size();
00279     for (size_type i = 0; i < numExportLIDs; i++) {
00280       // Get a (locally indexed) view of the current row's data.
00281       SourceMatrix.getLocalRowView(exportLIDs[i], lidsView, valsView);
00282 
00283       // Convert column indices as LIDs to column indices as GIDs.
00284       const size_type curNumEntries = lidsView.size();
00285       size_t curNumEntriesST = as<size_t>(curNumEntries);
00286       ArrayView<GO>  gidsView = gids(0, curNumEntries);
00287       ArrayView<int> pidsView = pids(0, curNumEntries);
00288       for (size_type k = 0; k < curNumEntries; ++k) {
00289         gidsView[k] = colMap.getGlobalElement(lidsView[k]);
00290         pidsView[k] = SourcePids[lidsView[k]];
00291       }
00292 
00293       // Views of the right places in each array so everthing looks like the right data type
00294       ArrayView<char> gidsViewOutChar = exports(curOffsetInBytes, curNumEntriesST*sizeof(GO));
00295       ArrayView<char> pidsViewOutChar = exports(curOffsetInBytes+curNumEntriesST*sizeof(GO), curNumEntriesST*sizeof(int));
00296       ArrayView<char> valsViewOutChar = exports(curOffsetInBytes+curNumEntriesST*(sizeof(GO)+sizeof(int)), curNumEntriesST*scalar_size);
00297 
00298       ArrayView<GO> gidsViewOut     = av_reinterpret_cast<GO>(gidsViewOutChar);
00299       ArrayView<int> pidsViewOut    = av_reinterpret_cast<int>(pidsViewOutChar);
00300 
00301       // Copy the row's data into the views of the exports array.
00302       std::copy(gidsView.begin(), gidsView.begin() + curNumEntriesST, gidsViewOut.begin());
00303       std::copy(pidsView.begin(), pidsView.begin() + curNumEntriesST, pidsViewOut.begin());
00304       serialization_type::packBuffer( SourceMatrix, curNumEntriesST, valsView, valsViewOutChar );
00305 
00306       // Keep track of how many bytes we packed.
00307       curOffsetInBytes += sizeOfPacket * curNumEntries;
00308     }
00309 
00310 #ifdef HAVE_TPETRA_DEBUG
00311       TEUCHOS_TEST_FOR_EXCEPTION(curOffsetInBytes != totalNumBytes,
00312         std::logic_error, "packAndPrepareWithOwningPids: At end of method, the final offset bytes count "
00313         "curOffsetInBytes=" << curOffsetInBytes << " does not equal the total "
00314         "number of bytes packed totalNumBytes=" << totalNumBytes << ".  Please "
00315         "report this bug to the Tpetra developers.");
00316 #endif //  HAVE_TPETRA_DEBUG
00317   }
00318 
00319 }
00320 
00321 
00322 
00323 //----------------------------------------------------------------------------
00324 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00325 size_t Tpetra::Import_Util::unpackAndCombineWithOwningPIDsCount(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
00326                                                                 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
00327                                                                 const Teuchos::ArrayView<const char> &imports,
00328                                                                 const Teuchos::ArrayView<size_t> &numPacketsPerLID,
00329                                                                 size_t constantNumPackets,
00330                                                                 Distributor &distor,
00331                                                                 CombineMode combineMode,
00332                                                                 size_t numSameIDs,
00333                                                                 const ArrayView<const LocalOrdinal> &permuteToLIDs,
00334                                                                 const ArrayView<const LocalOrdinal> &permuteFromLIDs) {
00335   typedef LocalOrdinal LO;
00336   typedef typename ArrayView<const LO>::size_type size_type;
00337   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type;
00338   typedef MatrixSerializationTraits<matrix_type> serialization_type;
00339   size_t nnz = 0;
00340 
00341   // CopyAndPermuteSection
00342   TEUCHOS_TEST_FOR_EXCEPTION(permuteToLIDs.size() != permuteFromLIDs.size(),
00343                              std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: permuteToLIDs.size() = " << permuteToLIDs.size()
00344                              << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
00345   const bool locallyIndexed = SourceMatrix.isLocallyIndexed();
00346   TEUCHOS_TEST_FOR_EXCEPTION(!locallyIndexed,std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: SourceMatrix must be locally indexed.");
00347 
00348   // Copy
00349   const LO numSameIDs_as_LID = Teuchos::as<LO>(numSameIDs);
00350   for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; sourceLID++)
00351     nnz+=SourceMatrix.getNumEntriesInLocalRow(sourceLID);
00352 
00353   // Permute
00354   const size_t numPermuteToLIDs = Teuchos::as<size_t>(permuteToLIDs.size());
00355   for (size_t p = 0; p < numPermuteToLIDs; p++)
00356     nnz+=SourceMatrix.getNumEntriesInLocalRow(permuteFromLIDs[p]);
00357 
00358   // UnpackAndCombine Section
00359   TEUCHOS_TEST_FOR_EXCEPTION(importLIDs.size() != numPacketsPerLID.size(),
00360                              std::invalid_argument, "unpackAndCombineWithOwningPIDsCount: importLIDs.size() = " << importLIDs.size()
00361                              << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
00362 
00363   const size_t scalar_size = serialization_type::scalarSize( SourceMatrix );
00364   const size_t sizeOfPacket    = sizeof(GlobalOrdinal)  + sizeof(int) + scalar_size;
00365 
00366   size_t curOffsetInBytes = 0;
00367   for (size_type i = 0; i < importLIDs.size(); ++i) {
00368     const size_t rowSize = numPacketsPerLID[i] / sizeOfPacket;
00369     curOffsetInBytes += rowSize * sizeOfPacket;
00370     nnz +=rowSize;
00371   }
00372   return nnz;
00373 }
00374 
00375 
00376 
00377 //----------------------------------------------------------------------------
00378 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00379 void Tpetra::Import_Util::unpackAndCombineIntoCrsArrays(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
00380                                                         const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
00381                                                         const Teuchos::ArrayView<const char> &imports,
00382                                                         const Teuchos::ArrayView<size_t> &numPacketsPerLID,
00383                                                         size_t constantNumPackets,
00384                                                         Distributor &distor,
00385                                                         CombineMode combineMode,
00386                                                         size_t numSameIDs,
00387                                                         const ArrayView<const LocalOrdinal> &permuteToLIDs,
00388                                                         const ArrayView<const LocalOrdinal> &permuteFromLIDs,
00389                                                         size_t TargetNumRows,
00390                                                         size_t TargetNumNonzeros,
00391                                                         int MyTargetPID,
00392                                                         const ArrayView<size_t> &CSR_rowptr,
00393                                                         const ArrayView<GlobalOrdinal> &CSR_colind,
00394                                                         const ArrayView<Scalar> &CSR_vals,
00395                                                         const Teuchos::ArrayView<const int> &SourcePids,
00396                                                         Teuchos::Array<int> &TargetPids) {
00397   using Teuchos::ArrayView;
00398   using Teuchos::as;
00399   using Teuchos::av_reinterpret_cast;
00400   typedef LocalOrdinal LO;
00401   typedef GlobalOrdinal GO;
00402   typedef Map<LocalOrdinal,GlobalOrdinal,Node>  map_type;
00403   typedef typename ArrayView<const LO>::size_type size_type;
00404   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> matrix_type;
00405   typedef MatrixSerializationTraits<matrix_type> serialization_type;
00406 
00407   size_t i,j;
00408   size_t N=TargetNumRows;
00409   size_t mynnz = TargetNumNonzeros;
00410   // In the case of reduced communicators, the SourceMatrix won't have the right "MyPID", so thus we have to supply it.
00411   int MyPID = MyTargetPID;
00412 
00413 
00414   // Zero the rowptr
00415   TEUCHOS_TEST_FOR_EXCEPTION(N+1 != as<size_t>(CSR_rowptr.size()),
00416                              std::invalid_argument, "unpackAndCombineIntoCrsArrays: CsR_rowptr.size() = " << CSR_rowptr.size()
00417                              << "!= TargetNumRows+1 = " << TargetNumRows+1 << ".");
00418   for(i=0; i<N+1; i++) CSR_rowptr[i]=0;
00419 
00420   // SameIDs: Always first, always in the same place
00421   for(i=0; i<numSameIDs; i++)
00422     CSR_rowptr[i]=SourceMatrix.getNumEntriesInLocalRow(as<LO>(i));
00423 
00424   // PermuteIDs: Still local, but reordered
00425   TEUCHOS_TEST_FOR_EXCEPTION(permuteToLIDs.size() != permuteFromLIDs.size(),
00426                              std::invalid_argument, "unpackAndCombineIntoCrsArrays: permuteToLIDs.size() = " << permuteToLIDs.size()
00427                              << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
00428   size_t numPermuteIDs = permuteToLIDs.size();
00429   for(i=0; i<numPermuteIDs; i++)
00430     CSR_rowptr[permuteToLIDs[i]] = SourceMatrix.getNumEntriesInLocalRow(permuteFromLIDs[i]);
00431 
00432 
00433 
00434   // Setup CSR_rowptr for remotes
00435   TEUCHOS_TEST_FOR_EXCEPTION(importLIDs.size() != numPacketsPerLID.size(),
00436                              std::invalid_argument, "unpackAndCombineIntoCrsArrays: importLIDs.size() = " << importLIDs.size()
00437                              << "!= numPacketsPerLID.size() = " << numPacketsPerLID.size() << ".");
00438 
00439   const size_t scalar_size = serialization_type::scalarSize( SourceMatrix );
00440   const size_t sizeOfPacket     = sizeof(GlobalOrdinal)  + sizeof(int) + scalar_size;
00441   const size_t totalNumBytes    = imports.size();
00442   const size_t RemoteNumEntries = totalNumBytes / sizeOfPacket;
00443   for (size_type k = 0; k < importLIDs.size(); ++k) {
00444     const size_t rowSize = numPacketsPerLID[k] / sizeOfPacket;
00445     CSR_rowptr[importLIDs[k]] += rowSize;
00446   }
00447 
00448   // If multiple procs contribute to a row;
00449   Teuchos::Array<size_t> NewStartRow(N+1);
00450 
00451   // Turn row length into a real CSR_rowptr
00452   size_t last_len = CSR_rowptr[0];
00453   CSR_rowptr[0] = 0;
00454   for(i=1; i<N+1; i++){
00455     size_t new_len    = CSR_rowptr[i];
00456     CSR_rowptr[i]  = last_len + CSR_rowptr[i-1];
00457     NewStartRow[i] = CSR_rowptr[i];
00458     last_len       = new_len;
00459   }
00460 
00461   TEUCHOS_TEST_FOR_EXCEPTION(CSR_rowptr[N] != mynnz,
00462                              std::invalid_argument, "unpackAndCombineIntoCrsArrays: CSR_rowptr[last] = " << CSR_rowptr[N]
00463                              << "!= mynnz = " << mynnz << ".");
00464 
00465   // Preseed TargetPids with -1 for local
00466   if(as<size_t>(TargetPids.size())!=mynnz) TargetPids.resize(mynnz);
00467   TargetPids.assign(mynnz,-1);
00468 
00469   // Grab pointers for SourceMatrix
00470   ArrayRCP<const size_t> Source_rowptr_RCP;
00471   ArrayRCP<const LO>     Source_colind_RCP;
00472   ArrayRCP<const Scalar> Source_vals_RCP;
00473   SourceMatrix.getAllValues(Source_rowptr_RCP,Source_colind_RCP,Source_vals_RCP);
00474   ArrayView<const size_t> Source_rowptr = Source_rowptr_RCP();
00475   ArrayView<const LO>     Source_colind = Source_colind_RCP();
00476   ArrayView<const Scalar> Source_vals   = Source_vals_RCP();
00477 
00478   const map_type& sourceColMap = * (SourceMatrix.getColMap());
00479   ArrayView<const GO> globalColElts = sourceColMap.getNodeElementList();
00480 
00481   // SameIDs: Copy the data over
00482   for(i=0; i<numSameIDs; i++) {
00483     size_t FromRow = Source_rowptr[i];
00484     size_t ToRow   = CSR_rowptr[i];
00485     NewStartRow[i] += Source_rowptr[i+1]-Source_rowptr[i];
00486 
00487     for(j=Source_rowptr[i]; j<Source_rowptr[i+1]; j++) {
00488       CSR_vals[ToRow + j - FromRow]   = Source_vals[j];
00489       CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
00490       TargetPids[ToRow + j - FromRow] = (SourcePids[Source_colind[j]] != MyPID) ? SourcePids[Source_colind[j]] : -1;
00491     }
00492   }
00493 
00494   // PermuteIDs: Copy the data over
00495   for(i=0; i<numPermuteIDs; i++) {
00496     LO FromLID     = permuteFromLIDs[i];
00497     size_t FromRow = Source_rowptr[FromLID];
00498     size_t ToRow   = CSR_rowptr[permuteToLIDs[i]];
00499 
00500     NewStartRow[permuteToLIDs[i]] += Source_rowptr[FromLID+1]-Source_rowptr[FromLID];
00501 
00502     for(j=Source_rowptr[FromLID]; j<Source_rowptr[FromLID+1]; j++) {
00503       CSR_vals[ToRow + j - FromRow]   = Source_vals[j];
00504       CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
00505       TargetPids[ToRow + j - FromRow] = (SourcePids[Source_colind[j]] != MyPID) ? SourcePids[Source_colind[j]] : -1;
00506     }
00507   }
00508 
00509   // RemoteIDs: Loop structure following UnpackAndCombine
00510   if(RemoteNumEntries > 0) {
00511     // data packed as follows:
00512     // [inds_row0 pids_row0 vals_row0 inds_row1 pids_row1 vals_row1 ...]
00513     ArrayView<const char>   avIndsC, avPidsC, avValsC;
00514     ArrayView<const GO>     avInds;
00515     ArrayView<const int>    avPids;
00516 
00517     size_t curOffsetInBytes = 0;
00518     for (i = 0; i < Teuchos::as<size_t>(importLIDs.size()); ++i) {
00519       const size_t rowSize = numPacketsPerLID[i] / sizeOfPacket;
00520       LO ToLID     = importLIDs[i];
00521       int StartRow = NewStartRow[ToLID];
00522       NewStartRow[ToLID]+=rowSize;
00523       if (rowSize == 0) continue;
00524 
00525       // Get views of the import (incoming data) buffers.
00526       avIndsC = imports(curOffsetInBytes, rowSize*sizeof(GO));
00527       avPidsC = imports(curOffsetInBytes+rowSize*sizeof(GO), rowSize*sizeof(int));
00528       avValsC = imports(curOffsetInBytes+rowSize*(sizeof(GO)+sizeof(int)), rowSize*scalar_size);
00529 
00530       avInds = av_reinterpret_cast<const GO>(avIndsC);
00531       avPids = av_reinterpret_cast<const int>(avPidsC);
00532 
00533       const char * avValsC_ptr = avValsC.getRawPtr();
00534 
00535       for(j=0; j<rowSize; j++){
00536         serialization_type::unpackScalar( SourceMatrix, avValsC_ptr, CSR_vals[StartRow+j] );
00537         CSR_colind[StartRow + j] = avInds[j];
00538         TargetPids[StartRow + j] = (avPids[j] != MyPID) ? avPids[j] : -1;
00539         avValsC_ptr += scalar_size;
00540       }
00541       curOffsetInBytes += rowSize * sizeOfPacket;
00542     }
00543 #ifdef HAVE_TPETRA_DEBUG
00544     TEUCHOS_TEST_FOR_EXCEPTION(curOffsetInBytes != totalNumBytes,
00545                                std::logic_error, "unpackAndCombineIntoCrsArrays: After unpacking and counting all the imports, the "
00546                                "final offset in bytes curOffsetInBytes=" << curOffsetInBytes << " != "
00547                                "total number of bytes totalNumBytes=" << totalNumBytes << ".  Please "
00548                                "report this bug to the Tpetra developers.");
00549 #endif // HAVE_TPETRA_DEBUG
00550   }
00551 }
00552 
00553 
00554 //----------------------------------------------------------------------------
00555 // Note: This should get merged with the other Tpetra sort routines eventually.
00556 template<typename Scalar, typename Ordinal>
00557 void Tpetra::Import_Util::sortCrsEntries(const Teuchos::ArrayView<size_t> &CRS_rowptr, const Teuchos::ArrayView<Ordinal> & CRS_colind, const Teuchos::ArrayView<Scalar> &CRS_vals){
00558   // For each row, sort column entries from smallest to largest.
00559   // Use shell sort. Stable sort so it is fast if indices are already sorted.
00560   // Code copied from  Epetra_CrsMatrix::SortEntries()
00561   size_t NumRows = CRS_rowptr.size()-1;
00562   size_t nnz = CRS_colind.size();
00563 
00564   for(size_t i = 0; i < NumRows; i++){
00565     size_t start=CRS_rowptr[i];
00566     if(start >= nnz) continue;
00567 
00568     Scalar* locValues   = &CRS_vals[start];
00569     size_t NumEntries   = CRS_rowptr[i+1] - start;
00570     Ordinal* locIndices = &CRS_colind[start];
00571 
00572     Ordinal n = NumEntries;
00573     Ordinal m = n/2;
00574 
00575     while(m > 0) {
00576       Ordinal max = n - m;
00577       for(Ordinal j = 0; j < max; j++) {
00578         for(Ordinal k = j; k >= 0; k-=m) {
00579           if(locIndices[k+m] >= locIndices[k])
00580             break;
00581           Scalar dtemp = locValues[k+m];
00582           locValues[k+m] = locValues[k];
00583           locValues[k] = dtemp;
00584           Ordinal itemp = locIndices[k+m];
00585           locIndices[k+m] = locIndices[k];
00586           locIndices[k] = itemp;
00587         }
00588       }
00589       m = m/2;
00590     }
00591   }
00592 }
00593 
00594 //----------------------------------------------------------------------------
00595 // Note: This should get merged with the other Tpetra sort routines eventually.
00596 template<typename Scalar, typename Ordinal>
00597 void Tpetra::Import_Util::sortAndMergeCrsEntries(const Teuchos::ArrayView<size_t> &CRS_rowptr, const Teuchos::ArrayView<Ordinal> & CRS_colind, const Teuchos::ArrayView<Scalar> &CRS_vals){
00598   // For each row, sort column entries from smallest to largest, merging column ids that are identify by adding values
00599   // Use shell sort. Stable sort so it is fast if indices are already sorted.
00600   // Code copied from  Epetra_CrsMatrix::SortEntries()
00601 
00602   size_t NumRows = CRS_rowptr.size()-1;
00603   size_t nnz = CRS_colind.size();
00604   size_t new_curr=CRS_rowptr[0], old_curr=CRS_rowptr[0];
00605 
00606   for(size_t i = 0; i < NumRows; i++){
00607     size_t start=CRS_rowptr[i];
00608     if(start >= nnz) continue;
00609 
00610     Scalar* locValues   = &CRS_vals[start];
00611     size_t NumEntries   = CRS_rowptr[i+1] - start;
00612     Ordinal* locIndices = &CRS_colind[start];
00613 
00614     // Sort phase
00615     Ordinal n = NumEntries;
00616     Ordinal m = n/2;
00617 
00618     while(m > 0) {
00619       Ordinal max = n - m;
00620       for(Ordinal j = 0; j < max; j++) {
00621         for(Ordinal k = j; k >= 0; k-=m) {
00622           if(locIndices[k+m] >= locIndices[k])
00623             break;
00624           Scalar dtemp = locValues[k+m];
00625           locValues[k+m] = locValues[k];
00626           locValues[k] = dtemp;
00627           Ordinal itemp = locIndices[k+m];
00628           locIndices[k+m] = locIndices[k];
00629           locIndices[k] = itemp;
00630         }
00631       }
00632       m = m/2;
00633     }
00634 
00635     // Merge & shrink
00636     for(size_t j=CRS_rowptr[i]; j < CRS_rowptr[i+1]; j++) {
00637       if(j > CRS_rowptr[i] && CRS_colind[j]==CRS_colind[new_curr-1]) {
00638         CRS_vals[new_curr-1] += CRS_vals[j];
00639       }
00640       else if(new_curr==j) {
00641         new_curr++;
00642       }
00643       else {
00644         CRS_colind[new_curr] = CRS_colind[j];
00645         CRS_vals[new_curr]   = CRS_vals[j];
00646         new_curr++;
00647       }
00648     }
00649 
00650     CRS_rowptr[i] = old_curr;
00651     old_curr=new_curr;
00652   }
00653 
00654   CRS_rowptr[NumRows] = new_curr;
00655 }
00656 
00657 //----------------------------------------------------------------------------
00658 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
00659 void Tpetra::Import_Util::lowCommunicationMakeColMapAndReindex(const ArrayView<const size_t> &rowptr,
00660                                                                const ArrayView<LocalOrdinal> &colind_LID,
00661                                                                const ArrayView<GlobalOrdinal> &colind_GID,
00662                                                                const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
00663                                                                const Teuchos::ArrayView<const int> &owningPIDs,
00664                                                                Teuchos::Array<int> &remotePIDs,
00665                                                                Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap) {
00666 
00667   // The domainMap is an RCP because there is a shortcut for a (common) special case to return the
00668   // columnMap = domainMap.
00669   const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> &domainMap = *domainMapRCP;
00670 
00671   // Scan all column indices and sort into two groups:
00672   // Local:  those whose GID matches a GID of the domain map on this processor and
00673   // Remote: All others.
00674   size_t numDomainElements = domainMap.getNodeNumElements();
00675   Teuchos::Array<bool> LocalGIDs;
00676   if (numDomainElements>0) LocalGIDs.resize(numDomainElements,false); // Assume domain GIDs are not local
00677 
00678   // In principle it is good to have RemoteGIDs and RemotGIDList be as long as the number of remote GIDs
00679   // on this processor, but this would require two passes through the column IDs, so we make it the max of 100
00680   // and the number of block rows.
00681   const size_t numMyRows =  rowptr.size()-1;
00682   int    hashsize        = Teuchos::as<int>(numMyRows);
00683   if (hashsize < 100) hashsize = 100;
00684 
00685   Tpetra::Details::HashTable<GlobalOrdinal,LocalOrdinal> RemoteGIDs(hashsize);
00686   Teuchos::Array<GlobalOrdinal> RemoteGIDList; RemoteGIDList.reserve(hashsize);
00687   Teuchos::Array<int> PIDList;                 PIDList.reserve(hashsize);
00688 
00689   // Here we start using the *LocalOrdinal* colind_LID array.  This is safe even if both
00690   // columnIndices arrays are actually the same (because LocalOrdinal==GlobalOrdinal).
00691   // For *local* GID's set colind_LID with with their LID in the domainMap.  For *remote* GIDs,
00692   // we set colind_LID with (numDomainElements+NumRemoteColGIDs) before the increment of
00693   // the remote count.  These numberings will be separate because no local LID is greater
00694   // than numDomainElements.
00695 
00696   size_t NumLocalColGIDs = 0;
00697   LocalOrdinal NumRemoteColGIDs = 0;
00698   for(size_t i = 0; i < numMyRows; i++) {
00699     for(size_t j = rowptr[i]; j < rowptr[i+1]; j++) {
00700       GlobalOrdinal GID = colind_GID[j];
00701       // Check if GID matches a row GID
00702       LocalOrdinal LID = domainMap.getLocalElement(GID);
00703       if(LID != -1) {
00704         bool alreadyFound = LocalGIDs[LID];
00705         if (!alreadyFound) {
00706           LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
00707           NumLocalColGIDs++;
00708         }
00709         colind_LID[j] = LID;
00710       }
00711       else {
00712         LocalOrdinal hash_value=RemoteGIDs.get(GID);
00713         if(hash_value  == -1) { // This means its a new remote GID
00714           int PID = owningPIDs[j];
00715           TEUCHOS_TEST_FOR_EXCEPTION(PID==-1,std::invalid_argument, "lowCommunicationMakeColMapAndReindex: Cannot figure out if PID is owned.");
00716           colind_LID[j] = Teuchos::as<LocalOrdinal>(numDomainElements + NumRemoteColGIDs);
00717           RemoteGIDs.add(GID, NumRemoteColGIDs);
00718           RemoteGIDList.push_back(GID);
00719           PIDList.push_back(PID);
00720           NumRemoteColGIDs++;
00721         }
00722         else
00723           colind_LID[j] = Teuchos::as<LocalOrdinal>(numDomainElements + hash_value);
00724       }
00725     }
00726   }
00727 
00728   // Possible short-circuit:  If all domain map GIDs are present as column indices, then set ColMap=domainMap and quit
00729   if (domainMap.getComm()->getSize()==1) {
00730     // Sanity check: When there is one processor,there can be no remoteGIDs
00731     TEUCHOS_TEST_FOR_EXCEPTION(NumRemoteColGIDs!=0,std::runtime_error,"lowCommunicationMakeColMapAndReindex: Some column IDs are not in domainMap.");
00732     if (Teuchos::as<size_t>(NumLocalColGIDs)==numDomainElements) {
00733       // In this case, we just use the domainMap's indices, which is, not coincidently, what we clobbered colind with up above anyway.
00734       // No further reindexing is needed.
00735       colMap = domainMapRCP;
00736       return;
00737     }
00738   }
00739 
00740   // Now build the array containing column GIDs
00741   // Build back end, containing remote GIDs, first
00742   LocalOrdinal numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
00743   Teuchos::Array<GlobalOrdinal> ColIndices;
00744   GlobalOrdinal * RemoteColIndices=0;
00745   if(numMyCols > 0) {
00746     ColIndices.resize(numMyCols);
00747     if(NumLocalColGIDs!=Teuchos::as<size_t>(numMyCols)) RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
00748   }
00749 
00750   for(LocalOrdinal i = 0; i < NumRemoteColGIDs; i++)
00751     RemoteColIndices[i] = RemoteGIDList[i];
00752 
00753   // Build permute array for *remote* reindexing.
00754   Teuchos::Array<LocalOrdinal> RemotePermuteIDs(NumRemoteColGIDs);
00755   for(LocalOrdinal i=0; i<NumRemoteColGIDs; i++) RemotePermuteIDs[i]=i;
00756 
00757 
00758   // Sort External column indices so that all columns coming from a given remote processor are contiguous
00759   // This is a sort with two auxillary arrays: RemoteColIndices and RemotePermuteIDs.
00760   Tpetra::sort3(PIDList.begin(),PIDList.end(),ColIndices.begin()+NumLocalColGIDs,RemotePermuteIDs.begin());
00761 
00762   // Stash the RemotePIDs
00763   // Note: If Teuchos::Array had a shrink_to_fit like std::vector, we'd call it here.
00764   remotePIDs = PIDList;
00765 
00766   // Sort external column indices so that columns from a given remote processor are not only contiguous
00767   // but also in ascending order. NOTE: I don't know if the number of externals associated
00768   // with a given remote processor is known at this point ... so I count them here.
00769 
00770   // NTS: Only sort the RemoteColIndices this time...
00771   LocalOrdinal StartCurrent = 0, StartNext = 1;
00772   while ( StartNext < NumRemoteColGIDs ) {
00773     if (PIDList[StartNext]==PIDList[StartNext-1]) StartNext++;
00774     else {
00775       Tpetra::sort2(ColIndices.begin()+NumLocalColGIDs+StartCurrent,ColIndices.begin()+NumLocalColGIDs+StartNext,RemotePermuteIDs.begin()+StartCurrent);
00776       StartCurrent = StartNext; StartNext++;
00777     }
00778   }
00779     Tpetra::sort2(ColIndices.begin()+NumLocalColGIDs+StartCurrent,ColIndices.begin()+NumLocalColGIDs+StartNext,RemotePermuteIDs.begin()+StartCurrent);
00780 
00781   // Reverse the permutation to get the information we actually care about
00782   Teuchos::Array<LocalOrdinal> ReverseRemotePermuteIDs(NumRemoteColGIDs);
00783   for(LocalOrdinal i=0; i<NumRemoteColGIDs; i++) ReverseRemotePermuteIDs[RemotePermuteIDs[i]]=i;
00784 
00785   // Build permute array for *local* reindexing.
00786   bool use_local_permute=false;
00787   Teuchos::Array<LocalOrdinal> LocalPermuteIDs(numDomainElements);
00788 
00789   // Now fill front end. Two cases:
00790   // (1) If the number of Local column GIDs is the same as the number of Local domain GIDs, we
00791   //     can simply read the domain GIDs into the front part of ColIndices, otherwise
00792   // (2) We step through the GIDs of the domainMap, checking to see if each domain GID is a column GID.
00793   //     we want to do this to maintain a consistent ordering of GIDs between the columns and the domain.
00794   Teuchos::ArrayView<const GlobalOrdinal> domainGlobalElements = domainMap.getNodeElementList();
00795   if(Teuchos::as<size_t>(NumLocalColGIDs) == numDomainElements) {
00796     if(NumLocalColGIDs > 0) {
00797       // Load Global Indices into first numMyCols elements column GID list
00798       std::copy(domainGlobalElements.begin(),domainGlobalElements.end(),ColIndices.begin());
00799     }
00800   }
00801   else {
00802     LocalOrdinal NumLocalAgain = 0;
00803     use_local_permute = true;
00804     for(size_t i = 0; i < numDomainElements; i++) {
00805       if(LocalGIDs[i]) {
00806         LocalPermuteIDs[i] = NumLocalAgain;
00807         ColIndices[NumLocalAgain++] = domainGlobalElements[i];
00808       }
00809     }
00810     TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(NumLocalAgain)!=NumLocalColGIDs,std::runtime_error,"lowCommunicationMakeColMapAndReindex: Local ID count test failed.");
00811   }
00812 
00813   // Make Column map
00814   Tpetra::global_size_t minus_one = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
00815   colMap = Teuchos::rcp(new Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node>(minus_one,ColIndices,domainMap.getIndexBase(),domainMap.getComm(),domainMap.getNode()));
00816 
00817   // Low-cost reindex of the matrix
00818   for(size_t i=0; i<numMyRows; i++){
00819     for(size_t j=rowptr[i]; j<rowptr[i+1]; j++){
00820       LocalOrdinal ID=colind_LID[j];
00821       if(Teuchos::as<size_t>(ID) < numDomainElements){
00822         if(use_local_permute) colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
00823         // In the case where use_local_permute==false, we just copy the DomainMap's ordering,
00824         // which it so happens is what we put in colind_LID to begin with.
00825       }
00826       else
00827         colind_LID[j] =  NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
00828     }
00829   }
00830 
00831 }
00832 
00833 
00834 
00835 
00836 
00837 
00838 #endif // TPETRA_IMPORT_UTIL_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines