Zoltan2
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 //
00003 // ***********************************************************************
00004 //
00005 //   Zoltan2: A package of combinatorial algorithms for scientific computing
00006 //                  Copyright 2012 Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Karen Devine      (kddevin@sandia.gov)
00039 //                    Erik Boman        (egboman@sandia.gov)
00040 //                    Siva Rajamanickam (srajama@sandia.gov)
00041 //
00042 // ***********************************************************************
00043 //
00044 // @HEADER
00045 
00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
00052 
00053 #include <Zoltan2_IdentifierMap.hpp>
00054 #include <Zoltan2_Solution.hpp>
00055 #include <Zoltan2_GreedyMWM.hpp>
00056 #include <Zoltan2_CoordinatePartitioningGraph.hpp>
00057 #include <cmath>
00058 #include <algorithm>
00059 #include <vector>
00060 #include <limits>
00061 
00062 #ifdef _MSC_VER
00063 #define NOMINMAX
00064 #include <windows.h>
00065 #endif
00066 
00067 namespace Teuchos{
00068 
00073 template <typename Ordinal, typename T>
00074 class Zoltan2_BoxBoundaries  : public ValueTypeReductionOp<Ordinal,T>
00075 {
00076 private:
00077     Ordinal size;
00078     T _EPSILON;
00079 
00080 public:
00083     Zoltan2_BoxBoundaries ():size(0), _EPSILON (std::numeric_limits<T>::epsilon()){}
00084 
00091     Zoltan2_BoxBoundaries (Ordinal s_):
00092         size(s_), _EPSILON (std::numeric_limits<T>::epsilon()){}
00093 
00096     void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00097     {
00098         for (Ordinal i=0; i < count; i++){
00099             if (Z2_ABS(inBuffer[i]) >  _EPSILON){
00100                 inoutBuffer[i] = inBuffer[i];
00101             }
00102         }
00103     }
00104 };
00105 } // namespace Teuchos
00106 
00107 
00108 namespace Zoltan2 {
00109 
00110 
00111 
00112 
00127 template <typename Adapter>
00128   class PartitioningSolution : public Solution
00129 {
00130 public:
00131 
00132 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00133   typedef typename Adapter::gno_t gno_t;
00134   typedef typename Adapter::scalar_t scalar_t;
00135   typedef typename Adapter::lno_t lno_t;
00136   typedef typename Adapter::zgid_t zgid_t;
00137   typedef typename Adapter::part_t part_t;
00138   typedef typename Adapter::user_t user_t;
00139 #endif
00140 
00158   PartitioningSolution( RCP<const Environment> &env,
00159     RCP<const Comm<int> > &comm,
00160     RCP<const IdentifierMap<user_t> > &idMap,
00161     int nUserWeights);
00162 
00192   PartitioningSolution( RCP<const Environment> &env,
00193     RCP<const Comm<int> > &comm,
00194     RCP<const IdentifierMap<user_t> > &idMap,
00195     int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
00196     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00197 
00199   // Information that the algorithm may wish to query.
00200 
00203   size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
00204 
00207   size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
00208 
00211   size_t getLocalNumberOfParts() const { return nLocalParts_; }
00212 
00220   scalar_t getLocalFractionOfPart() const { return localFraction_; }
00221 
00231   bool oneToOnePartDistribution() const { return onePartPerProc_; }
00232 
00252   const int *getPartDistribution() const {
00253     if (partDist_.size() > 0) return &partDist_[0];
00254     else return NULL;
00255   }
00256 
00273   const part_t *getProcDistribution() const {
00274     if (procDist_.size() > 0) return &procDist_[0];
00275     else return NULL;
00276   }
00277 
00281   int getNumberOfCriteria() const { return nWeightsPerObj_; }
00282 
00283 
00290   bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
00291 
00304   scalar_t getCriteriaPartSize(int idx, part_t part) const {
00305     if (pSizeUniform_[idx])
00306       return 1.0 / nGlobalParts_;
00307     else if (pCompactIndex_[idx].size())
00308       return pSize_[idx][pCompactIndex_[idx][part]];
00309     else
00310       return pSize_[idx][part];
00311   }
00312 
00326   bool criteriaHaveSamePartSizes(int c1, int c2) const;
00327 
00329   // Method used by the algorithm to set results.
00330 
00357   void setParts(ArrayRCP<const gno_t> &gnoList,
00358     ArrayRCP<part_t> &partList, bool dataDidNotMove);
00359 
00361 
00373   void RemapParts();
00374 
00376   /* Return the weight of objects staying with a given remap.
00377    * If remap is NULL, compute weight of objects staying with given partition
00378    */
00379   long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt,
00380                      part_t nrhs, part_t nlhs)
00381   {
00382     long staying = 0;
00383     for (part_t i = 0; i < nrhs; i++) { 
00384       part_t k = (remap ? remap[i] : i);
00385       for (part_t j = idx[k]; j < idx[k+1]; j++) { 
00386         if (i == (adj[j]-nlhs)) {
00387           staying += wgt[j];
00388           break;
00389         }
00390       }
00391     }
00392     return staying;
00393   }
00394 
00396   // Results that may be queried by the user, by migration methods,
00397   // or by metric calculation methods.
00398   // We return raw pointers so users don't have to learn about our
00399   // pointer wrappers.
00400 
00403   const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
00404 
00407   size_t getLocalNumberOfIds() const { return gids_.size(); }
00408 
00411   const zgid_t *getIdList() const {
00412     if (gids_.size() > 0) return gids_.getRawPtr();
00413     else                  return NULL;
00414   }
00415 
00418   const part_t *getPartList() const {
00419     if (parts_.size() > 0) return parts_.getRawPtr();
00420     else                   return NULL;
00421   }
00422 
00427   const int *getProcList() const {
00428     if (procs_.size() > 0) return procs_.getRawPtr();
00429     else                   return NULL;
00430   }
00431 
00432 
00435   void setPartBoxes(
00436     RCP<::std::vector<
00437              Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > outPartBoxes)
00438   {
00439     this->partBoxes = outPartBoxes;
00440   }
00441 
00444   RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00445   getPartBoxes()
00446   {
00447     return this->partBoxes;
00448   }
00449 
00452   void getCommunicationGraph(
00453           const Teuchos::Comm<int> *comm,
00454           ArrayRCP <part_t> &comXAdj,
00455           ArrayRCP <part_t> &comAdj) 
00456   {
00457     if(comXAdj_.getRawPtr() == NULL && comAdj_.getRawPtr() == NULL){
00458 
00459       part_t ntasks =  this->getActualGlobalNumberOfParts();
00460       if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00461         ntasks = this->getTargetGlobalNumberOfParts();
00462       }
00463       RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00464       pBoxes = this->getGlobalBoxBoundaries(comm);
00465       int dim = (*pBoxes)[0].getDim();
00466       GridHash<scalar_t, part_t> grid(pBoxes, ntasks, dim);
00467       grid.getAdjArrays(comXAdj_, comAdj_);
00468     }
00469     comAdj = comAdj_;
00470     comXAdj = comXAdj_;
00471   }
00472 
00473   RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00474   getGlobalBoxBoundaries(const Teuchos::Comm<int> *comm) {
00475     part_t ntasks =  this->getActualGlobalNumberOfParts();
00476     if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){
00477       ntasks = this->getTargetGlobalNumberOfParts();
00478     }
00479 
00480     RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 
00481     pBoxes = this->getPartBoxes();
00482 
00483     int dim = (*pBoxes)[0].getDim();
00484     scalar_t *localPartBoundaries = new scalar_t[ntasks * 2 *dim];
00485 
00486     memset(localPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00487 
00488     scalar_t *globalPartBoundaries = new scalar_t[ntasks * 2 *dim];
00489     memset(globalPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim);
00490 
00491     scalar_t *localPartMins = localPartBoundaries;
00492     scalar_t *localPartMaxs = localPartBoundaries + ntasks * dim;
00493 
00494     scalar_t *globalPartMins = globalPartBoundaries;
00495     scalar_t *globalPartMaxs = globalPartBoundaries + ntasks * dim;
00496 
00497     part_t boxCount = pBoxes->size();
00498     for (part_t i = 0; i < boxCount; ++i){
00499       part_t pId = (*pBoxes)[i].getpId();
00500       //cout << "me:" << comm->getRank() << " has:" << pId << endl;
00501 
00502       scalar_t *lmins = (*pBoxes)[i].getlmins();
00503       scalar_t *lmaxs = (*pBoxes)[i].getlmaxs();
00504 
00505       for (int j = 0; j < dim; ++j){
00506         localPartMins[dim * pId + j] = lmins[j];
00507         localPartMaxs[dim * pId + j] = lmaxs[j];
00508         /*
00509         cout << "me:" << comm->getRank()  <<
00510                 " dim * pId + j:"<< dim * pId + j <<
00511                 " localMin:" << localPartMins[dim * pId + j] <<
00512                 " localMax:" << localPartMaxs[dim * pId + j] << endl;
00513         */
00514       }
00515     }
00516 
00517     Teuchos::Zoltan2_BoxBoundaries<int, scalar_t> reductionOp(ntasks * 2 *dim);
00518 
00519     reduceAll<int, scalar_t>(*comm, reductionOp,
00520               ntasks * 2 *dim, localPartBoundaries, globalPartBoundaries);
00521     RCP<::std::vector<coordinateModelPartBox<scalar_t, part_t> > > 
00522     pB(new ::std::vector <coordinateModelPartBox <scalar_t, part_t> > (), true);
00523     for (part_t i = 0; i < ntasks; ++i){
00524       Zoltan2::coordinateModelPartBox <scalar_t, part_t> tpb(i, dim,
00525                                                  globalPartMins + dim * i,
00526                                                  globalPartMaxs + dim * i);
00527 
00528       /*
00529       for (int j = 0; j < dim; ++j){
00530           cout << "me:" << comm->getRank()  <<
00531                   " dim * pId + j:"<< dim * i + j <<
00532                   " globalMin:" << globalPartMins[dim * i + j] <<
00533                   " globalMax:" << globalPartMaxs[dim * i + j] << endl;
00534       }
00535       */
00536       pB->push_back(tpb);
00537     }
00538     delete []localPartBoundaries;
00539     delete []globalPartBoundaries;
00540     //RCP < ::std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > tmpRCPBox(pB, true);
00541     this->partBoxes = pB;
00542     return this->partBoxes;
00543   }
00544 
00574   template <typename Extra>
00575     size_t convertSolutionToImportList(
00576       int numExtra,
00577       ArrayRCP<Extra> &xtraInfo,
00578       ArrayRCP<typename Adapter::zgid_t> &imports,
00579       ArrayRCP<Extra> &newXtraInfo) const;
00580 
00598   void getPartsForProc(int procId, double &numParts, part_t &partMin,
00599     part_t &partMax) const
00600   {
00601     env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
00602       procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
00603 
00604     procToPartsMap(procId, numParts, partMin, partMax);
00605   }
00606 
00618   void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
00619   {
00620     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
00621       partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
00622 
00623     partToProcsMap(partId, procMin, procMax);
00624   }
00625 
00626 private:
00627   void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
00628     int numLocalParts, int numGlobalParts);
00629 
00630   void procToPartsMap(int procId, double &numParts, part_t &partMin,
00631     part_t &partMax) const;
00632 
00633   void partToProcsMap(part_t partId, int &procMin, int &procMax) const;
00634 
00635   void setPartDistribution();
00636 
00637   void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
00638     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
00639 
00640   void computePartSizes(int widx, ArrayView<part_t> ids,
00641     ArrayView<scalar_t> sizes);
00642 
00643   void broadcastPartSizes(int widx);
00644 
00645 
00646   RCP<const Environment> env_;             // has application communicator
00647   RCP<const Comm<int> > comm_;             // the problem communicator
00648   RCP<const IdentifierMap<user_t> > idMap_;
00649 
00650   //part box boundaries as a result of geometric partitioning algorithm.
00651   RCP < ::std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > partBoxes;
00652   ArrayRCP <part_t> comXAdj_; //communication graph xadj
00653   ArrayRCP <part_t> comAdj_; //communication graph adj.
00654 
00655   part_t nGlobalParts_;// target global number of parts
00656   part_t nLocalParts_; // number of parts to be on this process
00657 
00658   scalar_t localFraction_; // approx fraction of a part on this process
00659   int nWeightsPerObj_;      // if user has no weights, this is 1  TODO:  WHY???
00660 
00661   // If process p is to be assigned part p for all p, then onePartPerProc_
00662   // is true. Otherwise it is false, and either procDist_ or partDist_
00663   // describes the allocation of parts to processes.
00664   //
00665   // If parts are never split across processes, then procDist_ is defined
00666   // as follows:
00667   //
00668   //   partId              = procDist_[procId]
00669   //   partIdNext          = procDist_[procId+1]
00670   //   globalNumberOfParts = procDist_[numProcs]
00671   //
00672   // meaning that the parts assigned to process procId range from
00673   // [partId, partIdNext).  If partIdNext is the same as partId, then
00674   // process procId has no parts.
00675   //
00676   // If the number parts is less than the number of processes, and the
00677   // user did not specify "num_local_parts" for each of the processes, then
00678   // parts are split across processes, and partDist_ is defined rather than
00679   // procDist_.
00680   //
00681   //   procId              = partDist_[partId]
00682   //   procIdNext          = partDist_[partId+1]
00683   //   globalNumberOfProcs = partDist_[numParts]
00684   //
00685   // which implies that the part partId is shared by processes in the
00686   // the range [procId, procIdNext).
00687   //
00688   // We use std::vector so we can use upper_bound algorithm
00689 
00690   bool             onePartPerProc_;   // either this is true...
00691   ::std::vector<int>      partDist_;      // or this is defined ...
00692   ::std::vector<part_t> procDist_;      // or this is defined.
00693   bool procDistEquallySpread_;        // if procDist_ is used and
00694                                       // #parts > #procs and
00695                                       // num_local_parts is not specified,
00696                                       // parts are evenly distributed to procs
00697 
00698   // In order to minimize the storage required for part sizes, we
00699   // have three different representations.
00700   //
00701   // If the part sizes for weight index w are all the same, then:
00702   //    pSizeUniform_[w] = true
00703   //    pCompactIndex_[w].size() = 0
00704   //    pSize_[w].size() = 0
00705   //
00706   // and the size for part p is 1.0 / nparts.
00707   //
00708   // If part sizes differ for each part in weight index w, but there
00709   // are no more than 64 distinct sizes:
00710   //    pSizeUniform_[w] = false
00711   //    pCompactIndex_[w].size() = number of parts
00712   //    pSize_[w].size() = number of different sizes
00713   //
00714   // and the size for part p is pSize_[pCompactIndex_[p]].
00715   //
00716   // If part sizes differ for each part in weight index w, and there
00717   // are more than 64 distinct sizes:
00718   //    pSizeUniform_[w] = false
00719   //    pCompactIndex_[w].size() = 0
00720   //    pSize_[w].size() = nparts
00721   //
00722   // and the size for part p is pSize_[p].
00723   //
00724   // NOTE: If we expect to have similar cases, i.e. a long list of scalars
00725   //   where it is highly possible that just a few unique values appear,
00726   //   then we may want to make this a class.  The advantage is that we
00727   //   save a long list of 1-byte indices instead of a long list of scalars.
00728 
00729   ArrayRCP<bool> pSizeUniform_;
00730   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
00731   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
00732 
00734   // The algorithm sets these values upon completion.
00735 
00736   ArrayRCP<const zgid_t>  gids_;   // User's global IDs
00737   ArrayRCP<part_t> parts_;      // part number assigned to gids_[i]
00738 
00739   bool haveSolution_;
00740 
00741   part_t nGlobalPartsSolution_; // global number of parts in solution
00742 
00744   // The solution calculates this from the part assignments,
00745   // unless onePartPerProc_.
00746 
00747   ArrayRCP<int> procs_;       // process rank assigned to gids_[i]
00748 };
00749 
00751 // Definitions
00753 
00754 template <typename Adapter>
00755   PartitioningSolution<Adapter>::PartitioningSolution(
00756     RCP<const Environment> &env,
00757     RCP<const Comm<int> > &comm,
00758     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights)
00759     : env_(env), comm_(comm), idMap_(idMap),
00760       partBoxes(),comXAdj_(), comAdj_(),
00761       nGlobalParts_(0), nLocalParts_(0),
00762       localFraction_(0),  nWeightsPerObj_(),
00763       onePartPerProc_(false), partDist_(), procDist_(),
00764       procDistEquallySpread_(false),
00765       pSizeUniform_(), pCompactIndex_(), pSize_(),
00766       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00767       procs_()
00768 {
00769   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY??  WHY NOT ZERO?
00770 
00771   setPartDistribution();
00772 
00773   // We must call setPartSizes() because part sizes may have
00774   // been provided by the user on other processes.
00775 
00776   ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_];
00777   ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
00778   ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true);
00779   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
00780 
00781   setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
00782 
00783   env_->memory("After construction of solution");
00784 }
00785 
00786 template <typename Adapter>
00787   PartitioningSolution<Adapter>::PartitioningSolution(
00788     RCP<const Environment> &env,
00789     RCP<const Comm<int> > &comm,
00790     RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights,
00791     ArrayView<ArrayRCP<part_t> > reqPartIds,
00792     ArrayView<ArrayRCP<scalar_t> > reqPartSizes)
00793     : env_(env), comm_(comm), idMap_(idMap),
00794       partBoxes(),comXAdj_(), comAdj_(),
00795       nGlobalParts_(0), nLocalParts_(0),
00796       localFraction_(0),  nWeightsPerObj_(),
00797       onePartPerProc_(false), partDist_(), procDist_(),
00798       procDistEquallySpread_(false),
00799       pSizeUniform_(), pCompactIndex_(), pSize_(),
00800       gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
00801       procs_()
00802 {
00803   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  // TODO:  WHY?? WHY NOT ZERO?
00804 
00805   setPartDistribution();
00806 
00807   setPartSizes(reqPartIds, reqPartSizes);
00808 
00809   env_->memory("After construction of solution");
00810 }
00811 
00812 template <typename Adapter>
00813   void PartitioningSolution<Adapter>::setPartDistribution()
00814 {
00815   // Did the caller define num_global_parts and/or num_local_parts?
00816 
00817   const ParameterList &pl = env_->getParameters();
00818   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
00819   int numLocal=0, numGlobal=0;
00820   double val;
00821 
00822   const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
00823 
00824   if (pe){
00825     val = pe->getValue<double>(&val);  // TODO: KDD Skip this double get
00826     haveGlobalNumParts = 1;            // TODO: KDD Should be unnecessary once
00827     numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long.
00828     nGlobalParts_ = part_t(numGlobal); // TODO: KDD  also do below.
00829   }
00830 
00831   pe = pl.getEntryPtr("num_local_parts");
00832 
00833   if (pe){
00834     val = pe->getValue<double>(&val);
00835     haveLocalNumParts = 1;
00836     numLocal = static_cast<int>(val);
00837     nLocalParts_ = part_t(numLocal);
00838   }
00839 
00840   try{
00841     // Sets onePartPerProc_, partDist_, and procDist_
00842 
00843     partToProc(true, haveLocalNumParts, haveGlobalNumParts,
00844       numLocal, numGlobal);
00845   }
00846   Z2_FORWARD_EXCEPTIONS
00847 
00848   int nprocs = comm_->getSize();
00849   int rank = comm_->getRank();
00850 
00851   if (onePartPerProc_){
00852     nGlobalParts_ = nprocs;
00853     nLocalParts_ = 1;
00854   }
00855   else if (partDist_.size() > 0){   // more procs than parts
00856     nGlobalParts_ = partDist_.size() - 1;
00857     int pstart = partDist_[0];
00858     for (part_t i=1; i <= nGlobalParts_; i++){
00859       int pend = partDist_[i];
00860       if (rank >= pstart && rank < pend){
00861         int numOwners = pend - pstart;
00862         nLocalParts_ = 1;
00863         localFraction_ = 1.0 / numOwners;
00864         break;
00865       }
00866       pstart = pend;
00867     }
00868   }
00869   else if (procDist_.size() > 0){  // more parts than procs
00870     nGlobalParts_ = procDist_[nprocs];
00871     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
00872   }
00873   else {
00874     throw std::logic_error("partToProc error");
00875   }
00876 
00877 }
00878 
00879 template <typename Adapter>
00880   void PartitioningSolution<Adapter>::setPartSizes(
00881     ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
00882 {
00883   int widx = nWeightsPerObj_;
00884   bool fail=false;
00885 
00886   size_t *countBuf = new size_t [widx*2];
00887   ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
00888 
00889   fail = ((ids.size() != widx) || (sizes.size() != widx));
00890 
00891   for (int w=0; !fail && w < widx; w++){
00892     counts[w] = ids[w].size();
00893     if (ids[w].size() != sizes[w].size()) fail=true;
00894   }
00895 
00896   env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
00897     COMPLEX_ASSERTION, comm_);
00898 
00899   // Are all part sizes the same?  This is the common case.
00900 
00901   ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
00902   pSize_ = arcp(emptySizes, 0, widx);
00903 
00904   ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
00905   pCompactIndex_ = arcp(emptyIndices, 0, widx);
00906 
00907   bool *info = new bool [widx];
00908   pSizeUniform_ = arcp(info, 0, widx);
00909   for (int w=0; w < widx; w++)
00910     pSizeUniform_[w] = true;
00911 
00912   if (nGlobalParts_ == 1){
00913     return;   // there's only one part in the whole problem
00914   }
00915 
00916   size_t *ptr1 = counts.getRawPtr();
00917   size_t *ptr2 = counts.getRawPtr() + widx;
00918 
00919   try{
00920     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
00921   }
00922   Z2_THROW_OUTSIDE_ERROR(*env_);
00923 
00924   bool zero = true;
00925 
00926   for (int w=0; w < widx; w++)
00927     if (counts[widx+w] > 0){
00928       zero = false;
00929       pSizeUniform_[w] = false;
00930     }
00931 
00932   if (zero) // Part sizes for all criteria are uniform.
00933     return;
00934 
00935   // Compute the part sizes for criteria for which part sizes were
00936   // supplied.  Normalize for each criteria so part sizes sum to one.
00937 
00938   int nprocs = comm_->getSize();
00939   int rank = comm_->getRank();
00940 
00941   for (int w=0; w < widx; w++){
00942     if (pSizeUniform_[w]) continue;
00943 
00944     // Send all ids and sizes to one process.
00945     // (There is no simple gather method in Teuchos.)
00946 
00947     part_t length = ids[w].size();
00948     part_t *allLength = new part_t [nprocs];
00949     Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
00950       nprocs, allLength);
00951 
00952     if (rank == 0){
00953       int total = 0;
00954       for (int i=0; i < nprocs; i++)
00955         total += allLength[i];
00956 
00957       part_t *partNums = new part_t [total];
00958       scalar_t *partSizes = new scalar_t [total];
00959 
00960       ArrayView<part_t> idArray(partNums, total);
00961       ArrayView<scalar_t> sizeArray(partSizes, total);
00962 
00963       if (length > 0){
00964         for (int i=0; i < length; i++){
00965           *partNums++ = ids[w][i];
00966           *partSizes++ = sizes[w][i];
00967         }
00968       }
00969 
00970       for (int p=1; p < nprocs; p++){
00971         if (allLength[p] > 0){
00972           Teuchos::receive<int, part_t>(*comm_, p,
00973             allLength[p], partNums);
00974           Teuchos::receive<int, scalar_t>(*comm_, p,
00975             allLength[p], partSizes);
00976           partNums += allLength[p];
00977           partSizes += allLength[p];
00978         }
00979       }
00980 
00981       delete [] allLength;
00982 
00983       try{
00984         computePartSizes(w, idArray, sizeArray);
00985       }
00986       Z2_FORWARD_EXCEPTIONS
00987 
00988       delete [] idArray.getRawPtr();
00989       delete [] sizeArray.getRawPtr();
00990     }
00991     else{
00992       delete [] allLength;
00993       if (length > 0){
00994         Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
00995         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
00996       }
00997     }
00998 
00999     broadcastPartSizes(w);
01000   }
01001 }
01002 
01003 template <typename Adapter>
01004   void PartitioningSolution<Adapter>::broadcastPartSizes(int widx)
01005 {
01006   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
01007     pSize_.size()>widx &&
01008     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
01009     COMPLEX_ASSERTION);
01010 
01011   int rank = comm_->getRank();
01012   int nprocs = comm_->getSize();
01013   part_t nparts = nGlobalParts_;
01014 
01015   if (nprocs < 2)
01016     return;
01017 
01018   char flag=0;
01019 
01020   if (rank == 0){
01021     if (pSizeUniform_[widx] == true)
01022       flag = 1;
01023     else if (pCompactIndex_[widx].size() > 0)
01024       flag = 2;
01025     else
01026       flag = 3;
01027   }
01028 
01029   try{
01030     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
01031   }
01032   Z2_THROW_OUTSIDE_ERROR(*env_);
01033 
01034   if (flag == 1){
01035     if (rank > 0)
01036       pSizeUniform_[widx] = true;
01037 
01038     return;
01039   }
01040 
01041   if (flag == 2){
01042 
01043     // broadcast the indices into the size list
01044 
01045     unsigned char *idxbuf = NULL;
01046 
01047     if (rank > 0){
01048       idxbuf = new unsigned char [nparts];
01049       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
01050     }
01051     else{
01052       env_->localBugAssertion(__FILE__, __LINE__, "index list size",
01053         pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
01054       idxbuf = pCompactIndex_[widx].getRawPtr();
01055     }
01056 
01057     try{
01058       // broadcast of unsigned char is not supported
01059       Teuchos::broadcast<int, char>(*comm_, 0, nparts,
01060         reinterpret_cast<char *>(idxbuf));
01061     }
01062     Z2_THROW_OUTSIDE_ERROR(*env_);
01063 
01064     if (rank > 0)
01065       pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
01066 
01067     // broadcast the list of different part sizes
01068 
01069     unsigned char maxIdx=0;
01070     for (part_t p=0; p < nparts; p++)
01071       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
01072 
01073     int numSizes = maxIdx + 1;
01074 
01075     scalar_t *sizeList = NULL;
01076 
01077     if (rank > 0){
01078       sizeList = new scalar_t [numSizes];
01079       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
01080     }
01081     else{
01082       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01083         numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
01084 
01085       sizeList = pSize_[widx].getRawPtr();
01086     }
01087 
01088     try{
01089       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
01090     }
01091     Z2_THROW_OUTSIDE_ERROR(*env_);
01092 
01093     if (rank > 0)
01094       pSize_[widx] = arcp(sizeList, 0, numSizes, true);
01095 
01096     return;
01097   }
01098 
01099   if (flag == 3){
01100 
01101     // broadcast the size of each part
01102 
01103     scalar_t *sizeList = NULL;
01104 
01105     if (rank > 0){
01106       sizeList = new scalar_t [nparts];
01107       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
01108     }
01109     else{
01110       env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
01111         nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
01112 
01113       sizeList = pSize_[widx].getRawPtr();
01114     }
01115 
01116     try{
01117       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
01118     }
01119     Z2_THROW_OUTSIDE_ERROR(*env_);
01120 
01121     if (rank > 0)
01122       pSize_[widx] = arcp(sizeList, 0, nparts);
01123 
01124     return;
01125   }
01126 }
01127 
01128 template <typename Adapter>
01129   void PartitioningSolution<Adapter>::computePartSizes(int widx,
01130     ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
01131 {
01132   int len = ids.size();
01133 
01134   if (len == 0){
01135     pSizeUniform_[widx] = true;
01136     return;
01137   }
01138 
01139   env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
01140     len>0 && sizes.size()==len, COMPLEX_ASSERTION);
01141 
01142   env_->localBugAssertion(__FILE__, __LINE__, "bad index",
01143     widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
01144 
01145   env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
01146     pSize_.size()>widx &&
01147     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
01148     COMPLEX_ASSERTION);
01149 
01150   // Check ids and sizes and find min, max and average sizes.
01151   // If sizes are very close to uniform, call them uniform parts.
01152 
01153   part_t nparts = nGlobalParts_;
01154   unsigned char *buf = new unsigned char [nparts];
01155   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
01156   memset(buf, 0, nparts);
01157   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
01158 
01159   scalar_t epsilon = 10e-5 / nparts;
01160   scalar_t min=sizes[0], max=sizes[0], sum=0;
01161 
01162   for (int i=0; i < len; i++){
01163     part_t id = ids[i];
01164     scalar_t size = sizes[i];
01165 
01166     env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
01167       id>=0 && id<nparts, BASIC_ASSERTION);
01168 
01169     env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
01170       BASIC_ASSERTION);
01171 
01172     // TODO: we could allow users to specify multiple sizes for the same
01173     // part if we add a parameter that says what we are to do with them:
01174     // add them or take the max.
01175 
01176     env_->localInputAssertion(__FILE__, __LINE__,
01177       "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
01178 
01179     partIdx[id] = 1;    // mark that we have a size for this part
01180 
01181     if (size < min) min = size;
01182     if (size > max) max = size;
01183     sum += size;
01184   }
01185 
01186   if (sum == 0){
01187 
01188     // User has given us a list of parts of size 0, we'll set
01189     // the rest to them to equally sized parts.
01190 
01191     scalar_t *allSizes = new scalar_t [2];
01192     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
01193 
01194     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
01195 
01196     int numNonZero = nparts - len;
01197 
01198     allSizes[0] = 0.0;
01199     allSizes[1] = 1.0 / numNonZero;
01200 
01201     for (part_t p=0; p < nparts; p++)
01202       buf[p] = 1;                 // index to default part size
01203 
01204     for (int i=0; i < len; i++)
01205       buf[ids[i]] = 0;            // index to part size zero
01206 
01207     pSize_[widx] = sizeArray;
01208     pCompactIndex_[widx] = partIdx;
01209 
01210     return;
01211   }
01212 
01213   if (max - min <= epsilon){
01214     pSizeUniform_[widx] = true;
01215     return;
01216   }
01217 
01218   // A size for parts that were not specified:
01219   scalar_t avg = sum / nparts;
01220 
01221   // We are going to merge part sizes that are very close.  This takes
01222   // computation time now, but can save considerably in the storage of
01223   // all part sizes on each process.  For example, a common case may
01224   // be some parts are size 1 and all the rest are size 2.
01225 
01226   scalar_t *tmp = new scalar_t [len];
01227   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01228   memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
01229   ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
01230 
01231   std::sort(partSizes.begin(), partSizes.end());
01232 
01233   // create a list of sizes that are unique within epsilon
01234 
01235   Array<scalar_t> nextUniqueSize;
01236   nextUniqueSize.push_back(partSizes[len-1]);   // largest
01237   scalar_t curr = partSizes[len-1];
01238   int avgIndex = len;
01239   bool haveAvg = false;
01240   if (curr - avg <= epsilon)
01241      avgIndex = 0;
01242 
01243   for (int i=len-2; i >= 0; i--){
01244     scalar_t val = partSizes[i];
01245     if (curr - val > epsilon){
01246       nextUniqueSize.push_back(val);  // the highest in the group
01247       curr = val;
01248       if (avgIndex==len && val > avg && val - avg <= epsilon){
01249         // the average would be in this group
01250         avgIndex = nextUniqueSize.size() - 1;
01251         haveAvg = true;
01252       }
01253     }
01254   }
01255 
01256   partSizes.clear();
01257 
01258   size_t numSizes = nextUniqueSize.size();
01259   int sizeArrayLen = numSizes;
01260 
01261   if (numSizes < 64){
01262     // We can store the size for every part in a compact way.
01263 
01264     // Create a list of all sizes in increasing order
01265 
01266     if (!haveAvg) sizeArrayLen++;   // need to include average
01267 
01268     scalar_t *allSizes = new scalar_t [sizeArrayLen];
01269     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
01270     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
01271 
01272     int newAvgIndex = sizeArrayLen;
01273 
01274     for (int i=numSizes-1, idx=0; i >= 0; i--){
01275 
01276       if (newAvgIndex == sizeArrayLen){
01277 
01278         if (haveAvg && i==avgIndex)
01279           newAvgIndex = idx;
01280 
01281         else if (!haveAvg && avg < nextUniqueSize[i]){
01282           newAvgIndex = idx;
01283           allSizes[idx++] = avg;
01284         }
01285       }
01286 
01287       allSizes[idx++] = nextUniqueSize[i];
01288     }
01289 
01290     env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
01291       newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
01292 
01293     for (int i=0; i < nparts; i++){
01294       buf[i] = newAvgIndex;   // index to default part size
01295     }
01296 
01297     sum = (nparts - len) * allSizes[newAvgIndex];
01298 
01299     for (int i=0; i < len; i++){
01300       int id = ids[i];
01301       scalar_t size = sizes[i];
01302       int index;
01303 
01304       // Find the first size greater than or equal to this size.
01305 
01306       if (size < avg && avg - size <= epsilon)
01307         index = newAvgIndex;
01308       else{
01309         typename ArrayRCP<scalar_t>::iterator found =
01310           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
01311 
01312         env_->localBugAssertion(__FILE__, __LINE__, "size array",
01313           found != sizeArray.end(), COMPLEX_ASSERTION);
01314 
01315         index = found - sizeArray.begin();
01316       }
01317 
01318       buf[id] = index;
01319       sum += allSizes[index];
01320     }
01321 
01322     for (int i=0; i < sizeArrayLen; i++){
01323       sizeArray[i] /= sum;
01324     }
01325 
01326     pCompactIndex_[widx] = partIdx;
01327     pSize_[widx] = sizeArray;
01328   }
01329   else{
01330     // To have access to part sizes, we must store nparts scalar_ts on
01331     // every process.  We expect this is a rare case.
01332 
01333     tmp = new scalar_t [nparts];
01334     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
01335 
01336     sum += ((nparts - len) * avg);
01337 
01338     for (int i=0; i < nparts; i++){
01339       tmp[i] = avg/sum;
01340     }
01341 
01342     for (int i=0; i < len; i++){
01343       tmp[ids[i]] = sizes[i]/sum;
01344     }
01345 
01346     pSize_[widx] = arcp(tmp, 0, nparts);
01347   }
01348 }
01349 
01350 
01351 template <typename Adapter>
01352   void PartitioningSolution<Adapter>::setParts(
01353     ArrayRCP<const gno_t> &gnoList, ArrayRCP<part_t> &partList,
01354     bool dataDidNotMove)
01355 {
01356   env_->debug(DETAILED_STATUS, "Entering setParts");
01357 
01358   size_t len = partList.size();
01359 
01360   // Find the actual number of parts in the solution, which can
01361   // be more or less than the nGlobalParts_ target.
01362   // (We may want to compute the imbalance of a given solution with
01363   // respect to a desired solution.  This solution may have more or
01364   // fewer parts that the desired solution.)
01365 
01366   part_t lMax=0, lMin=0, gMax, gMin;
01367 
01368   if (len > 0)
01369     IdentifierTraits<part_t>::minMax(partList.getRawPtr(), len, lMin, lMax);
01370 
01371   IdentifierTraits<part_t>::globalMinMax(*comm_, len == 0,
01372     lMin, lMax, gMin, gMax);
01373 
01374   nGlobalPartsSolution_ = gMax - gMin + 1;
01375 
01376   if (dataDidNotMove) {
01377     // Case where the algorithm provides the part list in the same order
01378     // that it received the gnos.
01379 
01380     if (idMap_->gnosAreGids()){
01381       gids_ = Teuchos::arcp_reinterpret_cast<const zgid_t>(gnoList);
01382     }
01383     else{
01384       zgid_t *gidList = new zgid_t [len];
01385       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01386       ArrayView<zgid_t> gidView(gidList, len);
01387 
01388       const gno_t *gnos = gnoList.getRawPtr();
01389       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01390 
01391       try{
01392         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01393       }
01394       Z2_FORWARD_EXCEPTIONS
01395 
01396       gids_ = arcp<const zgid_t>(gidList, 0, len);
01397     }
01398     parts_ = partList;
01399   }
01400   else {
01401     // Case where the algorithm moved the data.
01402     // KDDKDD Should we require the algorithm to return the parts in
01403     // KDDKDD the same order as the gnos provided by the model?
01404 
01405     // We send part information to the process that "owns" the global number.
01406 
01407     ArrayView<zgid_t> emptyView;
01408     ArrayView<int> procList;
01409 
01410     if (len){
01411       int *tmp = new int [len];
01412       env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
01413       procList = ArrayView<int>(tmp, len);
01414     }
01415 
01416     idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList);
01417 
01418     int remotelyOwned = 0;
01419     int rank = comm_->getRank();
01420     int nprocs = comm_->getSize();
01421 
01422     for (size_t i=0; !remotelyOwned && i < len; i++){
01423       if (procList[i] != rank)
01424         remotelyOwned = 1;
01425     }
01426 
01427     int anyRemotelyOwned=0;
01428 
01429     try{
01430       reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1,
01431         &remotelyOwned, &anyRemotelyOwned);
01432     }
01433     Z2_THROW_OUTSIDE_ERROR(*env_);
01434 
01435     if (anyRemotelyOwned){
01436 
01437       // Send the owners of these gnos their part assignments.
01438 
01439       Array<int> countOutBuf(nprocs, 0);
01440       Array<int> countInBuf(nprocs, 0);
01441 
01442       Array<gno_t> outBuf(len*2, 0);
01443 
01444       if (len > 0){
01445         Array<lno_t> offsetBuf(nprocs+1, 0);
01446 
01447         for (size_t i=0; i < len; i++){
01448           countOutBuf[procList[i]]+=2;
01449         }
01450 
01451         offsetBuf[0] = 0;
01452         for (int i=0; i < nprocs; i++)
01453           offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i];
01454 
01455         for (size_t i=0; i < len; i++){
01456           int p = procList[i];
01457           int off = offsetBuf[p];
01458           outBuf[off] = gnoList[i];
01459           outBuf[off+1] = static_cast<gno_t>(partList[i]);
01460           offsetBuf[p]+=2;
01461         }
01462       }
01463 
01464       ArrayRCP<gno_t> inBuf;
01465 
01466       try{
01467         AlltoAllv<gno_t>(*comm_, *env_,
01468           outBuf(), countOutBuf(), inBuf, countInBuf());
01469       }
01470       Z2_FORWARD_EXCEPTIONS;
01471 
01472       outBuf.clear();
01473       countOutBuf.clear();
01474 
01475       gno_t newLen = 0;
01476       for (int i=0; i < nprocs; i++)
01477         newLen += countInBuf[i];
01478 
01479       countInBuf.clear();
01480 
01481       newLen /= 2;
01482 
01483       ArrayRCP<part_t> parts;
01484       ArrayRCP<const gno_t> myGnos;
01485 
01486       if (newLen > 0){
01487 
01488         gno_t *tmpGno = new gno_t [newLen];
01489         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno);
01490 
01491         part_t *tmpPart = new part_t [newLen];
01492         env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart);
01493 
01494         size_t next = 0;
01495         for (gno_t i=0; i < newLen; i++){
01496           tmpGno[i] = inBuf[next++];
01497           tmpPart[i] = inBuf[next++];
01498         }
01499 
01500         parts = arcp(tmpPart, 0, newLen);
01501         myGnos = arcp(tmpGno, 0, newLen);
01502       }
01503 
01504       gnoList = myGnos;
01505       partList = parts;
01506       len = newLen;
01507     }
01508 
01509     delete [] procList.getRawPtr();
01510 
01511     if (idMap_->gnosAreGids()){
01512       gids_ = Teuchos::arcp_reinterpret_cast<const zgid_t>(gnoList);
01513     }
01514     else{
01515       zgid_t *gidList = new zgid_t [len];
01516       env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList);
01517       ArrayView<zgid_t> gidView(gidList, len);
01518 
01519       const gno_t *gnos = gnoList.getRawPtr();
01520       ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len);
01521 
01522       try{
01523         idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP);
01524       }
01525       Z2_FORWARD_EXCEPTIONS
01526 
01527       gids_ = arcp<const zgid_t>(gidList, 0, len);
01528     }
01529 
01530     parts_ = partList;
01531   }
01532 
01533   // Now determine which process gets each object, if not one-to-one.
01534 
01535   if (!onePartPerProc_){
01536 
01537     int *procs = new int [len];
01538     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
01539     procs_ = arcp<int>(procs, 0, len);
01540 
01541     part_t *parts = partList.getRawPtr();
01542 
01543     if (procDist_.size() > 0){    // parts are not split across procs
01544 
01545       int procId;
01546       for (size_t i=0; i < len; i++){
01547         partToProcsMap(parts[i], procs[i], procId);
01548       }
01549     }
01550     else{  // harder - we need to split the parts across multiple procs
01551 
01552       lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
01553       env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
01554         partCounter);
01555 
01556       int numProcs = comm_->getSize();
01557 
01558       //MD NOTE: there was no initialization for partCounter.
01559       //I added the line below, correct me if I am wrong.
01560       memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
01561 
01562       for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
01563         partCounter[parts[i]]++;
01564 
01565       lno_t *procCounter = new lno_t [numProcs];
01566       env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
01567 
01568       int proc1;
01569       int proc2 = partDist_[0];
01570 
01571       for (part_t part=1; part < nGlobalParts_; part++){
01572         proc1 = proc2;
01573         proc2 = partDist_[part+1];
01574         int numprocs = proc2 - proc1;
01575 
01576         double dNum = partCounter[part];
01577         double dProcs = numprocs;
01578 
01579         //cout << "dNum:" << dNum << " dProcs:" << dProcs << endl;
01580         double each = floor(dNum/dProcs);
01581         double extra = fmod(dNum,dProcs);
01582 
01583         for (int proc=proc1, i=0; proc<proc2; proc++, i++){
01584           if (i < extra)
01585             procCounter[proc] = lno_t(each) + 1;
01586           else
01587             procCounter[proc] = lno_t(each);
01588         }
01589       }
01590 
01591       delete [] partCounter;
01592 
01593       for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
01594         if (partList[i] >= nGlobalParts_){
01595           // Solution has more parts that targeted.  These
01596           // objects just remain on this process.
01597           procs[i] = comm_->getRank();
01598           continue;
01599         }
01600         part_t partNum = parts[i];
01601         proc1 = partDist_[partNum];
01602         proc2 = partDist_[partNum + 1];
01603 
01604         int proc;
01605         for (proc=proc1; proc < proc2; proc++){
01606           if (procCounter[proc] > 0){
01607             procs[i] = proc;
01608             procCounter[proc]--;
01609             break;
01610           }
01611         }
01612         env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
01613           proc < proc2, COMPLEX_ASSERTION);
01614       }
01615 
01616       delete [] procCounter;
01617     }
01618   }
01619 
01620   // Now that parts_ info is back on home process, remap the parts.
01621   // TODO:  The parts will be inconsistent with the proc assignments after
01622   // TODO:  remapping.  This problem will go away after we separate process
01623   // TODO:  mapping from setParts.  But for MueLu's use case, the part
01624   // TODO:  remapping is all that matters; they do not use the process mapping.
01625   int doRemap = 0;
01626   const Teuchos::ParameterEntry *pe =
01627                  env_->getParameters().getEntryPtr("remap_parts");
01628   if (pe) doRemap = pe->getValue(&doRemap);
01629   if (doRemap) RemapParts();
01630 
01631   haveSolution_ = true;
01632 
01633   env_->memory("After Solution has processed algorithm's answer");
01634   env_->debug(DETAILED_STATUS, "Exiting setParts");
01635 }
01636 
01637 template <typename Adapter>
01638   template <typename Extra>
01639     size_t PartitioningSolution<Adapter>::convertSolutionToImportList(
01640       int numExtra, ArrayRCP<Extra> &xtraInfo,
01641       ArrayRCP<typename Adapter::zgid_t> &imports,       // output
01642       ArrayRCP<Extra> &newXtraInfo) const               // output
01643 {
01644   env_->localInputAssertion(__FILE__, __LINE__, "no solution yet",
01645     haveSolution_, BASIC_ASSERTION);
01646   env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList");
01647 
01648   int numProcs                = comm_->getSize();
01649   size_t localNumIds          = gids_.size();
01650 
01651   // How many to each process?
01652   Array<int> counts(numProcs, 0);
01653   if (onePartPerProc_)
01654     for (size_t i=0; i < localNumIds; i++)
01655       counts[parts_[i]]++;
01656   else
01657     for (size_t i=0; i < localNumIds; i++)
01658       counts[procs_[i]]++;
01659 
01660   Array<gno_t> offsets(numProcs+1, 0);
01661   for (int i=1; i <= numProcs; i++){
01662     offsets[i] = offsets[i-1] + counts[i-1];
01663   }
01664 
01665   Array<zgid_t> gidList(localNumIds);
01666   Array<Extra> numericInfo;
01667 
01668   if (numExtra > 0)
01669     numericInfo.resize(localNumIds);
01670 
01671   if (onePartPerProc_){
01672     for (size_t i=0; i < localNumIds; i++){
01673       lno_t idx = offsets[parts_[i]];
01674       gidList[idx] = gids_[i];
01675       if (numExtra > 0)
01676         numericInfo[idx] = xtraInfo[i];
01677       offsets[parts_[i]] = idx + 1;
01678     }
01679   }
01680   else {
01681     for (size_t i=0; i < localNumIds; i++){
01682       lno_t idx = offsets[procs_[i]];
01683       gidList[idx] = gids_[i];
01684       if (numExtra > 0)
01685         numericInfo[idx] = xtraInfo[i];
01686       offsets[procs_[i]] = idx + 1;
01687     }
01688   }
01689 
01690   Array<int> recvCounts(numProcs, 0);
01691 
01692   try{
01693     AlltoAllv<zgid_t>(*comm_, *env_, gidList(),
01694       counts(), imports, recvCounts());
01695   }
01696   catch (std::exception &e){
01697     throw std::runtime_error("alltoallv 1");
01698   }
01699 
01700   if (numExtra > 0){
01701     try{
01702       AlltoAllv<Extra>(*comm_, *env_, xtraInfo(),
01703         counts(), newXtraInfo, recvCounts());
01704     }
01705     catch (std::exception &e){
01706       throw std::runtime_error("alltoallv 2");
01707     }
01708   }
01709 
01710   env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList");
01711   return imports.size();
01712 }
01713 
01714 template <typename Adapter>
01715   void PartitioningSolution<Adapter>::procToPartsMap(int procId,
01716     double &numParts, part_t &partMin, part_t &partMax) const
01717 {
01718   if (onePartPerProc_){
01719     numParts = 1.0;
01720     partMin = partMax = procId;
01721   }
01722   else if (procDist_.size() > 0){
01723     partMin = procDist_[procId];
01724     partMax = procDist_[procId+1] - 1;
01725     numParts = procDist_[procId+1] - partMin;
01726   }
01727   else{
01728     // find the first p such that partDist_[p] > procId
01729 
01730     ::std::vector<int>::const_iterator entry;
01731     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
01732 
01733     size_t partIdx = entry - partDist_.begin();
01734     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
01735     partMin = partMax = int(partIdx) - 1;
01736     numParts = 1.0 / numProcs;
01737   }
01738 }
01739 
01740 template <typename Adapter>
01741   void PartitioningSolution<Adapter>::partToProcsMap(part_t partId,
01742     int &procMin, int &procMax) const
01743 {
01744   if (partId >= nGlobalParts_){
01745     // setParts() may be given an initial solution which uses a
01746     // different number of parts than the desired solution.  It is
01747     // still a solution.  We keep it on this process.
01748     procMin = procMax = comm_->getRank();
01749   }
01750   else if (onePartPerProc_){
01751     procMin = procMax = int(partId);
01752   }
01753   else if (procDist_.size() > 0){
01754     if (procDistEquallySpread_) {
01755       // Avoid binary search.
01756       double fProcs = comm_->getSize();
01757       double fParts = nGlobalParts_;
01758       double each = fParts / fProcs;
01759       procMin = int(partId / each);
01760       while (procDist_[procMin] > partId) procMin--;
01761       while (procDist_[procMin+1] <= partId) procMin++;
01762       procMax = procMin;
01763     }
01764     else {
01765       // find the first p such that procDist_[p] > partId.
01766       // For now, do a binary search.
01767 
01768       typename ::std::vector<part_t>::const_iterator entry;
01769       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
01770 
01771       size_t procIdx = entry - procDist_.begin();
01772       procMin = procMax = int(procIdx) - 1;
01773     }
01774   }
01775   else{
01776     procMin = partDist_[partId];
01777     procMax = partDist_[partId+1] - 1;
01778   }
01779 }
01780 
01781 template <typename Adapter>
01782   bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes(
01783     int c1, int c2) const
01784 {
01785   if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
01786     throw std::logic_error("criteriaHaveSamePartSizes error");
01787 
01788   bool theSame = false;
01789 
01790   if (c1 == c2)
01791     theSame = true;
01792 
01793   else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
01794     theSame = true;
01795 
01796   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
01797     theSame = true;
01798     bool useIndex = pCompactIndex_[c1].size() > 0;
01799     if (useIndex){
01800       for (part_t p=0; theSame && p < nGlobalParts_; p++)
01801         if (pSize_[c1][pCompactIndex_[c1][p]] !=
01802             pSize_[c2][pCompactIndex_[c2][p]])
01803           theSame = false;
01804     }
01805     else{
01806       for (part_t p=0; theSame && p < nGlobalParts_; p++)
01807         if (pSize_[c1][p] != pSize_[c2][p])
01808           theSame = false;
01809     }
01810   }
01811 
01812   return theSame;
01813 }
01814 
01830 template <typename Adapter>
01831   void PartitioningSolution<Adapter>::partToProc(
01832     bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
01833     int numLocalParts, int numGlobalParts)
01834 {
01835 #ifdef _MSC_VER
01836   typedef SSIZE_T ssize_t;
01837 #endif
01838   int nprocs = comm_->getSize();
01839   ssize_t reducevals[4];
01840   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
01841   ssize_t sumGlobal=0, sumLocal=0;
01842   ssize_t maxGlobal=0, maxLocal=0;
01843   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
01844       numGlobalParts, numLocalParts};
01845 
01846   partDist_.clear();
01847   procDist_.clear();
01848 
01849   if (doCheck){
01850 
01851     try{
01852       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
01853     }
01854     Z2_THROW_OUTSIDE_ERROR(*env_);
01855 
01856     sumHaveGlobal = reducevals[0];
01857     sumHaveLocal = reducevals[1];
01858     sumGlobal = reducevals[2];
01859     sumLocal = reducevals[3];
01860 
01861     env_->localInputAssertion(__FILE__, __LINE__,
01862       "Either all procs specify num_global/local_parts or none do",
01863       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
01864       (sumHaveLocal == 0 || sumHaveLocal == nprocs),
01865       BASIC_ASSERTION);
01866   }
01867   else{
01868     if (haveNumLocalParts)
01869       sumLocal = numLocalParts * nprocs;
01870     if (haveNumGlobalParts)
01871       sumGlobal = numGlobalParts * nprocs;
01872 
01873     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
01874     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
01875 
01876     maxLocal = numLocalParts;
01877     maxGlobal = numGlobalParts;
01878   }
01879 
01880   if (!haveNumLocalParts && !haveNumGlobalParts){
01881     onePartPerProc_ = true;   // default if user did not specify
01882     return;
01883   }
01884 
01885   if (haveNumGlobalParts){
01886     if (doCheck){
01887       vals[0] = numGlobalParts;
01888       vals[1] = numLocalParts;
01889       try{
01890         reduceAll<int, ssize_t>(
01891           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
01892       }
01893       Z2_THROW_OUTSIDE_ERROR(*env_);
01894 
01895       maxGlobal = reducevals[0];
01896       maxLocal = reducevals[1];
01897 
01898       env_->localInputAssertion(__FILE__, __LINE__,
01899         "Value for num_global_parts is different on different processes.",
01900         maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
01901     }
01902 
01903     if (sumLocal){
01904       env_->localInputAssertion(__FILE__, __LINE__,
01905         "Sum of num_local_parts does not equal requested num_global_parts",
01906         sumLocal == numGlobalParts, BASIC_ASSERTION);
01907 
01908       if (sumLocal == nprocs && maxLocal == 1){
01909         onePartPerProc_ = true;   // user specified one part per proc
01910         return;
01911       }
01912     }
01913     else{
01914       if (maxGlobal == nprocs){
01915         onePartPerProc_ = true;   // user specified num parts is num procs
01916         return;
01917       }
01918     }
01919   }
01920 
01921   // If we are here, we do not have #parts == #procs.
01922 
01923   if (sumHaveLocal == nprocs){
01924     //
01925     // We will go by the number of local parts specified.
01926     //
01927 
01928     try{
01929       procDist_.resize(nprocs+1);
01930     }
01931     catch (std::exception &e){
01932       throw(std::bad_alloc());
01933     }
01934 
01935     part_t *procArray = &procDist_[0];
01936 
01937     try{
01938       part_t tmp = part_t(numLocalParts);
01939       gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
01940     }
01941     Z2_THROW_OUTSIDE_ERROR(*env_);
01942 
01943     procArray[0] = 0;
01944 
01945     for (int proc=0; proc < nprocs; proc++)
01946       procArray[proc+1] += procArray[proc];
01947   }
01948   else{
01949     //
01950     // We will allocate global number of parts to the processes.
01951     //
01952     double fParts = numGlobalParts;
01953     double fProcs = nprocs;
01954 
01955     if (fParts < fProcs){
01956 
01957       try{
01958         partDist_.resize(size_t(fParts+1));
01959       }
01960       catch (std::exception &e){
01961         throw(std::bad_alloc());
01962       }
01963 
01964       int *partArray = &partDist_[0];
01965 
01966       double each = floor(fProcs / fParts);
01967       double extra = fmod(fProcs, fParts);
01968       partDist_[0] = 0;
01969 
01970       for (part_t part=0; part < numGlobalParts; part++){
01971         int numOwners = int(each + ((part<extra) ? 1 : 0));
01972         partArray[part+1] = partArray[part] + numOwners;
01973       }
01974 
01975       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
01976         partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
01977     }
01978     else if (fParts > fProcs){
01979 
01980       // User did not specify local number of parts per proc;
01981       // Distribute the parts evenly among the procs.
01982 
01983       procDistEquallySpread_ = true;
01984 
01985       try{
01986         procDist_.resize(size_t(fProcs+1));
01987       }
01988       catch (std::exception &e){
01989         throw(std::bad_alloc());
01990       }
01991 
01992       part_t *procArray = &procDist_[0];
01993 
01994       double each = floor(fParts / fProcs);
01995       double extra = fmod(fParts, fProcs);
01996       procArray[0] = 0;
01997 
01998       for (int proc=0; proc < nprocs; proc++){
01999         part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
02000         procArray[proc+1] = procArray[proc] + numParts;
02001       }
02002 
02003       env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
02004         procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
02005     }
02006     else{
02007       env_->globalBugAssertion(__FILE__, __LINE__,
02008         "should never get here", 1, COMPLEX_ASSERTION, comm_);
02009     }
02010   }
02011 }
02012 
02014 // Remap a new part assignment vector for maximum overlap with an input
02015 // part assignment.
02016 //
02017 // Assumptions for this version:
02018 //   input part assignment == processor rank for every local object.
02019 //   assuming nGlobalParts_ <= num ranks
02020 // TODO:  Write a version that takes the input part number as input;
02021 //        this change requires input parts in adapters to be provided in
02022 //        the Adapter.
02023 // TODO:  For repartitioning, compare to old remapping results; see Zoltan1.
02024 
02025 template <typename Adapter>
02026 void PartitioningSolution<Adapter>::RemapParts()
02027 {
02028   size_t len = parts_.size();
02029 
02030   part_t me = comm_->getRank();
02031   int np = comm_->getSize();
02032 
02033   if (np < nGlobalParts_) {
02034     if (me == 0)
02035       std::cout << "Remapping not yet supported for "
02036            << "num_global_parts " << nGlobalParts_
02037            << " > num procs " << np << std::endl;
02038     return;
02039   }
02040   // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
02041   // and edges between a process vtx and any parts to which that process'
02042   // objects are assigned.
02043   // Weight edge[parts_[i]] by the number of objects that are going from
02044   // this rank to parts_[i].
02045   // We use a std::map, assuming the number of unique parts in the parts_ array
02046   // is small to keep the binary search efficient.
02047   // TODO We use the count of objects to move; should change to SIZE of objects
02048   // to move; need SIZE function in Adapter.
02049 
02050   std::map<part_t, long> edges;
02051   long lstaying = 0;  // Total num of local objects staying if we keep the
02052                       // current mapping. TODO:  change to SIZE of local objs
02053   long gstaying = 0;  // Total num of objects staying in the current partition
02054 
02055   for (size_t i = 0; i < len; i++) {
02056     edges[parts_[i]]++;                // TODO Use obj size instead of count
02057     if (parts_[i] == me) lstaying++;    // TODO Use obj size instead of count
02058   }
02059 
02060   Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
02061                                 &lstaying, &gstaying);
02062 //TODO  if (gstaying == Adapter::getGlobalNumObjs()) return;  // Nothing to do
02063 
02064   part_t *remap = NULL;
02065 
02066   int nedges = edges.size();
02067 
02068   // Gather the graph to rank 0.
02069   part_t tnVtx = np + nGlobalParts_;  // total # vertices
02070   int *idx = NULL;    // Pointer index into graph adjacencies
02071   int *sizes = NULL;  // nedges per rank
02072   if (me == 0) {
02073     idx = new int[tnVtx+1];
02074     sizes = new int[np];
02075     sizes[0] = nedges;
02076   }
02077   if (np > 1)
02078     Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
02079 
02080   // prefix sum to build the idx array
02081   if (me == 0) {
02082     idx[0] = 0;
02083     for (int i = 0; i < np; i++)
02084       idx[i+1] = idx[i] + sizes[i];
02085   }
02086 
02087   // prepare to send edges
02088   int cnt = 0;
02089   part_t *bufv = NULL;
02090   long *bufw = NULL;
02091   if (nedges) {
02092     bufv = new part_t[nedges];
02093     bufw = new long[nedges];
02094     // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
02095     for (typename std::map<part_t, long>::iterator it = edges.begin();
02096          it != edges.end(); it++) {
02097       bufv[cnt] = it->first;  // target part
02098       bufw[cnt] = it->second; // weight
02099       cnt++;
02100     }
02101   }
02102 
02103   // Prepare to receive edges on rank 0
02104   part_t *adj = NULL;
02105   long *wgt = NULL;
02106   if (me == 0) {
02107 //SYM    adj = new part_t[2*idx[np]];  // need 2x space to symmetrize later
02108 //SYM    wgt = new long[2*idx[np]];  // need 2x space to symmetrize later
02109     adj = new part_t[idx[np]];
02110     wgt = new long[idx[np]];
02111   }
02112 
02113   Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
02114   Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
02115   delete [] bufv;
02116   delete [] bufw;
02117 
02118   // Now have constructed graph on rank 0.
02119   // Call the matching algorithm
02120 
02121   int doRemap;
02122   if (me == 0) {
02123     // We have the "LHS" vertices of the bipartite graph; need to create
02124     // "RHS" vertices.
02125     for (int i = 0; i < idx[np]; i++) {
02126       adj[i] += np;  // New RHS vertex number; offset by num LHS vertices
02127     }
02128 
02129     // Build idx for RHS vertices
02130     for (part_t i = np; i < tnVtx; i++) {
02131       idx[i+1] = idx[i];  // No edges for RHS vertices
02132     }
02133 
02134 #ifdef KDDKDD_DEBUG
02135     cout << "IDX ";
02136     for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
02137     std::cout << std::endl;
02138 
02139     std::cout << "ADJ ";
02140     for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
02141     std::cout << std::endl;
02142 
02143     std::cout << "WGT ";
02144     for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
02145     std::cout << std::endl;
02146 #endif
02147 
02148     // Perform matching on the graph
02149     part_t *match = new part_t[tnVtx];
02150     for (part_t i = 0; i < tnVtx; i++) match[i] = i;
02151     part_t nmatches =
02152              Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
02153 
02154 #ifdef KDDKDD_DEBUG
02155     std::cout << "After matching:  " << nmatches << " found" << std::endl;
02156     for (part_t i = 0; i < tnVtx; i++)
02157       std::cout << "match[" << i << "] = " << match[i]
02158            << ((match[i] != i &&
02159                (i < np && match[i] != i+np))
02160                   ? " *" : " ")
02161            << std::endl;
02162 #endif
02163 
02164     // See whether there were nontrivial changes in the matching.
02165     bool nontrivial = false;
02166     if (nmatches) {
02167       for (part_t i = 0; i < np; i++) {
02168         if ((match[i] != i) && (match[i] != (i+np))) {
02169           nontrivial = true;
02170           break;
02171         }
02172       }
02173     }
02174 
02175     // Process the matches
02176     if (nontrivial) {
02177       remap = new part_t[nGlobalParts_];
02178       for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
02179 
02180       bool *used = new bool[np];
02181       for (part_t i = 0; i < np; i++) used[i] = false;
02182 
02183       // First, process all matched parts
02184       for (part_t i = 0; i < nGlobalParts_; i++) {
02185         part_t tmp = i + np;
02186         if (match[tmp] != tmp) {
02187           remap[i] = match[tmp];
02188           used[match[tmp]] = true;
02189         }
02190       }
02191 
02192       // Second, process unmatched parts; keep same part number if possible
02193       for (part_t i = 0; i < nGlobalParts_; i++) {
02194         if (remap[i] > -1) continue;
02195         if (!used[i]) {
02196           remap[i] = i;
02197           used[i] = true;
02198         }
02199       }
02200 
02201       // Third, process unmatched parts; give them the next unused part
02202       for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
02203         if (remap[i] > -1) continue;
02204         while (used[uidx]) uidx++;
02205         remap[i] = uidx;
02206         used[uidx] = true;
02207       }
02208       delete [] used;
02209     }
02210     delete [] match;
02211 
02212 #ifdef KDDKDD_DEBUG
02213     cout << "Remap vector: ";
02214     for (part_t i = 0; i < nGlobalParts_; i++) cout << remap[i] << " ";
02215     std::cout << std::endl;
02216 #endif
02217 
02218     long newgstaying = measure_stays(remap, idx, adj, wgt,
02219                                                       nGlobalParts_, np);
02220     doRemap = (newgstaying > gstaying);
02221     std::cout << "gstaying " << gstaying << " measure(input) "
02222          << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
02223          << " newgstaying " << newgstaying
02224          << " nontrivial " << nontrivial
02225          << " doRemap " << doRemap << std::endl;
02226   }
02227   delete [] idx;
02228   delete [] sizes;
02229   delete [] adj;
02230   delete [] wgt;
02231 
02232   Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
02233 
02234   if (doRemap) {
02235     if (me != 0) remap = new part_t[nGlobalParts_];
02236     Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
02237     for (size_t i = 0; i < len; i++) {
02238       parts_[i] = remap[parts_[i]];
02239     }
02240   }
02241 
02242   delete [] remap;  // TODO May want to keep for repartitioning as in Zoltan
02243 }
02244 
02245 
02246 }  // namespace Zoltan2
02247 
02248 #endif