|
Zoltan2
|
00001 // @HEADER 00002 // 00003 // *********************************************************************** 00004 // 00005 // Zoltan2: A package of combinatorial algorithms for scientific computing 00006 // Copyright 2012 Sandia Corporation 00007 // 00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00009 // the U.S. Government retains certain rights in this software. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact Karen Devine (kddevin@sandia.gov) 00039 // Erik Boman (egboman@sandia.gov) 00040 // Siva Rajamanickam (srajama@sandia.gov) 00041 // 00042 // *********************************************************************** 00043 // 00044 // @HEADER 00045 00050 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 00051 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 00052 00053 #include <Zoltan2_IdentifierMap.hpp> 00054 #include <Zoltan2_Solution.hpp> 00055 #include <Zoltan2_GreedyMWM.hpp> 00056 #include <Zoltan2_CoordinatePartitioningGraph.hpp> 00057 #include <cmath> 00058 #include <algorithm> 00059 #include <vector> 00060 #include <limits> 00061 00062 #ifdef _MSC_VER 00063 #define NOMINMAX 00064 #include <windows.h> 00065 #endif 00066 00067 namespace Teuchos{ 00068 00073 template <typename Ordinal, typename T> 00074 class Zoltan2_BoxBoundaries : public ValueTypeReductionOp<Ordinal,T> 00075 { 00076 private: 00077 Ordinal size; 00078 T _EPSILON; 00079 00080 public: 00083 Zoltan2_BoxBoundaries ():size(0), _EPSILON (std::numeric_limits<T>::epsilon()){} 00084 00091 Zoltan2_BoxBoundaries (Ordinal s_): 00092 size(s_), _EPSILON (std::numeric_limits<T>::epsilon()){} 00093 00096 void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const 00097 { 00098 for (Ordinal i=0; i < count; i++){ 00099 if (Z2_ABS(inBuffer[i]) > _EPSILON){ 00100 inoutBuffer[i] = inBuffer[i]; 00101 } 00102 } 00103 } 00104 }; 00105 } // namespace Teuchos 00106 00107 00108 namespace Zoltan2 { 00109 00110 00111 00112 00127 template <typename Adapter> 00128 class PartitioningSolution : public Solution 00129 { 00130 public: 00131 00132 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00133 typedef typename Adapter::gno_t gno_t; 00134 typedef typename Adapter::scalar_t scalar_t; 00135 typedef typename Adapter::lno_t lno_t; 00136 typedef typename Adapter::zgid_t zgid_t; 00137 typedef typename Adapter::part_t part_t; 00138 typedef typename Adapter::user_t user_t; 00139 #endif 00140 00158 PartitioningSolution( RCP<const Environment> &env, 00159 RCP<const Comm<int> > &comm, 00160 RCP<const IdentifierMap<user_t> > &idMap, 00161 int nUserWeights); 00162 00192 PartitioningSolution( RCP<const Environment> &env, 00193 RCP<const Comm<int> > &comm, 00194 RCP<const IdentifierMap<user_t> > &idMap, 00195 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds, 00196 ArrayView<ArrayRCP<scalar_t> > reqPartSizes); 00197 00199 // Information that the algorithm may wish to query. 00200 00203 size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; } 00204 00207 size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; } 00208 00211 size_t getLocalNumberOfParts() const { return nLocalParts_; } 00212 00220 scalar_t getLocalFractionOfPart() const { return localFraction_; } 00221 00231 bool oneToOnePartDistribution() const { return onePartPerProc_; } 00232 00252 const int *getPartDistribution() const { 00253 if (partDist_.size() > 0) return &partDist_[0]; 00254 else return NULL; 00255 } 00256 00273 const part_t *getProcDistribution() const { 00274 if (procDist_.size() > 0) return &procDist_[0]; 00275 else return NULL; 00276 } 00277 00281 int getNumberOfCriteria() const { return nWeightsPerObj_; } 00282 00283 00290 bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];} 00291 00304 scalar_t getCriteriaPartSize(int idx, part_t part) const { 00305 if (pSizeUniform_[idx]) 00306 return 1.0 / nGlobalParts_; 00307 else if (pCompactIndex_[idx].size()) 00308 return pSize_[idx][pCompactIndex_[idx][part]]; 00309 else 00310 return pSize_[idx][part]; 00311 } 00312 00326 bool criteriaHaveSamePartSizes(int c1, int c2) const; 00327 00329 // Method used by the algorithm to set results. 00330 00357 void setParts(ArrayRCP<const gno_t> &gnoList, 00358 ArrayRCP<part_t> &partList, bool dataDidNotMove); 00359 00361 00373 void RemapParts(); 00374 00376 /* Return the weight of objects staying with a given remap. 00377 * If remap is NULL, compute weight of objects staying with given partition 00378 */ 00379 long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, 00380 part_t nrhs, part_t nlhs) 00381 { 00382 long staying = 0; 00383 for (part_t i = 0; i < nrhs; i++) { 00384 part_t k = (remap ? remap[i] : i); 00385 for (part_t j = idx[k]; j < idx[k+1]; j++) { 00386 if (i == (adj[j]-nlhs)) { 00387 staying += wgt[j]; 00388 break; 00389 } 00390 } 00391 } 00392 return staying; 00393 } 00394 00396 // Results that may be queried by the user, by migration methods, 00397 // or by metric calculation methods. 00398 // We return raw pointers so users don't have to learn about our 00399 // pointer wrappers. 00400 00403 const RCP<const Comm<int> > &getCommunicator() const { return comm_;} 00404 00407 size_t getLocalNumberOfIds() const { return gids_.size(); } 00408 00411 const zgid_t *getIdList() const { 00412 if (gids_.size() > 0) return gids_.getRawPtr(); 00413 else return NULL; 00414 } 00415 00418 const part_t *getPartList() const { 00419 if (parts_.size() > 0) return parts_.getRawPtr(); 00420 else return NULL; 00421 } 00422 00427 const int *getProcList() const { 00428 if (procs_.size() > 0) return procs_.getRawPtr(); 00429 else return NULL; 00430 } 00431 00432 00435 void setPartBoxes( 00436 RCP<::std::vector< 00437 Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > outPartBoxes) 00438 { 00439 this->partBoxes = outPartBoxes; 00440 } 00441 00444 RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 00445 getPartBoxes() 00446 { 00447 return this->partBoxes; 00448 } 00449 00452 void getCommunicationGraph( 00453 const Teuchos::Comm<int> *comm, 00454 ArrayRCP <part_t> &comXAdj, 00455 ArrayRCP <part_t> &comAdj) 00456 { 00457 if(comXAdj_.getRawPtr() == NULL && comAdj_.getRawPtr() == NULL){ 00458 00459 part_t ntasks = this->getActualGlobalNumberOfParts(); 00460 if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){ 00461 ntasks = this->getTargetGlobalNumberOfParts(); 00462 } 00463 RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 00464 pBoxes = this->getGlobalBoxBoundaries(comm); 00465 int dim = (*pBoxes)[0].getDim(); 00466 GridHash<scalar_t, part_t> grid(pBoxes, ntasks, dim); 00467 grid.getAdjArrays(comXAdj_, comAdj_); 00468 } 00469 comAdj = comAdj_; 00470 comXAdj = comXAdj_; 00471 } 00472 00473 RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 00474 getGlobalBoxBoundaries(const Teuchos::Comm<int> *comm) { 00475 part_t ntasks = this->getActualGlobalNumberOfParts(); 00476 if (part_t (this->getTargetGlobalNumberOfParts()) > ntasks){ 00477 ntasks = this->getTargetGlobalNumberOfParts(); 00478 } 00479 00480 RCP<::std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > > 00481 pBoxes = this->getPartBoxes(); 00482 00483 int dim = (*pBoxes)[0].getDim(); 00484 scalar_t *localPartBoundaries = new scalar_t[ntasks * 2 *dim]; 00485 00486 memset(localPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim); 00487 00488 scalar_t *globalPartBoundaries = new scalar_t[ntasks * 2 *dim]; 00489 memset(globalPartBoundaries, 0, sizeof(scalar_t) * ntasks * 2 *dim); 00490 00491 scalar_t *localPartMins = localPartBoundaries; 00492 scalar_t *localPartMaxs = localPartBoundaries + ntasks * dim; 00493 00494 scalar_t *globalPartMins = globalPartBoundaries; 00495 scalar_t *globalPartMaxs = globalPartBoundaries + ntasks * dim; 00496 00497 part_t boxCount = pBoxes->size(); 00498 for (part_t i = 0; i < boxCount; ++i){ 00499 part_t pId = (*pBoxes)[i].getpId(); 00500 //cout << "me:" << comm->getRank() << " has:" << pId << endl; 00501 00502 scalar_t *lmins = (*pBoxes)[i].getlmins(); 00503 scalar_t *lmaxs = (*pBoxes)[i].getlmaxs(); 00504 00505 for (int j = 0; j < dim; ++j){ 00506 localPartMins[dim * pId + j] = lmins[j]; 00507 localPartMaxs[dim * pId + j] = lmaxs[j]; 00508 /* 00509 cout << "me:" << comm->getRank() << 00510 " dim * pId + j:"<< dim * pId + j << 00511 " localMin:" << localPartMins[dim * pId + j] << 00512 " localMax:" << localPartMaxs[dim * pId + j] << endl; 00513 */ 00514 } 00515 } 00516 00517 Teuchos::Zoltan2_BoxBoundaries<int, scalar_t> reductionOp(ntasks * 2 *dim); 00518 00519 reduceAll<int, scalar_t>(*comm, reductionOp, 00520 ntasks * 2 *dim, localPartBoundaries, globalPartBoundaries); 00521 RCP<::std::vector<coordinateModelPartBox<scalar_t, part_t> > > 00522 pB(new ::std::vector <coordinateModelPartBox <scalar_t, part_t> > (), true); 00523 for (part_t i = 0; i < ntasks; ++i){ 00524 Zoltan2::coordinateModelPartBox <scalar_t, part_t> tpb(i, dim, 00525 globalPartMins + dim * i, 00526 globalPartMaxs + dim * i); 00527 00528 /* 00529 for (int j = 0; j < dim; ++j){ 00530 cout << "me:" << comm->getRank() << 00531 " dim * pId + j:"<< dim * i + j << 00532 " globalMin:" << globalPartMins[dim * i + j] << 00533 " globalMax:" << globalPartMaxs[dim * i + j] << endl; 00534 } 00535 */ 00536 pB->push_back(tpb); 00537 } 00538 delete []localPartBoundaries; 00539 delete []globalPartBoundaries; 00540 //RCP < ::std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > tmpRCPBox(pB, true); 00541 this->partBoxes = pB; 00542 return this->partBoxes; 00543 } 00544 00574 template <typename Extra> 00575 size_t convertSolutionToImportList( 00576 int numExtra, 00577 ArrayRCP<Extra> &xtraInfo, 00578 ArrayRCP<typename Adapter::zgid_t> &imports, 00579 ArrayRCP<Extra> &newXtraInfo) const; 00580 00598 void getPartsForProc(int procId, double &numParts, part_t &partMin, 00599 part_t &partMax) const 00600 { 00601 env_->localInputAssertion(__FILE__, __LINE__, "invalid process id", 00602 procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION); 00603 00604 procToPartsMap(procId, numParts, partMin, partMax); 00605 } 00606 00618 void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const 00619 { 00620 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id", 00621 partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION); 00622 00623 partToProcsMap(partId, procMin, procMax); 00624 } 00625 00626 private: 00627 void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts, 00628 int numLocalParts, int numGlobalParts); 00629 00630 void procToPartsMap(int procId, double &numParts, part_t &partMin, 00631 part_t &partMax) const; 00632 00633 void partToProcsMap(part_t partId, int &procMin, int &procMax) const; 00634 00635 void setPartDistribution(); 00636 00637 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds, 00638 ArrayView<ArrayRCP<scalar_t> > reqPartSizes); 00639 00640 void computePartSizes(int widx, ArrayView<part_t> ids, 00641 ArrayView<scalar_t> sizes); 00642 00643 void broadcastPartSizes(int widx); 00644 00645 00646 RCP<const Environment> env_; // has application communicator 00647 RCP<const Comm<int> > comm_; // the problem communicator 00648 RCP<const IdentifierMap<user_t> > idMap_; 00649 00650 //part box boundaries as a result of geometric partitioning algorithm. 00651 RCP < ::std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > partBoxes; 00652 ArrayRCP <part_t> comXAdj_; //communication graph xadj 00653 ArrayRCP <part_t> comAdj_; //communication graph adj. 00654 00655 part_t nGlobalParts_;// target global number of parts 00656 part_t nLocalParts_; // number of parts to be on this process 00657 00658 scalar_t localFraction_; // approx fraction of a part on this process 00659 int nWeightsPerObj_; // if user has no weights, this is 1 TODO: WHY??? 00660 00661 // If process p is to be assigned part p for all p, then onePartPerProc_ 00662 // is true. Otherwise it is false, and either procDist_ or partDist_ 00663 // describes the allocation of parts to processes. 00664 // 00665 // If parts are never split across processes, then procDist_ is defined 00666 // as follows: 00667 // 00668 // partId = procDist_[procId] 00669 // partIdNext = procDist_[procId+1] 00670 // globalNumberOfParts = procDist_[numProcs] 00671 // 00672 // meaning that the parts assigned to process procId range from 00673 // [partId, partIdNext). If partIdNext is the same as partId, then 00674 // process procId has no parts. 00675 // 00676 // If the number parts is less than the number of processes, and the 00677 // user did not specify "num_local_parts" for each of the processes, then 00678 // parts are split across processes, and partDist_ is defined rather than 00679 // procDist_. 00680 // 00681 // procId = partDist_[partId] 00682 // procIdNext = partDist_[partId+1] 00683 // globalNumberOfProcs = partDist_[numParts] 00684 // 00685 // which implies that the part partId is shared by processes in the 00686 // the range [procId, procIdNext). 00687 // 00688 // We use std::vector so we can use upper_bound algorithm 00689 00690 bool onePartPerProc_; // either this is true... 00691 ::std::vector<int> partDist_; // or this is defined ... 00692 ::std::vector<part_t> procDist_; // or this is defined. 00693 bool procDistEquallySpread_; // if procDist_ is used and 00694 // #parts > #procs and 00695 // num_local_parts is not specified, 00696 // parts are evenly distributed to procs 00697 00698 // In order to minimize the storage required for part sizes, we 00699 // have three different representations. 00700 // 00701 // If the part sizes for weight index w are all the same, then: 00702 // pSizeUniform_[w] = true 00703 // pCompactIndex_[w].size() = 0 00704 // pSize_[w].size() = 0 00705 // 00706 // and the size for part p is 1.0 / nparts. 00707 // 00708 // If part sizes differ for each part in weight index w, but there 00709 // are no more than 64 distinct sizes: 00710 // pSizeUniform_[w] = false 00711 // pCompactIndex_[w].size() = number of parts 00712 // pSize_[w].size() = number of different sizes 00713 // 00714 // and the size for part p is pSize_[pCompactIndex_[p]]. 00715 // 00716 // If part sizes differ for each part in weight index w, and there 00717 // are more than 64 distinct sizes: 00718 // pSizeUniform_[w] = false 00719 // pCompactIndex_[w].size() = 0 00720 // pSize_[w].size() = nparts 00721 // 00722 // and the size for part p is pSize_[p]. 00723 // 00724 // NOTE: If we expect to have similar cases, i.e. a long list of scalars 00725 // where it is highly possible that just a few unique values appear, 00726 // then we may want to make this a class. The advantage is that we 00727 // save a long list of 1-byte indices instead of a long list of scalars. 00728 00729 ArrayRCP<bool> pSizeUniform_; 00730 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_; 00731 ArrayRCP<ArrayRCP<scalar_t> > pSize_; 00732 00734 // The algorithm sets these values upon completion. 00735 00736 ArrayRCP<const zgid_t> gids_; // User's global IDs 00737 ArrayRCP<part_t> parts_; // part number assigned to gids_[i] 00738 00739 bool haveSolution_; 00740 00741 part_t nGlobalPartsSolution_; // global number of parts in solution 00742 00744 // The solution calculates this from the part assignments, 00745 // unless onePartPerProc_. 00746 00747 ArrayRCP<int> procs_; // process rank assigned to gids_[i] 00748 }; 00749 00751 // Definitions 00753 00754 template <typename Adapter> 00755 PartitioningSolution<Adapter>::PartitioningSolution( 00756 RCP<const Environment> &env, 00757 RCP<const Comm<int> > &comm, 00758 RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights) 00759 : env_(env), comm_(comm), idMap_(idMap), 00760 partBoxes(),comXAdj_(), comAdj_(), 00761 nGlobalParts_(0), nLocalParts_(0), 00762 localFraction_(0), nWeightsPerObj_(), 00763 onePartPerProc_(false), partDist_(), procDist_(), 00764 procDistEquallySpread_(false), 00765 pSizeUniform_(), pCompactIndex_(), pSize_(), 00766 gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0), 00767 procs_() 00768 { 00769 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO? 00770 00771 setPartDistribution(); 00772 00773 // We must call setPartSizes() because part sizes may have 00774 // been provided by the user on other processes. 00775 00776 ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_]; 00777 ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_]; 00778 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true); 00779 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true); 00780 00781 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_)); 00782 00783 env_->memory("After construction of solution"); 00784 } 00785 00786 template <typename Adapter> 00787 PartitioningSolution<Adapter>::PartitioningSolution( 00788 RCP<const Environment> &env, 00789 RCP<const Comm<int> > &comm, 00790 RCP<const IdentifierMap<user_t> > &idMap, int nUserWeights, 00791 ArrayView<ArrayRCP<part_t> > reqPartIds, 00792 ArrayView<ArrayRCP<scalar_t> > reqPartSizes) 00793 : env_(env), comm_(comm), idMap_(idMap), 00794 partBoxes(),comXAdj_(), comAdj_(), 00795 nGlobalParts_(0), nLocalParts_(0), 00796 localFraction_(0), nWeightsPerObj_(), 00797 onePartPerProc_(false), partDist_(), procDist_(), 00798 procDistEquallySpread_(false), 00799 pSizeUniform_(), pCompactIndex_(), pSize_(), 00800 gids_(), parts_(), haveSolution_(false), nGlobalPartsSolution_(0), 00801 procs_() 00802 { 00803 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO? 00804 00805 setPartDistribution(); 00806 00807 setPartSizes(reqPartIds, reqPartSizes); 00808 00809 env_->memory("After construction of solution"); 00810 } 00811 00812 template <typename Adapter> 00813 void PartitioningSolution<Adapter>::setPartDistribution() 00814 { 00815 // Did the caller define num_global_parts and/or num_local_parts? 00816 00817 const ParameterList &pl = env_->getParameters(); 00818 size_t haveGlobalNumParts=0, haveLocalNumParts=0; 00819 int numLocal=0, numGlobal=0; 00820 double val; 00821 00822 const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts"); 00823 00824 if (pe){ 00825 val = pe->getValue<double>(&val); // TODO: KDD Skip this double get 00826 haveGlobalNumParts = 1; // TODO: KDD Should be unnecessary once 00827 numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long. 00828 nGlobalParts_ = part_t(numGlobal); // TODO: KDD also do below. 00829 } 00830 00831 pe = pl.getEntryPtr("num_local_parts"); 00832 00833 if (pe){ 00834 val = pe->getValue<double>(&val); 00835 haveLocalNumParts = 1; 00836 numLocal = static_cast<int>(val); 00837 nLocalParts_ = part_t(numLocal); 00838 } 00839 00840 try{ 00841 // Sets onePartPerProc_, partDist_, and procDist_ 00842 00843 partToProc(true, haveLocalNumParts, haveGlobalNumParts, 00844 numLocal, numGlobal); 00845 } 00846 Z2_FORWARD_EXCEPTIONS 00847 00848 int nprocs = comm_->getSize(); 00849 int rank = comm_->getRank(); 00850 00851 if (onePartPerProc_){ 00852 nGlobalParts_ = nprocs; 00853 nLocalParts_ = 1; 00854 } 00855 else if (partDist_.size() > 0){ // more procs than parts 00856 nGlobalParts_ = partDist_.size() - 1; 00857 int pstart = partDist_[0]; 00858 for (part_t i=1; i <= nGlobalParts_; i++){ 00859 int pend = partDist_[i]; 00860 if (rank >= pstart && rank < pend){ 00861 int numOwners = pend - pstart; 00862 nLocalParts_ = 1; 00863 localFraction_ = 1.0 / numOwners; 00864 break; 00865 } 00866 pstart = pend; 00867 } 00868 } 00869 else if (procDist_.size() > 0){ // more parts than procs 00870 nGlobalParts_ = procDist_[nprocs]; 00871 nLocalParts_ = procDist_[rank+1] - procDist_[rank]; 00872 } 00873 else { 00874 throw std::logic_error("partToProc error"); 00875 } 00876 00877 } 00878 00879 template <typename Adapter> 00880 void PartitioningSolution<Adapter>::setPartSizes( 00881 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes) 00882 { 00883 int widx = nWeightsPerObj_; 00884 bool fail=false; 00885 00886 size_t *countBuf = new size_t [widx*2]; 00887 ArrayRCP<size_t> counts(countBuf, 0, widx*2, true); 00888 00889 fail = ((ids.size() != widx) || (sizes.size() != widx)); 00890 00891 for (int w=0; !fail && w < widx; w++){ 00892 counts[w] = ids[w].size(); 00893 if (ids[w].size() != sizes[w].size()) fail=true; 00894 } 00895 00896 env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0, 00897 COMPLEX_ASSERTION, comm_); 00898 00899 // Are all part sizes the same? This is the common case. 00900 00901 ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx]; 00902 pSize_ = arcp(emptySizes, 0, widx); 00903 00904 ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx]; 00905 pCompactIndex_ = arcp(emptyIndices, 0, widx); 00906 00907 bool *info = new bool [widx]; 00908 pSizeUniform_ = arcp(info, 0, widx); 00909 for (int w=0; w < widx; w++) 00910 pSizeUniform_[w] = true; 00911 00912 if (nGlobalParts_ == 1){ 00913 return; // there's only one part in the whole problem 00914 } 00915 00916 size_t *ptr1 = counts.getRawPtr(); 00917 size_t *ptr2 = counts.getRawPtr() + widx; 00918 00919 try{ 00920 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2); 00921 } 00922 Z2_THROW_OUTSIDE_ERROR(*env_); 00923 00924 bool zero = true; 00925 00926 for (int w=0; w < widx; w++) 00927 if (counts[widx+w] > 0){ 00928 zero = false; 00929 pSizeUniform_[w] = false; 00930 } 00931 00932 if (zero) // Part sizes for all criteria are uniform. 00933 return; 00934 00935 // Compute the part sizes for criteria for which part sizes were 00936 // supplied. Normalize for each criteria so part sizes sum to one. 00937 00938 int nprocs = comm_->getSize(); 00939 int rank = comm_->getRank(); 00940 00941 for (int w=0; w < widx; w++){ 00942 if (pSizeUniform_[w]) continue; 00943 00944 // Send all ids and sizes to one process. 00945 // (There is no simple gather method in Teuchos.) 00946 00947 part_t length = ids[w].size(); 00948 part_t *allLength = new part_t [nprocs]; 00949 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length, 00950 nprocs, allLength); 00951 00952 if (rank == 0){ 00953 int total = 0; 00954 for (int i=0; i < nprocs; i++) 00955 total += allLength[i]; 00956 00957 part_t *partNums = new part_t [total]; 00958 scalar_t *partSizes = new scalar_t [total]; 00959 00960 ArrayView<part_t> idArray(partNums, total); 00961 ArrayView<scalar_t> sizeArray(partSizes, total); 00962 00963 if (length > 0){ 00964 for (int i=0; i < length; i++){ 00965 *partNums++ = ids[w][i]; 00966 *partSizes++ = sizes[w][i]; 00967 } 00968 } 00969 00970 for (int p=1; p < nprocs; p++){ 00971 if (allLength[p] > 0){ 00972 Teuchos::receive<int, part_t>(*comm_, p, 00973 allLength[p], partNums); 00974 Teuchos::receive<int, scalar_t>(*comm_, p, 00975 allLength[p], partSizes); 00976 partNums += allLength[p]; 00977 partSizes += allLength[p]; 00978 } 00979 } 00980 00981 delete [] allLength; 00982 00983 try{ 00984 computePartSizes(w, idArray, sizeArray); 00985 } 00986 Z2_FORWARD_EXCEPTIONS 00987 00988 delete [] idArray.getRawPtr(); 00989 delete [] sizeArray.getRawPtr(); 00990 } 00991 else{ 00992 delete [] allLength; 00993 if (length > 0){ 00994 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0); 00995 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0); 00996 } 00997 } 00998 00999 broadcastPartSizes(w); 01000 } 01001 } 01002 01003 template <typename Adapter> 01004 void PartitioningSolution<Adapter>::broadcastPartSizes(int widx) 01005 { 01006 env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 01007 pSize_.size()>widx && 01008 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx, 01009 COMPLEX_ASSERTION); 01010 01011 int rank = comm_->getRank(); 01012 int nprocs = comm_->getSize(); 01013 part_t nparts = nGlobalParts_; 01014 01015 if (nprocs < 2) 01016 return; 01017 01018 char flag=0; 01019 01020 if (rank == 0){ 01021 if (pSizeUniform_[widx] == true) 01022 flag = 1; 01023 else if (pCompactIndex_[widx].size() > 0) 01024 flag = 2; 01025 else 01026 flag = 3; 01027 } 01028 01029 try{ 01030 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag); 01031 } 01032 Z2_THROW_OUTSIDE_ERROR(*env_); 01033 01034 if (flag == 1){ 01035 if (rank > 0) 01036 pSizeUniform_[widx] = true; 01037 01038 return; 01039 } 01040 01041 if (flag == 2){ 01042 01043 // broadcast the indices into the size list 01044 01045 unsigned char *idxbuf = NULL; 01046 01047 if (rank > 0){ 01048 idxbuf = new unsigned char [nparts]; 01049 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf); 01050 } 01051 else{ 01052 env_->localBugAssertion(__FILE__, __LINE__, "index list size", 01053 pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION); 01054 idxbuf = pCompactIndex_[widx].getRawPtr(); 01055 } 01056 01057 try{ 01058 // broadcast of unsigned char is not supported 01059 Teuchos::broadcast<int, char>(*comm_, 0, nparts, 01060 reinterpret_cast<char *>(idxbuf)); 01061 } 01062 Z2_THROW_OUTSIDE_ERROR(*env_); 01063 01064 if (rank > 0) 01065 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true); 01066 01067 // broadcast the list of different part sizes 01068 01069 unsigned char maxIdx=0; 01070 for (part_t p=0; p < nparts; p++) 01071 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p]; 01072 01073 int numSizes = maxIdx + 1; 01074 01075 scalar_t *sizeList = NULL; 01076 01077 if (rank > 0){ 01078 sizeList = new scalar_t [numSizes]; 01079 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList); 01080 } 01081 else{ 01082 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 01083 numSizes == pSize_[widx].size(), COMPLEX_ASSERTION); 01084 01085 sizeList = pSize_[widx].getRawPtr(); 01086 } 01087 01088 try{ 01089 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList); 01090 } 01091 Z2_THROW_OUTSIDE_ERROR(*env_); 01092 01093 if (rank > 0) 01094 pSize_[widx] = arcp(sizeList, 0, numSizes, true); 01095 01096 return; 01097 } 01098 01099 if (flag == 3){ 01100 01101 // broadcast the size of each part 01102 01103 scalar_t *sizeList = NULL; 01104 01105 if (rank > 0){ 01106 sizeList = new scalar_t [nparts]; 01107 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList); 01108 } 01109 else{ 01110 env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes", 01111 nparts == pSize_[widx].size(), COMPLEX_ASSERTION); 01112 01113 sizeList = pSize_[widx].getRawPtr(); 01114 } 01115 01116 try{ 01117 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList); 01118 } 01119 Z2_THROW_OUTSIDE_ERROR(*env_); 01120 01121 if (rank > 0) 01122 pSize_[widx] = arcp(sizeList, 0, nparts); 01123 01124 return; 01125 } 01126 } 01127 01128 template <typename Adapter> 01129 void PartitioningSolution<Adapter>::computePartSizes(int widx, 01130 ArrayView<part_t> ids, ArrayView<scalar_t> sizes) 01131 { 01132 int len = ids.size(); 01133 01134 if (len == 0){ 01135 pSizeUniform_[widx] = true; 01136 return; 01137 } 01138 01139 env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes", 01140 len>0 && sizes.size()==len, COMPLEX_ASSERTION); 01141 01142 env_->localBugAssertion(__FILE__, __LINE__, "bad index", 01143 widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION); 01144 01145 env_->localBugAssertion(__FILE__, __LINE__, "preallocations", 01146 pSize_.size()>widx && 01147 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx, 01148 COMPLEX_ASSERTION); 01149 01150 // Check ids and sizes and find min, max and average sizes. 01151 // If sizes are very close to uniform, call them uniform parts. 01152 01153 part_t nparts = nGlobalParts_; 01154 unsigned char *buf = new unsigned char [nparts]; 01155 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf); 01156 memset(buf, 0, nparts); 01157 ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true); 01158 01159 scalar_t epsilon = 10e-5 / nparts; 01160 scalar_t min=sizes[0], max=sizes[0], sum=0; 01161 01162 for (int i=0; i < len; i++){ 01163 part_t id = ids[i]; 01164 scalar_t size = sizes[i]; 01165 01166 env_->localInputAssertion(__FILE__, __LINE__, "invalid part id", 01167 id>=0 && id<nparts, BASIC_ASSERTION); 01168 01169 env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0, 01170 BASIC_ASSERTION); 01171 01172 // TODO: we could allow users to specify multiple sizes for the same 01173 // part if we add a parameter that says what we are to do with them: 01174 // add them or take the max. 01175 01176 env_->localInputAssertion(__FILE__, __LINE__, 01177 "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION); 01178 01179 partIdx[id] = 1; // mark that we have a size for this part 01180 01181 if (size < min) min = size; 01182 if (size > max) max = size; 01183 sum += size; 01184 } 01185 01186 if (sum == 0){ 01187 01188 // User has given us a list of parts of size 0, we'll set 01189 // the rest to them to equally sized parts. 01190 01191 scalar_t *allSizes = new scalar_t [2]; 01192 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes); 01193 01194 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true); 01195 01196 int numNonZero = nparts - len; 01197 01198 allSizes[0] = 0.0; 01199 allSizes[1] = 1.0 / numNonZero; 01200 01201 for (part_t p=0; p < nparts; p++) 01202 buf[p] = 1; // index to default part size 01203 01204 for (int i=0; i < len; i++) 01205 buf[ids[i]] = 0; // index to part size zero 01206 01207 pSize_[widx] = sizeArray; 01208 pCompactIndex_[widx] = partIdx; 01209 01210 return; 01211 } 01212 01213 if (max - min <= epsilon){ 01214 pSizeUniform_[widx] = true; 01215 return; 01216 } 01217 01218 // A size for parts that were not specified: 01219 scalar_t avg = sum / nparts; 01220 01221 // We are going to merge part sizes that are very close. This takes 01222 // computation time now, but can save considerably in the storage of 01223 // all part sizes on each process. For example, a common case may 01224 // be some parts are size 1 and all the rest are size 2. 01225 01226 scalar_t *tmp = new scalar_t [len]; 01227 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp); 01228 memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len); 01229 ArrayRCP<scalar_t> partSizes(tmp, 0, len, true); 01230 01231 std::sort(partSizes.begin(), partSizes.end()); 01232 01233 // create a list of sizes that are unique within epsilon 01234 01235 Array<scalar_t> nextUniqueSize; 01236 nextUniqueSize.push_back(partSizes[len-1]); // largest 01237 scalar_t curr = partSizes[len-1]; 01238 int avgIndex = len; 01239 bool haveAvg = false; 01240 if (curr - avg <= epsilon) 01241 avgIndex = 0; 01242 01243 for (int i=len-2; i >= 0; i--){ 01244 scalar_t val = partSizes[i]; 01245 if (curr - val > epsilon){ 01246 nextUniqueSize.push_back(val); // the highest in the group 01247 curr = val; 01248 if (avgIndex==len && val > avg && val - avg <= epsilon){ 01249 // the average would be in this group 01250 avgIndex = nextUniqueSize.size() - 1; 01251 haveAvg = true; 01252 } 01253 } 01254 } 01255 01256 partSizes.clear(); 01257 01258 size_t numSizes = nextUniqueSize.size(); 01259 int sizeArrayLen = numSizes; 01260 01261 if (numSizes < 64){ 01262 // We can store the size for every part in a compact way. 01263 01264 // Create a list of all sizes in increasing order 01265 01266 if (!haveAvg) sizeArrayLen++; // need to include average 01267 01268 scalar_t *allSizes = new scalar_t [sizeArrayLen]; 01269 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes); 01270 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true); 01271 01272 int newAvgIndex = sizeArrayLen; 01273 01274 for (int i=numSizes-1, idx=0; i >= 0; i--){ 01275 01276 if (newAvgIndex == sizeArrayLen){ 01277 01278 if (haveAvg && i==avgIndex) 01279 newAvgIndex = idx; 01280 01281 else if (!haveAvg && avg < nextUniqueSize[i]){ 01282 newAvgIndex = idx; 01283 allSizes[idx++] = avg; 01284 } 01285 } 01286 01287 allSizes[idx++] = nextUniqueSize[i]; 01288 } 01289 01290 env_->localBugAssertion(__FILE__, __LINE__, "finding average in list", 01291 newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION); 01292 01293 for (int i=0; i < nparts; i++){ 01294 buf[i] = newAvgIndex; // index to default part size 01295 } 01296 01297 sum = (nparts - len) * allSizes[newAvgIndex]; 01298 01299 for (int i=0; i < len; i++){ 01300 int id = ids[i]; 01301 scalar_t size = sizes[i]; 01302 int index; 01303 01304 // Find the first size greater than or equal to this size. 01305 01306 if (size < avg && avg - size <= epsilon) 01307 index = newAvgIndex; 01308 else{ 01309 typename ArrayRCP<scalar_t>::iterator found = 01310 std::lower_bound(sizeArray.begin(), sizeArray.end(), size); 01311 01312 env_->localBugAssertion(__FILE__, __LINE__, "size array", 01313 found != sizeArray.end(), COMPLEX_ASSERTION); 01314 01315 index = found - sizeArray.begin(); 01316 } 01317 01318 buf[id] = index; 01319 sum += allSizes[index]; 01320 } 01321 01322 for (int i=0; i < sizeArrayLen; i++){ 01323 sizeArray[i] /= sum; 01324 } 01325 01326 pCompactIndex_[widx] = partIdx; 01327 pSize_[widx] = sizeArray; 01328 } 01329 else{ 01330 // To have access to part sizes, we must store nparts scalar_ts on 01331 // every process. We expect this is a rare case. 01332 01333 tmp = new scalar_t [nparts]; 01334 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp); 01335 01336 sum += ((nparts - len) * avg); 01337 01338 for (int i=0; i < nparts; i++){ 01339 tmp[i] = avg/sum; 01340 } 01341 01342 for (int i=0; i < len; i++){ 01343 tmp[ids[i]] = sizes[i]/sum; 01344 } 01345 01346 pSize_[widx] = arcp(tmp, 0, nparts); 01347 } 01348 } 01349 01350 01351 template <typename Adapter> 01352 void PartitioningSolution<Adapter>::setParts( 01353 ArrayRCP<const gno_t> &gnoList, ArrayRCP<part_t> &partList, 01354 bool dataDidNotMove) 01355 { 01356 env_->debug(DETAILED_STATUS, "Entering setParts"); 01357 01358 size_t len = partList.size(); 01359 01360 // Find the actual number of parts in the solution, which can 01361 // be more or less than the nGlobalParts_ target. 01362 // (We may want to compute the imbalance of a given solution with 01363 // respect to a desired solution. This solution may have more or 01364 // fewer parts that the desired solution.) 01365 01366 part_t lMax=0, lMin=0, gMax, gMin; 01367 01368 if (len > 0) 01369 IdentifierTraits<part_t>::minMax(partList.getRawPtr(), len, lMin, lMax); 01370 01371 IdentifierTraits<part_t>::globalMinMax(*comm_, len == 0, 01372 lMin, lMax, gMin, gMax); 01373 01374 nGlobalPartsSolution_ = gMax - gMin + 1; 01375 01376 if (dataDidNotMove) { 01377 // Case where the algorithm provides the part list in the same order 01378 // that it received the gnos. 01379 01380 if (idMap_->gnosAreGids()){ 01381 gids_ = Teuchos::arcp_reinterpret_cast<const zgid_t>(gnoList); 01382 } 01383 else{ 01384 zgid_t *gidList = new zgid_t [len]; 01385 env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList); 01386 ArrayView<zgid_t> gidView(gidList, len); 01387 01388 const gno_t *gnos = gnoList.getRawPtr(); 01389 ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len); 01390 01391 try{ 01392 idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP); 01393 } 01394 Z2_FORWARD_EXCEPTIONS 01395 01396 gids_ = arcp<const zgid_t>(gidList, 0, len); 01397 } 01398 parts_ = partList; 01399 } 01400 else { 01401 // Case where the algorithm moved the data. 01402 // KDDKDD Should we require the algorithm to return the parts in 01403 // KDDKDD the same order as the gnos provided by the model? 01404 01405 // We send part information to the process that "owns" the global number. 01406 01407 ArrayView<zgid_t> emptyView; 01408 ArrayView<int> procList; 01409 01410 if (len){ 01411 int *tmp = new int [len]; 01412 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp); 01413 procList = ArrayView<int>(tmp, len); 01414 } 01415 01416 idMap_->gnoGlobalTranslate (gnoList.view(0,len), emptyView, procList); 01417 01418 int remotelyOwned = 0; 01419 int rank = comm_->getRank(); 01420 int nprocs = comm_->getSize(); 01421 01422 for (size_t i=0; !remotelyOwned && i < len; i++){ 01423 if (procList[i] != rank) 01424 remotelyOwned = 1; 01425 } 01426 01427 int anyRemotelyOwned=0; 01428 01429 try{ 01430 reduceAll<int, int>(*comm_, Teuchos::REDUCE_MAX, 1, 01431 &remotelyOwned, &anyRemotelyOwned); 01432 } 01433 Z2_THROW_OUTSIDE_ERROR(*env_); 01434 01435 if (anyRemotelyOwned){ 01436 01437 // Send the owners of these gnos their part assignments. 01438 01439 Array<int> countOutBuf(nprocs, 0); 01440 Array<int> countInBuf(nprocs, 0); 01441 01442 Array<gno_t> outBuf(len*2, 0); 01443 01444 if (len > 0){ 01445 Array<lno_t> offsetBuf(nprocs+1, 0); 01446 01447 for (size_t i=0; i < len; i++){ 01448 countOutBuf[procList[i]]+=2; 01449 } 01450 01451 offsetBuf[0] = 0; 01452 for (int i=0; i < nprocs; i++) 01453 offsetBuf[i+1] = offsetBuf[i] + countOutBuf[i]; 01454 01455 for (size_t i=0; i < len; i++){ 01456 int p = procList[i]; 01457 int off = offsetBuf[p]; 01458 outBuf[off] = gnoList[i]; 01459 outBuf[off+1] = static_cast<gno_t>(partList[i]); 01460 offsetBuf[p]+=2; 01461 } 01462 } 01463 01464 ArrayRCP<gno_t> inBuf; 01465 01466 try{ 01467 AlltoAllv<gno_t>(*comm_, *env_, 01468 outBuf(), countOutBuf(), inBuf, countInBuf()); 01469 } 01470 Z2_FORWARD_EXCEPTIONS; 01471 01472 outBuf.clear(); 01473 countOutBuf.clear(); 01474 01475 gno_t newLen = 0; 01476 for (int i=0; i < nprocs; i++) 01477 newLen += countInBuf[i]; 01478 01479 countInBuf.clear(); 01480 01481 newLen /= 2; 01482 01483 ArrayRCP<part_t> parts; 01484 ArrayRCP<const gno_t> myGnos; 01485 01486 if (newLen > 0){ 01487 01488 gno_t *tmpGno = new gno_t [newLen]; 01489 env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpGno); 01490 01491 part_t *tmpPart = new part_t [newLen]; 01492 env_->localMemoryAssertion(__FILE__, __LINE__, newLen, tmpPart); 01493 01494 size_t next = 0; 01495 for (gno_t i=0; i < newLen; i++){ 01496 tmpGno[i] = inBuf[next++]; 01497 tmpPart[i] = inBuf[next++]; 01498 } 01499 01500 parts = arcp(tmpPart, 0, newLen); 01501 myGnos = arcp(tmpGno, 0, newLen); 01502 } 01503 01504 gnoList = myGnos; 01505 partList = parts; 01506 len = newLen; 01507 } 01508 01509 delete [] procList.getRawPtr(); 01510 01511 if (idMap_->gnosAreGids()){ 01512 gids_ = Teuchos::arcp_reinterpret_cast<const zgid_t>(gnoList); 01513 } 01514 else{ 01515 zgid_t *gidList = new zgid_t [len]; 01516 env_->localMemoryAssertion(__FILE__, __LINE__, len, gidList); 01517 ArrayView<zgid_t> gidView(gidList, len); 01518 01519 const gno_t *gnos = gnoList.getRawPtr(); 01520 ArrayView<gno_t> gnoView(const_cast<gno_t *>(gnos), len); 01521 01522 try{ 01523 idMap_->gidTranslate(gidView, gnoView, TRANSLATE_LIB_TO_APP); 01524 } 01525 Z2_FORWARD_EXCEPTIONS 01526 01527 gids_ = arcp<const zgid_t>(gidList, 0, len); 01528 } 01529 01530 parts_ = partList; 01531 } 01532 01533 // Now determine which process gets each object, if not one-to-one. 01534 01535 if (!onePartPerProc_){ 01536 01537 int *procs = new int [len]; 01538 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs); 01539 procs_ = arcp<int>(procs, 0, len); 01540 01541 part_t *parts = partList.getRawPtr(); 01542 01543 if (procDist_.size() > 0){ // parts are not split across procs 01544 01545 int procId; 01546 for (size_t i=0; i < len; i++){ 01547 partToProcsMap(parts[i], procs[i], procId); 01548 } 01549 } 01550 else{ // harder - we need to split the parts across multiple procs 01551 01552 lno_t *partCounter = new lno_t [nGlobalPartsSolution_]; 01553 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_, 01554 partCounter); 01555 01556 int numProcs = comm_->getSize(); 01557 01558 //MD NOTE: there was no initialization for partCounter. 01559 //I added the line below, correct me if I am wrong. 01560 memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_); 01561 01562 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++) 01563 partCounter[parts[i]]++; 01564 01565 lno_t *procCounter = new lno_t [numProcs]; 01566 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter); 01567 01568 int proc1; 01569 int proc2 = partDist_[0]; 01570 01571 for (part_t part=1; part < nGlobalParts_; part++){ 01572 proc1 = proc2; 01573 proc2 = partDist_[part+1]; 01574 int numprocs = proc2 - proc1; 01575 01576 double dNum = partCounter[part]; 01577 double dProcs = numprocs; 01578 01579 //cout << "dNum:" << dNum << " dProcs:" << dProcs << endl; 01580 double each = floor(dNum/dProcs); 01581 double extra = fmod(dNum,dProcs); 01582 01583 for (int proc=proc1, i=0; proc<proc2; proc++, i++){ 01584 if (i < extra) 01585 procCounter[proc] = lno_t(each) + 1; 01586 else 01587 procCounter[proc] = lno_t(each); 01588 } 01589 } 01590 01591 delete [] partCounter; 01592 01593 for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){ 01594 if (partList[i] >= nGlobalParts_){ 01595 // Solution has more parts that targeted. These 01596 // objects just remain on this process. 01597 procs[i] = comm_->getRank(); 01598 continue; 01599 } 01600 part_t partNum = parts[i]; 01601 proc1 = partDist_[partNum]; 01602 proc2 = partDist_[partNum + 1]; 01603 01604 int proc; 01605 for (proc=proc1; proc < proc2; proc++){ 01606 if (procCounter[proc] > 0){ 01607 procs[i] = proc; 01608 procCounter[proc]--; 01609 break; 01610 } 01611 } 01612 env_->localBugAssertion(__FILE__, __LINE__, "part to proc", 01613 proc < proc2, COMPLEX_ASSERTION); 01614 } 01615 01616 delete [] procCounter; 01617 } 01618 } 01619 01620 // Now that parts_ info is back on home process, remap the parts. 01621 // TODO: The parts will be inconsistent with the proc assignments after 01622 // TODO: remapping. This problem will go away after we separate process 01623 // TODO: mapping from setParts. But for MueLu's use case, the part 01624 // TODO: remapping is all that matters; they do not use the process mapping. 01625 int doRemap = 0; 01626 const Teuchos::ParameterEntry *pe = 01627 env_->getParameters().getEntryPtr("remap_parts"); 01628 if (pe) doRemap = pe->getValue(&doRemap); 01629 if (doRemap) RemapParts(); 01630 01631 haveSolution_ = true; 01632 01633 env_->memory("After Solution has processed algorithm's answer"); 01634 env_->debug(DETAILED_STATUS, "Exiting setParts"); 01635 } 01636 01637 template <typename Adapter> 01638 template <typename Extra> 01639 size_t PartitioningSolution<Adapter>::convertSolutionToImportList( 01640 int numExtra, ArrayRCP<Extra> &xtraInfo, 01641 ArrayRCP<typename Adapter::zgid_t> &imports, // output 01642 ArrayRCP<Extra> &newXtraInfo) const // output 01643 { 01644 env_->localInputAssertion(__FILE__, __LINE__, "no solution yet", 01645 haveSolution_, BASIC_ASSERTION); 01646 env_->debug(DETAILED_STATUS, "Entering convertSolutionToImportList"); 01647 01648 int numProcs = comm_->getSize(); 01649 size_t localNumIds = gids_.size(); 01650 01651 // How many to each process? 01652 Array<int> counts(numProcs, 0); 01653 if (onePartPerProc_) 01654 for (size_t i=0; i < localNumIds; i++) 01655 counts[parts_[i]]++; 01656 else 01657 for (size_t i=0; i < localNumIds; i++) 01658 counts[procs_[i]]++; 01659 01660 Array<gno_t> offsets(numProcs+1, 0); 01661 for (int i=1; i <= numProcs; i++){ 01662 offsets[i] = offsets[i-1] + counts[i-1]; 01663 } 01664 01665 Array<zgid_t> gidList(localNumIds); 01666 Array<Extra> numericInfo; 01667 01668 if (numExtra > 0) 01669 numericInfo.resize(localNumIds); 01670 01671 if (onePartPerProc_){ 01672 for (size_t i=0; i < localNumIds; i++){ 01673 lno_t idx = offsets[parts_[i]]; 01674 gidList[idx] = gids_[i]; 01675 if (numExtra > 0) 01676 numericInfo[idx] = xtraInfo[i]; 01677 offsets[parts_[i]] = idx + 1; 01678 } 01679 } 01680 else { 01681 for (size_t i=0; i < localNumIds; i++){ 01682 lno_t idx = offsets[procs_[i]]; 01683 gidList[idx] = gids_[i]; 01684 if (numExtra > 0) 01685 numericInfo[idx] = xtraInfo[i]; 01686 offsets[procs_[i]] = idx + 1; 01687 } 01688 } 01689 01690 Array<int> recvCounts(numProcs, 0); 01691 01692 try{ 01693 AlltoAllv<zgid_t>(*comm_, *env_, gidList(), 01694 counts(), imports, recvCounts()); 01695 } 01696 catch (std::exception &e){ 01697 throw std::runtime_error("alltoallv 1"); 01698 } 01699 01700 if (numExtra > 0){ 01701 try{ 01702 AlltoAllv<Extra>(*comm_, *env_, xtraInfo(), 01703 counts(), newXtraInfo, recvCounts()); 01704 } 01705 catch (std::exception &e){ 01706 throw std::runtime_error("alltoallv 2"); 01707 } 01708 } 01709 01710 env_->debug(DETAILED_STATUS, "Exiting convertSolutionToImportList"); 01711 return imports.size(); 01712 } 01713 01714 template <typename Adapter> 01715 void PartitioningSolution<Adapter>::procToPartsMap(int procId, 01716 double &numParts, part_t &partMin, part_t &partMax) const 01717 { 01718 if (onePartPerProc_){ 01719 numParts = 1.0; 01720 partMin = partMax = procId; 01721 } 01722 else if (procDist_.size() > 0){ 01723 partMin = procDist_[procId]; 01724 partMax = procDist_[procId+1] - 1; 01725 numParts = procDist_[procId+1] - partMin; 01726 } 01727 else{ 01728 // find the first p such that partDist_[p] > procId 01729 01730 ::std::vector<int>::const_iterator entry; 01731 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId); 01732 01733 size_t partIdx = entry - partDist_.begin(); 01734 int numProcs = partDist_[partIdx] - partDist_[partIdx-1]; 01735 partMin = partMax = int(partIdx) - 1; 01736 numParts = 1.0 / numProcs; 01737 } 01738 } 01739 01740 template <typename Adapter> 01741 void PartitioningSolution<Adapter>::partToProcsMap(part_t partId, 01742 int &procMin, int &procMax) const 01743 { 01744 if (partId >= nGlobalParts_){ 01745 // setParts() may be given an initial solution which uses a 01746 // different number of parts than the desired solution. It is 01747 // still a solution. We keep it on this process. 01748 procMin = procMax = comm_->getRank(); 01749 } 01750 else if (onePartPerProc_){ 01751 procMin = procMax = int(partId); 01752 } 01753 else if (procDist_.size() > 0){ 01754 if (procDistEquallySpread_) { 01755 // Avoid binary search. 01756 double fProcs = comm_->getSize(); 01757 double fParts = nGlobalParts_; 01758 double each = fParts / fProcs; 01759 procMin = int(partId / each); 01760 while (procDist_[procMin] > partId) procMin--; 01761 while (procDist_[procMin+1] <= partId) procMin++; 01762 procMax = procMin; 01763 } 01764 else { 01765 // find the first p such that procDist_[p] > partId. 01766 // For now, do a binary search. 01767 01768 typename ::std::vector<part_t>::const_iterator entry; 01769 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId); 01770 01771 size_t procIdx = entry - procDist_.begin(); 01772 procMin = procMax = int(procIdx) - 1; 01773 } 01774 } 01775 else{ 01776 procMin = partDist_[partId]; 01777 procMax = partDist_[partId+1] - 1; 01778 } 01779 } 01780 01781 template <typename Adapter> 01782 bool PartitioningSolution<Adapter>::criteriaHaveSamePartSizes( 01783 int c1, int c2) const 01784 { 01785 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ ) 01786 throw std::logic_error("criteriaHaveSamePartSizes error"); 01787 01788 bool theSame = false; 01789 01790 if (c1 == c2) 01791 theSame = true; 01792 01793 else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true) 01794 theSame = true; 01795 01796 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){ 01797 theSame = true; 01798 bool useIndex = pCompactIndex_[c1].size() > 0; 01799 if (useIndex){ 01800 for (part_t p=0; theSame && p < nGlobalParts_; p++) 01801 if (pSize_[c1][pCompactIndex_[c1][p]] != 01802 pSize_[c2][pCompactIndex_[c2][p]]) 01803 theSame = false; 01804 } 01805 else{ 01806 for (part_t p=0; theSame && p < nGlobalParts_; p++) 01807 if (pSize_[c1][p] != pSize_[c2][p]) 01808 theSame = false; 01809 } 01810 } 01811 01812 return theSame; 01813 } 01814 01830 template <typename Adapter> 01831 void PartitioningSolution<Adapter>::partToProc( 01832 bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts, 01833 int numLocalParts, int numGlobalParts) 01834 { 01835 #ifdef _MSC_VER 01836 typedef SSIZE_T ssize_t; 01837 #endif 01838 int nprocs = comm_->getSize(); 01839 ssize_t reducevals[4]; 01840 ssize_t sumHaveGlobal=0, sumHaveLocal=0; 01841 ssize_t sumGlobal=0, sumLocal=0; 01842 ssize_t maxGlobal=0, maxLocal=0; 01843 ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts, 01844 numGlobalParts, numLocalParts}; 01845 01846 partDist_.clear(); 01847 procDist_.clear(); 01848 01849 if (doCheck){ 01850 01851 try{ 01852 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals); 01853 } 01854 Z2_THROW_OUTSIDE_ERROR(*env_); 01855 01856 sumHaveGlobal = reducevals[0]; 01857 sumHaveLocal = reducevals[1]; 01858 sumGlobal = reducevals[2]; 01859 sumLocal = reducevals[3]; 01860 01861 env_->localInputAssertion(__FILE__, __LINE__, 01862 "Either all procs specify num_global/local_parts or none do", 01863 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) && 01864 (sumHaveLocal == 0 || sumHaveLocal == nprocs), 01865 BASIC_ASSERTION); 01866 } 01867 else{ 01868 if (haveNumLocalParts) 01869 sumLocal = numLocalParts * nprocs; 01870 if (haveNumGlobalParts) 01871 sumGlobal = numGlobalParts * nprocs; 01872 01873 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0; 01874 sumHaveLocal = haveNumLocalParts ? nprocs : 0; 01875 01876 maxLocal = numLocalParts; 01877 maxGlobal = numGlobalParts; 01878 } 01879 01880 if (!haveNumLocalParts && !haveNumGlobalParts){ 01881 onePartPerProc_ = true; // default if user did not specify 01882 return; 01883 } 01884 01885 if (haveNumGlobalParts){ 01886 if (doCheck){ 01887 vals[0] = numGlobalParts; 01888 vals[1] = numLocalParts; 01889 try{ 01890 reduceAll<int, ssize_t>( 01891 *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals); 01892 } 01893 Z2_THROW_OUTSIDE_ERROR(*env_); 01894 01895 maxGlobal = reducevals[0]; 01896 maxLocal = reducevals[1]; 01897 01898 env_->localInputAssertion(__FILE__, __LINE__, 01899 "Value for num_global_parts is different on different processes.", 01900 maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION); 01901 } 01902 01903 if (sumLocal){ 01904 env_->localInputAssertion(__FILE__, __LINE__, 01905 "Sum of num_local_parts does not equal requested num_global_parts", 01906 sumLocal == numGlobalParts, BASIC_ASSERTION); 01907 01908 if (sumLocal == nprocs && maxLocal == 1){ 01909 onePartPerProc_ = true; // user specified one part per proc 01910 return; 01911 } 01912 } 01913 else{ 01914 if (maxGlobal == nprocs){ 01915 onePartPerProc_ = true; // user specified num parts is num procs 01916 return; 01917 } 01918 } 01919 } 01920 01921 // If we are here, we do not have #parts == #procs. 01922 01923 if (sumHaveLocal == nprocs){ 01924 // 01925 // We will go by the number of local parts specified. 01926 // 01927 01928 try{ 01929 procDist_.resize(nprocs+1); 01930 } 01931 catch (std::exception &e){ 01932 throw(std::bad_alloc()); 01933 } 01934 01935 part_t *procArray = &procDist_[0]; 01936 01937 try{ 01938 part_t tmp = part_t(numLocalParts); 01939 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1); 01940 } 01941 Z2_THROW_OUTSIDE_ERROR(*env_); 01942 01943 procArray[0] = 0; 01944 01945 for (int proc=0; proc < nprocs; proc++) 01946 procArray[proc+1] += procArray[proc]; 01947 } 01948 else{ 01949 // 01950 // We will allocate global number of parts to the processes. 01951 // 01952 double fParts = numGlobalParts; 01953 double fProcs = nprocs; 01954 01955 if (fParts < fProcs){ 01956 01957 try{ 01958 partDist_.resize(size_t(fParts+1)); 01959 } 01960 catch (std::exception &e){ 01961 throw(std::bad_alloc()); 01962 } 01963 01964 int *partArray = &partDist_[0]; 01965 01966 double each = floor(fProcs / fParts); 01967 double extra = fmod(fProcs, fParts); 01968 partDist_[0] = 0; 01969 01970 for (part_t part=0; part < numGlobalParts; part++){ 01971 int numOwners = int(each + ((part<extra) ? 1 : 0)); 01972 partArray[part+1] = partArray[part] + numOwners; 01973 } 01974 01975 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 01976 partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_); 01977 } 01978 else if (fParts > fProcs){ 01979 01980 // User did not specify local number of parts per proc; 01981 // Distribute the parts evenly among the procs. 01982 01983 procDistEquallySpread_ = true; 01984 01985 try{ 01986 procDist_.resize(size_t(fProcs+1)); 01987 } 01988 catch (std::exception &e){ 01989 throw(std::bad_alloc()); 01990 } 01991 01992 part_t *procArray = &procDist_[0]; 01993 01994 double each = floor(fParts / fProcs); 01995 double extra = fmod(fParts, fProcs); 01996 procArray[0] = 0; 01997 01998 for (int proc=0; proc < nprocs; proc++){ 01999 part_t numParts = part_t(each + ((proc<extra) ? 1 : 0)); 02000 procArray[proc+1] = procArray[proc] + numParts; 02001 } 02002 02003 env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs", 02004 procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_); 02005 } 02006 else{ 02007 env_->globalBugAssertion(__FILE__, __LINE__, 02008 "should never get here", 1, COMPLEX_ASSERTION, comm_); 02009 } 02010 } 02011 } 02012 02014 // Remap a new part assignment vector for maximum overlap with an input 02015 // part assignment. 02016 // 02017 // Assumptions for this version: 02018 // input part assignment == processor rank for every local object. 02019 // assuming nGlobalParts_ <= num ranks 02020 // TODO: Write a version that takes the input part number as input; 02021 // this change requires input parts in adapters to be provided in 02022 // the Adapter. 02023 // TODO: For repartitioning, compare to old remapping results; see Zoltan1. 02024 02025 template <typename Adapter> 02026 void PartitioningSolution<Adapter>::RemapParts() 02027 { 02028 size_t len = parts_.size(); 02029 02030 part_t me = comm_->getRank(); 02031 int np = comm_->getSize(); 02032 02033 if (np < nGlobalParts_) { 02034 if (me == 0) 02035 std::cout << "Remapping not yet supported for " 02036 << "num_global_parts " << nGlobalParts_ 02037 << " > num procs " << np << std::endl; 02038 return; 02039 } 02040 // Build edges of a bipartite graph with np + nGlobalParts_ vertices, 02041 // and edges between a process vtx and any parts to which that process' 02042 // objects are assigned. 02043 // Weight edge[parts_[i]] by the number of objects that are going from 02044 // this rank to parts_[i]. 02045 // We use a std::map, assuming the number of unique parts in the parts_ array 02046 // is small to keep the binary search efficient. 02047 // TODO We use the count of objects to move; should change to SIZE of objects 02048 // to move; need SIZE function in Adapter. 02049 02050 std::map<part_t, long> edges; 02051 long lstaying = 0; // Total num of local objects staying if we keep the 02052 // current mapping. TODO: change to SIZE of local objs 02053 long gstaying = 0; // Total num of objects staying in the current partition 02054 02055 for (size_t i = 0; i < len; i++) { 02056 edges[parts_[i]]++; // TODO Use obj size instead of count 02057 if (parts_[i] == me) lstaying++; // TODO Use obj size instead of count 02058 } 02059 02060 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1, 02061 &lstaying, &gstaying); 02062 //TODO if (gstaying == Adapter::getGlobalNumObjs()) return; // Nothing to do 02063 02064 part_t *remap = NULL; 02065 02066 int nedges = edges.size(); 02067 02068 // Gather the graph to rank 0. 02069 part_t tnVtx = np + nGlobalParts_; // total # vertices 02070 int *idx = NULL; // Pointer index into graph adjacencies 02071 int *sizes = NULL; // nedges per rank 02072 if (me == 0) { 02073 idx = new int[tnVtx+1]; 02074 sizes = new int[np]; 02075 sizes[0] = nedges; 02076 } 02077 if (np > 1) 02078 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_); 02079 02080 // prefix sum to build the idx array 02081 if (me == 0) { 02082 idx[0] = 0; 02083 for (int i = 0; i < np; i++) 02084 idx[i+1] = idx[i] + sizes[i]; 02085 } 02086 02087 // prepare to send edges 02088 int cnt = 0; 02089 part_t *bufv = NULL; 02090 long *bufw = NULL; 02091 if (nedges) { 02092 bufv = new part_t[nedges]; 02093 bufw = new long[nedges]; 02094 // Create buffer with edges (me, part[i]) and weight edges[parts_[i]]. 02095 for (typename std::map<part_t, long>::iterator it = edges.begin(); 02096 it != edges.end(); it++) { 02097 bufv[cnt] = it->first; // target part 02098 bufw[cnt] = it->second; // weight 02099 cnt++; 02100 } 02101 } 02102 02103 // Prepare to receive edges on rank 0 02104 part_t *adj = NULL; 02105 long *wgt = NULL; 02106 if (me == 0) { 02107 //SYM adj = new part_t[2*idx[np]]; // need 2x space to symmetrize later 02108 //SYM wgt = new long[2*idx[np]]; // need 2x space to symmetrize later 02109 adj = new part_t[idx[np]]; 02110 wgt = new long[idx[np]]; 02111 } 02112 02113 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_); 02114 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_); 02115 delete [] bufv; 02116 delete [] bufw; 02117 02118 // Now have constructed graph on rank 0. 02119 // Call the matching algorithm 02120 02121 int doRemap; 02122 if (me == 0) { 02123 // We have the "LHS" vertices of the bipartite graph; need to create 02124 // "RHS" vertices. 02125 for (int i = 0; i < idx[np]; i++) { 02126 adj[i] += np; // New RHS vertex number; offset by num LHS vertices 02127 } 02128 02129 // Build idx for RHS vertices 02130 for (part_t i = np; i < tnVtx; i++) { 02131 idx[i+1] = idx[i]; // No edges for RHS vertices 02132 } 02133 02134 #ifdef KDDKDD_DEBUG 02135 cout << "IDX "; 02136 for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " "; 02137 std::cout << std::endl; 02138 02139 std::cout << "ADJ "; 02140 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " "; 02141 std::cout << std::endl; 02142 02143 std::cout << "WGT "; 02144 for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " "; 02145 std::cout << std::endl; 02146 #endif 02147 02148 // Perform matching on the graph 02149 part_t *match = new part_t[tnVtx]; 02150 for (part_t i = 0; i < tnVtx; i++) match[i] = i; 02151 part_t nmatches = 02152 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match); 02153 02154 #ifdef KDDKDD_DEBUG 02155 std::cout << "After matching: " << nmatches << " found" << std::endl; 02156 for (part_t i = 0; i < tnVtx; i++) 02157 std::cout << "match[" << i << "] = " << match[i] 02158 << ((match[i] != i && 02159 (i < np && match[i] != i+np)) 02160 ? " *" : " ") 02161 << std::endl; 02162 #endif 02163 02164 // See whether there were nontrivial changes in the matching. 02165 bool nontrivial = false; 02166 if (nmatches) { 02167 for (part_t i = 0; i < np; i++) { 02168 if ((match[i] != i) && (match[i] != (i+np))) { 02169 nontrivial = true; 02170 break; 02171 } 02172 } 02173 } 02174 02175 // Process the matches 02176 if (nontrivial) { 02177 remap = new part_t[nGlobalParts_]; 02178 for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1; 02179 02180 bool *used = new bool[np]; 02181 for (part_t i = 0; i < np; i++) used[i] = false; 02182 02183 // First, process all matched parts 02184 for (part_t i = 0; i < nGlobalParts_; i++) { 02185 part_t tmp = i + np; 02186 if (match[tmp] != tmp) { 02187 remap[i] = match[tmp]; 02188 used[match[tmp]] = true; 02189 } 02190 } 02191 02192 // Second, process unmatched parts; keep same part number if possible 02193 for (part_t i = 0; i < nGlobalParts_; i++) { 02194 if (remap[i] > -1) continue; 02195 if (!used[i]) { 02196 remap[i] = i; 02197 used[i] = true; 02198 } 02199 } 02200 02201 // Third, process unmatched parts; give them the next unused part 02202 for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) { 02203 if (remap[i] > -1) continue; 02204 while (used[uidx]) uidx++; 02205 remap[i] = uidx; 02206 used[uidx] = true; 02207 } 02208 delete [] used; 02209 } 02210 delete [] match; 02211 02212 #ifdef KDDKDD_DEBUG 02213 cout << "Remap vector: "; 02214 for (part_t i = 0; i < nGlobalParts_; i++) cout << remap[i] << " "; 02215 std::cout << std::endl; 02216 #endif 02217 02218 long newgstaying = measure_stays(remap, idx, adj, wgt, 02219 nGlobalParts_, np); 02220 doRemap = (newgstaying > gstaying); 02221 std::cout << "gstaying " << gstaying << " measure(input) " 02222 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np) 02223 << " newgstaying " << newgstaying 02224 << " nontrivial " << nontrivial 02225 << " doRemap " << doRemap << std::endl; 02226 } 02227 delete [] idx; 02228 delete [] sizes; 02229 delete [] adj; 02230 delete [] wgt; 02231 02232 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap); 02233 02234 if (doRemap) { 02235 if (me != 0) remap = new part_t[nGlobalParts_]; 02236 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap); 02237 for (size_t i = 0; i < len; i++) { 02238 parts_[i] = remap[parts_[i]]; 02239 } 02240 } 02241 02242 delete [] remap; // TODO May want to keep for repartitioning as in Zoltan 02243 } 02244 02245 02246 } // namespace Zoltan2 02247 02248 #endif
1.7.6.1