Zoltan2
Zoltan2_TaskMapping.hpp
Go to the documentation of this file.
00001 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
00002 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
00003 
00004 #include <fstream>
00005 #include <ctime>
00006 #include <vector>
00007 #include "Zoltan2_AlgMultiJagged.hpp"
00008 #include "Teuchos_ArrayViewDecl.hpp"
00009 #include "Zoltan2_PartitionMapping.hpp"
00010 #include "Zoltan2_MachineRepresentation.hpp"
00011 #include "Teuchos_ReductionOp.hpp"
00012 #include "Zoltan2_XpetraMultiVectorAdapter.hpp"
00013 
00014 #include "Teuchos_ConfigDefs.hpp" // define HAVE_MPI
00015 #include "Teuchos_Comm.hpp"
00016 #ifdef HAVE_MPI
00017 #  include "Teuchos_DefaultMpiComm.hpp"
00018 #else
00019 #  include "Teuchos_DefaultSerialComm.hpp"
00020 #endif // HAVE_MPI
00021 
00022 //#define gnuPlot
00023 
00024 namespace Teuchos{
00025 
00028 template <typename Ordinal, typename T>
00029 class Zoltan2_ReduceBestMapping  : public ValueTypeReductionOp<Ordinal,T>
00030 {
00031 private:
00032   T _EPSILON;
00033 
00034 public:
00037   Zoltan2_ReduceBestMapping ():_EPSILON (std::numeric_limits<T>::epsilon()){}
00038 
00041   void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
00042   {
00043 
00044     for (Ordinal i=0; i < count; i++){
00045       if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
00046         inoutBuffer[0] = inBuffer[0];
00047         inoutBuffer[1] = inBuffer[1];
00048       } else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
00049           inBuffer[1] - inoutBuffer[1] < _EPSILON){
00050         inoutBuffer[0] = inBuffer[0];
00051         inoutBuffer[1] = inBuffer[1];
00052       }
00053     }
00054   }
00055 };
00056 } // namespace Teuchos
00057 
00058 
00059 namespace Zoltan2{
00060 
00061 template <typename it>
00062 inline it z2Fact(it x) {
00063   return (x == 1 ? x : x * z2Fact<it>(x - 1));
00064 }
00065 
00066 template <typename gno_t, typename part_t>
00067 class GNO_LNO_PAIR{
00068 public:
00069   gno_t gno;
00070   part_t part;
00071 };
00072 
00073 //returns the ith permutation indices.
00074 template <typename IT>
00075 void ithPermutation(const IT n, IT i, IT *perm)
00076 {
00077   IT j, k = 0;
00078   IT *fact = new IT[n];
00079 
00080 
00081   // compute factorial numbers
00082   fact[k] = 1;
00083   while (++k < n)
00084     fact[k] = fact[k - 1] * k;
00085 
00086   // compute factorial code
00087   for (k = 0; k < n; ++k)
00088   {
00089     perm[k] = i / fact[n - 1 - k];
00090     i = i % fact[n - 1 - k];
00091   }
00092 
00093   // readjust values to obtain the permutation
00094   // start from the end and check if preceding values are lower
00095   for (k = n - 1; k > 0; --k)
00096     for (j = k - 1; j >= 0; --j)
00097       if (perm[j] <= perm[k])
00098         perm[k]++;
00099 
00100   delete [] fact;
00101 }
00102 
00103 template <typename part_t>
00104 void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, vector <int> grid_dims){
00105   int dim = grid_dims.size();
00106   int neighborCount = 2 * dim;
00107   task_comm_xadj = allocMemory<part_t>(taskCount);
00108   task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
00109 
00110   part_t neighBorIndex = 0;
00111   for (part_t i = 0; i < taskCount; ++i){
00112     part_t prevDimMul = 1;
00113     for (int j = 0; j < dim; ++j){
00114       part_t lNeighbor = i - prevDimMul;
00115       part_t rNeighbor = i + prevDimMul;
00116       prevDimMul *= grid_dims[j];
00117       if (lNeighbor >= 0 &&  lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
00118         task_comm_adj[neighBorIndex++] = lNeighbor;
00119       }
00120       if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
00121         task_comm_adj[neighBorIndex++] = rNeighbor;
00122       }
00123     }
00124     task_comm_xadj[i] = neighBorIndex;
00125   }
00126 
00127 }
00128 //returns the center of the parts.
00129 template <typename Adapter, typename scalar_t, typename part_t>
00130 void getSolutionCenterCoordinates(
00131     const Environment *envConst,
00132     const Teuchos::Comm<int> *comm,
00133     const Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *coords,
00134     const Zoltan2::PartitioningSolution<Adapter> *soln_,
00135     int coordDim,
00136     part_t ntasks,
00137     scalar_t **partCenters){
00138 
00139   typedef typename Adapter::lno_t lno_t;
00140   typedef typename Adapter::gno_t gno_t;
00141   typedef typename Adapter::zgid_t zgid_t;
00142 
00143   typedef StridedData<lno_t, scalar_t> input_t;
00144   ArrayView<const gno_t> gnos;
00145   ArrayView<input_t>     xyz;
00146   ArrayView<input_t>     wgts;
00147   coords->getCoordinates(gnos, xyz, wgts);
00148 
00149   //local and global num coordinates.
00150   lno_t numLocalCoords = coords->getLocalNumCoordinates();
00151   //gno_t numGlobalCoords = coords->getGlobalNumCoordinates();
00152 
00153 
00154 
00155   //local number of points in each part.
00156   gno_t *point_counts = allocMemory<gno_t>(ntasks);
00157   memset(point_counts, 0, sizeof(gno_t) * ntasks);
00158 
00159   //global number of points in each part.
00160   gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
00161 
00162 
00163   scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
00164 
00165   for (int dim=0; dim < coordDim; dim++){
00166     ArrayRCP<const scalar_t> ar;
00167     xyz[dim].getInputArray(ar);
00168     //multiJagged coordinate values assignment
00169     multiJagged_coordinates[dim] =  (scalar_t *)ar.getRawPtr();
00170     memset(partCenters[dim], 0, sizeof(scalar_t) * ntasks);
00171   }
00172 
00173   //get parts with parallel gnos.
00174   const part_t *parts = soln_->getPartList();
00175   const zgid_t *soln_gnos = soln_->getIdList();
00176   /*
00177     for (lno_t i=0; i < numLocalCoords; i++){
00178         cout << "me:" << comm->getRank() << " gno:" << soln_gnos[i] << " tmp.part :" << parts[i]<< endl;
00179     }
00180    */
00181 
00182 
00183   envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Creation");
00184   //hash vector
00185   vector< vector <GNO_LNO_PAIR<gno_t, part_t> > > hash(numLocalCoords);
00186 
00187   //insert each point in solution to hash.
00188   for (lno_t i=0; i < numLocalCoords; i++){
00189     GNO_LNO_PAIR<gno_t, part_t> tmp;
00190     tmp.gno = soln_gnos[i];
00191     tmp.part = parts[i];
00192     //cout << "gno:" << tmp.gno << " tmp.part :" << tmp.part << endl;
00193     //count the local number of points in each part.
00194     ++point_counts[tmp.part];
00195     lno_t hash_index = tmp.gno % numLocalCoords;
00196     hash[hash_index].push_back(tmp);
00197   }
00198 
00199   envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Creation");
00200   //get global number of points in each part.
00201   reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
00202       ntasks, point_counts, global_point_counts
00203   );
00204 
00205 
00206 
00207   envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Search");
00208   //add up all coordinates in each part.
00209   for (lno_t i=0; i < numLocalCoords; i++){
00210     gno_t g = gnos[i];
00211     lno_t hash_index = g % numLocalCoords;
00212     lno_t hash_size = hash[hash_index].size();
00213     part_t p = -1;
00214     for (lno_t j =0; j < hash_size; ++j){
00215       if (hash[hash_index][j].gno == g){
00216         p = hash[hash_index][j].part;
00217         break;
00218       }
00219     }
00220     if(p == -1) {
00221       std::cerr << "ERROR AT HASHING FOR GNO:"<< g << " LNO:" << i << std::endl;
00222     }
00223     //add uo all coordinates in each part.
00224     for(int j = 0; j < coordDim; ++j){
00225       scalar_t c = multiJagged_coordinates[j][i];
00226       partCenters[j][p] += c;
00227     }
00228   }
00229   envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Search");
00230 
00231   for(int j = 0; j < coordDim; ++j){
00232     for (part_t i=0; i < ntasks; ++i){
00233       partCenters[j][i] /= global_point_counts[i];
00234     }
00235   }
00236 
00237   scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
00238   for(int j = 0; j < coordDim; ++j){
00239     reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
00240         ntasks, partCenters[j], tmpCoords
00241     );
00242 
00243     scalar_t *tmp = partCenters[j];
00244     partCenters[j] = tmpCoords;
00245     tmpCoords = tmp;
00246   }
00247 
00248   freeArray<gno_t> (point_counts);
00249   freeArray<gno_t> (global_point_counts);
00250 
00251   freeArray<scalar_t> (tmpCoords);
00252   freeArray<scalar_t *>(multiJagged_coordinates);
00253 }
00254 
00255 
00258 template <class IT, class WT>
00259 class KmeansHeap{
00260   IT heapSize;
00261   IT *indices;
00262   WT *values;
00263   WT _EPSILON;
00264 
00265 
00266 public:
00267   void setHeapsize(IT heapsize_){
00268     this->heapSize = heapsize_;
00269     this->indices = allocMemory<IT>(heapsize_ );
00270     this->values = allocMemory<WT>(heapsize_ );
00271     this->_EPSILON = std::numeric_limits<WT>::epsilon();
00272   }
00273 
00274   ~KmeansHeap(){
00275     freeArray<IT>(this->indices);
00276     freeArray<WT>(this->values);
00277   }
00278 
00279 
00280   void addPoint(IT index, WT distance){
00281     WT maxVal = this->values[0];
00282     //add only the distance is smaller than the maximum distance.
00283     //cout << "indeX:" << index << "distance:" <<distance << " maxVal:" << maxVal << endl;
00284     if (distance >= maxVal) return;
00285     else {
00286       this->values[0] = distance;
00287       this->indices[0] = index;
00288       this->push_down(0);
00289     }
00290   }
00291 
00292   //heap push down operation
00293   void push_down(IT index_on_heap){
00294     IT child_index1 = 2 * index_on_heap + 1;
00295     IT child_index2 = 2 * index_on_heap + 2;
00296 
00297     IT biggerIndex = -1;
00298     if(child_index1 < this->heapSize && child_index2 < this->heapSize){
00299 
00300       if (this->values[child_index1] < this->values[child_index2]){
00301         biggerIndex = child_index2;
00302       }
00303       else {
00304         biggerIndex = child_index1;
00305       }
00306     }
00307     else if(child_index1 < this->heapSize){
00308       biggerIndex = child_index1;
00309 
00310     }
00311     else if(child_index2 < this->heapSize){
00312       biggerIndex = child_index2;
00313     }
00314     if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
00315       WT tmpVal = this->values[biggerIndex];
00316       this->values[biggerIndex] = this->values[index_on_heap];
00317       this->values[index_on_heap] = tmpVal;
00318 
00319       IT tmpIndex = this->indices[biggerIndex];
00320       this->indices[biggerIndex] = this->indices[index_on_heap];
00321       this->indices[index_on_heap] = tmpIndex;
00322       this->push_down(biggerIndex);
00323     }
00324   }
00325 
00326   void initValues(){
00327     WT MAXVAL = std::numeric_limits<WT>::max();
00328     for(IT i = 0; i < this->heapSize; ++i){
00329       this->values[i] = MAXVAL;
00330       this->indices[i] = -1;
00331     }
00332   }
00333 
00334   //returns the total distance to center in the cluster.
00335   WT getTotalDistance(){
00336 
00337     WT nc = 0;
00338     for(IT j = 0; j < this->heapSize; ++j){
00339       nc += this->values[j];
00340 
00341       //cout << "index:" << this->indices[j] << " distance:" << this->values[j] << endl;
00342     }
00343     return nc;
00344   }
00345 
00346   //returns the new center of the cluster.
00347   bool getNewCenters(WT *center, WT **coords, int dimension){
00348     bool moved = false;
00349     for(int i = 0; i < dimension; ++i){
00350       WT nc = 0;
00351       for(IT j = 0; j < this->heapSize; ++j){
00352         IT k = this->indices[j];
00353         //cout << "i:" << i << " dim:" << dimension << " k:" << k << " heapSize:" << heapSize << endl;
00354         nc += coords[i][k];
00355       }
00356       nc /= this->heapSize;
00357       moved = (ABS(center[i] - nc) > this->_EPSILON || moved );
00358       center[i] = nc;
00359 
00360     }
00361     return moved;
00362   }
00363 
00364   void copyCoordinates(IT *permutation){
00365     for(IT i = 0; i < this->heapSize; ++i){
00366       permutation[i] = this->indices[i];
00367     }
00368   }
00369 };
00370 
00373 template <class IT, class WT>
00374 class KMeansCluster{
00375 
00376   int dimension;
00377   KmeansHeap<IT,WT> closestPoints;
00378 
00379 public:
00380   WT *center;
00381   ~KMeansCluster(){
00382     freeArray<WT>(center);
00383   }
00384 
00385   void setParams(int dimension_, int heapsize){
00386     this->dimension = dimension_;
00387     this->center = allocMemory<WT>(dimension_);
00388     this->closestPoints.setHeapsize(heapsize);
00389   }
00390 
00391   void clearHeap(){
00392     this->closestPoints.initValues();
00393   }
00394 
00395   bool getNewCenters( WT **coords){
00396     return this->closestPoints.getNewCenters(center, coords, dimension);
00397   }
00398 
00399   //returns the distance of the coordinate to the center.
00400   //also adds it to the heap.
00401   WT getDistance(IT index, WT **elementCoords){
00402     WT distance = 0;
00403     for (int i = 0; i < this->dimension; ++i){
00404       WT d = (center[i] - elementCoords[i][index]);
00405       distance += d * d;
00406     }
00407     distance = pow(distance, WT(1.0 / this->dimension));
00408     closestPoints.addPoint(index, distance);
00409     return distance;
00410   }
00411 
00412   WT getDistanceToCenter(){
00413     return closestPoints.getTotalDistance();
00414   }
00415 
00416   void copyCoordinates(IT *permutation){
00417     closestPoints.copyCoordinates(permutation);
00418   }
00419 };
00420 
00424 template <class IT, class WT>
00425 class KMeansAlgorithm{
00426 
00427   int dim;
00428   IT numElements;
00429   WT **elementCoords;
00430   IT numClusters;
00431   IT required_elements;
00432   KMeansCluster <IT,WT> *clusters;
00433   WT *maxCoordinates;
00434   WT *minCoordinates;
00435 public:
00436   ~KMeansAlgorithm(){
00437     freeArray<KMeansCluster <IT,WT> >(clusters);
00438     freeArray<WT>(maxCoordinates);
00439     freeArray<WT>(minCoordinates);
00440   }
00441 
00444   KMeansAlgorithm(
00445       int dim_ ,
00446       IT numElements_,
00447       WT **elementCoords_,
00448       IT required_elements_):
00449         dim(dim_),
00450         numElements(numElements_),
00451         elementCoords(elementCoords_),
00452         numClusters ((1 << dim_) + 1),
00453         required_elements(required_elements_)
00454   {
00455     this->clusters  = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
00456     //set dimension and the number of required elements for all clusters.
00457     for (int i = 0; i < numClusters; ++i){
00458       this->clusters[i].setParams(this->dim, this->required_elements);
00459     }
00460 
00461     this->maxCoordinates = allocMemory <WT> (this->dim);
00462     this->minCoordinates = allocMemory <WT> (this->dim);
00463 
00464     //obtain the min and max coordiantes for each dimension.
00465     for (int j = 0; j < dim; ++j){
00466       this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
00467       for(IT i = 1; i < numElements; ++i){
00468         WT t = this->elementCoords[j][i];
00469         if(t > this->maxCoordinates[j]){
00470           this->maxCoordinates[j] = t;
00471         }
00472         if (t < minCoordinates[j]){
00473           this->minCoordinates[j] = t;
00474         }
00475       }
00476     }
00477 
00478 
00479     //assign initial cluster centers.
00480     for (int j = 0; j < dim; ++j){
00481       int mod = (1 << (j+1));
00482       for (int i = 0; i < numClusters - 1; ++i){
00483         WT c = 0;
00484         if ( (i % mod) < mod / 2){
00485           c = this->maxCoordinates[j];
00486           //cout << "i:" << i << " j:" << j << " setting max:" << c << endl;
00487         }
00488         else {
00489           c = this->minCoordinates[j];
00490         }
00491         this->clusters[i].center[j] = c;
00492       }
00493     }
00494 
00495     //last cluster center is placed to middle.
00496     for (int j = 0; j < dim; ++j){
00497       this->clusters[numClusters - 1].center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
00498     }
00499 
00500 
00501     /*
00502         for (int i = 0; i < numClusters; ++i){
00503             //cout << endl << "cluster:" << i << endl << "\t";
00504             for (int j = 0; j < dim; ++j){
00505                 cout << this->clusters[i].center[j] << " ";
00506             }
00507         }
00508      */
00509   }
00510 
00511   //performs kmeans clustering of coordinates.
00512   void kmeans(){
00513     for(int it = 0; it < 10; ++it){
00514       //cout << "it:" << it << endl;
00515       for (IT j = 0; j < this->numClusters; ++j){
00516         this->clusters[j].clearHeap();
00517       }
00518       for (IT i = 0; i < this->numElements; ++i){
00519         //cout << "i:" << i << " numEl:" << this->numElements << endl;
00520         for (IT j = 0; j < this->numClusters; ++j){
00521           //cout << "j:" << j << " numClusters:" << this->numClusters << endl;
00522           this->clusters[j].getDistance(i,this->elementCoords);
00523         }
00524       }
00525       bool moved = false;
00526       for (IT j = 0; j < this->numClusters; ++j){
00527         moved =(this->clusters[j].getNewCenters(this->elementCoords) || moved );
00528       }
00529       if (!moved){
00530         break;
00531       }
00532     }
00533 
00534 
00535   }
00536 
00537   //finds the cluster in which the coordinates are the closest to each other.
00538   void getMinDistanceCluster(IT *procPermutation){
00539 
00540     WT minDistance = this->clusters[0].getDistanceToCenter();
00541     IT minCluster = 0;
00542     //cout << "j:" << 0 << " minDistance:" << minDistance << " minTmpDistance:" << minDistance<< " minCluster:" << minCluster << endl;
00543     for (IT j = 1; j < this->numClusters; ++j){
00544       WT minTmpDistance = this->clusters[j].getDistanceToCenter();
00545       //cout << "j:" << j << " minDistance:" << minDistance << " minTmpDistance:" << minTmpDistance<< " minCluster:" << minCluster << endl;
00546       if(minTmpDistance < minDistance){
00547         minDistance = minTmpDistance;
00548         minCluster = j;
00549       }
00550     }
00551 
00552     //cout << "minCluster:" << minCluster << endl;
00553     this->clusters[minCluster].copyCoordinates(procPermutation);
00554   }
00555 };
00556 
00557 
00558 
00559 #define MINOF(a,b) (((a)<(b))?(a):(b))
00560 
00567 template <typename T>
00568 void fillContinousArray(T *arr, size_t arrSize, T *val){
00569   if(val == NULL){
00570 
00571 #ifdef HAVE_ZOLTAN2_OMP
00572 #pragma omp parallel for
00573 #endif
00574     for(size_t i = 0; i < arrSize; ++i){
00575       arr[i] = i;
00576     }
00577 
00578   }
00579   else {
00580     T v = *val;
00581 #ifdef HAVE_ZOLTAN2_OMP
00582 #pragma omp parallel for
00583 #endif
00584     for(size_t i = 0; i < arrSize; ++i){
00585       //cout << "writing to i:" << i << " arr:" << arrSize << endl;
00586       arr[i] = v;
00587     }
00588   }
00589 }
00590 
00593 template <typename part_t, typename pcoord_t>
00594 class CommunicationModel{
00595 protected:
00596   double commCost;
00597 public:
00598 
00599   part_t no_procs; //the number of processors
00600   part_t no_tasks;  //the number of taks.
00601   CommunicationModel(): commCost(),no_procs(0), no_tasks(0){}
00602   CommunicationModel(part_t no_procs_, part_t no_tasks_):
00603     commCost(),
00604     no_procs(no_procs_),
00605     no_tasks(no_tasks_){}
00606   virtual ~CommunicationModel(){}
00607   part_t getNProcs() const{
00608     return this->no_procs;
00609   }
00610   part_t getNTasks()const{
00611     return this->no_tasks;
00612   }
00613 
00614 
00615   void calculateCommunicationCost(
00616       part_t *task_to_proc,
00617       part_t *task_communication_xadj,
00618       part_t *task_communication_adj,
00619       pcoord_t *task_communication_edge_weight){
00620 
00621     double totalCost = 0;
00622 
00623     part_t commCount = 0;
00624     for (part_t task = 0; task < this->no_tasks; ++task){
00625       int assigned_proc = task_to_proc[task];
00626       part_t task_adj_begin = 0;
00627       //cout << "task:" << task << endl;
00628       part_t task_adj_end = task_communication_xadj[task];
00629       if (task > 0) task_adj_begin = task_communication_xadj[task - 1];
00630 
00631       commCount += task_adj_end - task_adj_begin;
00632       //cout << "task:" << task << " proc:" << assigned_proc << endl;
00633       for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
00634 
00635         //cout << "task2:" << task2 << endl;
00636         part_t neighborTask = task_communication_adj[task2];
00637         //cout << "neighborTask :" << neighborTask  << endl;
00638         int neighborProc = task_to_proc[neighborTask];
00639         double distance = getProcDistance(assigned_proc, neighborProc);
00640         //cout << "assigned_proc:" << assigned_proc << " neighborProc:" << neighborProc << " d:" << distance << endl;
00641 
00642 
00643         if (task_communication_edge_weight == NULL){
00644           totalCost += distance ;
00645         }
00646         else {
00647           totalCost += distance * task_communication_edge_weight[task2];
00648         }
00649       }
00650     }
00651 
00652     this->commCost = totalCost;// commCount;
00653   }
00654 
00655   double getCommunicationCostMetric(){
00656     return this->commCost;
00657   }
00658 
00659   virtual double getProcDistance(int procId1, int procId2) const = 0;
00660 
00667   virtual void getMapping(
00668       int myRank,
00669       RCP<const Environment> env,
00670       ArrayRCP <part_t> &proc_to_task_xadj, //  = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array
00671       ArrayRCP <part_t> &proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
00672       ArrayRCP <part_t> &task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
00673   ) const = 0;
00674 };
00678 template <typename pcoord_t,  typename tcoord_t, typename part_t>
00679 class CoordinateCommunicationModel:public CommunicationModel<part_t, pcoord_t> {
00680 public:
00681   //private:
00682   int proc_coord_dim; //dimension of the processors
00683   pcoord_t **proc_coords; //the processor coordinates. allocated outside of the class.
00684   int task_coord_dim; //dimension of the tasks coordinates.
00685   tcoord_t **task_coords; //the task coordinates allocated outside of the class.
00686   int partArraySize;
00687   part_t *partNoArray;
00688 
00689   //public:
00690   CoordinateCommunicationModel():
00691     CommunicationModel<part_t, pcoord_t>(),
00692     proc_coord_dim(0),
00693     proc_coords(0),
00694     task_coord_dim(0),
00695     task_coords(0),
00696     partArraySize(-1),
00697     partNoArray(NULL){}
00698 
00699   virtual ~CoordinateCommunicationModel(){}
00700 
00709   CoordinateCommunicationModel(
00710       int pcoord_dim_,
00711       pcoord_t **pcoords_,
00712       int tcoord_dim_,
00713       tcoord_t **tcoords_,
00714       part_t no_procs_,
00715       part_t no_tasks_
00716   ):
00717     CommunicationModel<part_t, pcoord_t>(no_procs_, no_tasks_),
00718     proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
00719     task_coord_dim(tcoord_dim_), task_coords(tcoords_),
00720     partArraySize(std::min(tcoord_dim_, pcoord_dim_)),
00721     partNoArray(NULL){
00722   }
00723 
00724 
00725   void setPartArraySize(int psize){
00726     this->partArraySize = psize;
00727   }
00728   void setPartArray(part_t *pNo){
00729     this->partNoArray = pNo;
00730   }
00731 
00738   void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const{
00739     //currently returns a random subset.
00740 
00741     part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
00742     KMeansAlgorithm<part_t, pcoord_t > kma(
00743         minCoordDim, nprocs,
00744         this->proc_coords, ntasks);
00745 
00746     kma.kmeans();
00747     kma.getMinDistanceCluster(proc_permutation);
00748 
00749     for(int i = ntasks; i < nprocs; ++i){
00750       proc_permutation[i] = -1;
00751     }
00752     /*
00753         //fill array.
00754         fillContinousArray<part_t>(proc_permutation, nprocs, NULL);
00755         int _u_umpa_seed = 847449649;
00756         srand (time(NULL));
00757         int a = rand() % 1000 + 1;
00758         _u_umpa_seed -= a;
00759         //permute array randomly.
00760         update_visit_order(proc_permutation, nprocs,_u_umpa_seed, 1);
00761      */
00762   }
00763 
00764   //temporary, necessary for random permutation.
00765   static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
00766   {
00767     int   a = 16807;
00768     int   m = 2147483647;
00769     int   q = 127773;
00770     int   r = 2836;
00771     int   lo, hi, test;
00772     double d;
00773 
00774     lo = _u_umpa_seed % q;
00775     hi = _u_umpa_seed / q;
00776     test = (a*lo)-(r*hi);
00777     if (test>0)
00778       _u_umpa_seed = test;
00779     else
00780       _u_umpa_seed = test + m;
00781     d = (double) ((double) _u_umpa_seed / (double) m);
00782     return (part_t) (d*(double)l);
00783   }
00784 
00785   virtual double getProcDistance(int procId1, int procId2) const{
00786     double distance = 0;
00787     for (int i = 0 ; i < this->proc_coord_dim; ++i){
00788       distance += ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
00789     }
00790     return distance;
00791   }
00792 
00793 
00794   //temporary, does random permutation.
00795   void update_visit_order(part_t* visitOrder, part_t n, int &_u_umpa_seed, part_t rndm) {
00796     part_t *a = visitOrder;
00797 
00798 
00799     if (rndm){
00800       part_t i, u, v, tmp;
00801 
00802       if (n <= 4)
00803         return;
00804 
00805       //srand ( time(NULL) );
00806 
00807       //_u_umpa_seed = _u_umpa_seed1 - (rand()%100);
00808       for (i=0; i<n; i+=16)
00809       {
00810         u = umpa_uRandom(n-4, _u_umpa_seed);
00811         v = umpa_uRandom(n-4, _u_umpa_seed);
00812         SWAP(a[v], a[u], tmp);
00813         SWAP(a[v+1], a[u+1], tmp);
00814         SWAP(a[v+2], a[u+2], tmp);
00815         SWAP(a[v+3], a[u+3], tmp);
00816       }
00817     }
00818     else {
00819       part_t i, end = n / 4;
00820 
00821       for (i=1; i<end; i++)
00822       {
00823         part_t j=umpa_uRandom(n-i, _u_umpa_seed);
00824         part_t t=a[j];
00825         a[j] = a[n-i];
00826         a[n-i] = t;
00827       }
00828     }
00829     //PermuteInPlace(visitOrder, n);
00830   }
00831 
00832 
00833 
00840   virtual void getMapping(
00841       int myRank,
00842       RCP<const Environment> env,
00843       ArrayRCP <part_t> &rcp_proc_to_task_xadj, //  = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array
00844       ArrayRCP <part_t> &rcp_proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
00845       ArrayRCP <part_t> &rcp_task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
00846   ) const{
00847 
00848     rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs);
00849     rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks);
00850     rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks);
00851 
00852     part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr(); //holds the pointer to the task array
00853     part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr(); //holds the indices of tasks wrt to proc_to_task_xadj array.
00854     part_t *task_to_proc = rcp_task_to_proc.getRawPtr(); //holds the processors mapped to tasks.);
00855 
00856 
00857     part_t invalid = 0;
00858     fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs, &invalid);
00859 
00860     //obtain the number of parts that should be divided.
00861     part_t num_parts = MINOF(this->no_procs, this->no_tasks);
00862     //obtain the min coordinate dim.
00863     part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
00864 
00865     int recursion_depth = partArraySize;
00866     if(partArraySize < minCoordDim) recursion_depth = minCoordDim;
00867 
00868     int taskPerm = z2Fact<int>(this->task_coord_dim); //get the number of different permutations for task dimension ordering
00869     int procPerm = z2Fact<int>(this->proc_coord_dim); //get the number of different permutations for proc dimension ordering
00870     int permutations =  taskPerm * procPerm; //total number of permutations
00871 
00872     //holds the pointers to proc_adjList
00873     part_t *proc_xadj = allocMemory<part_t> (num_parts);
00874     //holds the processors in parts according to the result of partitioning algorithm.
00875     //the processors assigned to part x is at proc_adjList[ proc_xadj[x - 1] : proc_xadj[x] ]
00876     part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
00877 
00878 
00879     part_t used_num_procs = this->no_procs;
00880     if(this->no_procs > this->no_tasks){
00881       //obtain the subset of the processors that are closest to each other.
00882       this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
00883       used_num_procs = this->no_tasks;
00884     }
00885     else {
00886       fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
00887     }
00888 
00889     int myPermutation = myRank % permutations; //the index of the permutation
00890 
00891     int myProcPerm=  myPermutation % procPerm; // the index of the proc permutation
00892     int myTaskPerm  = myPermutation / procPerm; // the index of the task permutation
00893 
00894     int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim)
00895         ? this->proc_coord_dim : this->task_coord_dim);
00896 
00897     //get the permutation order from the proc permutation index.
00898     ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
00899     //reorder the coordinate dimensions.
00900     pcoord_t **pcoords = allocMemory<pcoord_t *> (this->proc_coord_dim);
00901     for(int i = 0; i < this->proc_coord_dim; ++i){
00902       pcoords[i] = this->proc_coords[permutation[i]];
00903       //cout << permutation[i] << " ";
00904     }
00905 
00906 
00907     //do the partitioning and renumber the parts.
00908     env->timerStart(MACRO_TIMERS, "Mapping - Proc Partitioning");
00909 
00910     AlgMJ<pcoord_t, part_t, part_t, part_t> mj_partitioner;
00911     mj_partitioner.sequential_task_partitioning(
00912         env,
00913         this->no_procs,
00914         used_num_procs,
00915         num_parts,
00916         minCoordDim,
00917         pcoords,//this->proc_coords,
00918         proc_adjList,
00919         proc_xadj,
00920         recursion_depth,
00921         partNoArray
00922         //,"proc_partitioning"
00923     );
00924     env->timerStop(MACRO_TIMERS, "Mapping - Proc Partitioning");
00925     freeArray<pcoord_t *> (pcoords);
00926 
00927 
00928     part_t *task_xadj = allocMemory<part_t> (num_parts);
00929     part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
00930     //fill task_adjList st: task_adjList[i] <- i.
00931     fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
00932 
00933     //get the permutation order from the task permutation index.
00934     ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
00935 
00936     //reorder task coordinate dimensions.
00937     tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim);
00938     for(int i = 0; i < this->task_coord_dim; ++i){
00939       tcoords[i] = this->task_coords[permutation[i]];
00940     }
00941 
00942     env->timerStart(MACRO_TIMERS, "Mapping - Task Partitioning");
00943     //partitioning of tasks
00944     mj_partitioner.sequential_task_partitioning(
00945         env,
00946         this->no_tasks,
00947         this->no_tasks,
00948         num_parts,
00949         minCoordDim,
00950         tcoords, //this->task_coords,
00951         task_adjList,
00952         task_xadj,
00953         recursion_depth,
00954         partNoArray
00955         //,"task_partitioning"
00956     );
00957     env->timerStop(MACRO_TIMERS, "Mapping - Task Partitioning");
00958     freeArray<pcoord_t *> (tcoords);
00959     freeArray<int> (permutation);
00960 
00961 
00962     //filling proc_to_task_xadj, proc_to_task_adj, task_to_proc arrays.
00963     for(part_t i = 0; i < num_parts; ++i){
00964 
00965       part_t proc_index_begin = 0;
00966       part_t task_begin_index = 0;
00967 
00968       if (i > 0) {
00969         proc_index_begin = proc_xadj[i - 1];
00970         task_begin_index = task_xadj[i - 1];
00971       }
00972       part_t proc_index_end = proc_xadj[i];
00973       part_t task_end_index = task_xadj[i];
00974 
00975 
00976       if(proc_index_end - proc_index_begin != 1){
00977         std::cerr << "Error at partitioning of processors" << std::endl;
00978         std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl;
00979         exit(1);
00980       }
00981       part_t assigned_proc = proc_adjList[proc_index_begin];
00982       proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
00983     }
00984 
00985 
00986     //holds the pointer to the task array
00987     part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs);
00988     proc_to_task_xadj_work[0] = proc_to_task_xadj[0];
00989     for(part_t i = 1; i < this->no_procs; ++i){
00990       proc_to_task_xadj[i] += proc_to_task_xadj[i - 1];
00991       proc_to_task_xadj_work[i] = proc_to_task_xadj[i];
00992     }
00993 
00994     for(part_t i = 0; i < num_parts; ++i){
00995 
00996       part_t proc_index_begin = 0;
00997       part_t task_begin_index = 0;
00998 
00999       if (i > 0) {
01000         proc_index_begin = proc_xadj[i - 1];
01001         task_begin_index = task_xadj[i - 1];
01002       }
01003       part_t task_end_index = task_xadj[i];
01004 
01005       part_t assigned_proc = proc_adjList[proc_index_begin];
01006 
01007       for (part_t j = task_begin_index; j < task_end_index; ++j){
01008         part_t taskId = task_adjList[j];
01009 
01010         task_to_proc[taskId] = assigned_proc;
01011 
01012         proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
01013       }
01014     }
01015 
01016     freeArray<part_t>(proc_to_task_xadj_work);
01017     freeArray<part_t>(task_xadj);
01018     freeArray<part_t>(task_adjList);
01019     freeArray<part_t>(proc_xadj);
01020     freeArray<part_t>(proc_adjList);
01021   }
01022 
01023 };
01024 
01025 template <typename Adapter, typename part_t>
01026 class CoordinateTaskMapper:public PartitionMapping<Adapter>{
01027 protected:
01028 
01029 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01030 
01031   typedef typename Adapter::scalar_t pcoord_t;
01032   typedef typename Adapter::scalar_t tcoord_t;
01033 
01034 #endif
01035 
01036   //RCP<const Environment> env;
01037   ArrayRCP<part_t> proc_to_task_xadj; //  = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array
01038   ArrayRCP<part_t> proc_to_task_adj; // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
01039   ArrayRCP<part_t> task_to_proc; //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.
01040   bool isOwnerofModel;
01041   CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t> *proc_task_comm;
01042   part_t nprocs;
01043   part_t ntasks;
01044   ArrayRCP<part_t>task_communication_xadj;
01045   ArrayRCP<part_t>task_communication_adj;
01046 
01047 
01050   void doMapping(int myRank){
01051 
01052     if(this->proc_task_comm){
01053       this->proc_task_comm->getMapping(
01054           myRank,
01055           Teuchos::RCP<const Environment>(this->env, false),
01056           this->proc_to_task_xadj, //  = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array
01057           this->proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
01058           this->task_to_proc //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.);
01059       );
01060     }
01061     else {
01062       std::cerr << "communicationModel is not specified in the Mapper" << std::endl;
01063       exit(1);
01064     }
01065   }
01066 
01067 
01070   RCP<Comm<int> > create_subCommunicator(){
01071     int procDim = this->proc_task_comm->proc_coord_dim;
01072     int taskDim = this->proc_task_comm->task_coord_dim;
01073 
01074     int taskPerm = z2Fact<int>(procDim); //get the number of different permutations for task dimension ordering
01075     int procPerm = z2Fact<int>(taskDim); //get the number of different permutations for proc dimension ordering
01076     int idealGroupSize =  taskPerm * procPerm; //total number of permutations
01077 
01078     int myRank = this->comm->getRank();
01079     int commSize = this->comm->getSize();
01080 
01081     int myGroupIndex = myRank / idealGroupSize;
01082 
01083     int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
01084     if (prevGroupBegin < 0) prevGroupBegin = 0;
01085     int myGroupBegin = myGroupIndex * idealGroupSize;
01086     int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
01087     int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
01088 
01089     if (myGroupEnd > commSize){
01090       myGroupBegin = prevGroupBegin;
01091       myGroupEnd = commSize;
01092     }
01093     if (nextGroupEnd > commSize){
01094       myGroupEnd = commSize;
01095     }
01096     int myGroupSize = myGroupEnd - myGroupBegin;
01097 
01098     part_t *myGroup = allocMemory<part_t>(myGroupSize);
01099     for (int i = 0; i < myGroupSize; ++i){
01100       myGroup[i] = myGroupBegin + i;
01101     }
01102     //cout << "me:" << myRank << " myGroupBegin:" << myGroupBegin << " myGroupEnd:" << myGroupEnd << endl;
01103 
01104     ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
01105 
01106     RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
01107     freeArray<part_t>(myGroup);
01108     return subComm;
01109   }
01110 
01111 
01114   void getBestMapping(){
01115     //create the sub group.
01116     RCP<Comm<int> > subComm = this->create_subCommunicator();
01117     //calculate cost.
01118     double myCost = this->proc_task_comm->getCommunicationCostMetric();
01119     //cout << "me:" << this->comm->getRank() << " myCost:" << myCost << endl;
01120     double localCost[2], globalCost[2];
01121 
01122     localCost[0] = myCost;
01123     localCost[1] = double(subComm->getRank());
01124 
01125     globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
01126     Teuchos::Zoltan2_ReduceBestMapping<int,double> reduceBest;
01127     reduceAll<int, double>(*subComm, reduceBest,
01128         2, localCost, globalCost);
01129 
01130     int sender = int(globalCost[1]);
01131 
01132     //cout << "me:" << localCost[1] << " localcost:" << localCost[0]<< " bestcost:" << globalCost[0] << endl;
01133     //cout << "me:" << localCost[1] << " proc:" << globalCost[1] << endl;
01134     broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
01135     broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
01136     broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
01137   }
01138 
01139 
01140 
01141   //write mapping to gnuPlot code to visualize.
01142   void writeMapping(){
01143     std::ofstream gnuPlotCode ("gnuPlot.plot", std::ofstream::out);
01144 
01145     int mindim = MINOF(proc_task_comm->proc_coord_dim, proc_task_comm->task_coord_dim);
01146     std::string ss = "";
01147     for(part_t i = 0; i < this->nprocs; ++i){
01148 
01149       std::string procFile = toString<int>(i) + "_mapping.txt";
01150       if (i == 0){
01151         gnuPlotCode << "plot \"" << procFile << "\"\n";
01152       }
01153       else {
01154         gnuPlotCode << "replot \"" << procFile << "\"\n";
01155       }
01156 
01157       std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
01158 
01159       std::string gnuPlotArrow = "set arrow from ";
01160       for(int j = 0; j <  mindim; ++j){
01161         if (j == mindim - 1){
01162           inpFile << proc_task_comm->proc_coords[j][i];
01163           gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]);
01164 
01165         }
01166         else {
01167           inpFile << proc_task_comm->proc_coords[j][i] << " ";
01168           gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]) +",";
01169         }
01170       }
01171       gnuPlotArrow += " to ";
01172 
01173 
01174       inpFile << std::endl;
01175       ArrayView<part_t> a = this->getAssignedTaksForProc(i);
01176       for(int k = 0; k <  a.size(); ++k){
01177         int j = a[k];
01178         //cout << "i:" << i << " j:"
01179         std::string gnuPlotArrow2 = gnuPlotArrow;
01180         for(int z = 0; z <  mindim; ++z){
01181           if(z == mindim - 1){
01182 
01183             //cout << "z:" << z << " j:" <<  j << " " << proc_task_comm->task_coords[z][j] << endl;
01184             inpFile << proc_task_comm->task_coords[z][j];
01185             gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]);
01186           }
01187           else{
01188             inpFile << proc_task_comm->task_coords[z][j] << " ";
01189             gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]) +",";
01190           }
01191         }
01192         ss += gnuPlotArrow2 + "\n";
01193         inpFile << std::endl;
01194       }
01195       inpFile.close();
01196 
01197     }
01198     gnuPlotCode << ss;
01199     gnuPlotCode << "\nreplot\n pause -1 \n";
01200     gnuPlotCode.close();
01201 
01202   }
01203 
01204 
01205   //write mapping to gnuPlot code to visualize.
01206   void writeMapping2(int myRank){
01207 
01208     std::string rankStr = toString<int>(myRank);
01209     std::string gnuPlots = "gnuPlot", extentionS = ".plot";
01210     std::string outF = gnuPlots + rankStr+ extentionS;
01211     std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
01212 
01213     CoordinateCommunicationModel<pcoord_t, tcoord_t, part_t> *tmpproc_task_comm =
01214         static_cast <CoordinateCommunicationModel<pcoord_t, tcoord_t, part_t> * > (proc_task_comm);
01215     int mindim = MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
01216     std::string ss = "";
01217     std::string procs = "", parts = "";
01218     for(part_t i = 0; i < this->nprocs; ++i){
01219 
01220       //inpFile << std::endl;
01221       ArrayView<part_t> a = this->getAssignedTaksForProc(i);
01222       if (a.size() == 0){
01223         continue;
01224       }
01225 
01226       //std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
01227 
01228       std::string gnuPlotArrow = "set arrow from ";
01229       for(int j = 0; j <  mindim; ++j){
01230         if (j == mindim - 1){
01231           //inpFile << proc_task_comm->proc_coords[j][i];
01232           gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
01233           procs += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
01234 
01235         }
01236         else {
01237           //inpFile << proc_task_comm->proc_coords[j][i] << " ";
01238           gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]) +",";
01239           procs += toString<float>(tmpproc_task_comm->proc_coords[j][i])+ " ";
01240         }
01241       }
01242       procs += "\n";
01243 
01244       gnuPlotArrow += " to ";
01245 
01246 
01247       for(int k = 0; k <  a.size(); ++k){
01248         int j = a[k];
01249         //cout << "i:" << i << " j:"
01250         std::string gnuPlotArrow2 = gnuPlotArrow;
01251         for(int z = 0; z <  mindim; ++z){
01252           if(z == mindim - 1){
01253 
01254             //cout << "z:" << z << " j:" <<  j << " " << proc_task_comm->task_coords[z][j] << endl;
01255             //inpFile << proc_task_comm->task_coords[z][j];
01256             gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]);
01257             parts += toString<float>(tmpproc_task_comm->task_coords[z][j]);
01258           }
01259           else{
01260             //inpFile << proc_task_comm->task_coords[z][j] << " ";
01261             gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]) +",";
01262             parts += toString<float>(tmpproc_task_comm->task_coords[z][j]) + " ";
01263           }
01264         }
01265         parts += "\n";
01266         ss += gnuPlotArrow2 + " nohead\n";
01267         //inpFile << std::endl;
01268       }
01269       //inpFile.close();
01270 
01271     }
01272 
01273 
01274     std::ofstream procFile ("procPlot.plot", std::ofstream::out);
01275     procFile << procs << "\n";
01276     procFile.close();
01277 
01278     std::ofstream partFile ("partPlot.plot", std::ofstream::out);
01279     partFile << parts<< "\n";
01280     partFile.close();
01281 
01282     std::ofstream extraProcFile ("allProc.plot", std::ofstream::out);
01283 
01284     for(part_t j = 0; j < this->nprocs; ++j){
01285       for(int i = 0; i <  mindim; ++i){
01286         extraProcFile << tmpproc_task_comm->proc_coords[i][j] <<  " ";
01287       }
01288       extraProcFile << std::endl;
01289     }
01290 
01291     extraProcFile.close();
01292 
01293     gnuPlotCode << ss;
01294     if(mindim == 2){
01295       gnuPlotCode << "plot \"procPlot.plot\" with points pointsize 3\n";
01296     } else {
01297       gnuPlotCode << "splot \"procPlot.plot\" with points pointsize 3\n";
01298     }
01299     gnuPlotCode << "replot \"partPlot.plot\" with points pointsize 3\n";
01300     gnuPlotCode << "replot \"allProc.plot\" with points pointsize 0.65\n";
01301     gnuPlotCode << "\nreplot\n pause -1 \n";
01302     gnuPlotCode.close();
01303 
01304   }
01305 
01306 
01307   void writeGnuPlot(
01308       const Teuchos::Comm<int> *comm_,
01309       const Zoltan2::PartitioningSolution<Adapter> *soln_,
01310       int coordDim,
01311       tcoord_t **partCenters
01312   ){
01313     std::string file = "gggnuPlot";
01314     std::string exten = ".plot";
01315     std::ofstream mm("2d.txt");
01316     file += toString<int>(comm_->getRank()) + exten;
01317     std::ofstream ff(file.c_str());
01318     //ff.seekg (0, ff.end);
01319     RCP < vector <Zoltan2::coordinateModelPartBox <tcoord_t, part_t> > > outPartBoxes = ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getPartBoxes();
01320 
01321     for (part_t i = 0; i < this->ntasks;++i){
01322       (*outPartBoxes)[i].writeGnuPlot(ff, mm);
01323     }
01324     if (coordDim == 2){
01325       ff << "plot \"2d.txt\"" << std::endl;
01326       //ff << "\n pause -1" << endl;
01327     }
01328     else {
01329       ff << "splot \"2d.txt\"" << std::endl;
01330       //ff << "\n pause -1" << endl;
01331     }
01332     mm.close();
01333 
01334     ff << "set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
01335     for (part_t i = 0; i < this->ntasks;++i){
01336       part_t pb = 0;
01337       if (i > 0) pb = task_communication_xadj[i -1];
01338       part_t pe = task_communication_xadj[i];
01339       for (part_t p = pb; p < pe; ++p){
01340         part_t n = task_communication_adj[p];
01341 
01342         //cout << "i:" << i << " n:" << n << endl;
01343         std::string arrowline = "set arrow from ";
01344         for (int j = 0; j < coordDim - 1; ++j){
01345           arrowline += toString<tcoord_t>(partCenters[j][n]) + ",";
01346         }
01347         arrowline += toString<tcoord_t>(partCenters[coordDim -1][n]) + " to ";
01348 
01349 
01350         for (int j = 0; j < coordDim - 1; ++j){
01351           arrowline += toString<tcoord_t>(partCenters[j][i]) + ",";
01352         }
01353         arrowline += toString<tcoord_t>(partCenters[coordDim -1][i]) + " as 5\n";
01354 
01355         //cout << "arrow:" << arrowline << endl;
01356         ff << arrowline;
01357       }
01358     }
01359 
01360     ff << "replot\n pause -1" << std::endl;
01361     ff.close();
01362   }
01363 
01364 public:
01365 
01366   void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
01367     proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
01368     proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
01369   }
01370 
01371 
01372   virtual ~CoordinateTaskMapper(){
01373     //freeArray<part_t> (proc_to_task_xadj);
01374     //freeArray<part_t> (proc_to_task_adj);
01375     //freeArray<part_t> (task_to_proc);
01376     if(this->isOwnerofModel){
01377       delete this->proc_task_comm;
01378     }
01379   }
01380 
01381 
01392   CoordinateTaskMapper(
01393       const Teuchos::Comm<int> *comm_,
01394       const MachineRepresentation<pcoord_t> *machine_,
01395       const Zoltan2::Model<typename Adapter::base_adapter_t> *model_,
01396       const Zoltan2::PartitioningSolution<Adapter> *soln_,
01397       const Environment *envConst):
01398         PartitionMapping<Adapter> (comm_, machine_, model_, soln_, envConst),
01399         proc_to_task_xadj(0),
01400         proc_to_task_adj(0),
01401         task_to_proc(0),
01402         isOwnerofModel(true),
01403         proc_task_comm(0),
01404         task_communication_xadj(0),
01405         task_communication_adj(0){
01406 
01407     pcoord_t *task_communication_edge_weight_ = NULL;
01408     //if mapping type is 0 then it is coordinate mapping
01409     int procDim = machine_->getProcDim();
01410     this->nprocs = machine_->getNumProcs();
01411     //get processor coordinates.
01412     pcoord_t **procCoordinates = machine_->getProcCoords();
01413 
01414     int coordDim = ((Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *)model_)->getCoordinateDim();
01415     this->ntasks = soln_->getActualGlobalNumberOfParts();
01416     if (part_t (soln_->getTargetGlobalNumberOfParts()) > this->ntasks){
01417       this->ntasks = soln_->getTargetGlobalNumberOfParts();
01418     }
01419     //cout << "actual: " << this->ntasks << endl;
01420 
01421     //alloc memory for part centers.
01422     tcoord_t **partCenters = NULL;
01423     partCenters = allocMemory<tcoord_t *>(coordDim);
01424     for (int i = 0; i < coordDim; ++i){
01425       partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
01426     }
01427 
01428 
01429     envConst->timerStart(MACRO_TIMERS, "Mapping - Solution Center");
01430     //get centers for the parts.
01431     getSolutionCenterCoordinates<Adapter, typename Adapter::scalar_t,part_t>(
01432         envConst,
01433         comm_,
01434         ((Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *)model_),
01435         this->soln,
01436         coordDim,
01437         ntasks,
01438         partCenters);
01439 
01440     envConst->timerStop(MACRO_TIMERS, "Mapping - Solution Center");
01441 
01442 
01443     //create coordinate communication model.
01444     this->proc_task_comm =
01445         new Zoltan2::CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t>(
01446             procDim,
01447             procCoordinates,
01448             coordDim,
01449             partCenters,
01450             this->nprocs,
01451             this->ntasks
01452         );
01453 
01454     int myRank = comm_->getRank();
01455 
01456 
01457     envConst->timerStart(MACRO_TIMERS, "Mapping - Processor Task map");
01458     this->doMapping(myRank);
01459     envConst->timerStop(MACRO_TIMERS, "Mapping - Processor Task map");
01460 
01461 
01462     envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Graph");
01463     ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getCommunicationGraph(
01464         comm_,
01465         task_communication_xadj,
01466         task_communication_adj
01467     );
01468 
01469     envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Graph");
01470   #ifdef gnuPlot
01471     if (comm_->getRank() == 0){
01472 
01473       part_t taskCommCount = task_communication_xadj.size();
01474       std::cout << " TotalComm:" << task_communication_xadj[taskCommCount - 1] << std::endl;
01475       part_t maxN = task_communication_xadj[0];
01476       for (part_t i = 1; i < taskCommCount; ++i){
01477         part_t nc = task_communication_xadj[i] - task_communication_xadj[i - 1];
01478         if (maxN < nc) maxN = nc;
01479       }
01480       std::cout << " maxNeighbor:" << maxN << std::endl;
01481     }
01482 
01483     this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
01484   #endif
01485 
01486     envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Cost");
01487     this->proc_task_comm->calculateCommunicationCost(
01488         task_to_proc.getRawPtr(),
01489         task_communication_xadj.getRawPtr(),
01490         task_communication_adj.getRawPtr(),
01491         task_communication_edge_weight_
01492     );
01493 
01494 
01495     envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Cost");
01496 
01497     //processors are divided into groups of size numProc! * numTasks!
01498     //each processor in the group obtains a mapping with a different rotation
01499     //and best one is broadcasted all processors.
01500     this->getBestMapping();
01501   #ifdef gnuPlot
01502     this->writeMapping2(comm_->getRank());
01503   #endif
01504 
01505     for (int i = 0; i < coordDim; ++i){
01506       freeArray<tcoord_t>(partCenters[i]);
01507     }
01508     freeArray<tcoord_t *>(partCenters);
01509 
01510   }
01511 
01512 
01559   CoordinateTaskMapper(
01560       const Environment *env_const_,
01561       const Teuchos::Comm<int> *problemComm,
01562       int proc_dim,
01563       int num_processors,
01564       pcoord_t **machine_coords,
01565 
01566       int task_dim,
01567       part_t num_tasks,
01568       tcoord_t **task_coords,
01569       ArrayRCP<part_t>task_comm_xadj,
01570       ArrayRCP<part_t>task_comm_adj,
01571       pcoord_t *task_communication_edge_weight_,
01572       int recursion_depth,
01573       part_t *part_no_array,
01574       part_t *machine_dimensions
01575   ):  PartitionMapping<Adapter>(problemComm, NULL, NULL, NULL, env_const_),
01576       proc_to_task_xadj(0),
01577       proc_to_task_adj(0),
01578       task_to_proc(0),
01579       isOwnerofModel(true),
01580       proc_task_comm(0),
01581       task_communication_xadj(task_comm_xadj),
01582       task_communication_adj(task_comm_adj){
01583 
01584     //if mapping type is 0 then it is coordinate mapping
01585     pcoord_t ** virtual_machine_coordinates  = machine_coords;
01586     if (machine_dimensions){
01587       virtual_machine_coordinates =
01588           this->shiftMachineCoordinates (
01589               proc_dim,
01590               machine_dimensions,
01591               num_processors,
01592               machine_coords);
01593     }
01594 
01595     this->nprocs = num_processors;
01596 
01597     int coordDim = task_dim;
01598     this->ntasks = num_tasks;
01599 
01600     //alloc memory for part centers.
01601     tcoord_t **partCenters = task_coords;
01602 
01603     //create coordinate communication model.
01604     this->proc_task_comm =
01605         new Zoltan2::CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t>(
01606             proc_dim,
01607             virtual_machine_coordinates,
01608             coordDim,
01609             partCenters,
01610             this->nprocs,
01611             this->ntasks
01612         );
01613     this->proc_task_comm->setPartArraySize(recursion_depth);
01614     this->proc_task_comm->setPartArray(part_no_array);
01615 
01616     int myRank = problemComm->getRank();
01617 
01618     this->doMapping(myRank);
01619 #ifdef gnuPlot
01620     this->writeMapping2(myRank);
01621 #endif
01622 
01623     if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
01624       this->proc_task_comm->calculateCommunicationCost(
01625           task_to_proc.getRawPtr(),
01626           task_communication_xadj.getRawPtr(),
01627           task_communication_adj.getRawPtr(),
01628           task_communication_edge_weight_
01629       );
01630 
01631 
01632       this->getBestMapping();
01633 
01634 /*
01635       if (myRank == 0){
01636         this->proc_task_comm->calculateCommunicationCost(
01637             task_to_proc.getRawPtr(),
01638             task_communication_xadj.getRawPtr(),
01639             task_communication_adj.getRawPtr(),
01640             task_communication_edge_weight_
01641         );
01642         cout << "me: " << problemComm->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << endl;
01643       }
01644 */
01645 
01646     }
01647 
01648     if (machine_dimensions){
01649       for (int i = 0; i < proc_dim; ++i){
01650         delete [] virtual_machine_coordinates[i];
01651       }
01652       delete [] virtual_machine_coordinates;
01653     }
01654 #ifdef gnuPlot
01655     if(comm_->getRank() == 0)
01656       this->writeMapping2(-1);
01657 #endif
01658   }
01659 
01660 
01661   double getCommunicationCostMetric(){
01662     return this->proc_task_comm->getCommCost();
01663   }
01664 
01667   virtual size_t getLocalNumberOfParts() const{
01668     return 0;
01669   }
01670 
01679   pcoord_t **shiftMachineCoordinates(
01680       int machine_dim,
01681       part_t *machine_dimensions,
01682       part_t numProcs,
01683       pcoord_t **mCoords){
01684     pcoord_t **result_machine_coords = NULL;
01685     result_machine_coords = new pcoord_t*[machine_dim];
01686     for (int i = 0; i < machine_dim; ++i){
01687       result_machine_coords[i] = new pcoord_t [numProcs];
01688     }
01689 
01690     for (int i = 0; i < machine_dim; ++i){
01691       part_t numMachinesAlongDim = machine_dimensions[i];
01692       part_t *machineCounts= new part_t[numMachinesAlongDim];
01693       memset(machineCounts, 0, sizeof(part_t) *numMachinesAlongDim);
01694 
01695       int *filledCoordinates= new int[numMachinesAlongDim];
01696 
01697       pcoord_t *coords = mCoords[i];
01698       for(part_t j = 0; j < numProcs; ++j){
01699         part_t mc = (part_t) coords[j];
01700         ++machineCounts[mc];
01701       }
01702 
01703       part_t filledCoordinateCount = 0;
01704       for(part_t j = 0; j < numMachinesAlongDim; ++j){
01705         if (machineCounts[j] > 0){
01706           filledCoordinates[filledCoordinateCount++] = j;
01707         }
01708       }
01709 
01710       part_t firstProcCoord = filledCoordinates[0];
01711       part_t firstProcCount = machineCounts[firstProcCoord];
01712 
01713       part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
01714       part_t lastProcCount = machineCounts[lastProcCoord];
01715 
01716       part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
01717       part_t firstLastGapProc = lastProcCount + firstProcCount;
01718 
01719       part_t leftSideProcCoord = firstProcCoord;
01720       part_t leftSideProcCount = firstProcCount;
01721       part_t biggestGap = 0;
01722       part_t biggestGapProc = numProcs;
01723 
01724       part_t shiftBorderCoordinate = -1;
01725       for(part_t j = 1; j < filledCoordinateCount; ++j){
01726         part_t rightSideProcCoord= filledCoordinates[j];
01727         part_t rightSideProcCount = machineCounts[rightSideProcCoord];
01728 
01729         part_t gap = rightSideProcCoord - leftSideProcCoord;
01730         part_t gapProc = rightSideProcCount + leftSideProcCount;
01731 
01732         /* Pick the largest gap in this dimension. Use fewer process on either side
01733                  of the largest gap to break the tie. An easy addition to this would
01734                  be to weight the gap by the number of processes. */
01735         if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
01736           shiftBorderCoordinate = rightSideProcCoord;
01737           biggestGapProc = gapProc;
01738           biggestGap = gap;
01739         }
01740         leftSideProcCoord = rightSideProcCoord;
01741         leftSideProcCount = rightSideProcCount;
01742       }
01743 
01744 
01745       if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
01746         shiftBorderCoordinate = -1;
01747       }
01748 
01749       for(part_t j = 0; j < numProcs; ++j){
01750 
01751         if (coords[j] < shiftBorderCoordinate){
01752           result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
01753 
01754         }
01755         else {
01756           result_machine_coords[i][j] = coords[j];
01757         }
01758         //cout << "I:" << i << "j:" << j << " coord:" << coords[j] << " now:" << result_machine_coords[i][j] << endl;
01759       }
01760       delete [] machineCounts;
01761       delete [] filledCoordinates;
01762     }
01763 
01764     return result_machine_coords;
01765 
01766   }
01767 
01774   virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const{
01775     numProcs = 1;
01776     procs = this->task_to_proc.getRawPtr() + taskId;
01777   }
01781   inline part_t getAssignedProcForTask(part_t taskId){
01782     return this->task_to_proc[taskId];
01783   }
01790   virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const{
01791 
01792     part_t task_begin = 0;
01793     if (procId > 0) task_begin = this->proc_to_task_xadj[procId - 1];
01794     part_t taskend = this->proc_to_task_xadj[procId];
01795     parts = this->proc_to_task_adj.getRawPtr() + task_begin;
01796     numParts = taskend - task_begin;
01797   }
01798 
01799   ArrayView<part_t> getAssignedTaksForProc(part_t procId){
01800     part_t task_begin = 0;
01801     if (procId > 0) task_begin = this->proc_to_task_xadj[procId - 1];
01802     part_t taskend = this->proc_to_task_xadj[procId];
01803 
01804     /*
01805         cout << "part_t:" << procId << " taskCount:" << taskend - task_begin << endl;
01806         for(part_t i = task_begin; i < taskend; ++i){
01807             cout << "part_t:" << procId << " task:" << proc_to_task_adj[i] << endl;
01808         }
01809      */
01810     if (taskend - task_begin > 0){
01811       ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
01812       return assignedParts;
01813     }
01814     else {
01815       ArrayView <part_t> assignedParts;
01816       return assignedParts;
01817     }
01818   }
01819 
01820 };
01821 
01822 
01823 
01876 template <typename part_t, typename pcoord_t, typename tcoord_t>
01877 void coordinateTaskMapperInterface(
01878     RCP<const Teuchos::Comm<int> > problemComm,
01879     int proc_dim,
01880     int num_processors,
01881     pcoord_t **machine_coords,
01882     int task_dim,
01883     part_t num_tasks,
01884     tcoord_t **task_coords,
01885     part_t *task_comm_xadj,
01886     part_t *task_comm_adj,
01887     pcoord_t *task_communication_edge_weight_, /*float-like, same size with task_communication_adj_ weight of the corresponding edge.*/
01888     part_t *proc_to_task_xadj, /*output*/
01889     part_t *proc_to_task_adj, /*output*/
01890     int recursion_depth,
01891     part_t *part_no_array,
01892     part_t *machine_dimensions
01893 )
01894 {
01895 
01896   const Environment *envConst_ = new Environment();
01897 
01898   typedef Tpetra::MultiVector<tcoord_t, part_t,part_t, KokkosClassic::DefaultNode::DefaultNodeType> tMVector_t;
01899 
01900   Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks, false);
01901 
01902   Teuchos::ArrayRCP<part_t> task_communication_adj;
01903   if (task_comm_xadj){
01904     Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks -1 /* KDDKDD OK for MEHMET's ODD LAYOUT; WRONG FOR TRADITIONAL */], false);
01905     task_communication_adj = tmp_task_communication_adj;
01906   }
01907 
01908 
01909   CoordinateTaskMapper<XpetraMultiVectorAdapter <tMVector_t>, part_t> *ctm = new CoordinateTaskMapper<XpetraMultiVectorAdapter <tMVector_t>, part_t>(
01910       envConst_,
01911       problemComm.getRawPtr(),
01912       proc_dim,
01913       num_processors,
01914       machine_coords,//machine_coords_,
01915 
01916       task_dim,
01917       num_tasks,
01918       task_coords,
01919 
01920       task_communication_xadj,
01921       task_communication_adj,
01922       task_communication_edge_weight_,
01923       recursion_depth,
01924       part_no_array,
01925       machine_dimensions
01926   );
01927 
01928 
01929   part_t* proc_to_task_xadj_;
01930   part_t* proc_to_task_adj_;
01931 
01932   ctm->getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
01933 
01934   for (part_t i = 0; i < num_processors; ++i){
01935     proc_to_task_xadj[i] = proc_to_task_xadj_[i];
01936   }
01937   for (part_t i = 0; i < num_tasks; ++i){
01938     proc_to_task_adj[i] = proc_to_task_adj_[i];
01939   }
01940   delete ctm;
01941   delete envConst_;
01942 
01943 }
01944 
01945 
01946 }// namespace Zoltan2
01947 
01948 #endif