|
Zoltan2
|
00001 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 00002 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 00003 00004 #include <fstream> 00005 #include <ctime> 00006 #include <vector> 00007 #include "Zoltan2_AlgMultiJagged.hpp" 00008 #include "Teuchos_ArrayViewDecl.hpp" 00009 #include "Zoltan2_PartitionMapping.hpp" 00010 #include "Zoltan2_MachineRepresentation.hpp" 00011 #include "Teuchos_ReductionOp.hpp" 00012 #include "Zoltan2_XpetraMultiVectorAdapter.hpp" 00013 00014 #include "Teuchos_ConfigDefs.hpp" // define HAVE_MPI 00015 #include "Teuchos_Comm.hpp" 00016 #ifdef HAVE_MPI 00017 # include "Teuchos_DefaultMpiComm.hpp" 00018 #else 00019 # include "Teuchos_DefaultSerialComm.hpp" 00020 #endif // HAVE_MPI 00021 00022 //#define gnuPlot 00023 00024 namespace Teuchos{ 00025 00028 template <typename Ordinal, typename T> 00029 class Zoltan2_ReduceBestMapping : public ValueTypeReductionOp<Ordinal,T> 00030 { 00031 private: 00032 T _EPSILON; 00033 00034 public: 00037 Zoltan2_ReduceBestMapping ():_EPSILON (std::numeric_limits<T>::epsilon()){} 00038 00041 void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const 00042 { 00043 00044 for (Ordinal i=0; i < count; i++){ 00045 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){ 00046 inoutBuffer[0] = inBuffer[0]; 00047 inoutBuffer[1] = inBuffer[1]; 00048 } else if(inBuffer[0] - inoutBuffer[0] < _EPSILON && 00049 inBuffer[1] - inoutBuffer[1] < _EPSILON){ 00050 inoutBuffer[0] = inBuffer[0]; 00051 inoutBuffer[1] = inBuffer[1]; 00052 } 00053 } 00054 } 00055 }; 00056 } // namespace Teuchos 00057 00058 00059 namespace Zoltan2{ 00060 00061 template <typename it> 00062 inline it z2Fact(it x) { 00063 return (x == 1 ? x : x * z2Fact<it>(x - 1)); 00064 } 00065 00066 template <typename gno_t, typename part_t> 00067 class GNO_LNO_PAIR{ 00068 public: 00069 gno_t gno; 00070 part_t part; 00071 }; 00072 00073 //returns the ith permutation indices. 00074 template <typename IT> 00075 void ithPermutation(const IT n, IT i, IT *perm) 00076 { 00077 IT j, k = 0; 00078 IT *fact = new IT[n]; 00079 00080 00081 // compute factorial numbers 00082 fact[k] = 1; 00083 while (++k < n) 00084 fact[k] = fact[k - 1] * k; 00085 00086 // compute factorial code 00087 for (k = 0; k < n; ++k) 00088 { 00089 perm[k] = i / fact[n - 1 - k]; 00090 i = i % fact[n - 1 - k]; 00091 } 00092 00093 // readjust values to obtain the permutation 00094 // start from the end and check if preceding values are lower 00095 for (k = n - 1; k > 0; --k) 00096 for (j = k - 1; j >= 0; --j) 00097 if (perm[j] <= perm[k]) 00098 perm[k]++; 00099 00100 delete [] fact; 00101 } 00102 00103 template <typename part_t> 00104 void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, vector <int> grid_dims){ 00105 int dim = grid_dims.size(); 00106 int neighborCount = 2 * dim; 00107 task_comm_xadj = allocMemory<part_t>(taskCount); 00108 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount); 00109 00110 part_t neighBorIndex = 0; 00111 for (part_t i = 0; i < taskCount; ++i){ 00112 part_t prevDimMul = 1; 00113 for (int j = 0; j < dim; ++j){ 00114 part_t lNeighbor = i - prevDimMul; 00115 part_t rNeighbor = i + prevDimMul; 00116 prevDimMul *= grid_dims[j]; 00117 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){ 00118 task_comm_adj[neighBorIndex++] = lNeighbor; 00119 } 00120 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){ 00121 task_comm_adj[neighBorIndex++] = rNeighbor; 00122 } 00123 } 00124 task_comm_xadj[i] = neighBorIndex; 00125 } 00126 00127 } 00128 //returns the center of the parts. 00129 template <typename Adapter, typename scalar_t, typename part_t> 00130 void getSolutionCenterCoordinates( 00131 const Environment *envConst, 00132 const Teuchos::Comm<int> *comm, 00133 const Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *coords, 00134 const Zoltan2::PartitioningSolution<Adapter> *soln_, 00135 int coordDim, 00136 part_t ntasks, 00137 scalar_t **partCenters){ 00138 00139 typedef typename Adapter::lno_t lno_t; 00140 typedef typename Adapter::gno_t gno_t; 00141 typedef typename Adapter::zgid_t zgid_t; 00142 00143 typedef StridedData<lno_t, scalar_t> input_t; 00144 ArrayView<const gno_t> gnos; 00145 ArrayView<input_t> xyz; 00146 ArrayView<input_t> wgts; 00147 coords->getCoordinates(gnos, xyz, wgts); 00148 00149 //local and global num coordinates. 00150 lno_t numLocalCoords = coords->getLocalNumCoordinates(); 00151 //gno_t numGlobalCoords = coords->getGlobalNumCoordinates(); 00152 00153 00154 00155 //local number of points in each part. 00156 gno_t *point_counts = allocMemory<gno_t>(ntasks); 00157 memset(point_counts, 0, sizeof(gno_t) * ntasks); 00158 00159 //global number of points in each part. 00160 gno_t *global_point_counts = allocMemory<gno_t>(ntasks); 00161 00162 00163 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim); 00164 00165 for (int dim=0; dim < coordDim; dim++){ 00166 ArrayRCP<const scalar_t> ar; 00167 xyz[dim].getInputArray(ar); 00168 //multiJagged coordinate values assignment 00169 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr(); 00170 memset(partCenters[dim], 0, sizeof(scalar_t) * ntasks); 00171 } 00172 00173 //get parts with parallel gnos. 00174 const part_t *parts = soln_->getPartList(); 00175 const zgid_t *soln_gnos = soln_->getIdList(); 00176 /* 00177 for (lno_t i=0; i < numLocalCoords; i++){ 00178 cout << "me:" << comm->getRank() << " gno:" << soln_gnos[i] << " tmp.part :" << parts[i]<< endl; 00179 } 00180 */ 00181 00182 00183 envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Creation"); 00184 //hash vector 00185 vector< vector <GNO_LNO_PAIR<gno_t, part_t> > > hash(numLocalCoords); 00186 00187 //insert each point in solution to hash. 00188 for (lno_t i=0; i < numLocalCoords; i++){ 00189 GNO_LNO_PAIR<gno_t, part_t> tmp; 00190 tmp.gno = soln_gnos[i]; 00191 tmp.part = parts[i]; 00192 //cout << "gno:" << tmp.gno << " tmp.part :" << tmp.part << endl; 00193 //count the local number of points in each part. 00194 ++point_counts[tmp.part]; 00195 lno_t hash_index = tmp.gno % numLocalCoords; 00196 hash[hash_index].push_back(tmp); 00197 } 00198 00199 envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Creation"); 00200 //get global number of points in each part. 00201 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM, 00202 ntasks, point_counts, global_point_counts 00203 ); 00204 00205 00206 00207 envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Search"); 00208 //add up all coordinates in each part. 00209 for (lno_t i=0; i < numLocalCoords; i++){ 00210 gno_t g = gnos[i]; 00211 lno_t hash_index = g % numLocalCoords; 00212 lno_t hash_size = hash[hash_index].size(); 00213 part_t p = -1; 00214 for (lno_t j =0; j < hash_size; ++j){ 00215 if (hash[hash_index][j].gno == g){ 00216 p = hash[hash_index][j].part; 00217 break; 00218 } 00219 } 00220 if(p == -1) { 00221 std::cerr << "ERROR AT HASHING FOR GNO:"<< g << " LNO:" << i << std::endl; 00222 } 00223 //add uo all coordinates in each part. 00224 for(int j = 0; j < coordDim; ++j){ 00225 scalar_t c = multiJagged_coordinates[j][i]; 00226 partCenters[j][p] += c; 00227 } 00228 } 00229 envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Search"); 00230 00231 for(int j = 0; j < coordDim; ++j){ 00232 for (part_t i=0; i < ntasks; ++i){ 00233 partCenters[j][i] /= global_point_counts[i]; 00234 } 00235 } 00236 00237 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks); 00238 for(int j = 0; j < coordDim; ++j){ 00239 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM, 00240 ntasks, partCenters[j], tmpCoords 00241 ); 00242 00243 scalar_t *tmp = partCenters[j]; 00244 partCenters[j] = tmpCoords; 00245 tmpCoords = tmp; 00246 } 00247 00248 freeArray<gno_t> (point_counts); 00249 freeArray<gno_t> (global_point_counts); 00250 00251 freeArray<scalar_t> (tmpCoords); 00252 freeArray<scalar_t *>(multiJagged_coordinates); 00253 } 00254 00255 00258 template <class IT, class WT> 00259 class KmeansHeap{ 00260 IT heapSize; 00261 IT *indices; 00262 WT *values; 00263 WT _EPSILON; 00264 00265 00266 public: 00267 void setHeapsize(IT heapsize_){ 00268 this->heapSize = heapsize_; 00269 this->indices = allocMemory<IT>(heapsize_ ); 00270 this->values = allocMemory<WT>(heapsize_ ); 00271 this->_EPSILON = std::numeric_limits<WT>::epsilon(); 00272 } 00273 00274 ~KmeansHeap(){ 00275 freeArray<IT>(this->indices); 00276 freeArray<WT>(this->values); 00277 } 00278 00279 00280 void addPoint(IT index, WT distance){ 00281 WT maxVal = this->values[0]; 00282 //add only the distance is smaller than the maximum distance. 00283 //cout << "indeX:" << index << "distance:" <<distance << " maxVal:" << maxVal << endl; 00284 if (distance >= maxVal) return; 00285 else { 00286 this->values[0] = distance; 00287 this->indices[0] = index; 00288 this->push_down(0); 00289 } 00290 } 00291 00292 //heap push down operation 00293 void push_down(IT index_on_heap){ 00294 IT child_index1 = 2 * index_on_heap + 1; 00295 IT child_index2 = 2 * index_on_heap + 2; 00296 00297 IT biggerIndex = -1; 00298 if(child_index1 < this->heapSize && child_index2 < this->heapSize){ 00299 00300 if (this->values[child_index1] < this->values[child_index2]){ 00301 biggerIndex = child_index2; 00302 } 00303 else { 00304 biggerIndex = child_index1; 00305 } 00306 } 00307 else if(child_index1 < this->heapSize){ 00308 biggerIndex = child_index1; 00309 00310 } 00311 else if(child_index2 < this->heapSize){ 00312 biggerIndex = child_index2; 00313 } 00314 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){ 00315 WT tmpVal = this->values[biggerIndex]; 00316 this->values[biggerIndex] = this->values[index_on_heap]; 00317 this->values[index_on_heap] = tmpVal; 00318 00319 IT tmpIndex = this->indices[biggerIndex]; 00320 this->indices[biggerIndex] = this->indices[index_on_heap]; 00321 this->indices[index_on_heap] = tmpIndex; 00322 this->push_down(biggerIndex); 00323 } 00324 } 00325 00326 void initValues(){ 00327 WT MAXVAL = std::numeric_limits<WT>::max(); 00328 for(IT i = 0; i < this->heapSize; ++i){ 00329 this->values[i] = MAXVAL; 00330 this->indices[i] = -1; 00331 } 00332 } 00333 00334 //returns the total distance to center in the cluster. 00335 WT getTotalDistance(){ 00336 00337 WT nc = 0; 00338 for(IT j = 0; j < this->heapSize; ++j){ 00339 nc += this->values[j]; 00340 00341 //cout << "index:" << this->indices[j] << " distance:" << this->values[j] << endl; 00342 } 00343 return nc; 00344 } 00345 00346 //returns the new center of the cluster. 00347 bool getNewCenters(WT *center, WT **coords, int dimension){ 00348 bool moved = false; 00349 for(int i = 0; i < dimension; ++i){ 00350 WT nc = 0; 00351 for(IT j = 0; j < this->heapSize; ++j){ 00352 IT k = this->indices[j]; 00353 //cout << "i:" << i << " dim:" << dimension << " k:" << k << " heapSize:" << heapSize << endl; 00354 nc += coords[i][k]; 00355 } 00356 nc /= this->heapSize; 00357 moved = (ABS(center[i] - nc) > this->_EPSILON || moved ); 00358 center[i] = nc; 00359 00360 } 00361 return moved; 00362 } 00363 00364 void copyCoordinates(IT *permutation){ 00365 for(IT i = 0; i < this->heapSize; ++i){ 00366 permutation[i] = this->indices[i]; 00367 } 00368 } 00369 }; 00370 00373 template <class IT, class WT> 00374 class KMeansCluster{ 00375 00376 int dimension; 00377 KmeansHeap<IT,WT> closestPoints; 00378 00379 public: 00380 WT *center; 00381 ~KMeansCluster(){ 00382 freeArray<WT>(center); 00383 } 00384 00385 void setParams(int dimension_, int heapsize){ 00386 this->dimension = dimension_; 00387 this->center = allocMemory<WT>(dimension_); 00388 this->closestPoints.setHeapsize(heapsize); 00389 } 00390 00391 void clearHeap(){ 00392 this->closestPoints.initValues(); 00393 } 00394 00395 bool getNewCenters( WT **coords){ 00396 return this->closestPoints.getNewCenters(center, coords, dimension); 00397 } 00398 00399 //returns the distance of the coordinate to the center. 00400 //also adds it to the heap. 00401 WT getDistance(IT index, WT **elementCoords){ 00402 WT distance = 0; 00403 for (int i = 0; i < this->dimension; ++i){ 00404 WT d = (center[i] - elementCoords[i][index]); 00405 distance += d * d; 00406 } 00407 distance = pow(distance, WT(1.0 / this->dimension)); 00408 closestPoints.addPoint(index, distance); 00409 return distance; 00410 } 00411 00412 WT getDistanceToCenter(){ 00413 return closestPoints.getTotalDistance(); 00414 } 00415 00416 void copyCoordinates(IT *permutation){ 00417 closestPoints.copyCoordinates(permutation); 00418 } 00419 }; 00420 00424 template <class IT, class WT> 00425 class KMeansAlgorithm{ 00426 00427 int dim; 00428 IT numElements; 00429 WT **elementCoords; 00430 IT numClusters; 00431 IT required_elements; 00432 KMeansCluster <IT,WT> *clusters; 00433 WT *maxCoordinates; 00434 WT *minCoordinates; 00435 public: 00436 ~KMeansAlgorithm(){ 00437 freeArray<KMeansCluster <IT,WT> >(clusters); 00438 freeArray<WT>(maxCoordinates); 00439 freeArray<WT>(minCoordinates); 00440 } 00441 00444 KMeansAlgorithm( 00445 int dim_ , 00446 IT numElements_, 00447 WT **elementCoords_, 00448 IT required_elements_): 00449 dim(dim_), 00450 numElements(numElements_), 00451 elementCoords(elementCoords_), 00452 numClusters ((1 << dim_) + 1), 00453 required_elements(required_elements_) 00454 { 00455 this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters); 00456 //set dimension and the number of required elements for all clusters. 00457 for (int i = 0; i < numClusters; ++i){ 00458 this->clusters[i].setParams(this->dim, this->required_elements); 00459 } 00460 00461 this->maxCoordinates = allocMemory <WT> (this->dim); 00462 this->minCoordinates = allocMemory <WT> (this->dim); 00463 00464 //obtain the min and max coordiantes for each dimension. 00465 for (int j = 0; j < dim; ++j){ 00466 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0]; 00467 for(IT i = 1; i < numElements; ++i){ 00468 WT t = this->elementCoords[j][i]; 00469 if(t > this->maxCoordinates[j]){ 00470 this->maxCoordinates[j] = t; 00471 } 00472 if (t < minCoordinates[j]){ 00473 this->minCoordinates[j] = t; 00474 } 00475 } 00476 } 00477 00478 00479 //assign initial cluster centers. 00480 for (int j = 0; j < dim; ++j){ 00481 int mod = (1 << (j+1)); 00482 for (int i = 0; i < numClusters - 1; ++i){ 00483 WT c = 0; 00484 if ( (i % mod) < mod / 2){ 00485 c = this->maxCoordinates[j]; 00486 //cout << "i:" << i << " j:" << j << " setting max:" << c << endl; 00487 } 00488 else { 00489 c = this->minCoordinates[j]; 00490 } 00491 this->clusters[i].center[j] = c; 00492 } 00493 } 00494 00495 //last cluster center is placed to middle. 00496 for (int j = 0; j < dim; ++j){ 00497 this->clusters[numClusters - 1].center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2; 00498 } 00499 00500 00501 /* 00502 for (int i = 0; i < numClusters; ++i){ 00503 //cout << endl << "cluster:" << i << endl << "\t"; 00504 for (int j = 0; j < dim; ++j){ 00505 cout << this->clusters[i].center[j] << " "; 00506 } 00507 } 00508 */ 00509 } 00510 00511 //performs kmeans clustering of coordinates. 00512 void kmeans(){ 00513 for(int it = 0; it < 10; ++it){ 00514 //cout << "it:" << it << endl; 00515 for (IT j = 0; j < this->numClusters; ++j){ 00516 this->clusters[j].clearHeap(); 00517 } 00518 for (IT i = 0; i < this->numElements; ++i){ 00519 //cout << "i:" << i << " numEl:" << this->numElements << endl; 00520 for (IT j = 0; j < this->numClusters; ++j){ 00521 //cout << "j:" << j << " numClusters:" << this->numClusters << endl; 00522 this->clusters[j].getDistance(i,this->elementCoords); 00523 } 00524 } 00525 bool moved = false; 00526 for (IT j = 0; j < this->numClusters; ++j){ 00527 moved =(this->clusters[j].getNewCenters(this->elementCoords) || moved ); 00528 } 00529 if (!moved){ 00530 break; 00531 } 00532 } 00533 00534 00535 } 00536 00537 //finds the cluster in which the coordinates are the closest to each other. 00538 void getMinDistanceCluster(IT *procPermutation){ 00539 00540 WT minDistance = this->clusters[0].getDistanceToCenter(); 00541 IT minCluster = 0; 00542 //cout << "j:" << 0 << " minDistance:" << minDistance << " minTmpDistance:" << minDistance<< " minCluster:" << minCluster << endl; 00543 for (IT j = 1; j < this->numClusters; ++j){ 00544 WT minTmpDistance = this->clusters[j].getDistanceToCenter(); 00545 //cout << "j:" << j << " minDistance:" << minDistance << " minTmpDistance:" << minTmpDistance<< " minCluster:" << minCluster << endl; 00546 if(minTmpDistance < minDistance){ 00547 minDistance = minTmpDistance; 00548 minCluster = j; 00549 } 00550 } 00551 00552 //cout << "minCluster:" << minCluster << endl; 00553 this->clusters[minCluster].copyCoordinates(procPermutation); 00554 } 00555 }; 00556 00557 00558 00559 #define MINOF(a,b) (((a)<(b))?(a):(b)) 00560 00567 template <typename T> 00568 void fillContinousArray(T *arr, size_t arrSize, T *val){ 00569 if(val == NULL){ 00570 00571 #ifdef HAVE_ZOLTAN2_OMP 00572 #pragma omp parallel for 00573 #endif 00574 for(size_t i = 0; i < arrSize; ++i){ 00575 arr[i] = i; 00576 } 00577 00578 } 00579 else { 00580 T v = *val; 00581 #ifdef HAVE_ZOLTAN2_OMP 00582 #pragma omp parallel for 00583 #endif 00584 for(size_t i = 0; i < arrSize; ++i){ 00585 //cout << "writing to i:" << i << " arr:" << arrSize << endl; 00586 arr[i] = v; 00587 } 00588 } 00589 } 00590 00593 template <typename part_t, typename pcoord_t> 00594 class CommunicationModel{ 00595 protected: 00596 double commCost; 00597 public: 00598 00599 part_t no_procs; //the number of processors 00600 part_t no_tasks; //the number of taks. 00601 CommunicationModel(): commCost(),no_procs(0), no_tasks(0){} 00602 CommunicationModel(part_t no_procs_, part_t no_tasks_): 00603 commCost(), 00604 no_procs(no_procs_), 00605 no_tasks(no_tasks_){} 00606 virtual ~CommunicationModel(){} 00607 part_t getNProcs() const{ 00608 return this->no_procs; 00609 } 00610 part_t getNTasks()const{ 00611 return this->no_tasks; 00612 } 00613 00614 00615 void calculateCommunicationCost( 00616 part_t *task_to_proc, 00617 part_t *task_communication_xadj, 00618 part_t *task_communication_adj, 00619 pcoord_t *task_communication_edge_weight){ 00620 00621 double totalCost = 0; 00622 00623 part_t commCount = 0; 00624 for (part_t task = 0; task < this->no_tasks; ++task){ 00625 int assigned_proc = task_to_proc[task]; 00626 part_t task_adj_begin = 0; 00627 //cout << "task:" << task << endl; 00628 part_t task_adj_end = task_communication_xadj[task]; 00629 if (task > 0) task_adj_begin = task_communication_xadj[task - 1]; 00630 00631 commCount += task_adj_end - task_adj_begin; 00632 //cout << "task:" << task << " proc:" << assigned_proc << endl; 00633 for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){ 00634 00635 //cout << "task2:" << task2 << endl; 00636 part_t neighborTask = task_communication_adj[task2]; 00637 //cout << "neighborTask :" << neighborTask << endl; 00638 int neighborProc = task_to_proc[neighborTask]; 00639 double distance = getProcDistance(assigned_proc, neighborProc); 00640 //cout << "assigned_proc:" << assigned_proc << " neighborProc:" << neighborProc << " d:" << distance << endl; 00641 00642 00643 if (task_communication_edge_weight == NULL){ 00644 totalCost += distance ; 00645 } 00646 else { 00647 totalCost += distance * task_communication_edge_weight[task2]; 00648 } 00649 } 00650 } 00651 00652 this->commCost = totalCost;// commCount; 00653 } 00654 00655 double getCommunicationCostMetric(){ 00656 return this->commCost; 00657 } 00658 00659 virtual double getProcDistance(int procId1, int procId2) const = 0; 00660 00667 virtual void getMapping( 00668 int myRank, 00669 RCP<const Environment> env, 00670 ArrayRCP <part_t> &proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array 00671 ArrayRCP <part_t> &proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array. 00672 ArrayRCP <part_t> &task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks. 00673 ) const = 0; 00674 }; 00678 template <typename pcoord_t, typename tcoord_t, typename part_t> 00679 class CoordinateCommunicationModel:public CommunicationModel<part_t, pcoord_t> { 00680 public: 00681 //private: 00682 int proc_coord_dim; //dimension of the processors 00683 pcoord_t **proc_coords; //the processor coordinates. allocated outside of the class. 00684 int task_coord_dim; //dimension of the tasks coordinates. 00685 tcoord_t **task_coords; //the task coordinates allocated outside of the class. 00686 int partArraySize; 00687 part_t *partNoArray; 00688 00689 //public: 00690 CoordinateCommunicationModel(): 00691 CommunicationModel<part_t, pcoord_t>(), 00692 proc_coord_dim(0), 00693 proc_coords(0), 00694 task_coord_dim(0), 00695 task_coords(0), 00696 partArraySize(-1), 00697 partNoArray(NULL){} 00698 00699 virtual ~CoordinateCommunicationModel(){} 00700 00709 CoordinateCommunicationModel( 00710 int pcoord_dim_, 00711 pcoord_t **pcoords_, 00712 int tcoord_dim_, 00713 tcoord_t **tcoords_, 00714 part_t no_procs_, 00715 part_t no_tasks_ 00716 ): 00717 CommunicationModel<part_t, pcoord_t>(no_procs_, no_tasks_), 00718 proc_coord_dim(pcoord_dim_), proc_coords(pcoords_), 00719 task_coord_dim(tcoord_dim_), task_coords(tcoords_), 00720 partArraySize(std::min(tcoord_dim_, pcoord_dim_)), 00721 partNoArray(NULL){ 00722 } 00723 00724 00725 void setPartArraySize(int psize){ 00726 this->partArraySize = psize; 00727 } 00728 void setPartArray(part_t *pNo){ 00729 this->partNoArray = pNo; 00730 } 00731 00738 void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const{ 00739 //currently returns a random subset. 00740 00741 part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim); 00742 KMeansAlgorithm<part_t, pcoord_t > kma( 00743 minCoordDim, nprocs, 00744 this->proc_coords, ntasks); 00745 00746 kma.kmeans(); 00747 kma.getMinDistanceCluster(proc_permutation); 00748 00749 for(int i = ntasks; i < nprocs; ++i){ 00750 proc_permutation[i] = -1; 00751 } 00752 /* 00753 //fill array. 00754 fillContinousArray<part_t>(proc_permutation, nprocs, NULL); 00755 int _u_umpa_seed = 847449649; 00756 srand (time(NULL)); 00757 int a = rand() % 1000 + 1; 00758 _u_umpa_seed -= a; 00759 //permute array randomly. 00760 update_visit_order(proc_permutation, nprocs,_u_umpa_seed, 1); 00761 */ 00762 } 00763 00764 //temporary, necessary for random permutation. 00765 static part_t umpa_uRandom(part_t l, int &_u_umpa_seed) 00766 { 00767 int a = 16807; 00768 int m = 2147483647; 00769 int q = 127773; 00770 int r = 2836; 00771 int lo, hi, test; 00772 double d; 00773 00774 lo = _u_umpa_seed % q; 00775 hi = _u_umpa_seed / q; 00776 test = (a*lo)-(r*hi); 00777 if (test>0) 00778 _u_umpa_seed = test; 00779 else 00780 _u_umpa_seed = test + m; 00781 d = (double) ((double) _u_umpa_seed / (double) m); 00782 return (part_t) (d*(double)l); 00783 } 00784 00785 virtual double getProcDistance(int procId1, int procId2) const{ 00786 double distance = 0; 00787 for (int i = 0 ; i < this->proc_coord_dim; ++i){ 00788 distance += ABS(proc_coords[i][procId1] - proc_coords[i][procId2]); 00789 } 00790 return distance; 00791 } 00792 00793 00794 //temporary, does random permutation. 00795 void update_visit_order(part_t* visitOrder, part_t n, int &_u_umpa_seed, part_t rndm) { 00796 part_t *a = visitOrder; 00797 00798 00799 if (rndm){ 00800 part_t i, u, v, tmp; 00801 00802 if (n <= 4) 00803 return; 00804 00805 //srand ( time(NULL) ); 00806 00807 //_u_umpa_seed = _u_umpa_seed1 - (rand()%100); 00808 for (i=0; i<n; i+=16) 00809 { 00810 u = umpa_uRandom(n-4, _u_umpa_seed); 00811 v = umpa_uRandom(n-4, _u_umpa_seed); 00812 SWAP(a[v], a[u], tmp); 00813 SWAP(a[v+1], a[u+1], tmp); 00814 SWAP(a[v+2], a[u+2], tmp); 00815 SWAP(a[v+3], a[u+3], tmp); 00816 } 00817 } 00818 else { 00819 part_t i, end = n / 4; 00820 00821 for (i=1; i<end; i++) 00822 { 00823 part_t j=umpa_uRandom(n-i, _u_umpa_seed); 00824 part_t t=a[j]; 00825 a[j] = a[n-i]; 00826 a[n-i] = t; 00827 } 00828 } 00829 //PermuteInPlace(visitOrder, n); 00830 } 00831 00832 00833 00840 virtual void getMapping( 00841 int myRank, 00842 RCP<const Environment> env, 00843 ArrayRCP <part_t> &rcp_proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array 00844 ArrayRCP <part_t> &rcp_proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array. 00845 ArrayRCP <part_t> &rcp_task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks. 00846 ) const{ 00847 00848 rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs); 00849 rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks); 00850 rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks); 00851 00852 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr(); //holds the pointer to the task array 00853 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr(); //holds the indices of tasks wrt to proc_to_task_xadj array. 00854 part_t *task_to_proc = rcp_task_to_proc.getRawPtr(); //holds the processors mapped to tasks.); 00855 00856 00857 part_t invalid = 0; 00858 fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs, &invalid); 00859 00860 //obtain the number of parts that should be divided. 00861 part_t num_parts = MINOF(this->no_procs, this->no_tasks); 00862 //obtain the min coordinate dim. 00863 part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim); 00864 00865 int recursion_depth = partArraySize; 00866 if(partArraySize < minCoordDim) recursion_depth = minCoordDim; 00867 00868 int taskPerm = z2Fact<int>(this->task_coord_dim); //get the number of different permutations for task dimension ordering 00869 int procPerm = z2Fact<int>(this->proc_coord_dim); //get the number of different permutations for proc dimension ordering 00870 int permutations = taskPerm * procPerm; //total number of permutations 00871 00872 //holds the pointers to proc_adjList 00873 part_t *proc_xadj = allocMemory<part_t> (num_parts); 00874 //holds the processors in parts according to the result of partitioning algorithm. 00875 //the processors assigned to part x is at proc_adjList[ proc_xadj[x - 1] : proc_xadj[x] ] 00876 part_t *proc_adjList = allocMemory<part_t>(this->no_procs); 00877 00878 00879 part_t used_num_procs = this->no_procs; 00880 if(this->no_procs > this->no_tasks){ 00881 //obtain the subset of the processors that are closest to each other. 00882 this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks); 00883 used_num_procs = this->no_tasks; 00884 } 00885 else { 00886 fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL); 00887 } 00888 00889 int myPermutation = myRank % permutations; //the index of the permutation 00890 00891 int myProcPerm= myPermutation % procPerm; // the index of the proc permutation 00892 int myTaskPerm = myPermutation / procPerm; // the index of the task permutation 00893 00894 int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim) 00895 ? this->proc_coord_dim : this->task_coord_dim); 00896 00897 //get the permutation order from the proc permutation index. 00898 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation); 00899 //reorder the coordinate dimensions. 00900 pcoord_t **pcoords = allocMemory<pcoord_t *> (this->proc_coord_dim); 00901 for(int i = 0; i < this->proc_coord_dim; ++i){ 00902 pcoords[i] = this->proc_coords[permutation[i]]; 00903 //cout << permutation[i] << " "; 00904 } 00905 00906 00907 //do the partitioning and renumber the parts. 00908 env->timerStart(MACRO_TIMERS, "Mapping - Proc Partitioning"); 00909 00910 AlgMJ<pcoord_t, part_t, part_t, part_t> mj_partitioner; 00911 mj_partitioner.sequential_task_partitioning( 00912 env, 00913 this->no_procs, 00914 used_num_procs, 00915 num_parts, 00916 minCoordDim, 00917 pcoords,//this->proc_coords, 00918 proc_adjList, 00919 proc_xadj, 00920 recursion_depth, 00921 partNoArray 00922 //,"proc_partitioning" 00923 ); 00924 env->timerStop(MACRO_TIMERS, "Mapping - Proc Partitioning"); 00925 freeArray<pcoord_t *> (pcoords); 00926 00927 00928 part_t *task_xadj = allocMemory<part_t> (num_parts); 00929 part_t *task_adjList = allocMemory<part_t>(this->no_tasks); 00930 //fill task_adjList st: task_adjList[i] <- i. 00931 fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL); 00932 00933 //get the permutation order from the task permutation index. 00934 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation); 00935 00936 //reorder task coordinate dimensions. 00937 tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim); 00938 for(int i = 0; i < this->task_coord_dim; ++i){ 00939 tcoords[i] = this->task_coords[permutation[i]]; 00940 } 00941 00942 env->timerStart(MACRO_TIMERS, "Mapping - Task Partitioning"); 00943 //partitioning of tasks 00944 mj_partitioner.sequential_task_partitioning( 00945 env, 00946 this->no_tasks, 00947 this->no_tasks, 00948 num_parts, 00949 minCoordDim, 00950 tcoords, //this->task_coords, 00951 task_adjList, 00952 task_xadj, 00953 recursion_depth, 00954 partNoArray 00955 //,"task_partitioning" 00956 ); 00957 env->timerStop(MACRO_TIMERS, "Mapping - Task Partitioning"); 00958 freeArray<pcoord_t *> (tcoords); 00959 freeArray<int> (permutation); 00960 00961 00962 //filling proc_to_task_xadj, proc_to_task_adj, task_to_proc arrays. 00963 for(part_t i = 0; i < num_parts; ++i){ 00964 00965 part_t proc_index_begin = 0; 00966 part_t task_begin_index = 0; 00967 00968 if (i > 0) { 00969 proc_index_begin = proc_xadj[i - 1]; 00970 task_begin_index = task_xadj[i - 1]; 00971 } 00972 part_t proc_index_end = proc_xadj[i]; 00973 part_t task_end_index = task_xadj[i]; 00974 00975 00976 if(proc_index_end - proc_index_begin != 1){ 00977 std::cerr << "Error at partitioning of processors" << std::endl; 00978 std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl; 00979 exit(1); 00980 } 00981 part_t assigned_proc = proc_adjList[proc_index_begin]; 00982 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index; 00983 } 00984 00985 00986 //holds the pointer to the task array 00987 part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs); 00988 proc_to_task_xadj_work[0] = proc_to_task_xadj[0]; 00989 for(part_t i = 1; i < this->no_procs; ++i){ 00990 proc_to_task_xadj[i] += proc_to_task_xadj[i - 1]; 00991 proc_to_task_xadj_work[i] = proc_to_task_xadj[i]; 00992 } 00993 00994 for(part_t i = 0; i < num_parts; ++i){ 00995 00996 part_t proc_index_begin = 0; 00997 part_t task_begin_index = 0; 00998 00999 if (i > 0) { 01000 proc_index_begin = proc_xadj[i - 1]; 01001 task_begin_index = task_xadj[i - 1]; 01002 } 01003 part_t task_end_index = task_xadj[i]; 01004 01005 part_t assigned_proc = proc_adjList[proc_index_begin]; 01006 01007 for (part_t j = task_begin_index; j < task_end_index; ++j){ 01008 part_t taskId = task_adjList[j]; 01009 01010 task_to_proc[taskId] = assigned_proc; 01011 01012 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId; 01013 } 01014 } 01015 01016 freeArray<part_t>(proc_to_task_xadj_work); 01017 freeArray<part_t>(task_xadj); 01018 freeArray<part_t>(task_adjList); 01019 freeArray<part_t>(proc_xadj); 01020 freeArray<part_t>(proc_adjList); 01021 } 01022 01023 }; 01024 01025 template <typename Adapter, typename part_t> 01026 class CoordinateTaskMapper:public PartitionMapping<Adapter>{ 01027 protected: 01028 01029 #ifndef DOXYGEN_SHOULD_SKIP_THIS 01030 01031 typedef typename Adapter::scalar_t pcoord_t; 01032 typedef typename Adapter::scalar_t tcoord_t; 01033 01034 #endif 01035 01036 //RCP<const Environment> env; 01037 ArrayRCP<part_t> proc_to_task_xadj; // = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array 01038 ArrayRCP<part_t> proc_to_task_adj; // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array. 01039 ArrayRCP<part_t> task_to_proc; //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks. 01040 bool isOwnerofModel; 01041 CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t> *proc_task_comm; 01042 part_t nprocs; 01043 part_t ntasks; 01044 ArrayRCP<part_t>task_communication_xadj; 01045 ArrayRCP<part_t>task_communication_adj; 01046 01047 01050 void doMapping(int myRank){ 01051 01052 if(this->proc_task_comm){ 01053 this->proc_task_comm->getMapping( 01054 myRank, 01055 Teuchos::RCP<const Environment>(this->env, false), 01056 this->proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs); //holds the pointer to the task array 01057 this->proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array. 01058 this->task_to_proc //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.); 01059 ); 01060 } 01061 else { 01062 std::cerr << "communicationModel is not specified in the Mapper" << std::endl; 01063 exit(1); 01064 } 01065 } 01066 01067 01070 RCP<Comm<int> > create_subCommunicator(){ 01071 int procDim = this->proc_task_comm->proc_coord_dim; 01072 int taskDim = this->proc_task_comm->task_coord_dim; 01073 01074 int taskPerm = z2Fact<int>(procDim); //get the number of different permutations for task dimension ordering 01075 int procPerm = z2Fact<int>(taskDim); //get the number of different permutations for proc dimension ordering 01076 int idealGroupSize = taskPerm * procPerm; //total number of permutations 01077 01078 int myRank = this->comm->getRank(); 01079 int commSize = this->comm->getSize(); 01080 01081 int myGroupIndex = myRank / idealGroupSize; 01082 01083 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize; 01084 if (prevGroupBegin < 0) prevGroupBegin = 0; 01085 int myGroupBegin = myGroupIndex * idealGroupSize; 01086 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize; 01087 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize; 01088 01089 if (myGroupEnd > commSize){ 01090 myGroupBegin = prevGroupBegin; 01091 myGroupEnd = commSize; 01092 } 01093 if (nextGroupEnd > commSize){ 01094 myGroupEnd = commSize; 01095 } 01096 int myGroupSize = myGroupEnd - myGroupBegin; 01097 01098 part_t *myGroup = allocMemory<part_t>(myGroupSize); 01099 for (int i = 0; i < myGroupSize; ++i){ 01100 myGroup[i] = myGroupBegin + i; 01101 } 01102 //cout << "me:" << myRank << " myGroupBegin:" << myGroupBegin << " myGroupEnd:" << myGroupEnd << endl; 01103 01104 ArrayView<const part_t> myGroupView(myGroup, myGroupSize); 01105 01106 RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView); 01107 freeArray<part_t>(myGroup); 01108 return subComm; 01109 } 01110 01111 01114 void getBestMapping(){ 01115 //create the sub group. 01116 RCP<Comm<int> > subComm = this->create_subCommunicator(); 01117 //calculate cost. 01118 double myCost = this->proc_task_comm->getCommunicationCostMetric(); 01119 //cout << "me:" << this->comm->getRank() << " myCost:" << myCost << endl; 01120 double localCost[2], globalCost[2]; 01121 01122 localCost[0] = myCost; 01123 localCost[1] = double(subComm->getRank()); 01124 01125 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max(); 01126 Teuchos::Zoltan2_ReduceBestMapping<int,double> reduceBest; 01127 reduceAll<int, double>(*subComm, reduceBest, 01128 2, localCost, globalCost); 01129 01130 int sender = int(globalCost[1]); 01131 01132 //cout << "me:" << localCost[1] << " localcost:" << localCost[0]<< " bestcost:" << globalCost[0] << endl; 01133 //cout << "me:" << localCost[1] << " proc:" << globalCost[1] << endl; 01134 broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr()); 01135 broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr()); 01136 broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr()); 01137 } 01138 01139 01140 01141 //write mapping to gnuPlot code to visualize. 01142 void writeMapping(){ 01143 std::ofstream gnuPlotCode ("gnuPlot.plot", std::ofstream::out); 01144 01145 int mindim = MINOF(proc_task_comm->proc_coord_dim, proc_task_comm->task_coord_dim); 01146 std::string ss = ""; 01147 for(part_t i = 0; i < this->nprocs; ++i){ 01148 01149 std::string procFile = toString<int>(i) + "_mapping.txt"; 01150 if (i == 0){ 01151 gnuPlotCode << "plot \"" << procFile << "\"\n"; 01152 } 01153 else { 01154 gnuPlotCode << "replot \"" << procFile << "\"\n"; 01155 } 01156 01157 std::ofstream inpFile (procFile.c_str(), std::ofstream::out); 01158 01159 std::string gnuPlotArrow = "set arrow from "; 01160 for(int j = 0; j < mindim; ++j){ 01161 if (j == mindim - 1){ 01162 inpFile << proc_task_comm->proc_coords[j][i]; 01163 gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]); 01164 01165 } 01166 else { 01167 inpFile << proc_task_comm->proc_coords[j][i] << " "; 01168 gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]) +","; 01169 } 01170 } 01171 gnuPlotArrow += " to "; 01172 01173 01174 inpFile << std::endl; 01175 ArrayView<part_t> a = this->getAssignedTaksForProc(i); 01176 for(int k = 0; k < a.size(); ++k){ 01177 int j = a[k]; 01178 //cout << "i:" << i << " j:" 01179 std::string gnuPlotArrow2 = gnuPlotArrow; 01180 for(int z = 0; z < mindim; ++z){ 01181 if(z == mindim - 1){ 01182 01183 //cout << "z:" << z << " j:" << j << " " << proc_task_comm->task_coords[z][j] << endl; 01184 inpFile << proc_task_comm->task_coords[z][j]; 01185 gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]); 01186 } 01187 else{ 01188 inpFile << proc_task_comm->task_coords[z][j] << " "; 01189 gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]) +","; 01190 } 01191 } 01192 ss += gnuPlotArrow2 + "\n"; 01193 inpFile << std::endl; 01194 } 01195 inpFile.close(); 01196 01197 } 01198 gnuPlotCode << ss; 01199 gnuPlotCode << "\nreplot\n pause -1 \n"; 01200 gnuPlotCode.close(); 01201 01202 } 01203 01204 01205 //write mapping to gnuPlot code to visualize. 01206 void writeMapping2(int myRank){ 01207 01208 std::string rankStr = toString<int>(myRank); 01209 std::string gnuPlots = "gnuPlot", extentionS = ".plot"; 01210 std::string outF = gnuPlots + rankStr+ extentionS; 01211 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out); 01212 01213 CoordinateCommunicationModel<pcoord_t, tcoord_t, part_t> *tmpproc_task_comm = 01214 static_cast <CoordinateCommunicationModel<pcoord_t, tcoord_t, part_t> * > (proc_task_comm); 01215 int mindim = MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim); 01216 std::string ss = ""; 01217 std::string procs = "", parts = ""; 01218 for(part_t i = 0; i < this->nprocs; ++i){ 01219 01220 //inpFile << std::endl; 01221 ArrayView<part_t> a = this->getAssignedTaksForProc(i); 01222 if (a.size() == 0){ 01223 continue; 01224 } 01225 01226 //std::ofstream inpFile (procFile.c_str(), std::ofstream::out); 01227 01228 std::string gnuPlotArrow = "set arrow from "; 01229 for(int j = 0; j < mindim; ++j){ 01230 if (j == mindim - 1){ 01231 //inpFile << proc_task_comm->proc_coords[j][i]; 01232 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]); 01233 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i]); 01234 01235 } 01236 else { 01237 //inpFile << proc_task_comm->proc_coords[j][i] << " "; 01238 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]) +","; 01239 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i])+ " "; 01240 } 01241 } 01242 procs += "\n"; 01243 01244 gnuPlotArrow += " to "; 01245 01246 01247 for(int k = 0; k < a.size(); ++k){ 01248 int j = a[k]; 01249 //cout << "i:" << i << " j:" 01250 std::string gnuPlotArrow2 = gnuPlotArrow; 01251 for(int z = 0; z < mindim; ++z){ 01252 if(z == mindim - 1){ 01253 01254 //cout << "z:" << z << " j:" << j << " " << proc_task_comm->task_coords[z][j] << endl; 01255 //inpFile << proc_task_comm->task_coords[z][j]; 01256 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]); 01257 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]); 01258 } 01259 else{ 01260 //inpFile << proc_task_comm->task_coords[z][j] << " "; 01261 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]) +","; 01262 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]) + " "; 01263 } 01264 } 01265 parts += "\n"; 01266 ss += gnuPlotArrow2 + " nohead\n"; 01267 //inpFile << std::endl; 01268 } 01269 //inpFile.close(); 01270 01271 } 01272 01273 01274 std::ofstream procFile ("procPlot.plot", std::ofstream::out); 01275 procFile << procs << "\n"; 01276 procFile.close(); 01277 01278 std::ofstream partFile ("partPlot.plot", std::ofstream::out); 01279 partFile << parts<< "\n"; 01280 partFile.close(); 01281 01282 std::ofstream extraProcFile ("allProc.plot", std::ofstream::out); 01283 01284 for(part_t j = 0; j < this->nprocs; ++j){ 01285 for(int i = 0; i < mindim; ++i){ 01286 extraProcFile << tmpproc_task_comm->proc_coords[i][j] << " "; 01287 } 01288 extraProcFile << std::endl; 01289 } 01290 01291 extraProcFile.close(); 01292 01293 gnuPlotCode << ss; 01294 if(mindim == 2){ 01295 gnuPlotCode << "plot \"procPlot.plot\" with points pointsize 3\n"; 01296 } else { 01297 gnuPlotCode << "splot \"procPlot.plot\" with points pointsize 3\n"; 01298 } 01299 gnuPlotCode << "replot \"partPlot.plot\" with points pointsize 3\n"; 01300 gnuPlotCode << "replot \"allProc.plot\" with points pointsize 0.65\n"; 01301 gnuPlotCode << "\nreplot\n pause -1 \n"; 01302 gnuPlotCode.close(); 01303 01304 } 01305 01306 01307 void writeGnuPlot( 01308 const Teuchos::Comm<int> *comm_, 01309 const Zoltan2::PartitioningSolution<Adapter> *soln_, 01310 int coordDim, 01311 tcoord_t **partCenters 01312 ){ 01313 std::string file = "gggnuPlot"; 01314 std::string exten = ".plot"; 01315 std::ofstream mm("2d.txt"); 01316 file += toString<int>(comm_->getRank()) + exten; 01317 std::ofstream ff(file.c_str()); 01318 //ff.seekg (0, ff.end); 01319 RCP < vector <Zoltan2::coordinateModelPartBox <tcoord_t, part_t> > > outPartBoxes = ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getPartBoxes(); 01320 01321 for (part_t i = 0; i < this->ntasks;++i){ 01322 (*outPartBoxes)[i].writeGnuPlot(ff, mm); 01323 } 01324 if (coordDim == 2){ 01325 ff << "plot \"2d.txt\"" << std::endl; 01326 //ff << "\n pause -1" << endl; 01327 } 01328 else { 01329 ff << "splot \"2d.txt\"" << std::endl; 01330 //ff << "\n pause -1" << endl; 01331 } 01332 mm.close(); 01333 01334 ff << "set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl; 01335 for (part_t i = 0; i < this->ntasks;++i){ 01336 part_t pb = 0; 01337 if (i > 0) pb = task_communication_xadj[i -1]; 01338 part_t pe = task_communication_xadj[i]; 01339 for (part_t p = pb; p < pe; ++p){ 01340 part_t n = task_communication_adj[p]; 01341 01342 //cout << "i:" << i << " n:" << n << endl; 01343 std::string arrowline = "set arrow from "; 01344 for (int j = 0; j < coordDim - 1; ++j){ 01345 arrowline += toString<tcoord_t>(partCenters[j][n]) + ","; 01346 } 01347 arrowline += toString<tcoord_t>(partCenters[coordDim -1][n]) + " to "; 01348 01349 01350 for (int j = 0; j < coordDim - 1; ++j){ 01351 arrowline += toString<tcoord_t>(partCenters[j][i]) + ","; 01352 } 01353 arrowline += toString<tcoord_t>(partCenters[coordDim -1][i]) + " as 5\n"; 01354 01355 //cout << "arrow:" << arrowline << endl; 01356 ff << arrowline; 01357 } 01358 } 01359 01360 ff << "replot\n pause -1" << std::endl; 01361 ff.close(); 01362 } 01363 01364 public: 01365 01366 void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){ 01367 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr(); 01368 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr(); 01369 } 01370 01371 01372 virtual ~CoordinateTaskMapper(){ 01373 //freeArray<part_t> (proc_to_task_xadj); 01374 //freeArray<part_t> (proc_to_task_adj); 01375 //freeArray<part_t> (task_to_proc); 01376 if(this->isOwnerofModel){ 01377 delete this->proc_task_comm; 01378 } 01379 } 01380 01381 01392 CoordinateTaskMapper( 01393 const Teuchos::Comm<int> *comm_, 01394 const MachineRepresentation<pcoord_t> *machine_, 01395 const Zoltan2::Model<typename Adapter::base_adapter_t> *model_, 01396 const Zoltan2::PartitioningSolution<Adapter> *soln_, 01397 const Environment *envConst): 01398 PartitionMapping<Adapter> (comm_, machine_, model_, soln_, envConst), 01399 proc_to_task_xadj(0), 01400 proc_to_task_adj(0), 01401 task_to_proc(0), 01402 isOwnerofModel(true), 01403 proc_task_comm(0), 01404 task_communication_xadj(0), 01405 task_communication_adj(0){ 01406 01407 pcoord_t *task_communication_edge_weight_ = NULL; 01408 //if mapping type is 0 then it is coordinate mapping 01409 int procDim = machine_->getProcDim(); 01410 this->nprocs = machine_->getNumProcs(); 01411 //get processor coordinates. 01412 pcoord_t **procCoordinates = machine_->getProcCoords(); 01413 01414 int coordDim = ((Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *)model_)->getCoordinateDim(); 01415 this->ntasks = soln_->getActualGlobalNumberOfParts(); 01416 if (part_t (soln_->getTargetGlobalNumberOfParts()) > this->ntasks){ 01417 this->ntasks = soln_->getTargetGlobalNumberOfParts(); 01418 } 01419 //cout << "actual: " << this->ntasks << endl; 01420 01421 //alloc memory for part centers. 01422 tcoord_t **partCenters = NULL; 01423 partCenters = allocMemory<tcoord_t *>(coordDim); 01424 for (int i = 0; i < coordDim; ++i){ 01425 partCenters[i] = allocMemory<tcoord_t>(this->ntasks); 01426 } 01427 01428 01429 envConst->timerStart(MACRO_TIMERS, "Mapping - Solution Center"); 01430 //get centers for the parts. 01431 getSolutionCenterCoordinates<Adapter, typename Adapter::scalar_t,part_t>( 01432 envConst, 01433 comm_, 01434 ((Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *)model_), 01435 this->soln, 01436 coordDim, 01437 ntasks, 01438 partCenters); 01439 01440 envConst->timerStop(MACRO_TIMERS, "Mapping - Solution Center"); 01441 01442 01443 //create coordinate communication model. 01444 this->proc_task_comm = 01445 new Zoltan2::CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t>( 01446 procDim, 01447 procCoordinates, 01448 coordDim, 01449 partCenters, 01450 this->nprocs, 01451 this->ntasks 01452 ); 01453 01454 int myRank = comm_->getRank(); 01455 01456 01457 envConst->timerStart(MACRO_TIMERS, "Mapping - Processor Task map"); 01458 this->doMapping(myRank); 01459 envConst->timerStop(MACRO_TIMERS, "Mapping - Processor Task map"); 01460 01461 01462 envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Graph"); 01463 ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getCommunicationGraph( 01464 comm_, 01465 task_communication_xadj, 01466 task_communication_adj 01467 ); 01468 01469 envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Graph"); 01470 #ifdef gnuPlot 01471 if (comm_->getRank() == 0){ 01472 01473 part_t taskCommCount = task_communication_xadj.size(); 01474 std::cout << " TotalComm:" << task_communication_xadj[taskCommCount - 1] << std::endl; 01475 part_t maxN = task_communication_xadj[0]; 01476 for (part_t i = 1; i < taskCommCount; ++i){ 01477 part_t nc = task_communication_xadj[i] - task_communication_xadj[i - 1]; 01478 if (maxN < nc) maxN = nc; 01479 } 01480 std::cout << " maxNeighbor:" << maxN << std::endl; 01481 } 01482 01483 this->writeGnuPlot(comm_, soln_, coordDim, partCenters); 01484 #endif 01485 01486 envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Cost"); 01487 this->proc_task_comm->calculateCommunicationCost( 01488 task_to_proc.getRawPtr(), 01489 task_communication_xadj.getRawPtr(), 01490 task_communication_adj.getRawPtr(), 01491 task_communication_edge_weight_ 01492 ); 01493 01494 01495 envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Cost"); 01496 01497 //processors are divided into groups of size numProc! * numTasks! 01498 //each processor in the group obtains a mapping with a different rotation 01499 //and best one is broadcasted all processors. 01500 this->getBestMapping(); 01501 #ifdef gnuPlot 01502 this->writeMapping2(comm_->getRank()); 01503 #endif 01504 01505 for (int i = 0; i < coordDim; ++i){ 01506 freeArray<tcoord_t>(partCenters[i]); 01507 } 01508 freeArray<tcoord_t *>(partCenters); 01509 01510 } 01511 01512 01559 CoordinateTaskMapper( 01560 const Environment *env_const_, 01561 const Teuchos::Comm<int> *problemComm, 01562 int proc_dim, 01563 int num_processors, 01564 pcoord_t **machine_coords, 01565 01566 int task_dim, 01567 part_t num_tasks, 01568 tcoord_t **task_coords, 01569 ArrayRCP<part_t>task_comm_xadj, 01570 ArrayRCP<part_t>task_comm_adj, 01571 pcoord_t *task_communication_edge_weight_, 01572 int recursion_depth, 01573 part_t *part_no_array, 01574 part_t *machine_dimensions 01575 ): PartitionMapping<Adapter>(problemComm, NULL, NULL, NULL, env_const_), 01576 proc_to_task_xadj(0), 01577 proc_to_task_adj(0), 01578 task_to_proc(0), 01579 isOwnerofModel(true), 01580 proc_task_comm(0), 01581 task_communication_xadj(task_comm_xadj), 01582 task_communication_adj(task_comm_adj){ 01583 01584 //if mapping type is 0 then it is coordinate mapping 01585 pcoord_t ** virtual_machine_coordinates = machine_coords; 01586 if (machine_dimensions){ 01587 virtual_machine_coordinates = 01588 this->shiftMachineCoordinates ( 01589 proc_dim, 01590 machine_dimensions, 01591 num_processors, 01592 machine_coords); 01593 } 01594 01595 this->nprocs = num_processors; 01596 01597 int coordDim = task_dim; 01598 this->ntasks = num_tasks; 01599 01600 //alloc memory for part centers. 01601 tcoord_t **partCenters = task_coords; 01602 01603 //create coordinate communication model. 01604 this->proc_task_comm = 01605 new Zoltan2::CoordinateCommunicationModel<pcoord_t,tcoord_t,part_t>( 01606 proc_dim, 01607 virtual_machine_coordinates, 01608 coordDim, 01609 partCenters, 01610 this->nprocs, 01611 this->ntasks 01612 ); 01613 this->proc_task_comm->setPartArraySize(recursion_depth); 01614 this->proc_task_comm->setPartArray(part_no_array); 01615 01616 int myRank = problemComm->getRank(); 01617 01618 this->doMapping(myRank); 01619 #ifdef gnuPlot 01620 this->writeMapping2(myRank); 01621 #endif 01622 01623 if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){ 01624 this->proc_task_comm->calculateCommunicationCost( 01625 task_to_proc.getRawPtr(), 01626 task_communication_xadj.getRawPtr(), 01627 task_communication_adj.getRawPtr(), 01628 task_communication_edge_weight_ 01629 ); 01630 01631 01632 this->getBestMapping(); 01633 01634 /* 01635 if (myRank == 0){ 01636 this->proc_task_comm->calculateCommunicationCost( 01637 task_to_proc.getRawPtr(), 01638 task_communication_xadj.getRawPtr(), 01639 task_communication_adj.getRawPtr(), 01640 task_communication_edge_weight_ 01641 ); 01642 cout << "me: " << problemComm->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << endl; 01643 } 01644 */ 01645 01646 } 01647 01648 if (machine_dimensions){ 01649 for (int i = 0; i < proc_dim; ++i){ 01650 delete [] virtual_machine_coordinates[i]; 01651 } 01652 delete [] virtual_machine_coordinates; 01653 } 01654 #ifdef gnuPlot 01655 if(comm_->getRank() == 0) 01656 this->writeMapping2(-1); 01657 #endif 01658 } 01659 01660 01661 double getCommunicationCostMetric(){ 01662 return this->proc_task_comm->getCommCost(); 01663 } 01664 01667 virtual size_t getLocalNumberOfParts() const{ 01668 return 0; 01669 } 01670 01679 pcoord_t **shiftMachineCoordinates( 01680 int machine_dim, 01681 part_t *machine_dimensions, 01682 part_t numProcs, 01683 pcoord_t **mCoords){ 01684 pcoord_t **result_machine_coords = NULL; 01685 result_machine_coords = new pcoord_t*[machine_dim]; 01686 for (int i = 0; i < machine_dim; ++i){ 01687 result_machine_coords[i] = new pcoord_t [numProcs]; 01688 } 01689 01690 for (int i = 0; i < machine_dim; ++i){ 01691 part_t numMachinesAlongDim = machine_dimensions[i]; 01692 part_t *machineCounts= new part_t[numMachinesAlongDim]; 01693 memset(machineCounts, 0, sizeof(part_t) *numMachinesAlongDim); 01694 01695 int *filledCoordinates= new int[numMachinesAlongDim]; 01696 01697 pcoord_t *coords = mCoords[i]; 01698 for(part_t j = 0; j < numProcs; ++j){ 01699 part_t mc = (part_t) coords[j]; 01700 ++machineCounts[mc]; 01701 } 01702 01703 part_t filledCoordinateCount = 0; 01704 for(part_t j = 0; j < numMachinesAlongDim; ++j){ 01705 if (machineCounts[j] > 0){ 01706 filledCoordinates[filledCoordinateCount++] = j; 01707 } 01708 } 01709 01710 part_t firstProcCoord = filledCoordinates[0]; 01711 part_t firstProcCount = machineCounts[firstProcCoord]; 01712 01713 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1]; 01714 part_t lastProcCount = machineCounts[lastProcCoord]; 01715 01716 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord; 01717 part_t firstLastGapProc = lastProcCount + firstProcCount; 01718 01719 part_t leftSideProcCoord = firstProcCoord; 01720 part_t leftSideProcCount = firstProcCount; 01721 part_t biggestGap = 0; 01722 part_t biggestGapProc = numProcs; 01723 01724 part_t shiftBorderCoordinate = -1; 01725 for(part_t j = 1; j < filledCoordinateCount; ++j){ 01726 part_t rightSideProcCoord= filledCoordinates[j]; 01727 part_t rightSideProcCount = machineCounts[rightSideProcCoord]; 01728 01729 part_t gap = rightSideProcCoord - leftSideProcCoord; 01730 part_t gapProc = rightSideProcCount + leftSideProcCount; 01731 01732 /* Pick the largest gap in this dimension. Use fewer process on either side 01733 of the largest gap to break the tie. An easy addition to this would 01734 be to weight the gap by the number of processes. */ 01735 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){ 01736 shiftBorderCoordinate = rightSideProcCoord; 01737 biggestGapProc = gapProc; 01738 biggestGap = gap; 01739 } 01740 leftSideProcCoord = rightSideProcCoord; 01741 leftSideProcCount = rightSideProcCount; 01742 } 01743 01744 01745 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){ 01746 shiftBorderCoordinate = -1; 01747 } 01748 01749 for(part_t j = 0; j < numProcs; ++j){ 01750 01751 if (coords[j] < shiftBorderCoordinate){ 01752 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim; 01753 01754 } 01755 else { 01756 result_machine_coords[i][j] = coords[j]; 01757 } 01758 //cout << "I:" << i << "j:" << j << " coord:" << coords[j] << " now:" << result_machine_coords[i][j] << endl; 01759 } 01760 delete [] machineCounts; 01761 delete [] filledCoordinates; 01762 } 01763 01764 return result_machine_coords; 01765 01766 } 01767 01774 virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const{ 01775 numProcs = 1; 01776 procs = this->task_to_proc.getRawPtr() + taskId; 01777 } 01781 inline part_t getAssignedProcForTask(part_t taskId){ 01782 return this->task_to_proc[taskId]; 01783 } 01790 virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const{ 01791 01792 part_t task_begin = 0; 01793 if (procId > 0) task_begin = this->proc_to_task_xadj[procId - 1]; 01794 part_t taskend = this->proc_to_task_xadj[procId]; 01795 parts = this->proc_to_task_adj.getRawPtr() + task_begin; 01796 numParts = taskend - task_begin; 01797 } 01798 01799 ArrayView<part_t> getAssignedTaksForProc(part_t procId){ 01800 part_t task_begin = 0; 01801 if (procId > 0) task_begin = this->proc_to_task_xadj[procId - 1]; 01802 part_t taskend = this->proc_to_task_xadj[procId]; 01803 01804 /* 01805 cout << "part_t:" << procId << " taskCount:" << taskend - task_begin << endl; 01806 for(part_t i = task_begin; i < taskend; ++i){ 01807 cout << "part_t:" << procId << " task:" << proc_to_task_adj[i] << endl; 01808 } 01809 */ 01810 if (taskend - task_begin > 0){ 01811 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin); 01812 return assignedParts; 01813 } 01814 else { 01815 ArrayView <part_t> assignedParts; 01816 return assignedParts; 01817 } 01818 } 01819 01820 }; 01821 01822 01823 01876 template <typename part_t, typename pcoord_t, typename tcoord_t> 01877 void coordinateTaskMapperInterface( 01878 RCP<const Teuchos::Comm<int> > problemComm, 01879 int proc_dim, 01880 int num_processors, 01881 pcoord_t **machine_coords, 01882 int task_dim, 01883 part_t num_tasks, 01884 tcoord_t **task_coords, 01885 part_t *task_comm_xadj, 01886 part_t *task_comm_adj, 01887 pcoord_t *task_communication_edge_weight_, /*float-like, same size with task_communication_adj_ weight of the corresponding edge.*/ 01888 part_t *proc_to_task_xadj, /*output*/ 01889 part_t *proc_to_task_adj, /*output*/ 01890 int recursion_depth, 01891 part_t *part_no_array, 01892 part_t *machine_dimensions 01893 ) 01894 { 01895 01896 const Environment *envConst_ = new Environment(); 01897 01898 typedef Tpetra::MultiVector<tcoord_t, part_t,part_t, KokkosClassic::DefaultNode::DefaultNodeType> tMVector_t; 01899 01900 Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks, false); 01901 01902 Teuchos::ArrayRCP<part_t> task_communication_adj; 01903 if (task_comm_xadj){ 01904 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks -1 /* KDDKDD OK for MEHMET's ODD LAYOUT; WRONG FOR TRADITIONAL */], false); 01905 task_communication_adj = tmp_task_communication_adj; 01906 } 01907 01908 01909 CoordinateTaskMapper<XpetraMultiVectorAdapter <tMVector_t>, part_t> *ctm = new CoordinateTaskMapper<XpetraMultiVectorAdapter <tMVector_t>, part_t>( 01910 envConst_, 01911 problemComm.getRawPtr(), 01912 proc_dim, 01913 num_processors, 01914 machine_coords,//machine_coords_, 01915 01916 task_dim, 01917 num_tasks, 01918 task_coords, 01919 01920 task_communication_xadj, 01921 task_communication_adj, 01922 task_communication_edge_weight_, 01923 recursion_depth, 01924 part_no_array, 01925 machine_dimensions 01926 ); 01927 01928 01929 part_t* proc_to_task_xadj_; 01930 part_t* proc_to_task_adj_; 01931 01932 ctm->getProcTask(proc_to_task_xadj_, proc_to_task_adj_); 01933 01934 for (part_t i = 0; i < num_processors; ++i){ 01935 proc_to_task_xadj[i] = proc_to_task_xadj_[i]; 01936 } 01937 for (part_t i = 0; i < num_tasks; ++i){ 01938 proc_to_task_adj[i] = proc_to_task_adj_[i]; 01939 } 01940 delete ctm; 01941 delete envConst_; 01942 01943 } 01944 01945 01946 }// namespace Zoltan2 01947 01948 #endif
1.7.6.1