Intrepid
/usr/src/RPM/BUILD/trilinos-11.12.1/packages/intrepid/example/Drivers/example_17.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ************************************************************************
00003 //
00004 //                           Intrepid Package
00005 //                 Copyright (2007) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Pavel Bochev  (pbboche@sandia.gov)
00038 //                    Denis Ridzal  (dridzal@sandia.gov), or
00039 //                    Kara Peterson (kjpeter@sandia.gov)
00040 //
00041 // ************************************************************************
00042 // @HEADER
00043 
00081 // Intrepid includes
00082 #include "Intrepid_FunctionSpaceTools.hpp"
00083 #include "Intrepid_FunctionSpaceToolsInPlace.hpp"
00084 #include "Intrepid_FieldContainer.hpp"
00085 #include "Intrepid_CellTools.hpp"
00086 #include "Intrepid_CubaturePolylib.hpp"
00087 #include "Intrepid_CubatureTensor.hpp"
00088 #include "Intrepid_HGRAD_HEX_Cn_FEM.hpp"
00089 #include "Intrepid_TensorProductSpaceTools.hpp"
00090 #include "Intrepid_Utils.hpp"
00091 
00092 // Epetra includes
00093 #include "Epetra_Time.h"
00094 #include "Epetra_Map.h"
00095 #include "Epetra_FEVector.h"
00096 #include "Epetra_SerialComm.h"
00097 
00098 // Teuchos includes
00099 #include "Teuchos_oblackholestream.hpp"
00100 #include "Teuchos_RCP.hpp"
00101 #include "Teuchos_Time.hpp"
00102 #include "Teuchos_SerialDenseMatrix.hpp"
00103 
00104 // Shards includes
00105 #include "Shards_CellTopology.hpp"
00106 
00107 // EpetraExt includes
00108 #include "EpetraExt_MultiVectorOut.h"
00109 
00110 using namespace std;
00111 using namespace Intrepid;
00112 using Teuchos::rcp;
00113 
00114 int main(int argc, char *argv[]) {
00115 
00116   //Check number of arguments
00117   if (argc < 4) {
00118     std::cout <<"\n>>> ERROR: Invalid number of arguments.\n\n";
00119     std::cout <<"Usage:\n\n";
00120     std::cout <<"  ./Intrepid_example_Drivers_Example_14.exe deg NX NY NZ verbose\n\n";
00121     std::cout <<" where \n";
00122     std::cout <<"   int deg             - polynomial degree to be used (assumed >= 1) \n";
00123     std::cout <<"   int NX              - num intervals in x direction (assumed box domain, 0,1) \n";
00124     std::cout <<"   int NY              - num intervals in y direction (assumed box domain, 0,1) \n";
00125     std::cout <<"   int NZ              - num intervals in y direction (assumed box domain, 0,1) \n";
00126     std::cout <<"   verbose (optional)  - any character, indicates verbose output \n\n";
00127     exit(1);
00128   }
00129   
00130   // This little trick lets us print to std::cout only if
00131   // a (dummy) command-line argument is provided.
00132   int iprint     = argc - 1;
00133   Teuchos::RCP<std::ostream> outStream;
00134   Teuchos::oblackholestream bhs; // outputs nothing
00135   if (iprint > 2)
00136     outStream = Teuchos::rcp(&std::cout, false);
00137   else
00138     outStream = Teuchos::rcp(&bhs, false);
00139   
00140   // Save the format state of the original std::cout.
00141   Teuchos::oblackholestream oldFormatState;
00142   oldFormatState.copyfmt(std::cout);
00143   
00144   *outStream                                                            \
00145     << "===============================================================================\n" \
00146     << "|                                                                             |\n" \
00147     << "|  Example: Apply Stiffness Matrix for                                        |\n" \
00148     << "|                   Poisson Operator on Hexahedral Mesh with BLAS             |\n" \
00149     << "|                                                                             |\n" \
00150     << "|  Questions? Contact  Pavel Bochev  (pbboche@sandia.gov),                    |\n" \
00151     << "|                      Denis Ridzal  (dridzal@sandia.gov),                    |\n" \
00152     << "|                      Kara Peterson (kjpeter@sandia.gov).                    |\n" \
00153     << "|                                                                             |\n" \
00154     << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
00155     << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
00156     << "|                                                                             |\n" \
00157     << "===============================================================================\n";
00158 
00159   
00160   // ************************************ GET INPUTS **************************************
00161   
00162   int deg          = atoi(argv[1]);  // polynomial degree to use
00163   int NX           = atoi(argv[2]);  // num intervals in x direction (assumed box domain, 0,1)
00164   int NY           = atoi(argv[3]);  // num intervals in y direction (assumed box domain, 0,1)
00165   int NZ           = atoi(argv[4]);  // num intervals in y direction (assumed box domain, 0,1)
00166   
00167 
00168   // *********************************** CELL TOPOLOGY **********************************
00169   
00170   // Get cell topology for base hexahedron
00171   typedef shards::CellTopology    CellTopology;
00172   CellTopology hex_8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
00173   
00174   // Get dimensions 
00175   int numNodesPerElem = hex_8.getNodeCount();
00176   int spaceDim = hex_8.getDimension();
00177   
00178   // *********************************** GENERATE MESH ************************************
00179   
00180   *outStream << "Generating mesh ... \n\n";
00181   
00182   *outStream << "   NX" << "   NY" << "   NZ\n";
00183   *outStream << std::setw(5) << NX <<
00184     std::setw(5) << NY << std::setw(5) << NZ << "\n\n";
00185   
00186   // Print mesh information
00187   int numElems = NX*NY*NZ;
00188   int numNodes = (NX+1)*(NY+1)*(NZ+1);
00189   *outStream << " Number of Elements: " << numElems << " \n";
00190   *outStream << "    Number of Nodes: " << numNodes << " \n\n";
00191   
00192   // Cube
00193   double leftX = 0.0, rightX = 1.0;
00194   double leftY = 0.0, rightY = 1.0;
00195   double leftZ = 0.0, rightZ = 1.0;
00196 
00197   // Mesh spacing
00198   double hx = (rightX-leftX)/((double)NX);
00199   double hy = (rightY-leftY)/((double)NY);
00200   double hz = (rightZ-leftZ)/((double)NZ);
00201 
00202   // Get nodal coordinates
00203   FieldContainer<double> nodeCoord(numNodes, spaceDim);
00204   FieldContainer<int> nodeOnBoundary(numNodes);
00205   int inode = 0;
00206   for (int k=0; k<NZ+1; k++) 
00207     {
00208       for (int j=0; j<NY+1; j++) 
00209         {
00210           for (int i=0; i<NX+1; i++) 
00211             {
00212               nodeCoord(inode,0) = leftX + (double)i*hx;
00213               nodeCoord(inode,1) = leftY + (double)j*hy;
00214               nodeCoord(inode,2) = leftZ + (double)k*hz;
00215               if (k==0 || k==NZ || j==0 || i==0 || j==NY || i==NX)
00216                 {
00217                   nodeOnBoundary(inode)=1;
00218                 }
00219               else 
00220                 {
00221                   nodeOnBoundary(inode)=0;
00222                 }
00223               inode++;
00224             }
00225         }
00226     }
00227 //#define DUMP_DATA
00228 #ifdef DUMP_DATA
00229   // Print nodal coords
00230   ofstream fcoordout("coords.dat");
00231   for (int i=0; i<numNodes; i++) {
00232     fcoordout << nodeCoord(i,0) <<" ";
00233     fcoordout << nodeCoord(i,1) <<" ";
00234     fcoordout << nodeCoord(i,2) <<"\n";
00235   }
00236   fcoordout.close();
00237 #endif
00238   
00239   
00240   
00241   // ********************************* CUBATURE AND BASIS *********************************** 
00242   *outStream << "Getting cubature and basis ... \n\n";
00243   
00244   // Get numerical integration points and weights
00245   // I only need this on the line since I'm doing tensor products 
00246   Teuchos::RCP<Cubature<double,FieldContainer<double>,FieldContainer<double> > > glcub
00247     = Teuchos::rcp(new CubaturePolylib<double,FieldContainer<double>,FieldContainer<double> >(2*deg-1,PL_GAUSS_LOBATTO) );
00248       
00249   const int numCubPoints1D = glcub->getNumPoints();
00250 
00251   FieldContainer<double> cubPoints1D(numCubPoints1D, 1);
00252   FieldContainer<double> cubWeights1D(numCubPoints1D);
00253   
00254   glcub->getCubature(cubPoints1D,cubWeights1D);
00255 
00256   std::vector<Teuchos::RCP<Cubature<double,FieldContainer<double>,FieldContainer<double> > > >
00257     cub_to_tensor;  
00258   cub_to_tensor.push_back( glcub );
00259   cub_to_tensor.push_back( glcub );
00260   cub_to_tensor.push_back( glcub );
00261 
00262   CubatureTensor<double,FieldContainer<double>,FieldContainer<double> > cubhex( cub_to_tensor );
00263 
00264   Basis_HGRAD_HEX_Cn_FEM<double, FieldContainer<double> > hexBasis( deg , POINTTYPE_SPECTRAL );
00265   const int numFields = hexBasis.getCardinality();
00266 
00267   // get the bases tabulated at the quadrature points, dimension-by-dimension
00268   const int numCubPoints = cubhex.getNumPoints();
00269   FieldContainer<double> cubPoints3D( numCubPoints , spaceDim );
00270   FieldContainer<double> cubWeights3D( numCubPoints );
00271   FieldContainer<double> basisGrads( numFields , numCubPoints , spaceDim );
00272   cubhex.getCubature( cubPoints3D , cubWeights3D );
00273   hexBasis.getValues( basisGrads , cubPoints3D , OPERATOR_GRAD );
00274 
00275 
00276   FieldContainer<int> elemToNode(numElems, numNodesPerElem);
00277   int ielem = 0;
00278   for (int k=0; k<NZ; k++) 
00279     {
00280       for (int j=0; j<NY; j++) 
00281         {
00282           for (int i=0; i<NX; i++) 
00283             {
00284               elemToNode(ielem,0) = k * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i;
00285               elemToNode(ielem,1) = k * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i + 1;
00286               elemToNode(ielem,2) = k * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i + 1;
00287               elemToNode(ielem,3) = k * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i;
00288               elemToNode(ielem,4) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i;
00289               elemToNode(ielem,5) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i + 1;
00290               elemToNode(ielem,6) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i + 1;
00291               elemToNode(ielem,7) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i;
00292               ielem++;
00293             }
00294         }
00295     }
00296 #ifdef DUMP_DATA
00297   // Output connectivity
00298   ofstream fe2nout("elem2node.dat");
00299   for (int k=0;k<NZ;k++)
00300     {
00301       for (int j=0; j<NY; j++) 
00302         {
00303           for (int i=0; i<NX; i++) 
00304             {
00305               int ielem = i + j * NX + k * NY * NY;
00306               for (int m=0; m<numNodesPerElem; m++)
00307                 {
00308                   fe2nout << elemToNode(ielem,m) <<"  ";
00309                 }
00310               fe2nout <<"\n";
00311             }
00312         }
00313     }
00314   fe2nout.close();
00315 #endif
00316 
00317 
00318   // ********************************* 3-D LOCAL-TO-GLOBAL MAPPING *******************************
00319   FieldContainer<int> ltgMapping(numElems,numFields);
00320   const int numDOF = (NX*deg+1)*(NY*deg+1)*(NZ*deg+1);
00321   ielem=0;
00322   for (int k=0;k<NZ;k++) 
00323     {
00324       for (int j=0;j<NY;j++) 
00325         {
00326           for (int i=0;i<NX;i++) 
00327             {
00328               const int start = k * ( NY * deg + 1 ) * ( NX * deg + 1 ) + j * ( NX * deg + 1 ) + i * deg;
00329               // loop over local dof on this cell
00330               int local_dof_cur=0;
00331               for (int kloc=0;kloc<=deg;kloc++) 
00332                 {
00333                   for (int jloc=0;jloc<=deg;jloc++) 
00334                     {
00335                       for (int iloc=0;iloc<=deg;iloc++)
00336                         {
00337                           ltgMapping(ielem,local_dof_cur) = start 
00338                             + kloc * ( NX * deg + 1 ) * ( NY * deg + 1 )
00339                             + jloc * ( NX * deg + 1 )
00340                             + iloc;
00341                           local_dof_cur++;
00342                         }
00343                     }
00344                 }
00345               ielem++;
00346             }
00347         }
00348     }
00349 #ifdef DUMP_DATA
00350   // Output ltg mapping 
00351   ielem = 0;
00352   ofstream ltgout("ltg.dat");
00353   for (int k=0;k<NZ;k++)  
00354     {
00355       for (int j=0; j<NY; j++) 
00356         {
00357           for (int i=0; i<NX; i++) 
00358             {
00359               int ielem = i + j * NX + k * NX * NY;
00360               for (int m=0; m<numFields; m++)
00361                 {
00362                   ltgout << ltgMapping(ielem,m) <<"  ";
00363                 }
00364               ltgout <<"\n";
00365             }
00366         }
00367     }
00368   ltgout.close();
00369 #endif
00370 
00371   // ********** DECLARE GLOBAL OBJECTS *************
00372   Epetra_SerialComm Comm;
00373   Epetra_Map globalMapG(numDOF, 0, Comm);
00374   Epetra_FEVector u(globalMapG);  u.Random();
00375   Epetra_FEVector Ku(globalMapG);
00376 
00377   // ************* For Jacobians **********************
00378   FieldContainer<double> cellVertices(numElems,numNodesPerElem,spaceDim);
00379   FieldContainer<double> cellJacobian(numElems,numCubPoints,spaceDim,spaceDim);
00380   FieldContainer<double> cellJacobInv(numElems,numCubPoints,spaceDim,spaceDim);
00381   FieldContainer<double> cellJacobDet(numElems,numCubPoints);
00382 
00383 
00384   // get vertices of cells (for computing Jacobians)
00385   for (int i=0;i<numElems;i++)
00386     {
00387       for (int j=0;j<numNodesPerElem;j++)
00388         {
00389           const int nodeCur = elemToNode(i,j);
00390           for (int k=0;k<spaceDim;k++) 
00391             {
00392               cellVertices(i,j,k) = nodeCoord(nodeCur,k);
00393             }
00394         }
00395     }
00396 
00397   // jacobian evaluation 
00398   CellTools<double>::setJacobian(cellJacobian,cubPoints3D,cellVertices,hex_8);
00399   CellTools<double>::setJacobianInv(cellJacobInv, cellJacobian );
00400   CellTools<double>::setJacobianDet(cellJacobDet, cellJacobian );
00401 
00402 
00403   // ************* MATRIX-FREE APPLICATION 
00404   FieldContainer<double> uScattered(numElems,1,numFields);
00405   FieldContainer<double> KuScattered(numElems,1,numFields);
00406   FieldContainer<double> gradU(numElems,1,numFields,3);
00407 
00408   // get views in SDM format for BLAS purposes
00409   Teuchos::SerialDenseMatrix<int,double> refGradsAsMat( Teuchos::View ,
00410                                                      &basisGrads[0] ,
00411                                                      numCubPoints * spaceDim ,
00412                                                      numCubPoints * spaceDim ,
00413                                                      numFields );
00414   Teuchos::SerialDenseMatrix<int,double> uScatAsMat( Teuchos::View ,
00415                                                      &uScattered[0] ,
00416                                                      numFields ,
00417                                                      numFields ,
00418                                                      numElems );
00419   Teuchos::SerialDenseMatrix<int,double> GraduScatAsMat( Teuchos::View ,
00420                                                          &gradU[0] ,
00421                                                          numCubPoints * spaceDim ,
00422                                                          numCubPoints * spaceDim ,
00423                                                          numElems );
00424   Teuchos::SerialDenseMatrix<int,double> KuScatAsMat( Teuchos::View ,
00425                                                       &KuScattered[0] ,
00426                                                       numFields ,
00427                                                       numFields ,
00428                                                       numElems );
00429 
00430 
00431   u.GlobalAssemble();
00432 
00433 
00434 
00435   Ku.PutScalar(0.0);
00436   Ku.GlobalAssemble();
00437 
00438   double *uVals = u[0];
00439   double *KuVals = Ku[0];
00440 
00441   Teuchos::Time full_timer( "Time to apply operator matrix-free:" );
00442   Teuchos::Time scatter_timer( "Time to scatter dof:" );
00443   Teuchos::Time elementwise_timer( "Time to do elementwise computation:" ); 
00444   Teuchos::Time grad_timer( "Time to compute gradients:" );
00445   Teuchos::Time pointwise_timer( "Time to do pointwise transformations:" );
00446   Teuchos::Time moment_timer( "Time to compute moments:" );
00447   Teuchos::Time gather_timer( "Time to gather dof:" );
00448 
00449   full_timer.start();
00450 
00451   scatter_timer.start();
00452   for (int k=0; k<numElems; k++) 
00453     {
00454       for (int i=0;i<numFields;i++) 
00455         {
00456           uScattered(k,0,i) = uVals[ltgMapping(k,i)];
00457         }
00458     }
00459   scatter_timer.stop();
00460 
00461   elementwise_timer.start();
00462 
00463   grad_timer.start();
00464   // reference grad per cell with BLAS
00465   GraduScatAsMat.multiply( Teuchos::NO_TRANS , Teuchos::NO_TRANS ,
00466                            1.0 , refGradsAsMat , uScatAsMat , 0.0 );
00467                            
00468 
00469   grad_timer.stop();
00470   pointwise_timer.start();
00471   // pointwise transformations of gradients
00472   Intrepid::FunctionSpaceToolsInPlace::HGRADtransformGRAD<double>( gradU , cellJacobian );
00473   Intrepid::FunctionSpaceToolsInPlace::HGRADtransformGRADDual<double>( gradU , cellJacobian );
00474   Intrepid::FunctionSpaceToolsInPlace::multiplyMeasure<double>( gradU , cellJacobDet );
00475   pointwise_timer.stop();
00476   moment_timer.start();
00477 
00478   KuScatAsMat.multiply( Teuchos::TRANS , Teuchos::NO_TRANS , 1.0 , refGradsAsMat , GraduScatAsMat , 0.0 );
00479 
00480   moment_timer.stop();
00481   elementwise_timer.stop();
00482   gather_timer.start();
00483   for (int k=0;k<numElems;k++)
00484     {
00485       for (int i=0;i<numFields;i++)
00486         {
00487           KuVals[ltgMapping(k,i)] += KuScattered(k,0,i);
00488         }
00489     }
00490   gather_timer.stop();
00491   full_timer.stop();
00492 
00493   *outStream << full_timer.name() << " " << full_timer.totalElapsedTime() << " sec\n";
00494   *outStream << "\t" << scatter_timer.name() << " " << scatter_timer.totalElapsedTime() << " sec\n";
00495   *outStream << "\t" << elementwise_timer.name() << " " << elementwise_timer.totalElapsedTime() << " sec\n";
00496   *outStream << "\t\t" << grad_timer.name() << " " << grad_timer.totalElapsedTime() << " sec\n";
00497   *outStream << "\t\t" << pointwise_timer.name() << " " << pointwise_timer.totalElapsedTime() << " sec\n";
00498   *outStream << "\t\t" << moment_timer.name() << " " << moment_timer.totalElapsedTime() << " sec\n";
00499   *outStream << "\t" << gather_timer.name() << " " << gather_timer.totalElapsedTime() << " sec\n";
00500 
00501 
00502   *outStream << "End Result: TEST PASSED\n";
00503   
00504   // reset format state of std::cout
00505   std::cout.copyfmt(oldFormatState);
00506   
00507   return 0;
00508 }
00509