Tpetra Matrix/Vector Services  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
cg-solve_file.cpp
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 //
00005 //          Tpetra: Templated Linear Algebra Services Package
00006 //                 Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 // @HEADER
00042 */
00043 #pragma message "Start Compiling"
00044 #include <Teuchos_GlobalMPISession.hpp>
00045 #include <Teuchos_oblackholestream.hpp>
00046 #include <Teuchos_CommandLineProcessor.hpp>
00047 #include <Teuchos_Array.hpp>
00048 
00049 // I/O for Harwell-Boeing files
00050 #include <Tpetra_MatrixIO.hpp>
00051 
00052 #include "Tpetra_Power_Method.hpp"
00053 
00054 #include "Tpetra_DefaultPlatform.hpp"
00055 #include "Tpetra_Version.hpp"
00056 #include "Tpetra_Map.hpp"
00057 #include "Tpetra_KokkosRefactor_MultiVector.hpp"
00058 #include "Tpetra_KokkosRefactor_Vector.hpp"
00059 #include "Tpetra_KokkosRefactor_CrsMatrix.hpp"
00060 
00061 #include <MatrixMarket_Tpetra.hpp>
00062 #include <algorithm>
00063 #include <functional>
00064 #ifndef USE_KOKKOS_CLASSIC
00065 #include <KokkosCompat_ClassicNodeAPI_Wrapper.hpp>
00066 #else
00067 #include <Kokkos_TPINode.hpp>
00068 #endif
00069 #include <impl/Kokkos_Timer.hpp>
00070 
00071 template<class CrsMatrix, class Vector>
00072 void cg_solve(Teuchos::RCP<CrsMatrix> A, Teuchos::RCP<Vector> b, Teuchos::RCP<Vector> x, int myproc) {
00073   typedef double ScalarType;
00074   typedef double magnitude_type;
00075   typedef typename CrsMatrix::local_ordinal_type LocalOrdinalType;
00076   Teuchos::RCP<Vector> r,p,Ap;
00077   int max_iter=200;
00078   double tolerance = 1e-8;
00079   r = Tpetra::createVector<ScalarType>(A->getRangeMap());
00080   p = Tpetra::createVector<ScalarType>(A->getRangeMap());
00081   Ap = Tpetra::createVector<ScalarType>(A->getRangeMap());
00082 
00083   int length = r->getLocalLength();
00084   for(int i = 0;i<length;i++) {
00085     x->replaceLocalValue(i,0);
00086     r->replaceLocalValue(i,1);
00087     Ap->replaceLocalValue(i,1);
00088   }
00089 
00090   magnitude_type normr = 0;
00091   magnitude_type rtrans = 0;
00092   magnitude_type oldrtrans = 0;
00093 
00094   LocalOrdinalType print_freq = max_iter/10;
00095   if (print_freq>50) print_freq = 50;
00096   if (print_freq<1)  print_freq = 1;
00097 
00098   ScalarType one = 1.0;
00099   ScalarType zero = 0.0;
00100 
00101   double dottime = 0;
00102   double addtime = 0;
00103   double matvectime = 0;
00104 
00105   Kokkos::Impl::Timer timer;
00106   p->update(1.0,*x,0.0,*x,0.0);
00107   addtime += timer.seconds(); timer.reset();
00108 
00109 
00110   A->apply(*p, *Ap);
00111   matvectime += timer.seconds(); timer.reset();
00112 
00113   r->update(1.0,*b,-1.0,*Ap,0.0);
00114   addtime += timer.seconds(); timer.reset();
00115 
00116   rtrans = r->dot(*r);
00117   dottime += timer.seconds(); timer.reset();
00118 
00119   normr = std::sqrt(rtrans);
00120 
00121   if (myproc == 0) {
00122     std::cout << "Initial Residual = "<< normr << std::endl;
00123   }
00124 
00125   magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();
00126 
00127   int num_iters=0;
00128   for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
00129     if (k == 1) {
00130       p->update(1.0,*r,0.0,*r,0.0);
00131       addtime += timer.seconds(); timer.reset();
00132     }
00133     else {
00134       oldrtrans = rtrans;
00135       rtrans = r->dot(*r);
00136       dottime += timer.seconds(); timer.reset();
00137       magnitude_type beta = rtrans/oldrtrans;
00138       p->update(beta,*p,1.0,*r,0.0);
00139       addtime += timer.seconds(); timer.reset();
00140     }
00141     normr = std::sqrt(rtrans);
00142     if (myproc == 0 && (k%print_freq==0 || k==max_iter)) {
00143       std::cout << "Iteration = "<<k<<"   Residual = "<<normr<<std::endl;
00144     }
00145 
00146     magnitude_type alpha = 0;
00147     magnitude_type p_ap_dot = 0;
00148     A->apply(*p, *Ap);
00149     matvectime += timer.seconds(); timer.reset();
00150     p_ap_dot = Ap->dot(*p);
00151     dottime += timer.seconds(); timer.reset();
00152 
00153    if (p_ap_dot < brkdown_tol) {
00154       if (p_ap_dot < 0 ) {
00155         std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
00156         return;
00157       }
00158       else brkdown_tol = 0.1 * p_ap_dot;
00159     }
00160     alpha = rtrans/p_ap_dot;
00161 
00162 
00163     x->update(1.0,*x,alpha,*p,0.0);
00164     r->update(1.0,*r,-alpha,*Ap,0.0);
00165     addtime += timer.seconds(); timer.reset();
00166 
00167     num_iters = k;
00168   }
00169   rtrans = r->dot(*r);
00170 
00171   normr = std::sqrt(rtrans);
00172 
00173   //if (myproc == 0) {
00174     std::cout << "Final Residual = "<< normr << " after " << num_iters << " iterations" << std::endl;
00175     std::cout << "WAXPBY time: " << addtime << std::endl;
00176     std::cout << "DOT    time: " << dottime << std::endl;
00177     std::cout << "MATVEC time: " << matvectime << std::endl;
00178     std::cout << "CG     time: " << matvectime+addtime+dottime << std::endl;
00179  // }
00180 
00181 }
00182 
00183 
00184 int main(int argc, char *argv[]) {
00185   Teuchos::oblackholestream blackhole;
00186   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
00187 
00188   //
00189   // Specify types used in this example
00190   //
00191   typedef double                                                  Scalar;
00192   typedef Teuchos::ScalarTraits<Scalar>::magnitudeType            Magnitude;
00193   typedef int                                                     Ordinal;
00194 
00195 #pragma message "HUHU?"
00196 #ifndef USE_KOKKOS_CLASSIC
00197 #ifdef COMPILE_CUDA
00198   typedef Kokkos::Compat::KokkosCudaWrapperNode                Node;
00199 #pragma message "Compile CUDA"
00200 #else
00201   #ifdef KOKKOS_HAVE_OPENMP
00202   #pragma message "Compile OpenMP"
00203     typedef Kokkos::Compat::KokkosOpenMPWrapperNode                Node;
00204   #else
00205   #ifdef KOKKOS_HAVE_PTHREAD
00206 #pragma message "Compile Threads"
00207     typedef Kokkos::Compat::KokkosThreadsWrapperNode                Node;
00208     #endif
00209   #endif
00210 #endif
00211 #else
00212   typedef KokkosClassic::TPINode                Node;
00213 #endif
00214   typedef Tpetra::MpiPlatform<Node>                            Platform;
00215   typedef Tpetra::CrsMatrix<Scalar,Ordinal,Ordinal,Node>          CrsMatrix;
00216   using Teuchos::RCP;
00217   using Teuchos::tuple;
00218 
00219 
00220   //
00221   // Get example parameters from command-line processor
00222   //
00223   bool printMatrix = false;
00224   bool verbose = false;
00225   int niters = 100;
00226   int numthreads = 1;
00227   int numteams = 1;
00228   Magnitude tolerance = 1.0e-2;
00229   std::string filename("dc1.mtx");
00230   std::string filename_vector("dc1_b.mtx");
00231   Teuchos::CommandLineProcessor cmdp(false,true);
00232   cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
00233   cmdp.setOption("numthreads",&numthreads,"Number of threads per thread team.");
00234   cmdp.setOption("numteams",&numteams,"Number of thread teams.");
00235   cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix.");
00236   cmdp.setOption("filename_vector",&filename_vector,"Filename for Harwell-Boeing test matrix.");
00237   cmdp.setOption("tolerance",&tolerance,"Relative residual tolerance used for solver.");
00238   cmdp.setOption("iterations",&niters,"Maximum number of iterations.");
00239   cmdp.setOption("printMatrix","noPrintMatrix",&printMatrix,"Print the full matrix after reading it.");
00240   if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
00241     return -1;
00242   }
00243 
00244   int verboseint = verbose?1:0;
00245   Teuchos::ParameterList params;
00246   params.set("Num Threads",numthreads,"Number of Threads per Threadteam");
00247   params.set("Num Teams",numteams,"Number of Threadteams");
00248   params.set("Verbose",verboseint,"Verbose output");
00249 
00250   //
00251   // Get the communicator and node
00252   //
00253   Node anode(params);
00254   RCP<Node>  node(&anode,false);
00255 
00256   Platform platform(node);
00257   RCP<const Teuchos::Comm<int> > comm = platform.getComm();
00258 
00259   /*Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
00260   RCP<const Teuchos::Comm<int> > comm = platform.getComm();*/
00261   const int myRank = comm->getRank();
00262   verbose = verbose || (myRank==0);
00263   //
00264   // Say hello, print some communicator info
00265   //
00266   if (verbose) {
00267     std::cout << "\n" << Tpetra::version() << std::endl << std::endl;
00268   }
00269   std::cout << "Comm info: " << *comm;
00270 
00271 
00272   // Read Tpetra::CrsMatrix from file
00273   //
00274   RCP<CrsMatrix> A = Tpetra::MatrixMarket::Reader<CrsMatrix>::readSparseFile(filename,comm,node);
00275 
00276   if (printMatrix) {
00277     RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
00278     A->describe(*fos, Teuchos::VERB_EXTREME);
00279   }
00280   else if (verbose) {
00281     std::cout << std::endl << A->description() << std::endl << std::endl;
00282   }
00283 
00284   typedef Tpetra::Vector<Scalar,Ordinal,Ordinal,Node> Vector;
00285 
00286   if ( A->getRangeMap() != A->getDomainMap() ) {
00287     throw std::runtime_error("TpetraExamples::powerMethod(): operator must have domain and range maps that are equivalent.");
00288   }
00289   // create three vectors, fill z with random numbers
00290   Teuchos::RCP<Vector> b, x;
00291   std::cout << "Create Vector\n";
00292   RCP<const CrsMatrix::map_type> map = A->getRangeMap();
00293   b = Tpetra::MatrixMarket::Reader<CrsMatrix>::readVectorFile(filename_vector,comm,node,map);
00294   x = Tpetra::createVector<Scalar>(A->getRangeMap());
00295 
00296   cg_solve(A,b,x,myRank);
00297 
00298   return 0;
00299 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines