Teuchos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Teuchos_BLAS.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_BLAS.hpp"
00043 #include "Teuchos_BLAS_wrappers.hpp"
00044 
00045 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00046 the appropriate declaration of one will need to be added back into
00047 functions that include the macro:
00048 */
00049 
00050 namespace {
00051 #if defined (INTEL_CXML)
00052         unsigned int one=1;
00053 #endif
00054 } // namespace
00055 
00056 #ifdef CHAR_MACRO
00057 #undef CHAR_MACRO
00058 #endif
00059 #if defined (INTEL_CXML)
00060 #define CHAR_MACRO(char_var) &char_var, one
00061 #else
00062 #define CHAR_MACRO(char_var) &char_var
00063 #endif
00064 
00065 
00066 const char Teuchos::ESideChar[] = {'L' , 'R' };
00067 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
00068 const char Teuchos::EUploChar[] = {'U' , 'L' };
00069 const char Teuchos::EDiagChar[] = {'U' , 'N' };
00070 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
00071 //const char Teuchos::EFactChar[] = {'F', 'N' };
00072 //const char Teuchos::ENormChar[] = {'O', 'I' };
00073 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
00074 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
00075 //const char Teuchos::EJobSChar[] = {'E', 'S' };
00076 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
00077 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
00078 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
00079 //const char Teuchos::ESortChar[] = {'N', 'S'};
00080 
00081 
00082 namespace {
00083 
00084 
00085 template<typename Scalar>
00086 Scalar generic_dot(const int n, const Scalar* x, const int incx,
00087   const Scalar* y, const int incy)
00088 {
00089   typedef Teuchos::ScalarTraits<Scalar> ST;
00090   Scalar dot = 0.0;
00091   if (incx==1 && incy==1) {
00092     for (int i = 0; i < n; ++i)
00093       dot += (*x++)*ST::conjugate(*y++);
00094   }
00095   else {
00096     if (incx < 0)
00097       x = x - incx*(n-1);
00098     if (incy < 0)
00099       y = y - incy*(n-1);
00100     for (int i = 0; i < n; ++i, x+=incx, y+=incy)
00101       dot += (*x)*ST::conjugate(*y);
00102   }
00103   return dot;
00104 }
00105 
00106 
00107 } // namespace
00108 
00109 
00110 namespace Teuchos {
00111 
00112 //Explicitly instantiating these templates for windows due to an issue with
00113 //resolving them when linking dlls.
00114 #ifdef _WIN32
00115 #  ifdef HAVE_TEUCHOS_COMPLEX
00116      template BLAS<long int, std::complex<float> >;
00117      template BLAS<long int, std::complex<double> >;
00118 #  endif
00119      template BLAS<long int, float>;
00120      template BLAS<long int, double>;
00121 #endif
00122 
00123   // *************************** BLAS<int,float> DEFINITIONS ******************************
00124 
00125   void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
00126   { SROTG_F77(da, db, c, s ); }
00127 
00128   void BLAS<int, float>::ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const
00129   { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00130 
00131 
00132   float BLAS<int, float>::ASUM(const int n, const float* x, const int incx) const
00133   {
00134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00135     return cblas_sasum(n, x, incx);
00136 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00137     float tmp = SASUM_F77(&n, x, &incx);
00138     return tmp;
00139 #else
00140     typedef ScalarTraits<float> ST;
00141     float sum = 0.0;
00142     if (incx == 1) {
00143       for (int i = 0; i < n; ++i)
00144         sum += ST::magnitude(*x++);
00145     }
00146     else {
00147       for (int i = 0; i < n; ++i, x+=incx)
00148         sum += ST::magnitude(*x);
00149     }
00150     return sum;
00151 #endif
00152   }
00153 
00154   void BLAS<int, float>::AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const
00155   { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00156 
00157   void BLAS<int, float>::COPY(const int n, const float* x, const int incx, float* y, const int incy) const
00158   { SCOPY_F77(&n, x, &incx, y, &incy); }
00159 
00160   float BLAS<int, float>::DOT(const int n, const float* x, const int incx, const float* y, const int incy) const
00161   {
00162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00163     return cblas_sdot(n, x, incx, y, incy);
00164 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00165     return SDOT_F77(&n, x, &incx, y, &incy);
00166 #else
00167     return generic_dot(n, x, incx, y, incy);
00168 #endif
00169   }
00170 
00171   int BLAS<int, float>::IAMAX(const int n, const float* x, const int incx) const
00172   { return ISAMAX_F77(&n, x, &incx); }
00173 
00174   float BLAS<int, float>::NRM2(const int n, const float* x, const int incx) const
00175   {
00176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00177     return cblas_snrm2(n, x, incx);
00178 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00179     return SNRM2_F77(&n, x, &incx);
00180 #else
00181     return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
00182 #endif
00183   }
00184 
00185   void BLAS<int, float>::SCAL(const int n, const float alpha, float* x, const int incx) const
00186   { SSCAL_F77(&n, &alpha, x, &incx); }
00187 
00188   void BLAS<int, float>::GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const
00189   { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00190 
00191   void BLAS<int, float>::GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const
00192   { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00193 
00194   void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const
00195   { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00196 
00197   void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
00198   { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00199 
00200   void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
00201   { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00202 
00203   void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const
00204   { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00205 
00206   void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
00207   { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00208 
00209   void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
00210   { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00211 
00212   // *************************** BLAS<int,double> DEFINITIONS ******************************
00213 
00214   void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
00215   { DROTG_F77(da, db, c, s); }
00216 
00217   void BLAS<int, double>::ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const
00218   { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00219 
00220   double BLAS<int, double>::ASUM(const int n, const double* x, const int incx) const
00221   { return DASUM_F77(&n, x, &incx); }
00222 
00223   void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const
00224   { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00225 
00226   void BLAS<int, double>::COPY(const int n, const double* x, const int incx, double* y, const int incy) const
00227   { DCOPY_F77(&n, x, &incx, y, &incy); }
00228 
00229   double BLAS<int, double>::DOT(const int n, const double* x, const int incx, const double* y, const int incy) const
00230   {
00231     return DDOT_F77(&n, x, &incx, y, &incy);
00232   }
00233 
00234   int BLAS<int, double>::IAMAX(const int n, const double* x, const int incx) const
00235   { return IDAMAX_F77(&n, x, &incx); }
00236 
00237   double BLAS<int, double>::NRM2(const int n, const double* x, const int incx) const
00238   { return DNRM2_F77(&n, x, &incx); }
00239 
00240   void BLAS<int, double>::SCAL(const int n, const double alpha, double* x, const int incx) const
00241   { DSCAL_F77(&n, &alpha, x, &incx); }
00242 
00243   void BLAS<int, double>::GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const
00244   { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00245 
00246   void BLAS<int, double>::GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const
00247   { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00248 
00249   void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const
00250   { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00251 
00252   void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const
00253   { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00254 
00255   void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const
00256   { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00257 
00258   void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const
00259   { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00260 
00261   void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
00262   { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00263 
00264   void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
00265   { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00266 
00267 #ifdef HAVE_TEUCHOS_COMPLEX
00268 
00269   // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
00270 
00271   void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
00272   { CROTG_F77(da, db, c, s ); }
00273 
00274   void BLAS<int, std::complex<float> >::ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const
00275   { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00276 
00277   float BLAS<int, std::complex<float> >::ASUM(const int n, const std::complex<float>* x, const int incx) const
00278   {
00279 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00280     return cblas_scasum(n, x, incx);
00281 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
00282     return (float) SCASUM_F77(&n, x, &incx);
00283 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00284     return SCASUM_F77(&n, x, &incx);
00285 #else // Wow, you just plain don't have this routine.
00286     // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
00287     // I've enhanced this by accumulating in double precision.
00288     double result = 0;
00289     if (incx == 1) {
00290       for (int i = 0; i < n; ++i) {
00291   result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
00292       }
00293     } else {
00294       const int nincx = n * incx;
00295       for (int i = 0; i < nincx; i += incx) {
00296   result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
00297       }
00298     }
00299     return static_cast<float> (result);
00300 #endif
00301   }
00302 
00303   void BLAS<int, std::complex<float> >::AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
00304   { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00305 
00306   void BLAS<int, std::complex<float> >::COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
00307   { CCOPY_F77(&n, x, &incx, y, &incy); }
00308 
00309   std::complex<float> BLAS<int, std::complex<float> >::DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const
00310   {
00311 #undef HAVE_TEUCHOS_BLASFLOAT
00312 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00313     std::complex<float> z;
00314     cblas_cdotc_sub(n,x,incx,y,incy,&z);
00315     return z;
00316 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00317     std::complex<float> z;
00318     CDOT_F77(&z, &n, x, &incx, y, &incy);
00319     return z;
00320 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00321     return CDOT_F77(&n, x, &incx, y, &incy);
00322 #else // Wow, you just plain don't have this routine.
00323     // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
00324     // I've enhanced this by accumulating in double precision.
00325     std::complex<double> result (0, 0);
00326     if (n >= 0) {
00327       if (incx == 1 && incy == 1) {
00328   for (int i = 0; i < n; ++i) {
00329     result += std::conj (x[i]) * y[i];
00330   }
00331       } else {
00332   int ix = 0;
00333   int iy = 0;
00334   if (incx < 0) {
00335     ix = (1-n) * incx;
00336   }
00337   if (incy < 0) {
00338     iy = (1-n) * incy;
00339   }
00340   for (int i = 0; i < n; ++i) {
00341     result += std::conj (x[ix]) * y[iy];
00342     ix += incx;
00343     iy += incy;
00344   }
00345       }
00346     }
00347     return static_cast<std::complex<float> > (result);
00348 #endif
00349   }
00350 
00351   int BLAS<int, std::complex<float> >::IAMAX(const int n, const std::complex<float>* x, const int incx) const
00352   { return ICAMAX_F77(&n, x, &incx); }
00353 
00354   float BLAS<int, std::complex<float> >::NRM2(const int n, const std::complex<float>* x, const int incx) const
00355   {
00356 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00357     return cblas_scnrm2(n, x, incx);
00358 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
00359     return (float) SCNRM2_F77(&n, x, &incx);
00360 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00361     return SCNRM2_F77(&n, x, &incx);
00362 #else // Wow, you just plain don't have this routine.
00363     // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
00364     // I've enhanced this by accumulating in double precision.
00365     if (n < 1 || incx < 1) {
00366       return 0;
00367     } else {
00368       double scale = 0;
00369       double ssq = 1;
00370 
00371       const int upper = 1 + (n-1)*incx;
00372       for (int ix = 0; ix < upper; ix += incx) {
00373   // The reference BLAS implementation cleverly scales the
00374   // intermediate result. so that even if the square of the norm
00375   // would overflow, computing the norm itself does not.  Hence,
00376   // "ssq" for "scaled square root."
00377   if (std::real (x[ix]) != 0) {
00378     const double temp = std::abs (std::real (x[ix]));
00379     if (scale < temp) {
00380       const double scale_over_temp = scale / temp;
00381       ssq = 1 + ssq * scale_over_temp*scale_over_temp;
00382       // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
00383       scale = temp;
00384     } else {
00385       const double temp_over_scale = temp / scale;
00386       ssq = ssq + temp_over_scale*temp_over_scale;
00387     }
00388   }
00389   if (std::imag (x[ix]) != 0) {
00390     const double temp = std::abs (std::imag (x[ix]));
00391     if (scale < temp) {
00392       const double scale_over_temp = scale / temp;
00393       ssq = 1 + ssq * scale_over_temp*scale_over_temp;
00394       // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
00395       scale = temp;
00396     } else {
00397       const double temp_over_scale = temp / scale;
00398       ssq = ssq + temp_over_scale*temp_over_scale;
00399     }
00400   }
00401       }
00402       return static_cast<float> (scale * std::sqrt (ssq));
00403     }
00404 #endif
00405   }
00406 
00407   void BLAS<int, std::complex<float> >::SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const
00408   { CSCAL_F77(&n, &alpha, x, &incx); }
00409 
00410   void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const
00411   { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00412 
00413   void BLAS<int, std::complex<float> >::GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const
00414   { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00415 
00416   void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const
00417   { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00418 
00419   void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00420   { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00421 
00422   void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00423   { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00424 
00425   void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00426   { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00427 
00428   void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
00429   { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00430 
00431   void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
00432   { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00433 
00434   // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
00435 
00436   void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
00437   { ZROTG_F77(da, db, c, s); }
00438 
00439   void BLAS<int, std::complex<double> >::ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const
00440   { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00441 
00442   double BLAS<int, std::complex<double> >::ASUM(const int n, const std::complex<double>* x, const int incx) const
00443   { return ZASUM_F77(&n, x, &incx); }
00444 
00445   void BLAS<int, std::complex<double> >::AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
00446   { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00447 
00448   void BLAS<int, std::complex<double> >::COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
00449   { ZCOPY_F77(&n, x, &incx, y, &incy); }
00450 
00451   std::complex<double> BLAS<int, std::complex<double> >::DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const
00452   {
00453 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00454     std::complex<double> z;
00455     cblas_zdotc_sub(n,x,incx,y,incy,&z);
00456     return z;
00457 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
00458 #  if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00459     std::complex<double> z;
00460     ZDOT_F77(&z, &n, x, &incx, y, &incy);
00461     return z;
00462 #  else
00463     // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
00464     // doesn't have the easy workaround.  I'll just reimplement the
00465     // missing routine here.  See www.netlib.org/blas/zdotc.f.
00466     std::complex<double> ztemp (0, 0);
00467     if (n > 0) {
00468       if (incx == 1 && incy == 1) {
00469   for (int i = 0; i < n; ++i) {
00470     ztemp += std::conj (x[i]) * y[i];
00471   }
00472       } else {
00473   int ix = 0;
00474   int iy = 0;
00475   if (incx < 0) {
00476     ix = (1-n)*incx;
00477   }
00478   if (incy < 0) {
00479     iy = (1-n)*incy;
00480   }
00481   for (int i = 0; i < n; ++i) {
00482     ztemp += std::conj (x[ix]) * y[iy];
00483     ix += incx;
00484     iy += incy;
00485   }
00486       }
00487     }
00488     return ztemp;
00489 
00490 #  endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00491 #else
00492     return ZDOT_F77(&n, x, &incx, y, &incy);
00493 #endif
00494   }
00495 
00496   int BLAS<int, std::complex<double> >::IAMAX(const int n, const std::complex<double>* x, const int incx) const
00497   { return IZAMAX_F77(&n, x, &incx); }
00498 
00499   double BLAS<int, std::complex<double> >::NRM2(const int n, const std::complex<double>* x, const int incx) const
00500   { return ZNRM2_F77(&n, x, &incx); }
00501 
00502   void BLAS<int, std::complex<double> >::SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const
00503   { ZSCAL_F77(&n, &alpha, x, &incx); }
00504 
00505   void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const
00506   { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00507 
00508   void BLAS<int, std::complex<double> >::GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const
00509   { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00510 
00511   void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const
00512   { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00513 
00514   void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
00515   { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00516 
00517   void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const
00518   { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00519 
00520   void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
00521   { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00522 
00523   void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
00524   { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00525 
00526   void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
00527   { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00528 
00529 #endif // HAVE_TEUCHOS_COMPLEX
00530 
00531 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines