|
Teuchos - Trilinos Tools Package
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 #include "Teuchos_BLAS.hpp" 00043 #include "Teuchos_BLAS_wrappers.hpp" 00044 00045 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so 00046 the appropriate declaration of one will need to be added back into 00047 functions that include the macro: 00048 */ 00049 00050 namespace { 00051 #if defined (INTEL_CXML) 00052 unsigned int one=1; 00053 #endif 00054 } // namespace 00055 00056 #ifdef CHAR_MACRO 00057 #undef CHAR_MACRO 00058 #endif 00059 #if defined (INTEL_CXML) 00060 #define CHAR_MACRO(char_var) &char_var, one 00061 #else 00062 #define CHAR_MACRO(char_var) &char_var 00063 #endif 00064 00065 00066 const char Teuchos::ESideChar[] = {'L' , 'R' }; 00067 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' }; 00068 const char Teuchos::EUploChar[] = {'U' , 'L' }; 00069 const char Teuchos::EDiagChar[] = {'U' , 'N' }; 00070 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' }; 00071 //const char Teuchos::EFactChar[] = {'F', 'N' }; 00072 //const char Teuchos::ENormChar[] = {'O', 'I' }; 00073 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' }; 00074 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' }; 00075 //const char Teuchos::EJobSChar[] = {'E', 'S' }; 00076 //const char Teuchos::EJobVSChar[] = {'V', 'N' }; 00077 //const char Teuchos::EHowmnyChar[] = {'A', 'S' }; 00078 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' }; 00079 //const char Teuchos::ESortChar[] = {'N', 'S'}; 00080 00081 00082 namespace { 00083 00084 00085 template<typename Scalar> 00086 Scalar generic_dot(const int n, const Scalar* x, const int incx, 00087 const Scalar* y, const int incy) 00088 { 00089 typedef Teuchos::ScalarTraits<Scalar> ST; 00090 Scalar dot = 0.0; 00091 if (incx==1 && incy==1) { 00092 for (int i = 0; i < n; ++i) 00093 dot += (*x++)*ST::conjugate(*y++); 00094 } 00095 else { 00096 if (incx < 0) 00097 x = x - incx*(n-1); 00098 if (incy < 0) 00099 y = y - incy*(n-1); 00100 for (int i = 0; i < n; ++i, x+=incx, y+=incy) 00101 dot += (*x)*ST::conjugate(*y); 00102 } 00103 return dot; 00104 } 00105 00106 00107 } // namespace 00108 00109 00110 namespace Teuchos { 00111 00112 //Explicitly instantiating these templates for windows due to an issue with 00113 //resolving them when linking dlls. 00114 #ifdef _WIN32 00115 # ifdef HAVE_TEUCHOS_COMPLEX 00116 template BLAS<long int, std::complex<float> >; 00117 template BLAS<long int, std::complex<double> >; 00118 # endif 00119 template BLAS<long int, float>; 00120 template BLAS<long int, double>; 00121 #endif 00122 00123 // *************************** BLAS<int,float> DEFINITIONS ****************************** 00124 00125 void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const 00126 { SROTG_F77(da, db, c, s ); } 00127 00128 void BLAS<int, float>::ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const 00129 { SROT_F77(&n, dx, &incx, dy, &incy, c, s); } 00130 00131 00132 float BLAS<int, float>::ASUM(const int n, const float* x, const int incx) const 00133 { 00134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00135 return cblas_sasum(n, x, incx); 00136 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00137 float tmp = SASUM_F77(&n, x, &incx); 00138 return tmp; 00139 #else 00140 typedef ScalarTraits<float> ST; 00141 float sum = 0.0; 00142 if (incx == 1) { 00143 for (int i = 0; i < n; ++i) 00144 sum += ST::magnitude(*x++); 00145 } 00146 else { 00147 for (int i = 0; i < n; ++i, x+=incx) 00148 sum += ST::magnitude(*x); 00149 } 00150 return sum; 00151 #endif 00152 } 00153 00154 void BLAS<int, float>::AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const 00155 { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); } 00156 00157 void BLAS<int, float>::COPY(const int n, const float* x, const int incx, float* y, const int incy) const 00158 { SCOPY_F77(&n, x, &incx, y, &incy); } 00159 00160 float BLAS<int, float>::DOT(const int n, const float* x, const int incx, const float* y, const int incy) const 00161 { 00162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00163 return cblas_sdot(n, x, incx, y, incy); 00164 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00165 return SDOT_F77(&n, x, &incx, y, &incy); 00166 #else 00167 return generic_dot(n, x, incx, y, incy); 00168 #endif 00169 } 00170 00171 int BLAS<int, float>::IAMAX(const int n, const float* x, const int incx) const 00172 { return ISAMAX_F77(&n, x, &incx); } 00173 00174 float BLAS<int, float>::NRM2(const int n, const float* x, const int incx) const 00175 { 00176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00177 return cblas_snrm2(n, x, incx); 00178 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00179 return SNRM2_F77(&n, x, &incx); 00180 #else 00181 return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx)); 00182 #endif 00183 } 00184 00185 void BLAS<int, float>::SCAL(const int n, const float alpha, float* x, const int incx) const 00186 { SSCAL_F77(&n, &alpha, x, &incx); } 00187 00188 void BLAS<int, float>::GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const 00189 { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } 00190 00191 void BLAS<int, float>::GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const 00192 { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } 00193 00194 void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const 00195 { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); } 00196 00197 void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const 00198 { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00199 00200 void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const 00201 { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00202 00203 void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const 00204 { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); } 00205 00206 void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const 00207 { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00208 00209 void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const 00210 { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00211 00212 // *************************** BLAS<int,double> DEFINITIONS ****************************** 00213 00214 void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const 00215 { DROTG_F77(da, db, c, s); } 00216 00217 void BLAS<int, double>::ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const 00218 { DROT_F77(&n, dx, &incx, dy, &incy, c, s); } 00219 00220 double BLAS<int, double>::ASUM(const int n, const double* x, const int incx) const 00221 { return DASUM_F77(&n, x, &incx); } 00222 00223 void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const 00224 { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); } 00225 00226 void BLAS<int, double>::COPY(const int n, const double* x, const int incx, double* y, const int incy) const 00227 { DCOPY_F77(&n, x, &incx, y, &incy); } 00228 00229 double BLAS<int, double>::DOT(const int n, const double* x, const int incx, const double* y, const int incy) const 00230 { 00231 return DDOT_F77(&n, x, &incx, y, &incy); 00232 } 00233 00234 int BLAS<int, double>::IAMAX(const int n, const double* x, const int incx) const 00235 { return IDAMAX_F77(&n, x, &incx); } 00236 00237 double BLAS<int, double>::NRM2(const int n, const double* x, const int incx) const 00238 { return DNRM2_F77(&n, x, &incx); } 00239 00240 void BLAS<int, double>::SCAL(const int n, const double alpha, double* x, const int incx) const 00241 { DSCAL_F77(&n, &alpha, x, &incx); } 00242 00243 void BLAS<int, double>::GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const 00244 { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } 00245 00246 void BLAS<int, double>::GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const 00247 { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } 00248 00249 void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const 00250 { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); } 00251 00252 void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const 00253 { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00254 00255 void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const 00256 { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00257 00258 void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const 00259 { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); } 00260 00261 void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const 00262 { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00263 00264 void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const 00265 { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00266 00267 #ifdef HAVE_TEUCHOS_COMPLEX 00268 00269 // *************************** BLAS<int,std::complex<float> > DEFINITIONS ****************************** 00270 00271 void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const 00272 { CROTG_F77(da, db, c, s ); } 00273 00274 void BLAS<int, std::complex<float> >::ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const 00275 { CROT_F77(&n, dx, &incx, dy, &incy, c, s); } 00276 00277 float BLAS<int, std::complex<float> >::ASUM(const int n, const std::complex<float>* x, const int incx) const 00278 { 00279 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00280 return cblas_scasum(n, x, incx); 00281 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN) 00282 return (float) SCASUM_F77(&n, x, &incx); 00283 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00284 return SCASUM_F77(&n, x, &incx); 00285 #else // Wow, you just plain don't have this routine. 00286 // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f. 00287 // I've enhanced this by accumulating in double precision. 00288 double result = 0; 00289 if (incx == 1) { 00290 for (int i = 0; i < n; ++i) { 00291 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i])); 00292 } 00293 } else { 00294 const int nincx = n * incx; 00295 for (int i = 0; i < nincx; i += incx) { 00296 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i])); 00297 } 00298 } 00299 return static_cast<float> (result); 00300 #endif 00301 } 00302 00303 void BLAS<int, std::complex<float> >::AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const 00304 { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); } 00305 00306 void BLAS<int, std::complex<float> >::COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const 00307 { CCOPY_F77(&n, x, &incx, y, &incy); } 00308 00309 std::complex<float> BLAS<int, std::complex<float> >::DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const 00310 { 00311 #undef HAVE_TEUCHOS_BLASFLOAT 00312 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00313 std::complex<float> z; 00314 cblas_cdotc_sub(n,x,incx,y,incy,&z); 00315 return z; 00316 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM) 00317 std::complex<float> z; 00318 CDOT_F77(&z, &n, x, &incx, y, &incy); 00319 return z; 00320 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00321 return CDOT_F77(&n, x, &incx, y, &incy); 00322 #else // Wow, you just plain don't have this routine. 00323 // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f. 00324 // I've enhanced this by accumulating in double precision. 00325 std::complex<double> result (0, 0); 00326 if (n >= 0) { 00327 if (incx == 1 && incy == 1) { 00328 for (int i = 0; i < n; ++i) { 00329 result += std::conj (x[i]) * y[i]; 00330 } 00331 } else { 00332 int ix = 0; 00333 int iy = 0; 00334 if (incx < 0) { 00335 ix = (1-n) * incx; 00336 } 00337 if (incy < 0) { 00338 iy = (1-n) * incy; 00339 } 00340 for (int i = 0; i < n; ++i) { 00341 result += std::conj (x[ix]) * y[iy]; 00342 ix += incx; 00343 iy += incy; 00344 } 00345 } 00346 } 00347 return static_cast<std::complex<float> > (result); 00348 #endif 00349 } 00350 00351 int BLAS<int, std::complex<float> >::IAMAX(const int n, const std::complex<float>* x, const int incx) const 00352 { return ICAMAX_F77(&n, x, &incx); } 00353 00354 float BLAS<int, std::complex<float> >::NRM2(const int n, const std::complex<float>* x, const int incx) const 00355 { 00356 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00357 return cblas_scnrm2(n, x, incx); 00358 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN) 00359 return (float) SCNRM2_F77(&n, x, &incx); 00360 #elif defined(HAVE_TEUCHOS_BLASFLOAT) 00361 return SCNRM2_F77(&n, x, &incx); 00362 #else // Wow, you just plain don't have this routine. 00363 // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f. 00364 // I've enhanced this by accumulating in double precision. 00365 if (n < 1 || incx < 1) { 00366 return 0; 00367 } else { 00368 double scale = 0; 00369 double ssq = 1; 00370 00371 const int upper = 1 + (n-1)*incx; 00372 for (int ix = 0; ix < upper; ix += incx) { 00373 // The reference BLAS implementation cleverly scales the 00374 // intermediate result. so that even if the square of the norm 00375 // would overflow, computing the norm itself does not. Hence, 00376 // "ssq" for "scaled square root." 00377 if (std::real (x[ix]) != 0) { 00378 const double temp = std::abs (std::real (x[ix])); 00379 if (scale < temp) { 00380 const double scale_over_temp = scale / temp; 00381 ssq = 1 + ssq * scale_over_temp*scale_over_temp; 00382 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far. 00383 scale = temp; 00384 } else { 00385 const double temp_over_scale = temp / scale; 00386 ssq = ssq + temp_over_scale*temp_over_scale; 00387 } 00388 } 00389 if (std::imag (x[ix]) != 0) { 00390 const double temp = std::abs (std::imag (x[ix])); 00391 if (scale < temp) { 00392 const double scale_over_temp = scale / temp; 00393 ssq = 1 + ssq * scale_over_temp*scale_over_temp; 00394 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far. 00395 scale = temp; 00396 } else { 00397 const double temp_over_scale = temp / scale; 00398 ssq = ssq + temp_over_scale*temp_over_scale; 00399 } 00400 } 00401 } 00402 return static_cast<float> (scale * std::sqrt (ssq)); 00403 } 00404 #endif 00405 } 00406 00407 void BLAS<int, std::complex<float> >::SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const 00408 { CSCAL_F77(&n, &alpha, x, &incx); } 00409 00410 void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const 00411 { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } 00412 00413 void BLAS<int, std::complex<float> >::GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const 00414 { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } 00415 00416 void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const 00417 { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); } 00418 00419 void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const 00420 { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00421 00422 void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const 00423 { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00424 00425 void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const 00426 { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); } 00427 00428 void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const 00429 { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00430 00431 void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const 00432 { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00433 00434 // *************************** BLAS<int,std::complex<double> > DEFINITIONS ****************************** 00435 00436 void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const 00437 { ZROTG_F77(da, db, c, s); } 00438 00439 void BLAS<int, std::complex<double> >::ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const 00440 { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); } 00441 00442 double BLAS<int, std::complex<double> >::ASUM(const int n, const std::complex<double>* x, const int incx) const 00443 { return ZASUM_F77(&n, x, &incx); } 00444 00445 void BLAS<int, std::complex<double> >::AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const 00446 { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); } 00447 00448 void BLAS<int, std::complex<double> >::COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const 00449 { ZCOPY_F77(&n, x, &incx, y, &incy); } 00450 00451 std::complex<double> BLAS<int, std::complex<double> >::DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const 00452 { 00453 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX) 00454 std::complex<double> z; 00455 cblas_zdotc_sub(n,x,incx,y,incy,&z); 00456 return z; 00457 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) 00458 # if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM) 00459 std::complex<double> z; 00460 ZDOT_F77(&z, &n, x, &incx, y, &incy); 00461 return z; 00462 # else 00463 // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem 00464 // doesn't have the easy workaround. I'll just reimplement the 00465 // missing routine here. See www.netlib.org/blas/zdotc.f. 00466 std::complex<double> ztemp (0, 0); 00467 if (n > 0) { 00468 if (incx == 1 && incy == 1) { 00469 for (int i = 0; i < n; ++i) { 00470 ztemp += std::conj (x[i]) * y[i]; 00471 } 00472 } else { 00473 int ix = 0; 00474 int iy = 0; 00475 if (incx < 0) { 00476 ix = (1-n)*incx; 00477 } 00478 if (incy < 0) { 00479 iy = (1-n)*incy; 00480 } 00481 for (int i = 0; i < n; ++i) { 00482 ztemp += std::conj (x[ix]) * y[iy]; 00483 ix += incx; 00484 iy += incy; 00485 } 00486 } 00487 } 00488 return ztemp; 00489 00490 # endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM) 00491 #else 00492 return ZDOT_F77(&n, x, &incx, y, &incy); 00493 #endif 00494 } 00495 00496 int BLAS<int, std::complex<double> >::IAMAX(const int n, const std::complex<double>* x, const int incx) const 00497 { return IZAMAX_F77(&n, x, &incx); } 00498 00499 double BLAS<int, std::complex<double> >::NRM2(const int n, const std::complex<double>* x, const int incx) const 00500 { return ZNRM2_F77(&n, x, &incx); } 00501 00502 void BLAS<int, std::complex<double> >::SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const 00503 { ZSCAL_F77(&n, &alpha, x, &incx); } 00504 00505 void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const 00506 { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); } 00507 00508 void BLAS<int, std::complex<double> >::GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const 00509 { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } 00510 00511 void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const 00512 { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); } 00513 00514 void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const 00515 { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00516 00517 void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const 00518 { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 00519 00520 void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const 00521 { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); } 00522 00523 void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const 00524 { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00525 00526 void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const 00527 { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); } 00528 00529 #endif // HAVE_TEUCHOS_COMPLEX 00530 00531 }
1.7.6.1