|
Teuchos Package Browser (Single Doxygen Collection)
Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Teuchos: Common Tools Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 // @HEADER 00041 00042 // Kris 00043 // 06.16.03 -- Start over from scratch 00044 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed) 00045 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77() 00046 // -- Added warning messages for generic calls 00047 // 07.08.03 -- Move into Teuchos package/namespace 00048 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats: 00049 // * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.) 00050 // * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible. 00051 // * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust. 00052 // * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial. 00053 // * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well. 00054 // -- Removed warning messages for generic calls 00055 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented. 00056 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information. 00057 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ). 00058 00059 #ifndef _TEUCHOS_BLAS_HPP_ 00060 #define _TEUCHOS_BLAS_HPP_ 00061 00069 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so 00070 the appropriate declaration of one will need to be added back into 00071 functions that include the macro: 00072 */ 00073 #if defined (INTEL_CXML) 00074 unsigned int one=1; 00075 #endif 00076 00077 #ifdef CHAR_MACRO 00078 #undef CHAR_MACRO 00079 #endif 00080 #if defined (INTEL_CXML) 00081 #define CHAR_MACRO(char_var) &char_var, one 00082 #else 00083 #define CHAR_MACRO(char_var) &char_var 00084 #endif 00085 00086 #include "Teuchos_ConfigDefs.hpp" 00087 #include "Teuchos_ScalarTraits.hpp" 00088 #include "Teuchos_OrdinalTraits.hpp" 00089 #include "Teuchos_BLAS_types.hpp" 00090 #include "Teuchos_Assert.hpp" 00091 00124 namespace Teuchos 00125 { 00126 extern TEUCHOS_LIB_DLL_EXPORT const char ESideChar[]; 00127 extern TEUCHOS_LIB_DLL_EXPORT const char ETranspChar[]; 00128 extern TEUCHOS_LIB_DLL_EXPORT const char EUploChar[]; 00129 extern TEUCHOS_LIB_DLL_EXPORT const char EDiagChar[]; 00130 00132 00137 template<typename OrdinalType, typename ScalarType> 00138 class DefaultBLASImpl 00139 { 00140 00141 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType; 00142 00143 public: 00145 00146 00148 inline DefaultBLASImpl(void) {} 00149 00151 inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& /*BLAS_source*/) {} 00152 00154 inline virtual ~DefaultBLASImpl(void) {} 00156 00158 00159 00161 void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const; 00162 00164 void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const; 00165 00167 void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const; 00168 00170 void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const; 00171 00173 template <typename alpha_type, typename x_type> 00174 void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const; 00175 00177 typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00178 00180 template <typename x_type, typename y_type> 00181 ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const; 00182 00184 typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00185 00187 OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const; 00189 00191 00192 00194 template <typename alpha_type, typename A_type, typename x_type, typename beta_type> 00195 void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, 00196 const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const; 00197 00199 template <typename A_type> 00200 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, 00201 const OrdinalType lda, ScalarType* x, const OrdinalType incx) const; 00202 00205 template <typename alpha_type, typename x_type, typename y_type> 00206 void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, 00207 const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const; 00209 00211 00212 00219 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 00220 void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const; 00221 00223 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 00224 void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const; 00225 00227 template <typename alpha_type, typename A_type, typename beta_type> 00228 void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const; 00229 00231 template <typename alpha_type, typename A_type> 00232 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, 00233 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const; 00234 00236 template <typename alpha_type, typename A_type> 00237 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, 00238 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const; 00240 }; 00241 00242 template<typename OrdinalType, typename ScalarType> 00243 class TEUCHOS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType> 00244 { 00245 00246 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType; 00247 00248 public: 00250 00251 00253 inline BLAS(void) {} 00254 00256 inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {} 00257 00259 inline virtual ~BLAS(void) {} 00261 }; 00262 00263 //------------------------------------------------------------------------------------------ 00264 // LEVEL 1 BLAS ROUTINES 00265 //------------------------------------------------------------------------------------------ 00266 00274 namespace details { 00275 00276 // Compute magnitude. 00277 template<typename ScalarType, bool isComplex> 00278 class MagValue { 00279 public: 00280 void 00281 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 00282 }; 00283 00284 // Complex-arithmetic specialization. 00285 template<typename ScalarType> 00286 class MagValue<ScalarType, true> { 00287 public: 00288 void 00289 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 00290 }; 00291 00292 // Real-arithmetic specialization. 00293 template<typename ScalarType> 00294 class MagValue<ScalarType, false> { 00295 public: 00296 void 00297 blas_dabs1(const ScalarType* a, ScalarType* ret) const; 00298 }; 00299 00300 template<typename ScalarType, bool isComplex> 00301 class GivensRotator { 00302 public: 00303 void 00304 ROTG (ScalarType* a, 00305 ScalarType* b, 00306 typename ScalarTraits<ScalarType>::magnitudeType* c, 00307 ScalarType* s) const; 00308 }; 00309 00310 // Complex-arithmetic specialization. 00311 template<typename ScalarType> 00312 class GivensRotator<ScalarType, true> { 00313 public: 00314 void 00315 ROTG (ScalarType* ca, 00316 ScalarType* cb, 00317 typename ScalarTraits<ScalarType>::magnitudeType* c, 00318 ScalarType* s) const; 00319 }; 00320 00321 // Real-arithmetic specialization. 00322 template<typename ScalarType> 00323 class GivensRotator<ScalarType, false> { 00324 public: 00325 void 00326 ROTG (ScalarType* da, 00327 ScalarType* db, 00328 ScalarType* c, 00329 ScalarType* s) const; 00330 private: 00343 ScalarType SIGN (ScalarType x, ScalarType y) const { 00344 typedef ScalarTraits<ScalarType> STS; 00345 00346 if (y > STS::zero()) { 00347 return STS::magnitude (x); 00348 } else if (y < STS::zero()) { 00349 return -STS::magnitude (x); 00350 } else { // y == STS::zero() 00351 // Suppose that ScalarType implements signed zero, as IEEE 00352 // 754 - compliant floating-point numbers should. You can't 00353 // use == to test for signed zero, since +0 == -0. However, 00354 // 1/0 = Inf > 0 and 1/-0 = -Inf < 0. Let's hope ScalarType 00355 // supports Inf... we don't need to test for Inf, just see 00356 // if it's greater than or less than zero. 00357 // 00358 // NOTE: This ONLY works if ScalarType is real. Complex 00359 // infinity doesn't have a sign, so we can't compare it with 00360 // zero. That's OK, because finite complex numbers don't 00361 // have a sign either; they have an angle. 00362 ScalarType signedInfinity = STS::one() / y; 00363 if (signedInfinity > STS::zero()) { 00364 return STS::magnitude (x); 00365 } else { 00366 // Even if ScalarType doesn't implement signed zero, 00367 // Fortran's SIGN intrinsic returns -ABS(X) if the second 00368 // argument Y is zero. We imitate this behavior here. 00369 return -STS::magnitude (x); 00370 } 00371 } 00372 } 00373 }; 00374 00375 // Implementation of complex-arithmetic specialization. 00376 template<typename ScalarType> 00377 void 00378 GivensRotator<ScalarType, true>:: 00379 ROTG (ScalarType* ca, 00380 ScalarType* cb, 00381 typename ScalarTraits<ScalarType>::magnitudeType* c, 00382 ScalarType* s) const 00383 { 00384 typedef ScalarTraits<ScalarType> STS; 00385 typedef typename STS::magnitudeType MagnitudeType; 00386 typedef ScalarTraits<MagnitudeType> STM; 00387 00388 // This is a straightforward translation into C++ of the 00389 // reference BLAS' implementation of ZROTG. You can get 00390 // the Fortran 77 source code of ZROTG here: 00391 // 00392 // http://www.netlib.org/blas/zrotg.f 00393 // 00394 // I used the following rules to translate Fortran types and 00395 // intrinsic functions into C++: 00396 // 00397 // DOUBLE PRECISION -> MagnitudeType 00398 // DOUBLE COMPLEX -> ScalarType 00399 // CDABS -> STS::magnitude 00400 // DCMPLX -> ScalarType constructor (assuming that ScalarType 00401 // is std::complex<MagnitudeType>) 00402 // DCONJG -> STS::conjugate 00403 // DSQRT -> STM::squareroot 00404 ScalarType alpha; 00405 MagnitudeType norm, scale; 00406 00407 if (STS::magnitude (*ca) == STM::zero()) { 00408 *c = STM::zero(); 00409 *s = STS::one(); 00410 *ca = *cb; 00411 } else { 00412 scale = STS::magnitude (*ca) + STS::magnitude (*cb); 00413 { // I introduced temporaries into the translated BLAS code in 00414 // order to make the expression easier to read and also save a 00415 // few floating-point operations. 00416 const MagnitudeType ca_scaled = 00417 STS::magnitude (*ca / ScalarType(scale, STM::zero())); 00418 const MagnitudeType cb_scaled = 00419 STS::magnitude (*cb / ScalarType(scale, STM::zero())); 00420 norm = scale * 00421 STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled); 00422 } 00423 alpha = *ca / STS::magnitude (*ca); 00424 *c = STS::magnitude (*ca) / norm; 00425 *s = alpha * STS::conjugate (*cb) / norm; 00426 *ca = alpha * norm; 00427 } 00428 } 00429 00430 // Implementation of real-arithmetic specialization. 00431 template<typename ScalarType> 00432 void 00433 GivensRotator<ScalarType, false>:: 00434 ROTG (ScalarType* da, 00435 ScalarType* db, 00436 ScalarType* c, 00437 ScalarType* s) const 00438 { 00439 typedef ScalarTraits<ScalarType> STS; 00440 00441 // This is a straightforward translation into C++ of the 00442 // reference BLAS' implementation of DROTG. You can get 00443 // the Fortran 77 source code of DROTG here: 00444 // 00445 // http://www.netlib.org/blas/drotg.f 00446 // 00447 // I used the following rules to translate Fortran types and 00448 // intrinsic functions into C++: 00449 // 00450 // DOUBLE PRECISION -> ScalarType 00451 // DABS -> STS::magnitude 00452 // DSQRT -> STM::squareroot 00453 // DSIGN -> SIGN (see below) 00454 // 00455 // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of 00456 // the Fortran type-generic SIGN intrinsic) required special 00457 // translation, which we did in a separate utility function in 00458 // the specializaton of GivensRotator for real arithmetic. 00459 // (ROTG for complex arithmetic doesn't require this function.) 00460 // C99 provides a copysign() math library function, but we are 00461 // not able to rely on the existence of C99 functions here. 00462 ScalarType r, roe, scale, z; 00463 00464 roe = *db; 00465 if (STS::magnitude (*da) > STS::magnitude (*db)) { 00466 roe = *da; 00467 } 00468 scale = STS::magnitude (*da) + STS::magnitude (*db); 00469 if (scale == STS::zero()) { 00470 *c = STS::one(); 00471 *s = STS::zero(); 00472 r = STS::zero(); 00473 z = STS::zero(); 00474 } else { 00475 // I introduced temporaries into the translated BLAS code in 00476 // order to make the expression easier to read and also save 00477 // a few floating-point operations. 00478 const ScalarType da_scaled = *da / scale; 00479 const ScalarType db_scaled = *db / scale; 00480 r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled); 00481 r = SIGN (STS::one(), roe) * r; 00482 *c = *da / r; 00483 *s = *db / r; 00484 z = STS::one(); 00485 if (STS::magnitude (*da) > STS::magnitude (*db)) { 00486 z = *s; 00487 } 00488 if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) { 00489 z = STS::one() / *c; 00490 } 00491 } 00492 00493 *da = r; 00494 *db = z; 00495 } 00496 00497 // Real-valued implementation of MagValue 00498 template<typename ScalarType> 00499 void 00500 MagValue<ScalarType, false>:: 00501 blas_dabs1(const ScalarType* a, ScalarType* ret) const 00502 { 00503 *ret = Teuchos::ScalarTraits<ScalarType>::magnitude( *a ); 00504 } 00505 00506 // Complex-valued implementation of MagValue 00507 template<typename ScalarType> 00508 void 00509 MagValue<ScalarType, true>:: 00510 blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const 00511 { 00512 *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real()); 00513 *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag()); 00514 } 00515 00516 } // namespace details 00517 00518 template<typename OrdinalType, typename ScalarType> 00519 void 00520 DefaultBLASImpl<OrdinalType, ScalarType>:: 00521 ROTG (ScalarType* da, 00522 ScalarType* db, 00523 MagnitudeType* c, 00524 ScalarType* s) const 00525 { 00526 typedef ScalarTraits<ScalarType> STS; 00527 details::GivensRotator<ScalarType, STS::isComplex> rotator; 00528 rotator.ROTG (da, db, c, s); 00529 } 00530 00531 template<typename OrdinalType, typename ScalarType> 00532 void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const 00533 { 00534 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00535 ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s); 00536 if (n <= 0) return; 00537 if (incx==1 && incy==1) { 00538 for(OrdinalType i=0; i<n; ++i) { 00539 ScalarType temp = *c*dx[i] + sconj*dy[i]; 00540 dy[i] = *c*dy[i] - sconj*dx[i]; 00541 dx[i] = temp; 00542 } 00543 } 00544 else { 00545 OrdinalType ix = 0, iy = 0; 00546 if (incx < izero) ix = (-n+1)*incx; 00547 if (incy < izero) iy = (-n+1)*incy; 00548 for(OrdinalType i=0; i<n; ++i) { 00549 ScalarType temp = *c*dx[ix] + sconj*dy[iy]; 00550 dy[iy] = *c*dy[iy] - sconj*dx[ix]; 00551 dx[ix] = temp; 00552 ix += incx; 00553 iy += incy; 00554 } 00555 } 00556 } 00557 00558 template<typename OrdinalType, typename ScalarType> 00559 void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const 00560 { 00561 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00562 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00563 OrdinalType i, ix = izero; 00564 00565 if ( n < ione || incx < ione ) 00566 return; 00567 00568 // Scale the vector. 00569 for(i = izero; i < n; i++) 00570 { 00571 x[ix] *= alpha; 00572 ix += incx; 00573 } 00574 } /* end SCAL */ 00575 00576 template<typename OrdinalType, typename ScalarType> 00577 void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const 00578 { 00579 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00580 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00581 OrdinalType i, ix = izero, iy = izero; 00582 if ( n > izero ) { 00583 // Set the initial indices (ix, iy). 00584 if (incx < izero) { ix = (-n+ione)*incx; } 00585 if (incy < izero) { iy = (-n+ione)*incy; } 00586 00587 for(i = izero; i < n; i++) 00588 { 00589 y[iy] = x[ix]; 00590 ix += incx; 00591 iy += incy; 00592 } 00593 } 00594 } /* end COPY */ 00595 00596 template<typename OrdinalType, typename ScalarType> 00597 template <typename alpha_type, typename x_type> 00598 void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const 00599 { 00600 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00601 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00602 OrdinalType i, ix = izero, iy = izero; 00603 if( n > izero && alpha != ScalarTraits<alpha_type>::zero()) 00604 { 00605 // Set the initial indices (ix, iy). 00606 if (incx < izero) { ix = (-n+ione)*incx; } 00607 if (incy < izero) { iy = (-n+ione)*incy; } 00608 00609 for(i = izero; i < n; i++) 00610 { 00611 y[iy] += alpha * x[ix]; 00612 ix += incx; 00613 iy += incy; 00614 } 00615 } 00616 } /* end AXPY */ 00617 00618 template<typename OrdinalType, typename ScalarType> 00619 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00620 { 00621 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00622 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00623 typename ScalarTraits<ScalarType>::magnitudeType temp, result = 00624 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00625 OrdinalType i, ix = izero; 00626 00627 if ( n < ione || incx < ione ) 00628 return result; 00629 00630 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval; 00631 for (i = izero; i < n; i++) 00632 { 00633 mval.blas_dabs1( &x[ix], &temp ); 00634 result += temp; 00635 ix += incx; 00636 } 00637 00638 return result; 00639 } /* end ASUM */ 00640 00641 template<typename OrdinalType, typename ScalarType> 00642 template <typename x_type, typename y_type> 00643 ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const 00644 { 00645 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00646 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00647 ScalarType result = ScalarTraits<ScalarType>::zero(); 00648 OrdinalType i, ix = izero, iy = izero; 00649 if( n > izero ) 00650 { 00651 // Set the initial indices (ix,iy). 00652 if (incx < izero) { ix = (-n+ione)*incx; } 00653 if (incy < izero) { iy = (-n+ione)*incy; } 00654 00655 for(i = izero; i < n; i++) 00656 { 00657 result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy]; 00658 ix += incx; 00659 iy += incy; 00660 } 00661 } 00662 return result; 00663 } /* end DOT */ 00664 00665 template<typename OrdinalType, typename ScalarType> 00666 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00667 { 00668 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00669 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00670 typename ScalarTraits<ScalarType>::magnitudeType result = 00671 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00672 OrdinalType i, ix = izero; 00673 00674 if ( n < ione || incx < ione ) 00675 return result; 00676 00677 for(i = izero; i < n; i++) 00678 { 00679 result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]); 00680 ix += incx; 00681 } 00682 result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result); 00683 return result; 00684 } /* end NRM2 */ 00685 00686 template<typename OrdinalType, typename ScalarType> 00687 OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const 00688 { 00689 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00690 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00691 OrdinalType result = izero, ix = izero, i; 00692 typename ScalarTraits<ScalarType>::magnitudeType curval = 00693 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00694 typename ScalarTraits<ScalarType>::magnitudeType maxval = 00695 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero(); 00696 00697 if ( n < ione || incx < ione ) 00698 return result; 00699 00700 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval; 00701 00702 mval.blas_dabs1( &x[ix], &maxval ); 00703 ix += incx; 00704 for(i = ione; i < n; i++) 00705 { 00706 mval.blas_dabs1( &x[ix], &curval ); 00707 if(curval > maxval) 00708 { 00709 result = i; 00710 maxval = curval; 00711 } 00712 ix += incx; 00713 } 00714 00715 return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values 00716 } /* end IAMAX */ 00717 00718 //------------------------------------------------------------------------------------------ 00719 // LEVEL 2 BLAS ROUTINES 00720 //------------------------------------------------------------------------------------------ 00721 template<typename OrdinalType, typename ScalarType> 00722 template <typename alpha_type, typename A_type, typename x_type, typename beta_type> 00723 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const 00724 { 00725 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00726 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00727 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 00728 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 00729 x_type x_zero = ScalarTraits<x_type>::zero(); 00730 ScalarType y_zero = ScalarTraits<ScalarType>::zero(); 00731 beta_type beta_one = ScalarTraits<beta_type>::one(); 00732 bool noConj = true; 00733 bool BadArgument = false; 00734 00735 // Quick return if there is nothing to do! 00736 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; } 00737 00738 // Otherwise, we need to check the argument list. 00739 if( m < izero ) { 00740 std::cout << "BLAS::GEMV Error: M == " << m << std::endl; 00741 BadArgument = true; 00742 } 00743 if( n < izero ) { 00744 std::cout << "BLAS::GEMV Error: N == " << n << std::endl; 00745 BadArgument = true; 00746 } 00747 if( lda < m ) { 00748 std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl; 00749 BadArgument = true; 00750 } 00751 if( incx == izero ) { 00752 std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl; 00753 BadArgument = true; 00754 } 00755 if( incy == izero ) { 00756 std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl; 00757 BadArgument = true; 00758 } 00759 00760 if(!BadArgument) { 00761 OrdinalType i, j, lenx, leny, ix, iy, jx, jy; 00762 OrdinalType kx = izero, ky = izero; 00763 ScalarType temp; 00764 00765 // Determine the lengths of the vectors x and y. 00766 if(ETranspChar[trans] == 'N') { 00767 lenx = n; 00768 leny = m; 00769 } else { 00770 lenx = m; 00771 leny = n; 00772 } 00773 00774 // Determine if this is a conjugate tranpose 00775 noConj = (ETranspChar[trans] == 'T'); 00776 00777 // Set the starting pointers for the vectors x and y if incx/y < 0. 00778 if (incx < izero ) { kx = (ione - lenx)*incx; } 00779 if (incy < izero ) { ky = (ione - leny)*incy; } 00780 00781 // Form y = beta*y 00782 ix = kx; iy = ky; 00783 if(beta != beta_one) { 00784 if (incy == ione) { 00785 if (beta == beta_zero) { 00786 for(i = izero; i < leny; i++) { y[i] = y_zero; } 00787 } else { 00788 for(i = izero; i < leny; i++) { y[i] *= beta; } 00789 } 00790 } else { 00791 if (beta == beta_zero) { 00792 for(i = izero; i < leny; i++) { 00793 y[iy] = y_zero; 00794 iy += incy; 00795 } 00796 } else { 00797 for(i = izero; i < leny; i++) { 00798 y[iy] *= beta; 00799 iy += incy; 00800 } 00801 } 00802 } 00803 } 00804 00805 // Return if we don't have to do anything more. 00806 if(alpha == alpha_zero) { return; } 00807 00808 if( ETranspChar[trans] == 'N' ) { 00809 // Form y = alpha*A*y 00810 jx = kx; 00811 if (incy == ione) { 00812 for(j = izero; j < n; j++) { 00813 if (x[jx] != x_zero) { 00814 temp = alpha*x[jx]; 00815 for(i = izero; i < m; i++) { 00816 y[i] += temp*A[j*lda + i]; 00817 } 00818 } 00819 jx += incx; 00820 } 00821 } else { 00822 for(j = izero; j < n; j++) { 00823 if (x[jx] != x_zero) { 00824 temp = alpha*x[jx]; 00825 iy = ky; 00826 for(i = izero; i < m; i++) { 00827 y[iy] += temp*A[j*lda + i]; 00828 iy += incy; 00829 } 00830 } 00831 jx += incx; 00832 } 00833 } 00834 } else { 00835 jy = ky; 00836 if (incx == ione) { 00837 for(j = izero; j < n; j++) { 00838 temp = y_zero; 00839 if ( noConj ) { 00840 for(i = izero; i < m; i++) { 00841 temp += A[j*lda + i]*x[i]; 00842 } 00843 } else { 00844 for(i = izero; i < m; i++) { 00845 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i]; 00846 } 00847 } 00848 y[jy] += alpha*temp; 00849 jy += incy; 00850 } 00851 } else { 00852 for(j = izero; j < n; j++) { 00853 temp = y_zero; 00854 ix = kx; 00855 if ( noConj ) { 00856 for (i = izero; i < m; i++) { 00857 temp += A[j*lda + i]*x[ix]; 00858 ix += incx; 00859 } 00860 } else { 00861 for (i = izero; i < m; i++) { 00862 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix]; 00863 ix += incx; 00864 } 00865 } 00866 y[jy] += alpha*temp; 00867 jy += incy; 00868 } 00869 } 00870 } 00871 } /* if (!BadArgument) */ 00872 } /* end GEMV */ 00873 00874 template<typename OrdinalType, typename ScalarType> 00875 template <typename A_type> 00876 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const 00877 { 00878 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 00879 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 00880 ScalarType zero = ScalarTraits<ScalarType>::zero(); 00881 bool BadArgument = false; 00882 bool noConj = true; 00883 00884 // Quick return if there is nothing to do! 00885 if( n == izero ){ return; } 00886 00887 // Otherwise, we need to check the argument list. 00888 if( n < izero ) { 00889 std::cout << "BLAS::TRMV Error: N == " << n << std::endl; 00890 BadArgument = true; 00891 } 00892 if( lda < n ) { 00893 std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl; 00894 BadArgument = true; 00895 } 00896 if( incx == izero ) { 00897 std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl; 00898 BadArgument = true; 00899 } 00900 00901 if(!BadArgument) { 00902 OrdinalType i, j, ix, jx, kx = izero; 00903 ScalarType temp; 00904 bool noUnit = (EDiagChar[diag] == 'N'); 00905 00906 // Determine if this is a conjugate tranpose 00907 noConj = (ETranspChar[trans] == 'T'); 00908 00909 // Set the starting pointer for the vector x if incx < 0. 00910 if (incx < izero) { kx = (-n+ione)*incx; } 00911 00912 // Start the operations for a nontransposed triangular matrix 00913 if (ETranspChar[trans] == 'N') { 00914 /* Compute x = A*x */ 00915 if (EUploChar[uplo] == 'U') { 00916 /* A is an upper triangular matrix */ 00917 if (incx == ione) { 00918 for (j=izero; j<n; j++) { 00919 if (x[j] != zero) { 00920 temp = x[j]; 00921 for (i=izero; i<j; i++) { 00922 x[i] += temp*A[j*lda + i]; 00923 } 00924 if ( noUnit ) 00925 x[j] *= A[j*lda + j]; 00926 } 00927 } 00928 } else { 00929 jx = kx; 00930 for (j=izero; j<n; j++) { 00931 if (x[jx] != zero) { 00932 temp = x[jx]; 00933 ix = kx; 00934 for (i=izero; i<j; i++) { 00935 x[ix] += temp*A[j*lda + i]; 00936 ix += incx; 00937 } 00938 if ( noUnit ) 00939 x[jx] *= A[j*lda + j]; 00940 } 00941 jx += incx; 00942 } 00943 } /* if (incx == ione) */ 00944 } else { /* A is a lower triangular matrix */ 00945 if (incx == ione) { 00946 for (j=n-ione; j>-ione; j--) { 00947 if (x[j] != zero) { 00948 temp = x[j]; 00949 for (i=n-ione; i>j; i--) { 00950 x[i] += temp*A[j*lda + i]; 00951 } 00952 if ( noUnit ) 00953 x[j] *= A[j*lda + j]; 00954 } 00955 } 00956 } else { 00957 kx += (n-ione)*incx; 00958 jx = kx; 00959 for (j=n-ione; j>-ione; j--) { 00960 if (x[jx] != zero) { 00961 temp = x[jx]; 00962 ix = kx; 00963 for (i=n-ione; i>j; i--) { 00964 x[ix] += temp*A[j*lda + i]; 00965 ix -= incx; 00966 } 00967 if ( noUnit ) 00968 x[jx] *= A[j*lda + j]; 00969 } 00970 jx -= incx; 00971 } 00972 } 00973 } /* if (EUploChar[uplo]=='U') */ 00974 } else { /* A is transposed/conjugated */ 00975 /* Compute x = A'*x */ 00976 if (EUploChar[uplo]=='U') { 00977 /* A is an upper triangular matrix */ 00978 if (incx == ione) { 00979 for (j=n-ione; j>-ione; j--) { 00980 temp = x[j]; 00981 if ( noConj ) { 00982 if ( noUnit ) 00983 temp *= A[j*lda + j]; 00984 for (i=j-ione; i>-ione; i--) { 00985 temp += A[j*lda + i]*x[i]; 00986 } 00987 } else { 00988 if ( noUnit ) 00989 temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]); 00990 for (i=j-ione; i>-ione; i--) { 00991 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i]; 00992 } 00993 } 00994 x[j] = temp; 00995 } 00996 } else { 00997 jx = kx + (n-ione)*incx; 00998 for (j=n-ione; j>-ione; j--) { 00999 temp = x[jx]; 01000 ix = jx; 01001 if ( noConj ) { 01002 if ( noUnit ) 01003 temp *= A[j*lda + j]; 01004 for (i=j-ione; i>-ione; i--) { 01005 ix -= incx; 01006 temp += A[j*lda + i]*x[ix]; 01007 } 01008 } else { 01009 if ( noUnit ) 01010 temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]); 01011 for (i=j-ione; i>-ione; i--) { 01012 ix -= incx; 01013 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix]; 01014 } 01015 } 01016 x[jx] = temp; 01017 jx -= incx; 01018 } 01019 } 01020 } else { 01021 /* A is a lower triangular matrix */ 01022 if (incx == ione) { 01023 for (j=izero; j<n; j++) { 01024 temp = x[j]; 01025 if ( noConj ) { 01026 if ( noUnit ) 01027 temp *= A[j*lda + j]; 01028 for (i=j+ione; i<n; i++) { 01029 temp += A[j*lda + i]*x[i]; 01030 } 01031 } else { 01032 if ( noUnit ) 01033 temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]); 01034 for (i=j+ione; i<n; i++) { 01035 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i]; 01036 } 01037 } 01038 x[j] = temp; 01039 } 01040 } else { 01041 jx = kx; 01042 for (j=izero; j<n; j++) { 01043 temp = x[jx]; 01044 ix = jx; 01045 if ( noConj ) { 01046 if ( noUnit ) 01047 temp *= A[j*lda + j]; 01048 for (i=j+ione; i<n; i++) { 01049 ix += incx; 01050 temp += A[j*lda + i]*x[ix]; 01051 } 01052 } else { 01053 if ( noUnit ) 01054 temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]); 01055 for (i=j+ione; i<n; i++) { 01056 ix += incx; 01057 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix]; 01058 } 01059 } 01060 x[jx] = temp; 01061 jx += incx; 01062 } 01063 } 01064 } /* if (EUploChar[uplo]=='U') */ 01065 } /* if (ETranspChar[trans]=='N') */ 01066 } /* if (!BadArgument) */ 01067 } /* end TRMV */ 01068 01069 template<typename OrdinalType, typename ScalarType> 01070 template <typename alpha_type, typename x_type, typename y_type> 01071 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const 01072 { 01073 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01074 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01075 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01076 y_type y_zero = ScalarTraits<y_type>::zero(); 01077 bool BadArgument = false; 01078 01079 // Quick return if there is nothing to do! 01080 if( m == izero || n == izero || alpha == alpha_zero ){ return; } 01081 01082 // Otherwise, we need to check the argument list. 01083 if( m < izero ) { 01084 std::cout << "BLAS::GER Error: M == " << m << std::endl; 01085 BadArgument = true; 01086 } 01087 if( n < izero ) { 01088 std::cout << "BLAS::GER Error: N == " << n << std::endl; 01089 BadArgument = true; 01090 } 01091 if( lda < m ) { 01092 std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl; 01093 BadArgument = true; 01094 } 01095 if( incx == 0 ) { 01096 std::cout << "BLAS::GER Error: INCX == 0"<< std::endl; 01097 BadArgument = true; 01098 } 01099 if( incy == 0 ) { 01100 std::cout << "BLAS::GER Error: INCY == 0"<< std::endl; 01101 BadArgument = true; 01102 } 01103 01104 if(!BadArgument) { 01105 OrdinalType i, j, ix, jy = izero, kx = izero; 01106 ScalarType temp; 01107 01108 // Set the starting pointers for the vectors x and y if incx/y < 0. 01109 if (incx < izero) { kx = (-m+ione)*incx; } 01110 if (incy < izero) { jy = (-n+ione)*incy; } 01111 01112 // Start the operations for incx == 1 01113 if( incx == ione ) { 01114 for( j=izero; j<n; j++ ) { 01115 if ( y[jy] != y_zero ) { 01116 temp = alpha*y[jy]; 01117 for ( i=izero; i<m; i++ ) { 01118 A[j*lda + i] += x[i]*temp; 01119 } 01120 } 01121 jy += incy; 01122 } 01123 } 01124 else { // Start the operations for incx != 1 01125 for( j=izero; j<n; j++ ) { 01126 if ( y[jy] != y_zero ) { 01127 temp = alpha*y[jy]; 01128 ix = kx; 01129 for( i=izero; i<m; i++ ) { 01130 A[j*lda + i] += x[ix]*temp; 01131 ix += incx; 01132 } 01133 } 01134 jy += incy; 01135 } 01136 } 01137 } /* if(!BadArgument) */ 01138 } /* end GER */ 01139 01140 //------------------------------------------------------------------------------------------ 01141 // LEVEL 3 BLAS ROUTINES 01142 //------------------------------------------------------------------------------------------ 01143 01144 template<typename OrdinalType, typename ScalarType> 01145 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 01146 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const 01147 { 01148 01149 typedef TypeNameTraits<OrdinalType> OTNT; 01150 typedef TypeNameTraits<ScalarType> STNT; 01151 01152 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01153 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01154 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 01155 B_type B_zero = ScalarTraits<B_type>::zero(); 01156 ScalarType C_zero = ScalarTraits<ScalarType>::zero(); 01157 beta_type beta_one = ScalarTraits<beta_type>::one(); 01158 OrdinalType i, j, p; 01159 OrdinalType NRowA = m, NRowB = k; 01160 ScalarType temp; 01161 bool BadArgument = false; 01162 bool conjA = false, conjB = false; 01163 01164 // Change dimensions of matrix if either matrix is transposed 01165 if( !(ETranspChar[transa]=='N') ) { 01166 NRowA = k; 01167 } 01168 if( !(ETranspChar[transb]=='N') ) { 01169 NRowB = n; 01170 } 01171 01172 // Quick return if there is nothing to do! 01173 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; } 01174 if( m < izero ) { 01175 std::cout << "BLAS::GEMM Error: M == " << m << std::endl; 01176 BadArgument = true; 01177 } 01178 if( n < izero ) { 01179 std::cout << "BLAS::GEMM Error: N == " << n << std::endl; 01180 BadArgument = true; 01181 } 01182 if( k < izero ) { 01183 std::cout << "BLAS::GEMM Error: K == " << k << std::endl; 01184 BadArgument = true; 01185 } 01186 if( lda < NRowA ) { 01187 std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl; 01188 BadArgument = true; 01189 } 01190 if( ldb < NRowB ) { 01191 std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl; 01192 BadArgument = true; 01193 } 01194 if( ldc < m ) { 01195 std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl; 01196 BadArgument = true; 01197 } 01198 01199 if(!BadArgument) { 01200 01201 // Determine if this is a conjugate tranpose 01202 conjA = (ETranspChar[transa] == 'C'); 01203 conjB = (ETranspChar[transb] == 'C'); 01204 01205 // Only need to scale the resulting matrix C. 01206 if( alpha == alpha_zero ) { 01207 if( beta == beta_zero ) { 01208 for (j=izero; j<n; j++) { 01209 for (i=izero; i<m; i++) { 01210 C[j*ldc + i] = C_zero; 01211 } 01212 } 01213 } else { 01214 for (j=izero; j<n; j++) { 01215 for (i=izero; i<m; i++) { 01216 C[j*ldc + i] *= beta; 01217 } 01218 } 01219 } 01220 return; 01221 } 01222 // 01223 // Now start the operations. 01224 // 01225 if ( ETranspChar[transb]=='N' ) { 01226 if ( ETranspChar[transa]=='N' ) { 01227 // Compute C = alpha*A*B + beta*C 01228 for (j=izero; j<n; j++) { 01229 if( beta == beta_zero ) { 01230 for (i=izero; i<m; i++){ 01231 C[j*ldc + i] = C_zero; 01232 } 01233 } else if( beta != beta_one ) { 01234 for (i=izero; i<m; i++){ 01235 C[j*ldc + i] *= beta; 01236 } 01237 } 01238 for (p=izero; p<k; p++){ 01239 if (B[j*ldb + p] != B_zero ){ 01240 temp = alpha*B[j*ldb + p]; 01241 for (i=izero; i<m; i++) { 01242 C[j*ldc + i] += temp*A[p*lda + i]; 01243 } 01244 } 01245 } 01246 } 01247 } else if ( conjA ) { 01248 // Compute C = alpha*conj(A')*B + beta*C 01249 for (j=izero; j<n; j++) { 01250 for (i=izero; i<m; i++) { 01251 temp = C_zero; 01252 for (p=izero; p<k; p++) { 01253 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p]; 01254 } 01255 if (beta == beta_zero) { 01256 C[j*ldc + i] = alpha*temp; 01257 } else { 01258 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01259 } 01260 } 01261 } 01262 } else { 01263 // Compute C = alpha*A'*B + beta*C 01264 for (j=izero; j<n; j++) { 01265 for (i=izero; i<m; i++) { 01266 temp = C_zero; 01267 for (p=izero; p<k; p++) { 01268 temp += A[i*lda + p]*B[j*ldb + p]; 01269 } 01270 if (beta == beta_zero) { 01271 C[j*ldc + i] = alpha*temp; 01272 } else { 01273 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01274 } 01275 } 01276 } 01277 } 01278 } else if ( ETranspChar[transa]=='N' ) { 01279 if ( conjB ) { 01280 // Compute C = alpha*A*conj(B') + beta*C 01281 for (j=izero; j<n; j++) { 01282 if (beta == beta_zero) { 01283 for (i=izero; i<m; i++) { 01284 C[j*ldc + i] = C_zero; 01285 } 01286 } else if ( beta != beta_one ) { 01287 for (i=izero; i<m; i++) { 01288 C[j*ldc + i] *= beta; 01289 } 01290 } 01291 for (p=izero; p<k; p++) { 01292 if (B[p*ldb + j] != B_zero) { 01293 temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]); 01294 for (i=izero; i<m; i++) { 01295 C[j*ldc + i] += temp*A[p*lda + i]; 01296 } 01297 } 01298 } 01299 } 01300 } else { 01301 // Compute C = alpha*A*B' + beta*C 01302 for (j=izero; j<n; j++) { 01303 if (beta == beta_zero) { 01304 for (i=izero; i<m; i++) { 01305 C[j*ldc + i] = C_zero; 01306 } 01307 } else if ( beta != beta_one ) { 01308 for (i=izero; i<m; i++) { 01309 C[j*ldc + i] *= beta; 01310 } 01311 } 01312 for (p=izero; p<k; p++) { 01313 if (B[p*ldb + j] != B_zero) { 01314 temp = alpha*B[p*ldb + j]; 01315 for (i=izero; i<m; i++) { 01316 C[j*ldc + i] += temp*A[p*lda + i]; 01317 } 01318 } 01319 } 01320 } 01321 } 01322 } else if ( conjA ) { 01323 if ( conjB ) { 01324 // Compute C = alpha*conj(A')*conj(B') + beta*C 01325 for (j=izero; j<n; j++) { 01326 for (i=izero; i<m; i++) { 01327 temp = C_zero; 01328 for (p=izero; p<k; p++) { 01329 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p]) 01330 * ScalarTraits<B_type>::conjugate(B[p*ldb + j]); 01331 } 01332 if (beta == beta_zero) { 01333 C[j*ldc + i] = alpha*temp; 01334 } else { 01335 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01336 } 01337 } 01338 } 01339 } else { 01340 // Compute C = alpha*conj(A')*B' + beta*C 01341 for (j=izero; j<n; j++) { 01342 for (i=izero; i<m; i++) { 01343 temp = C_zero; 01344 for (p=izero; p<k; p++) { 01345 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p]) 01346 * B[p*ldb + j]; 01347 } 01348 if (beta == beta_zero) { 01349 C[j*ldc + i] = alpha*temp; 01350 } else { 01351 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01352 } 01353 } 01354 } 01355 } 01356 } else { 01357 if ( conjB ) { 01358 // Compute C = alpha*A'*conj(B') + beta*C 01359 for (j=izero; j<n; j++) { 01360 for (i=izero; i<m; i++) { 01361 temp = C_zero; 01362 for (p=izero; p<k; p++) { 01363 temp += A[i*lda + p] 01364 * ScalarTraits<B_type>::conjugate(B[p*ldb + j]); 01365 } 01366 if (beta == beta_zero) { 01367 C[j*ldc + i] = alpha*temp; 01368 } else { 01369 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01370 } 01371 } 01372 } 01373 } else { 01374 // Compute C = alpha*A'*B' + beta*C 01375 for (j=izero; j<n; j++) { 01376 for (i=izero; i<m; i++) { 01377 temp = C_zero; 01378 for (p=izero; p<k; p++) { 01379 temp += A[i*lda + p]*B[p*ldb + j]; 01380 } 01381 if (beta == beta_zero) { 01382 C[j*ldc + i] = alpha*temp; 01383 } else { 01384 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01385 } 01386 } 01387 } 01388 } // end if (ETranspChar[transa]=='N') ... 01389 } // end if (ETranspChar[transb]=='N') ... 01390 } // end if (!BadArgument) ... 01391 } // end of GEMM 01392 01393 01394 template<typename OrdinalType, typename ScalarType> 01395 template <typename alpha_type, typename A_type, typename B_type, typename beta_type> 01396 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const 01397 { 01398 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01399 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01400 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01401 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 01402 ScalarType C_zero = ScalarTraits<ScalarType>::zero(); 01403 beta_type beta_one = ScalarTraits<beta_type>::one(); 01404 OrdinalType i, j, k, NRowA = m; 01405 ScalarType temp1, temp2; 01406 bool BadArgument = false; 01407 bool Upper = (EUploChar[uplo] == 'U'); 01408 if (ESideChar[side] == 'R') { NRowA = n; } 01409 01410 // Quick return. 01411 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; } 01412 if( m < izero ) { 01413 std::cout << "BLAS::SYMM Error: M == "<< m << std::endl; 01414 BadArgument = true; } 01415 if( n < izero ) { 01416 std::cout << "BLAS::SYMM Error: N == "<< n << std::endl; 01417 BadArgument = true; } 01418 if( lda < NRowA ) { 01419 std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl; 01420 BadArgument = true; } 01421 if( ldb < m ) { 01422 std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl; 01423 BadArgument = true; } 01424 if( ldc < m ) { 01425 std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl; 01426 BadArgument = true; } 01427 01428 if(!BadArgument) { 01429 01430 // Only need to scale C and return. 01431 if (alpha == alpha_zero) { 01432 if (beta == beta_zero ) { 01433 for (j=izero; j<n; j++) { 01434 for (i=izero; i<m; i++) { 01435 C[j*ldc + i] = C_zero; 01436 } 01437 } 01438 } else { 01439 for (j=izero; j<n; j++) { 01440 for (i=izero; i<m; i++) { 01441 C[j*ldc + i] *= beta; 01442 } 01443 } 01444 } 01445 return; 01446 } 01447 01448 if ( ESideChar[side] == 'L') { 01449 // Compute C = alpha*A*B + beta*C 01450 01451 if (Upper) { 01452 // The symmetric part of A is stored in the upper triangular part of the matrix. 01453 for (j=izero; j<n; j++) { 01454 for (i=izero; i<m; i++) { 01455 temp1 = alpha*B[j*ldb + i]; 01456 temp2 = C_zero; 01457 for (k=izero; k<i; k++) { 01458 C[j*ldc + k] += temp1*A[i*lda + k]; 01459 temp2 += B[j*ldb + k]*A[i*lda + k]; 01460 } 01461 if (beta == beta_zero) { 01462 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2; 01463 } else { 01464 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2; 01465 } 01466 } 01467 } 01468 } else { 01469 // The symmetric part of A is stored in the lower triangular part of the matrix. 01470 for (j=izero; j<n; j++) { 01471 for (i=m-ione; i>-ione; i--) { 01472 temp1 = alpha*B[j*ldb + i]; 01473 temp2 = C_zero; 01474 for (k=i+ione; k<m; k++) { 01475 C[j*ldc + k] += temp1*A[i*lda + k]; 01476 temp2 += B[j*ldb + k]*A[i*lda + k]; 01477 } 01478 if (beta == beta_zero) { 01479 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2; 01480 } else { 01481 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2; 01482 } 01483 } 01484 } 01485 } 01486 } else { 01487 // Compute C = alpha*B*A + beta*C. 01488 for (j=izero; j<n; j++) { 01489 temp1 = alpha*A[j*lda + j]; 01490 if (beta == beta_zero) { 01491 for (i=izero; i<m; i++) { 01492 C[j*ldc + i] = temp1*B[j*ldb + i]; 01493 } 01494 } else { 01495 for (i=izero; i<m; i++) { 01496 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i]; 01497 } 01498 } 01499 for (k=izero; k<j; k++) { 01500 if (Upper) { 01501 temp1 = alpha*A[j*lda + k]; 01502 } else { 01503 temp1 = alpha*A[k*lda + j]; 01504 } 01505 for (i=izero; i<m; i++) { 01506 C[j*ldc + i] += temp1*B[k*ldb + i]; 01507 } 01508 } 01509 for (k=j+ione; k<n; k++) { 01510 if (Upper) { 01511 temp1 = alpha*A[k*lda + j]; 01512 } else { 01513 temp1 = alpha*A[j*lda + k]; 01514 } 01515 for (i=izero; i<m; i++) { 01516 C[j*ldc + i] += temp1*B[k*ldb + i]; 01517 } 01518 } 01519 } 01520 } // end if (ESideChar[side]=='L') ... 01521 } // end if(!BadArgument) ... 01522 } // end SYMM 01523 01524 template<typename OrdinalType, typename ScalarType> 01525 template <typename alpha_type, typename A_type, typename beta_type> 01526 void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const 01527 { 01528 typedef TypeNameTraits<OrdinalType> OTNT; 01529 typedef TypeNameTraits<ScalarType> STNT; 01530 01531 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01532 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01533 beta_type beta_zero = ScalarTraits<beta_type>::zero(); 01534 beta_type beta_one = ScalarTraits<beta_type>::one(); 01535 A_type temp, A_zero = ScalarTraits<A_type>::zero(); 01536 ScalarType C_zero = ScalarTraits<ScalarType>::zero(); 01537 OrdinalType i, j, l, NRowA = n; 01538 bool BadArgument = false; 01539 bool Upper = (EUploChar[uplo] == 'U'); 01540 01541 TEUCHOS_TEST_FOR_EXCEPTION( 01542 Teuchos::ScalarTraits<ScalarType>::isComplex 01543 && (trans == CONJ_TRANS), 01544 std::logic_error, 01545 "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()" 01546 " does not support CONJ_TRANS for complex data types." 01547 ); 01548 01549 // Change dimensions of A matrix is transposed 01550 if( !(ETranspChar[trans]=='N') ) { 01551 NRowA = k; 01552 } 01553 01554 // Quick return. 01555 if ( n==izero ) { return; } 01556 if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) { return; } 01557 if( n < izero ) { 01558 std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl; 01559 BadArgument = true; } 01560 if( k < izero ) { 01561 std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl; 01562 BadArgument = true; } 01563 if( lda < NRowA ) { 01564 std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl; 01565 BadArgument = true; } 01566 if( ldc < n ) { 01567 std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl; 01568 BadArgument = true; } 01569 01570 if(!BadArgument) { 01571 01572 // Scale C when alpha is zero 01573 if (alpha == alpha_zero) { 01574 if (Upper) { 01575 if (beta==beta_zero) { 01576 for (j=izero; j<n; j++) { 01577 for (i=izero; i<=j; i++) { 01578 C[j*ldc + i] = C_zero; 01579 } 01580 } 01581 } 01582 else { 01583 for (j=izero; j<n; j++) { 01584 for (i=izero; i<=j; i++) { 01585 C[j*ldc + i] *= beta; 01586 } 01587 } 01588 } 01589 } 01590 else { 01591 if (beta==beta_zero) { 01592 for (j=izero; j<n; j++) { 01593 for (i=j; i<n; i++) { 01594 C[j*ldc + i] = C_zero; 01595 } 01596 } 01597 } 01598 else { 01599 for (j=izero; j<n; j++) { 01600 for (i=j; i<n; i++) { 01601 C[j*ldc + i] *= beta; 01602 } 01603 } 01604 } 01605 } 01606 return; 01607 } 01608 01609 // Now we can start the computation 01610 01611 if ( ETranspChar[trans]=='N' ) { 01612 01613 // Form C <- alpha*A*A' + beta*C 01614 if (Upper) { 01615 for (j=izero; j<n; j++) { 01616 if (beta==beta_zero) { 01617 for (i=izero; i<=j; i++) { 01618 C[j*ldc + i] = C_zero; 01619 } 01620 } 01621 else if (beta!=beta_one) { 01622 for (i=izero; i<=j; i++) { 01623 C[j*ldc + i] *= beta; 01624 } 01625 } 01626 for (l=izero; l<k; l++) { 01627 if (A[l*lda + j] != A_zero) { 01628 temp = alpha*A[l*lda + j]; 01629 for (i = izero; i <=j; i++) { 01630 C[j*ldc + i] += temp*A[l*lda + i]; 01631 } 01632 } 01633 } 01634 } 01635 } 01636 else { 01637 for (j=izero; j<n; j++) { 01638 if (beta==beta_zero) { 01639 for (i=j; i<n; i++) { 01640 C[j*ldc + i] = C_zero; 01641 } 01642 } 01643 else if (beta!=beta_one) { 01644 for (i=j; i<n; i++) { 01645 C[j*ldc + i] *= beta; 01646 } 01647 } 01648 for (l=izero; l<k; l++) { 01649 if (A[l*lda + j] != A_zero) { 01650 temp = alpha*A[l*lda + j]; 01651 for (i=j; i<n; i++) { 01652 C[j*ldc + i] += temp*A[l*lda + i]; 01653 } 01654 } 01655 } 01656 } 01657 } 01658 } 01659 else { 01660 01661 // Form C <- alpha*A'*A + beta*C 01662 if (Upper) { 01663 for (j=izero; j<n; j++) { 01664 for (i=izero; i<=j; i++) { 01665 temp = A_zero; 01666 for (l=izero; l<k; l++) { 01667 temp += A[i*lda + l]*A[j*lda + l]; 01668 } 01669 if (beta==beta_zero) { 01670 C[j*ldc + i] = alpha*temp; 01671 } 01672 else { 01673 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01674 } 01675 } 01676 } 01677 } 01678 else { 01679 for (j=izero; j<n; j++) { 01680 for (i=j; i<n; i++) { 01681 temp = A_zero; 01682 for (l=izero; l<k; ++l) { 01683 temp += A[i*lda + l]*A[j*lda + l]; 01684 } 01685 if (beta==beta_zero) { 01686 C[j*ldc + i] = alpha*temp; 01687 } 01688 else { 01689 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i]; 01690 } 01691 } 01692 } 01693 } 01694 } 01695 } /* if (!BadArgument) */ 01696 } /* END SYRK */ 01697 01698 template<typename OrdinalType, typename ScalarType> 01699 template <typename alpha_type, typename A_type> 01700 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const 01701 { 01702 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01703 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01704 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01705 A_type A_zero = ScalarTraits<A_type>::zero(); 01706 ScalarType B_zero = ScalarTraits<ScalarType>::zero(); 01707 ScalarType one = ScalarTraits<ScalarType>::one(); 01708 OrdinalType i, j, k, NRowA = m; 01709 ScalarType temp; 01710 bool BadArgument = false; 01711 bool LSide = (ESideChar[side] == 'L'); 01712 bool noUnit = (EDiagChar[diag] == 'N'); 01713 bool Upper = (EUploChar[uplo] == 'U'); 01714 bool noConj = (ETranspChar[transa] == 'T'); 01715 01716 if(!LSide) { NRowA = n; } 01717 01718 // Quick return. 01719 if (n==izero || m==izero) { return; } 01720 if( m < izero ) { 01721 std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl; 01722 BadArgument = true; } 01723 if( n < izero ) { 01724 std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl; 01725 BadArgument = true; } 01726 if( lda < NRowA ) { 01727 std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl; 01728 BadArgument = true; } 01729 if( ldb < m ) { 01730 std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl; 01731 BadArgument = true; } 01732 01733 if(!BadArgument) { 01734 01735 // B only needs to be zeroed out. 01736 if( alpha == alpha_zero ) { 01737 for( j=izero; j<n; j++ ) { 01738 for( i=izero; i<m; i++ ) { 01739 B[j*ldb + i] = B_zero; 01740 } 01741 } 01742 return; 01743 } 01744 01745 // Start the computations. 01746 if ( LSide ) { 01747 // A is on the left side of B. 01748 01749 if ( ETranspChar[transa]=='N' ) { 01750 // Compute B = alpha*A*B 01751 01752 if ( Upper ) { 01753 // A is upper triangular 01754 for( j=izero; j<n; j++ ) { 01755 for( k=izero; k<m; k++) { 01756 if ( B[j*ldb + k] != B_zero ) { 01757 temp = alpha*B[j*ldb + k]; 01758 for( i=izero; i<k; i++ ) { 01759 B[j*ldb + i] += temp*A[k*lda + i]; 01760 } 01761 if ( noUnit ) 01762 temp *=A[k*lda + k]; 01763 B[j*ldb + k] = temp; 01764 } 01765 } 01766 } 01767 } else { 01768 // A is lower triangular 01769 for( j=izero; j<n; j++ ) { 01770 for( k=m-ione; k>-ione; k-- ) { 01771 if( B[j*ldb + k] != B_zero ) { 01772 temp = alpha*B[j*ldb + k]; 01773 B[j*ldb + k] = temp; 01774 if ( noUnit ) 01775 B[j*ldb + k] *= A[k*lda + k]; 01776 for( i=k+ione; i<m; i++ ) { 01777 B[j*ldb + i] += temp*A[k*lda + i]; 01778 } 01779 } 01780 } 01781 } 01782 } 01783 } else { 01784 // Compute B = alpha*A'*B or B = alpha*conj(A')*B 01785 if( Upper ) { 01786 for( j=izero; j<n; j++ ) { 01787 for( i=m-ione; i>-ione; i-- ) { 01788 temp = B[j*ldb + i]; 01789 if ( noConj ) { 01790 if( noUnit ) 01791 temp *= A[i*lda + i]; 01792 for( k=izero; k<i; k++ ) { 01793 temp += A[i*lda + k]*B[j*ldb + k]; 01794 } 01795 } else { 01796 if( noUnit ) 01797 temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]); 01798 for( k=izero; k<i; k++ ) { 01799 temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k]; 01800 } 01801 } 01802 B[j*ldb + i] = alpha*temp; 01803 } 01804 } 01805 } else { 01806 for( j=izero; j<n; j++ ) { 01807 for( i=izero; i<m; i++ ) { 01808 temp = B[j*ldb + i]; 01809 if ( noConj ) { 01810 if( noUnit ) 01811 temp *= A[i*lda + i]; 01812 for( k=i+ione; k<m; k++ ) { 01813 temp += A[i*lda + k]*B[j*ldb + k]; 01814 } 01815 } else { 01816 if( noUnit ) 01817 temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]); 01818 for( k=i+ione; k<m; k++ ) { 01819 temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k]; 01820 } 01821 } 01822 B[j*ldb + i] = alpha*temp; 01823 } 01824 } 01825 } 01826 } 01827 } else { 01828 // A is on the right hand side of B. 01829 01830 if( ETranspChar[transa] == 'N' ) { 01831 // Compute B = alpha*B*A 01832 01833 if( Upper ) { 01834 // A is upper triangular. 01835 for( j=n-ione; j>-ione; j-- ) { 01836 temp = alpha; 01837 if( noUnit ) 01838 temp *= A[j*lda + j]; 01839 for( i=izero; i<m; i++ ) { 01840 B[j*ldb + i] *= temp; 01841 } 01842 for( k=izero; k<j; k++ ) { 01843 if( A[j*lda + k] != A_zero ) { 01844 temp = alpha*A[j*lda + k]; 01845 for( i=izero; i<m; i++ ) { 01846 B[j*ldb + i] += temp*B[k*ldb + i]; 01847 } 01848 } 01849 } 01850 } 01851 } else { 01852 // A is lower triangular. 01853 for( j=izero; j<n; j++ ) { 01854 temp = alpha; 01855 if( noUnit ) 01856 temp *= A[j*lda + j]; 01857 for( i=izero; i<m; i++ ) { 01858 B[j*ldb + i] *= temp; 01859 } 01860 for( k=j+ione; k<n; k++ ) { 01861 if( A[j*lda + k] != A_zero ) { 01862 temp = alpha*A[j*lda + k]; 01863 for( i=izero; i<m; i++ ) { 01864 B[j*ldb + i] += temp*B[k*ldb + i]; 01865 } 01866 } 01867 } 01868 } 01869 } 01870 } else { 01871 // Compute B = alpha*B*A' or B = alpha*B*conj(A') 01872 01873 if( Upper ) { 01874 for( k=izero; k<n; k++ ) { 01875 for( j=izero; j<k; j++ ) { 01876 if( A[k*lda + j] != A_zero ) { 01877 if ( noConj ) 01878 temp = alpha*A[k*lda + j]; 01879 else 01880 temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]); 01881 for( i=izero; i<m; i++ ) { 01882 B[j*ldb + i] += temp*B[k*ldb + i]; 01883 } 01884 } 01885 } 01886 temp = alpha; 01887 if( noUnit ) { 01888 if ( noConj ) 01889 temp *= A[k*lda + k]; 01890 else 01891 temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]); 01892 } 01893 if( temp != one ) { 01894 for( i=izero; i<m; i++ ) { 01895 B[k*ldb + i] *= temp; 01896 } 01897 } 01898 } 01899 } else { 01900 for( k=n-ione; k>-ione; k-- ) { 01901 for( j=k+ione; j<n; j++ ) { 01902 if( A[k*lda + j] != A_zero ) { 01903 if ( noConj ) 01904 temp = alpha*A[k*lda + j]; 01905 else 01906 temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]); 01907 for( i=izero; i<m; i++ ) { 01908 B[j*ldb + i] += temp*B[k*ldb + i]; 01909 } 01910 } 01911 } 01912 temp = alpha; 01913 if( noUnit ) { 01914 if ( noConj ) 01915 temp *= A[k*lda + k]; 01916 else 01917 temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]); 01918 } 01919 if( temp != one ) { 01920 for( i=izero; i<m; i++ ) { 01921 B[k*ldb + i] *= temp; 01922 } 01923 } 01924 } 01925 } 01926 } // end if( ETranspChar[transa] == 'N' ) ... 01927 } // end if ( LSide ) ... 01928 } // end if (!BadArgument) 01929 } // end TRMM 01930 01931 template<typename OrdinalType, typename ScalarType> 01932 template <typename alpha_type, typename A_type> 01933 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const 01934 { 01935 OrdinalType izero = OrdinalTraits<OrdinalType>::zero(); 01936 OrdinalType ione = OrdinalTraits<OrdinalType>::one(); 01937 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero(); 01938 A_type A_zero = ScalarTraits<A_type>::zero(); 01939 ScalarType B_zero = ScalarTraits<ScalarType>::zero(); 01940 alpha_type alpha_one = ScalarTraits<alpha_type>::one(); 01941 ScalarType B_one = ScalarTraits<ScalarType>::one(); 01942 ScalarType temp; 01943 OrdinalType NRowA = m; 01944 bool BadArgument = false; 01945 bool noUnit = (EDiagChar[diag]=='N'); 01946 bool noConj = (ETranspChar[transa] == 'T'); 01947 01948 if (!(ESideChar[side] == 'L')) { NRowA = n; } 01949 01950 // Quick return. 01951 if (n == izero || m == izero) { return; } 01952 if( m < izero ) { 01953 std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl; 01954 BadArgument = true; } 01955 if( n < izero ) { 01956 std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl; 01957 BadArgument = true; } 01958 if( lda < NRowA ) { 01959 std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl; 01960 BadArgument = true; } 01961 if( ldb < m ) { 01962 std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl; 01963 BadArgument = true; } 01964 01965 if(!BadArgument) 01966 { 01967 int i, j, k; 01968 // Set the solution to the zero vector. 01969 if(alpha == alpha_zero) { 01970 for(j = izero; j < n; j++) { 01971 for( i = izero; i < m; i++) { 01972 B[j*ldb+i] = B_zero; 01973 } 01974 } 01975 } 01976 else 01977 { // Start the operations. 01978 if(ESideChar[side] == 'L') { 01979 // 01980 // Perform computations for OP(A)*X = alpha*B 01981 // 01982 if(ETranspChar[transa] == 'N') { 01983 // 01984 // Compute B = alpha*inv( A )*B 01985 // 01986 if(EUploChar[uplo] == 'U') { 01987 // A is upper triangular. 01988 for(j = izero; j < n; j++) { 01989 // Perform alpha*B if alpha is not 1. 01990 if(alpha != alpha_one) { 01991 for( i = izero; i < m; i++) { 01992 B[j*ldb+i] *= alpha; 01993 } 01994 } 01995 // Perform a backsolve for column j of B. 01996 for(k = (m - ione); k > -ione; k--) { 01997 // If this entry is zero, we don't have to do anything. 01998 if (B[j*ldb + k] != B_zero) { 01999 if ( noUnit ) { 02000 B[j*ldb + k] /= A[k*lda + k]; 02001 } 02002 for(i = izero; i < k; i++) { 02003 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i]; 02004 } 02005 } 02006 } 02007 } 02008 } 02009 else 02010 { // A is lower triangular. 02011 for(j = izero; j < n; j++) { 02012 // Perform alpha*B if alpha is not 1. 02013 if(alpha != alpha_one) { 02014 for( i = izero; i < m; i++) { 02015 B[j*ldb+i] *= alpha; 02016 } 02017 } 02018 // Perform a forward solve for column j of B. 02019 for(k = izero; k < m; k++) { 02020 // If this entry is zero, we don't have to do anything. 02021 if (B[j*ldb + k] != B_zero) { 02022 if ( noUnit ) { 02023 B[j*ldb + k] /= A[k*lda + k]; 02024 } 02025 for(i = k+ione; i < m; i++) { 02026 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i]; 02027 } 02028 } 02029 } 02030 } 02031 } // end if (uplo == 'U') 02032 } // if (transa =='N') 02033 else { 02034 // 02035 // Compute B = alpha*inv( A' )*B 02036 // or B = alpha*inv( conj(A') )*B 02037 // 02038 if(EUploChar[uplo] == 'U') { 02039 // A is upper triangular. 02040 for(j = izero; j < n; j++) { 02041 for( i = izero; i < m; i++) { 02042 temp = alpha*B[j*ldb+i]; 02043 if ( noConj ) { 02044 for(k = izero; k < i; k++) { 02045 temp -= A[i*lda + k] * B[j*ldb + k]; 02046 } 02047 if ( noUnit ) { 02048 temp /= A[i*lda + i]; 02049 } 02050 } else { 02051 for(k = izero; k < i; k++) { 02052 temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k]) 02053 * B[j*ldb + k]; 02054 } 02055 if ( noUnit ) { 02056 temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]); 02057 } 02058 } 02059 B[j*ldb + i] = temp; 02060 } 02061 } 02062 } 02063 else 02064 { // A is lower triangular. 02065 for(j = izero; j < n; j++) { 02066 for(i = (m - ione) ; i > -ione; i--) { 02067 temp = alpha*B[j*ldb+i]; 02068 if ( noConj ) { 02069 for(k = i+ione; k < m; k++) { 02070 temp -= A[i*lda + k] * B[j*ldb + k]; 02071 } 02072 if ( noUnit ) { 02073 temp /= A[i*lda + i]; 02074 } 02075 } else { 02076 for(k = i+ione; k < m; k++) { 02077 temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k]) 02078 * B[j*ldb + k]; 02079 } 02080 if ( noUnit ) { 02081 temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]); 02082 } 02083 } 02084 B[j*ldb + i] = temp; 02085 } 02086 } 02087 } 02088 } 02089 } // if (side == 'L') 02090 else { 02091 // side == 'R' 02092 // 02093 // Perform computations for X*OP(A) = alpha*B 02094 // 02095 if (ETranspChar[transa] == 'N') { 02096 // 02097 // Compute B = alpha*B*inv( A ) 02098 // 02099 if(EUploChar[uplo] == 'U') { 02100 // A is upper triangular. 02101 // Perform a backsolve for column j of B. 02102 for(j = izero; j < n; j++) { 02103 // Perform alpha*B if alpha is not 1. 02104 if(alpha != alpha_one) { 02105 for( i = izero; i < m; i++) { 02106 B[j*ldb+i] *= alpha; 02107 } 02108 } 02109 for(k = izero; k < j; k++) { 02110 // If this entry is zero, we don't have to do anything. 02111 if (A[j*lda + k] != A_zero) { 02112 for(i = izero; i < m; i++) { 02113 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 02114 } 02115 } 02116 } 02117 if ( noUnit ) { 02118 temp = B_one/A[j*lda + j]; 02119 for(i = izero; i < m; i++) { 02120 B[j*ldb + i] *= temp; 02121 } 02122 } 02123 } 02124 } 02125 else 02126 { // A is lower triangular. 02127 for(j = (n - ione); j > -ione; j--) { 02128 // Perform alpha*B if alpha is not 1. 02129 if(alpha != alpha_one) { 02130 for( i = izero; i < m; i++) { 02131 B[j*ldb+i] *= alpha; 02132 } 02133 } 02134 // Perform a forward solve for column j of B. 02135 for(k = j+ione; k < n; k++) { 02136 // If this entry is zero, we don't have to do anything. 02137 if (A[j*lda + k] != A_zero) { 02138 for(i = izero; i < m; i++) { 02139 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 02140 } 02141 } 02142 } 02143 if ( noUnit ) { 02144 temp = B_one/A[j*lda + j]; 02145 for(i = izero; i < m; i++) { 02146 B[j*ldb + i] *= temp; 02147 } 02148 } 02149 } 02150 } // end if (uplo == 'U') 02151 } // if (transa =='N') 02152 else { 02153 // 02154 // Compute B = alpha*B*inv( A' ) 02155 // or B = alpha*B*inv( conj(A') ) 02156 // 02157 if(EUploChar[uplo] == 'U') { 02158 // A is upper triangular. 02159 for(k = (n - ione); k > -ione; k--) { 02160 if ( noUnit ) { 02161 if ( noConj ) 02162 temp = B_one/A[k*lda + k]; 02163 else 02164 temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]); 02165 for(i = izero; i < m; i++) { 02166 B[k*ldb + i] *= temp; 02167 } 02168 } 02169 for(j = izero; j < k; j++) { 02170 if (A[k*lda + j] != A_zero) { 02171 if ( noConj ) 02172 temp = A[k*lda + j]; 02173 else 02174 temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]); 02175 for(i = izero; i < m; i++) { 02176 B[j*ldb + i] -= temp*B[k*ldb + i]; 02177 } 02178 } 02179 } 02180 if (alpha != alpha_one) { 02181 for (i = izero; i < m; i++) { 02182 B[k*ldb + i] *= alpha; 02183 } 02184 } 02185 } 02186 } 02187 else 02188 { // A is lower triangular. 02189 for(k = izero; k < n; k++) { 02190 if ( noUnit ) { 02191 if ( noConj ) 02192 temp = B_one/A[k*lda + k]; 02193 else 02194 temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]); 02195 for (i = izero; i < m; i++) { 02196 B[k*ldb + i] *= temp; 02197 } 02198 } 02199 for(j = k+ione; j < n; j++) { 02200 if(A[k*lda + j] != A_zero) { 02201 if ( noConj ) 02202 temp = A[k*lda + j]; 02203 else 02204 temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]); 02205 for(i = izero; i < m; i++) { 02206 B[j*ldb + i] -= temp*B[k*ldb + i]; 02207 } 02208 } 02209 } 02210 if (alpha != alpha_one) { 02211 for (i = izero; i < m; i++) { 02212 B[k*ldb + i] *= alpha; 02213 } 02214 } 02215 } 02216 } 02217 } 02218 } 02219 } 02220 } 02221 } 02222 02223 // Explicit instantiation for template<int,float> 02224 02225 template <> 02226 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, float> 02227 { 02228 public: 02229 inline BLAS(void) {} 02230 inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {} 02231 inline virtual ~BLAS(void) {} 02232 void ROTG(float* da, float* db, float* c, float* s) const; 02233 void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const; 02234 float ASUM(const int n, const float* x, const int incx) const; 02235 void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const; 02236 void COPY(const int n, const float* x, const int incx, float* y, const int incy) const; 02237 float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const; 02238 float NRM2(const int n, const float* x, const int incx) const; 02239 void SCAL(const int n, const float alpha, float* x, const int incx) const; 02240 int IAMAX(const int n, const float* x, const int incx) const; 02241 void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const; 02242 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const; 02243 void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const; 02244 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const; 02245 void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const; 02246 void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const; 02247 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const; 02248 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const; 02249 }; 02250 02251 // Explicit instantiation for template<int,double> 02252 02253 template<> 02254 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, double> 02255 { 02256 public: 02257 inline BLAS(void) {} 02258 inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {} 02259 inline virtual ~BLAS(void) {} 02260 void ROTG(double* da, double* db, double* c, double* s) const; 02261 void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const; 02262 double ASUM(const int n, const double* x, const int incx) const; 02263 void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const; 02264 void COPY(const int n, const double* x, const int incx, double* y, const int incy) const; 02265 double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const; 02266 double NRM2(const int n, const double* x, const int incx) const; 02267 void SCAL(const int n, const double alpha, double* x, const int incx) const; 02268 int IAMAX(const int n, const double* x, const int incx) const; 02269 void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const; 02270 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const; 02271 void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const; 02272 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const; 02273 void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const; 02274 void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const; 02275 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const; 02276 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const; 02277 }; 02278 02279 // Explicit instantiation for template<int,complex<float> > 02280 02281 template<> 02282 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<float> > 02283 { 02284 public: 02285 inline BLAS(void) {} 02286 inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {} 02287 inline virtual ~BLAS(void) {} 02288 void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const; 02289 void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const; 02290 float ASUM(const int n, const std::complex<float>* x, const int incx) const; 02291 void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const; 02292 void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const; 02293 std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const; 02294 float NRM2(const int n, const std::complex<float>* x, const int incx) const; 02295 void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const; 02296 int IAMAX(const int n, const std::complex<float>* x, const int incx) const; 02297 void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const; 02298 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const; 02299 void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const; 02300 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const; 02301 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const; 02302 void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const; 02303 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const; 02304 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const; 02305 }; 02306 02307 // Explicit instantiation for template<int,complex<double> > 02308 02309 template<> 02310 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<double> > 02311 { 02312 public: 02313 inline BLAS(void) {} 02314 inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {} 02315 inline virtual ~BLAS(void) {} 02316 void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const; 02317 void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const; 02318 double ASUM(const int n, const std::complex<double>* x, const int incx) const; 02319 void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const; 02320 void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const; 02321 std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const; 02322 double NRM2(const int n, const std::complex<double>* x, const int incx) const; 02323 void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const; 02324 int IAMAX(const int n, const std::complex<double>* x, const int incx) const; 02325 void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const; 02326 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const; 02327 void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const; 02328 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const; 02329 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const; 02330 void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const; 02331 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const; 02332 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const; 02333 }; 02334 02335 } // namespace Teuchos 02336 02337 #endif // _TEUCHOS_BLAS_HPP_
1.7.6.1