Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends
Kokkos_Sequential_SparseKernels.hpp
Go to the documentation of this file.
00001 /*
00002 //@HEADER
00003 // ************************************************************************
00004 //
00005 //          Kokkos: Node API and Parallel Node Kernels
00006 //              Copyright (2008) Sandia Corporation
00007 //
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00039 //
00040 // ************************************************************************
00041 //@HEADER
00042 */
00043 
00044 #ifndef KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
00045 #define KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
00046 
00064 
00065 #include <KokkosLinAlg_config.h>
00066 #include <Kokkos_ArithTraits.hpp>
00067 
00068 namespace Kokkos {
00069 namespace Sequential {
00070 
00097 template<class LocalOrdinal,
00098          class OffsetType,
00099          class MatrixScalar,
00100          class DomainScalar,
00101          class RangeScalar>
00102 void
00103 gaussSeidel (const LocalOrdinal numRows,
00104              const LocalOrdinal numCols,
00105              const OffsetType* const ptr,
00106              const LocalOrdinal* const ind,
00107              const MatrixScalar* const val,
00108              const DomainScalar* const B,
00109              const OffsetType b_stride,
00110              RangeScalar* const X,
00111              const OffsetType x_stride,
00112              const MatrixScalar* const D,
00113              const MatrixScalar omega,
00114              const Kokkos::ESweepDirection direction)
00115 {
00116   using Kokkos::Details::ArithTraits;
00117   typedef LocalOrdinal LO;
00118   const OffsetType theNumRows = static_cast<OffsetType> (numRows);
00119   const OffsetType theNumCols = static_cast<OffsetType> (numCols);
00120 
00121   if (numRows == 0 || numCols == 0) {
00122     return; // Nothing to do.
00123   }
00124   else if (numRows > 0 && ptr[numRows] == 0) {
00125     // All the off-diagonal entries of A are zero, and all the
00126     // diagonal entries are (implicitly) 1.  Therefore compute: X :=
00127     // (1 - omega) X + omega B.  There's no need to care about the
00128     // direction, since there are no cross-row data dependencies in
00129     // this case.
00130     const MatrixScalar oneMinusOmega =
00131       ArithTraits<MatrixScalar>::one () - omega;
00132     for (OffsetType j = 0; j < theNumCols; ++j) {
00133       RangeScalar* const x_j = X + j*x_stride;
00134       const DomainScalar* const b_j = B + j*b_stride;
00135       for (OffsetType i = 0; i < theNumRows; ++i) {
00136         x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
00137       }
00138     }
00139     return;
00140   }
00141 
00142   if (numCols == 1) {
00143     if (direction == Kokkos::Forward) {
00144       for (LO i = 0; i < numRows; ++i) {
00145         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00146         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00147           const LO j = ind[k];
00148           const MatrixScalar A_ij = val[k];
00149           x_temp += A_ij * X[j];
00150         }
00151         X[i] += omega * D[i] * (B[i] - x_temp);
00152       }
00153     } else if (direction == Kokkos::Backward) {
00154       // Split the loop so that it is correct even if LO is unsigned.
00155       // It's a bad idea for LO to be unsigned, but we want this to
00156       // work nevertheless.
00157       for (LO i = numRows - 1; i != 0; --i) {
00158         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00159         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00160           const LO j = ind[k];
00161           const MatrixScalar A_ij = val[k];
00162           x_temp += A_ij * X[j];
00163         }
00164         X[i] += omega * D[i] * (B[i] - x_temp);
00165       }
00166       { // last loop iteration
00167         const LO i = 0;
00168         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00169         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00170           const LO j = ind[k];
00171           const MatrixScalar A_ij = val[k];
00172           x_temp += A_ij * X[j];
00173         }
00174         X[i] += omega * D[i] * (B[i] - x_temp);
00175       }
00176     }
00177   }
00178   else { // numCols > 1
00179     // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with
00180     // multiple columns becomes important, we can add unrolled
00181     // implementations.  The implementation below is not unrolled.
00182     // It may also be reasonable to parallelize over right-hand
00183     // sides, if there are enough of them, especially if the matrix
00184     // fits in cache.
00185     Teuchos::Array<RangeScalar> temp (numCols);
00186     RangeScalar* const x_temp = temp.getRawPtr ();
00187 
00188     if (direction == Kokkos::Forward) {
00189       for (LO i = 0; i < numRows; ++i) {
00190         for (OffsetType c = 0; c < theNumCols; ++c) {
00191           x_temp[c] = ArithTraits<RangeScalar>::zero ();
00192         }
00193         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00194           const LO j = ind[k];
00195           const MatrixScalar A_ij = val[k];
00196           for (OffsetType c = 0; c < theNumCols; ++c) {
00197             x_temp[c] += A_ij * X[j + x_stride*c];
00198           }
00199         }
00200         for (OffsetType c = 0; c < theNumCols; ++c) {
00201           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00202         }
00203       }
00204     } else if (direction == Kokkos::Backward) { // backward mode
00205       // Split the loop so that it is correct even if LO is unsigned.
00206       // It's a bad idea for LO to be unsigned, but we want this to
00207       // work nevertheless.
00208       for (LO i = numRows - 1; i != 0; --i) {
00209         for (OffsetType c = 0; c < theNumCols; ++c) {
00210           x_temp[c] = ArithTraits<RangeScalar>::zero ();
00211         }
00212         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00213           const LO j = ind[k];
00214           const MatrixScalar A_ij = val[k];
00215           for (OffsetType c = 0; c < theNumCols; ++c) {
00216             x_temp[c] += A_ij * X[j + x_stride*c];
00217           }
00218         }
00219         for (OffsetType c = 0; c < theNumCols; ++c) {
00220           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00221         }
00222       }
00223       { // last loop iteration
00224         const LO i = 0;
00225         for (OffsetType c = 0; c < theNumCols; ++c) {
00226           x_temp[c] = ArithTraits<RangeScalar>::zero ();
00227         }
00228         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00229           const LO j = ind[k];
00230           const MatrixScalar A_ij = val[k];
00231           for (OffsetType c = 0; c < theNumCols; ++c) {
00232             x_temp[c] += A_ij * X[j + x_stride*c];
00233           }
00234         }
00235         for (OffsetType c = 0; c < theNumCols; ++c) {
00236           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00237         }
00238       }
00239     }
00240   }
00241 }
00242 
00243 
00277 template<class LocalOrdinal,
00278          class OffsetType,
00279          class MatrixScalar,
00280          class DomainScalar,
00281          class RangeScalar>
00282 void
00283 reorderedGaussSeidel (const LocalOrdinal numRows,
00284                       const LocalOrdinal numCols,
00285                       const OffsetType* const ptr,
00286                       const LocalOrdinal* const ind,
00287                       const MatrixScalar* const val,
00288                       const DomainScalar* const B,
00289                       const OffsetType b_stride,
00290                       RangeScalar* const X,
00291                       const OffsetType x_stride,
00292                       const MatrixScalar* const D,
00293                       const LocalOrdinal* const rowInd,
00294                       const LocalOrdinal numRowInds, // length of rowInd
00295                       const MatrixScalar omega,
00296                       const Kokkos::ESweepDirection direction)
00297 {
00298   using Kokkos::Details::ArithTraits;
00299   typedef LocalOrdinal LO;
00300   const OffsetType theNumRows = static_cast<OffsetType> (numRows);
00301   const OffsetType theNumCols = static_cast<OffsetType> (numCols);
00302 
00303   if (numRows == 0 || numCols == 0) {
00304     return; // Nothing to do.
00305   }
00306   else if (numRows > 0 && ptr[numRows] == 0) {
00307     // All the off-diagonal entries of A are zero, and all the
00308     // diagonal entries are (implicitly) 1.  Therefore compute: X :=
00309     // (1 - omega) X + omega B.  There's no need to care about the
00310     // direction or row ordering, since there are no cross-row data
00311     // dependencies in this case.
00312     const MatrixScalar oneMinusOmega =
00313       ArithTraits<MatrixScalar>::one () - omega;
00314     for (OffsetType j = 0; j < theNumCols; ++j) {
00315       RangeScalar* const x_j = X + j*x_stride;
00316       const DomainScalar* const b_j = B + j*b_stride;
00317       for (OffsetType i = 0; i < theNumRows; ++i) {
00318         x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i];
00319       }
00320     }
00321     return;
00322   }
00323 
00324   if (numCols == 1) {
00325     if (direction == Kokkos::Forward) {
00326       for (LO ii = 0; ii < numRowInds; ++ii) {
00327         LO i = rowInd[ii];
00328         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00329         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00330           const LO j = ind[k];
00331           const MatrixScalar A_ij = val[k];
00332           x_temp += A_ij * X[j];
00333         }
00334         X[i] += omega * D[i] * (B[i] - x_temp);
00335       }
00336     } else if (direction == Kokkos::Backward) {
00337       // Split the loop so that it is correct even if LO is unsigned.
00338       // It's a bad idea for LO to be unsigned, but we want this to
00339       // work nevertheless.
00340       for (LO ii = numRowInds - 1; ii != 0; --ii) {
00341         LO i = rowInd[ii];
00342         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00343         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00344           const LO j = ind[k];
00345           const MatrixScalar A_ij = val[k];
00346           x_temp += A_ij * X[j];
00347         }
00348         X[i] += omega * D[i] * (B[i] - x_temp);
00349       }
00350       { // last loop iteration
00351         const LO ii = 0;
00352         LO i = rowInd[ii];
00353         RangeScalar x_temp = ArithTraits<RangeScalar>::zero ();
00354         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00355           const LO j = ind[k];
00356           const MatrixScalar A_ij = val[k];
00357           x_temp += A_ij * X[j];
00358         }
00359         X[i] += omega * D[i] * (B[i] - x_temp);
00360       }
00361     }
00362   }
00363   else { // numCols > 1
00364     // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with
00365     // multiple columns becomes important, we can add unrolled
00366     // implementations.  The implementation below is not unrolled.
00367     // It may also be reasonable to parallelize over right-hand
00368     // sides, if there are enough of them, especially if the matrix
00369     // fits in cache.
00370     Teuchos::Array<RangeScalar> temp (numCols);
00371     RangeScalar* const x_temp = temp.getRawPtr ();
00372 
00373     if (direction == Kokkos::Forward) {
00374       for (LO ii = 0; ii < numRowInds; ++ii) {
00375         LO i = rowInd[ii];
00376         for (OffsetType c = 0; c < theNumCols; ++c) {
00377           x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero ();
00378         }
00379         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00380           const LO j = ind[k];
00381           const MatrixScalar A_ij = val[k];
00382           for (OffsetType c = 0; c < theNumCols; ++c) {
00383             x_temp[c] += A_ij * X[j + x_stride*c];
00384           }
00385         }
00386         for (OffsetType c = 0; c < theNumCols; ++c) {
00387           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00388         }
00389       }
00390     } else if (direction == Kokkos::Backward) { // backward mode
00391       // Split the loop so that it is correct even if LO is unsigned.
00392       // It's a bad idea for LO to be unsigned, but we want this to
00393       // work nevertheless.
00394       for (LO ii = numRowInds - 1; ii != 0; --ii) {
00395         LO i = rowInd[ii];
00396         for (OffsetType c = 0; c < theNumCols; ++c) {
00397           x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero ();
00398         }
00399         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00400           const LO j = ind[k];
00401           const MatrixScalar A_ij = val[k];
00402           for (OffsetType c = 0; c < theNumCols; ++c) {
00403             x_temp[c] += A_ij * X[j + x_stride*c];
00404           }
00405         }
00406         for (OffsetType c = 0; c < theNumCols; ++c) {
00407           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00408         }
00409       }
00410       { // last loop iteration
00411         const LO ii = 0;
00412         LO i = rowInd[ii];
00413         for (OffsetType c = 0; c < theNumCols; ++c) {
00414           x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero ();
00415         }
00416         for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) {
00417           const LO j = ind[k];
00418           const MatrixScalar A_ij = val[k];
00419           for (OffsetType c = 0; c < theNumCols; ++c) {
00420             x_temp[c] += A_ij * X[j + x_stride*c];
00421           }
00422         }
00423         for (OffsetType c = 0; c < theNumCols; ++c) {
00424           X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]);
00425         }
00426       }
00427     }
00428   }
00429 }
00430 
00431 
00432 template<class CrsMatrixType,
00433          class DomainMultiVectorType,
00434          class RangeMultiVectorType>
00435 void
00436 lowerTriSolveCsrUnitDiag (RangeMultiVectorType X,
00437                           const CrsMatrixType& A,
00438                           DomainMultiVectorType Y)
00439 {
00440   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00441   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00442   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00443 
00444   const local_ordinal_type numRows = A.numRows ();
00445   //const local_ordinal_type numCols = A.numCols ();
00446   const local_ordinal_type numVecs = X.dimension_1 ();
00447   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00448   typename CrsMatrixType::index_type ind = A.graph.entries;
00449   typename CrsMatrixType::values_type val = A.values;
00450 
00451   for (local_ordinal_type r = 0; r < numRows; ++r) {
00452     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00453       X(r, j) = Y(r, j);
00454     }
00455     const offset_type beg = ptr(r);
00456     const offset_type end = ptr(r+1);
00457     for (offset_type k = beg; k < end; ++k) {
00458       const matrix_scalar_type A_rc = val(k);
00459       const local_ordinal_type c = ind(k);
00460       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00461         X(r, j) -= A_rc * X(c, j);
00462       }
00463     } // for each entry A_rc in the current row r
00464   } // for each row r
00465 }
00466 
00467 
00468 template<class CrsMatrixType,
00469          class DomainMultiVectorType,
00470          class RangeMultiVectorType>
00471 void
00472 lowerTriSolveCsr (RangeMultiVectorType X,
00473                   const CrsMatrixType& A,
00474                   DomainMultiVectorType Y)
00475 {
00476   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00477   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00478   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00479   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
00480 
00481   const local_ordinal_type numRows = A.numRows ();
00482   //const local_ordinal_type numCols = A.numCols ();
00483   const local_ordinal_type numVecs = X.dimension_1 ();
00484   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00485   typename CrsMatrixType::index_type ind = A.graph.entries;
00486   typename CrsMatrixType::values_type val = A.values;
00487 
00488   for (local_ordinal_type r = 0; r < numRows; ++r) {
00489     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00490       X(r, j) = Y(r, j);
00491     }
00492 
00493     matrix_scalar_type A_rr = STS::zero ();
00494     const offset_type beg = ptr(r);
00495     const offset_type end = ptr(r+1);
00496 
00497     for (offset_type k = beg; k < end; ++k) {
00498       const matrix_scalar_type A_rc = val(k);
00499       const local_ordinal_type c = ind(k);
00500       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
00501       // has equal local row and column indices.  That may not
00502       // necessarily hold, depending on the row and column Maps.  The
00503       // way to fix this would be for Tpetra::CrsMatrix to remember
00504       // the local column index of the diagonal entry (if there is
00505       // one) in each row, and pass that along to this function.
00506       if (r == c) {
00507         A_rr += A_rc;
00508       } else {
00509         for (local_ordinal_type j = 0; j < numVecs; ++j) {
00510           X(r, j) -= A_rc * X(c, j);
00511         }
00512       }
00513     } // for each entry A_rc in the current row r
00514     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00515       X(r, j) /= A_rr;
00516     }
00517   } // for each row r
00518 }
00519 
00520 
00521 template<class CrsMatrixType,
00522          class DomainMultiVectorType,
00523          class RangeMultiVectorType>
00524 void
00525 upperTriSolveCsrUnitDiag (RangeMultiVectorType X,
00526                           const CrsMatrixType& A,
00527                           DomainMultiVectorType Y)
00528 {
00529   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00530   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00531   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00532 
00533   const local_ordinal_type numRows = A.numRows ();
00534   //const local_ordinal_type numCols = A.numCols ();
00535   const local_ordinal_type numVecs = X.dimension_1 ();
00536   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00537   typename CrsMatrixType::index_type ind = A.graph.entries;
00538   typename CrsMatrixType::values_type val = A.values;
00539 
00540   // If local_ordinal_type is unsigned and numRows is 0, the loop
00541   // below will have entirely the wrong number of iterations.
00542   if (numRows == 0) {
00543     return;
00544   }
00545 
00546   // Don't use r >= 0 as the test, because that fails if
00547   // local_ordinal_type is unsigned.  We do r == 0 (last
00548   // iteration) below.
00549   for (local_ordinal_type r = numRows - 1; r != 0; --r) {
00550     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00551       X(r, j) = Y(r, j);
00552     }
00553     const offset_type beg = ptr(r);
00554     const offset_type end = ptr(r+1);
00555     for (offset_type k = beg; k < end; ++k) {
00556       const matrix_scalar_type A_rc = val(k);
00557       const local_ordinal_type c = ind(k);
00558       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00559         X(r, j) -= A_rc * X(c, j);
00560       }
00561     } // for each entry A_rc in the current row r
00562   } // for each row r
00563 
00564   // Last iteration: r = 0.
00565   {
00566     const local_ordinal_type r = 0;
00567     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00568       X(r, j) = Y(r, j);
00569     }
00570     const offset_type beg = ptr(r);
00571     const offset_type end = ptr(r+1);
00572     for (offset_type k = beg; k < end; ++k) {
00573       const matrix_scalar_type A_rc = val(k);
00574       const local_ordinal_type c = ind(k);
00575       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00576         X(r, j) -= A_rc * X(c, j);
00577       }
00578     } // for each entry A_rc in the current row r
00579   } // last iteration: r = 0
00580 }
00581 
00582 
00583 template<class CrsMatrixType,
00584          class DomainMultiVectorType,
00585          class RangeMultiVectorType>
00586 void
00587 upperTriSolveCsr (RangeMultiVectorType X,
00588                   const CrsMatrixType& A,
00589                   DomainMultiVectorType Y)
00590 {
00591   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00592   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00593   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00594 
00595   const local_ordinal_type numRows = A.numRows ();
00596   //const local_ordinal_type numCols = A.numCols ();
00597   const local_ordinal_type numVecs = X.dimension_1 ();
00598   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00599   typename CrsMatrixType::index_type ind = A.graph.entries;
00600   typename CrsMatrixType::values_type val = A.values;
00601 
00602   // If local_ordinal_type is unsigned and numRows is 0, the loop
00603   // below will have entirely the wrong number of iterations.
00604   if (numRows == 0) {
00605     return;
00606   }
00607 
00608   // Don't use r >= 0 as the test, because that fails if
00609   // local_ordinal_type is unsigned.  We do r == 0 (last
00610   // iteration) below.
00611   for (local_ordinal_type r = numRows - 1; r != 0; --r) {
00612     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00613       X(r, j) = Y(r, j);
00614     }
00615     const offset_type beg = ptr(r);
00616     const offset_type end = ptr(r+1);
00617     // We assume the diagonal entry is first in the row.
00618     const matrix_scalar_type A_rr = val(beg);
00619     for (offset_type k = beg + static_cast<offset_type> (1); k < end; ++k) {
00620       const matrix_scalar_type A_rc = val(k);
00621       const local_ordinal_type c = ind(k);
00622       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00623         X(r, j) -= A_rc * X(c, j);
00624       }
00625     } // for each entry A_rc in the current row r
00626     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00627       X(r, j) /= A_rr;
00628     }
00629   } // for each row r
00630 
00631   // Last iteration: r = 0.
00632   {
00633     const local_ordinal_type r = 0;
00634     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00635       X(r, j) = Y(r, j);
00636     }
00637     const offset_type beg = ptr(r);
00638     const offset_type end = ptr(r+1);
00639     // We assume the diagonal entry is first in the row.
00640     const matrix_scalar_type A_rr = val(beg);
00641     for (size_t k = beg + 1; k < end; ++k) {
00642       const matrix_scalar_type A_rc = val(k);
00643       const local_ordinal_type c = ind(k);
00644       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00645         X(r, j) -= A_rc * X(c, j);
00646       }
00647     } // for each entry A_rc in the current row r
00648     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00649       X(r, j) /= A_rr;
00650     }
00651   } // last iteration: r = 0
00652 }
00653 
00654 
00655 template<class CrsMatrixType,
00656          class DomainMultiVectorType,
00657          class RangeMultiVectorType>
00658 void
00659 upperTriSolveCscUnitDiag (RangeMultiVectorType X,
00660                           const CrsMatrixType& A,
00661                           DomainMultiVectorType Y)
00662 {
00663   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00664   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00665   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00666 
00667   const local_ordinal_type numRows = A.numRows ();
00668   const local_ordinal_type numCols = A.numCols ();
00669   const local_ordinal_type numVecs = X.dimension_1 ();
00670   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00671   typename CrsMatrixType::index_type ind = A.graph.entries;
00672   typename CrsMatrixType::values_type val = A.values;
00673 
00674   for (local_ordinal_type j = 0; j < numVecs; ++j) {
00675     for (local_ordinal_type i = 0; i < numRows; ++i) {
00676       X(i, j) = Y(i, j);
00677     }
00678   }
00679 
00680   // If local_ordinal_type is unsigned and numCols is 0, the loop
00681   // below will have entirely the wrong number of iterations.
00682   if (numCols == 0) {
00683     return;
00684   }
00685 
00686   // Don't use c >= 0 as the test, because that fails if
00687   // local_ordinal_type is unsigned.  We do c == 0 (last
00688   // iteration) below.
00689   for (local_ordinal_type c = numCols - 1; c != 0; --c) {
00690     const offset_type beg = ptr(c);
00691     const offset_type end = ptr(c+1);
00692     for (offset_type k = beg; k < end; ++k) {
00693       const matrix_scalar_type A_rc = val(k);
00694       const local_ordinal_type r = ind(k);
00695       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00696         X(r, j) -= A_rc * X(c, j);
00697       }
00698     } // for each entry A_rc in the current column c
00699   } // for each column c
00700 
00701   // Last iteration: c = 0.
00702   {
00703     const local_ordinal_type c = 0;
00704     const offset_type beg = ptr(c);
00705     const offset_type end = ptr(c+1);
00706     for (offset_type k = beg; k < end; ++k) {
00707       const matrix_scalar_type A_rc = val(k);
00708       const local_ordinal_type r = ind(k);
00709       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00710         X(r, j) -= A_rc * X(c, j);
00711       }
00712     } // for each entry A_rc in the current column c
00713   }
00714 }
00715 
00716 
00717 template<class CrsMatrixType,
00718          class DomainMultiVectorType,
00719          class RangeMultiVectorType>
00720 void
00721 upperTriSolveCsc (RangeMultiVectorType X,
00722                   const CrsMatrixType& A,
00723                   DomainMultiVectorType Y)
00724 {
00725   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00726   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00727   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00728   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
00729 
00730   const local_ordinal_type numRows = A.numRows ();
00731   const local_ordinal_type numCols = A.numCols ();
00732   const local_ordinal_type numVecs = X.dimension_1 ();
00733   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00734   typename CrsMatrixType::index_type ind = A.graph.entries;
00735   typename CrsMatrixType::values_type val = A.values;
00736 
00737   for (local_ordinal_type j = 0; j < numVecs; ++j) {
00738     for (local_ordinal_type i = 0; i < numRows; ++i) {
00739       X(i, j) = Y(i, j);
00740     }
00741   }
00742 
00743   // If local_ordinal_type is unsigned and numCols is 0, the loop
00744   // below will have entirely the wrong number of iterations.
00745   if (numCols == 0) {
00746     return;
00747   }
00748 
00749   // Don't use c >= 0 as the test, because that fails if
00750   // local_ordinal_type is unsigned.  We do c == 0 (last
00751   // iteration) below.
00752   for (local_ordinal_type c = numCols - 1; c != 0; --c) {
00753     matrix_scalar_type A_cc = STS::zero ();
00754     const offset_type beg = ptr(c);
00755     const offset_type end = ptr(c+1);
00756     for (offset_type k = beg; k < end; ++k) {
00757       const local_ordinal_type r = ind(k);
00758       const matrix_scalar_type A_rc = val(k);
00759       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
00760       // has equal local row and column indices.  That may not
00761       // necessarily hold, depending on the row and column Maps.  See
00762       // note above.
00763       if (r == c) {
00764         A_cc += A_rc;
00765       } else {
00766         for (local_ordinal_type j = 0; j < numVecs; ++j) {
00767           X(r, j) -= A_rc * X(c, j);
00768         }
00769       }
00770     } // for each entry A_rc in the current column c
00771     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00772       X(c, j) /= A_cc;
00773     }
00774   } // for each column c
00775 
00776   // Last iteration: c = 0.
00777   {
00778     const local_ordinal_type c = 0;
00779     matrix_scalar_type A_cc = STS::zero ();
00780     const offset_type beg = ptr(c);
00781     const offset_type end = ptr(c+1);
00782     for (offset_type k = beg; k < end; ++k) {
00783       const local_ordinal_type r = ind(k);
00784       const matrix_scalar_type A_rc = val(k);
00785       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
00786       // has equal local row and column indices.  That may not
00787       // necessarily hold, depending on the row and column Maps.  See
00788       // note above.
00789       if (r == c) {
00790         A_cc += A_rc;
00791       } else {
00792         for (local_ordinal_type j = 0; j < numVecs; ++j) {
00793           X(r, j) -= A_rc * X(c, j);
00794         }
00795       }
00796     } // for each entry A_rc in the current column c
00797     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00798       X(c, j) /= A_cc;
00799     }
00800   }
00801 }
00802 
00803 
00804 template<class CrsMatrixType,
00805          class DomainMultiVectorType,
00806          class RangeMultiVectorType>
00807 void
00808 lowerTriSolveCscUnitDiag (RangeMultiVectorType X,
00809                           const CrsMatrixType& A,
00810                           DomainMultiVectorType Y)
00811 {
00812   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00813   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00814   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00815 
00816   const local_ordinal_type numRows = A.numRows ();
00817   const local_ordinal_type numCols = A.numCols ();
00818   const local_ordinal_type numVecs = X.dimension_1 ();
00819   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00820   typename CrsMatrixType::index_type ind = A.graph.entries;
00821   typename CrsMatrixType::values_type val = A.values;
00822 
00823   for (local_ordinal_type j = 0; j < numVecs; ++j) {
00824     for (local_ordinal_type i = 0; i < numRows; ++i) {
00825       X(i, j) = Y(i, j);
00826     }
00827   }
00828 
00829   for (local_ordinal_type c = 0; c < numCols; ++c) {
00830     const offset_type beg = ptr(c);
00831     const offset_type end = ptr(c+1);
00832     for (offset_type k = beg; k < end; ++k) {
00833       const local_ordinal_type r = ind(k);
00834       const matrix_scalar_type A_rc = val(k);
00835       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00836         X(r, j) -= A_rc * X(c, j);
00837       }
00838     } // for each entry A_rc in the current column c
00839   } // for each column c
00840 }
00841 
00842 
00843 template<class CrsMatrixType,
00844          class DomainMultiVectorType,
00845          class RangeMultiVectorType>
00846 void
00847 upperTriSolveCscUnitDiagConj (RangeMultiVectorType X,
00848                               const CrsMatrixType& A,
00849                               DomainMultiVectorType Y)
00850 {
00851   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00852   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00853   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00854   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
00855 
00856   const local_ordinal_type numRows = A.numRows ();
00857   const local_ordinal_type numCols = A.numCols ();
00858   const local_ordinal_type numVecs = X.dimension_1 ();
00859   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00860   typename CrsMatrixType::index_type ind = A.graph.entries;
00861   typename CrsMatrixType::values_type val = A.values;
00862 
00863   for (local_ordinal_type j = 0; j < numVecs; ++j) {
00864     for (local_ordinal_type i = 0; i < numRows; ++i) {
00865       X(i, j) = Y(i, j);
00866     }
00867   }
00868 
00869   // If local_ordinal_type is unsigned and numCols is 0, the loop
00870   // below will have entirely the wrong number of iterations.
00871   if (numCols == 0) {
00872     return;
00873   }
00874 
00875   // Don't use c >= 0 as the test, because that fails if
00876   // local_ordinal_type is unsigned.  We do c == 0 (last
00877   // iteration) below.
00878   for (local_ordinal_type c = numCols - 1; c != 0; --c) {
00879     const offset_type beg = ptr(c);
00880     const offset_type end = ptr(c+1);
00881     for (offset_type k = beg; k < end; ++k) {
00882       const local_ordinal_type r = ind(k);
00883       const matrix_scalar_type A_rc = STS::conj (val(k));
00884       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00885         X(r, j) -= A_rc * X(c, j);
00886       }
00887     } // for each entry A_rc in the current column c
00888   } // for each column c
00889 
00890   // Last iteration: c = 0.
00891   {
00892     const local_ordinal_type c = 0;
00893     const offset_type beg = ptr(c);
00894     const offset_type end = ptr(c+1);
00895     for (offset_type k = beg; k < end; ++k) {
00896       const local_ordinal_type r = ind(k);
00897       const matrix_scalar_type A_rc = STS::conj (val(k));
00898       for (local_ordinal_type j = 0; j < numVecs; ++j) {
00899         X(r, j) -= A_rc * X(c, j);
00900       }
00901     } // for each entry A_rc in the current column c
00902   }
00903 }
00904 
00905 
00906 template<class CrsMatrixType,
00907          class DomainMultiVectorType,
00908          class RangeMultiVectorType>
00909 void
00910 upperTriSolveCscConj (RangeMultiVectorType X,
00911                       const CrsMatrixType& A,
00912                       DomainMultiVectorType Y)
00913 {
00914   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
00915   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
00916   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
00917   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
00918 
00919   const local_ordinal_type numRows = A.numRows ();
00920   const local_ordinal_type numCols = A.numCols ();
00921   const local_ordinal_type numVecs = X.dimension_1 ();
00922   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
00923   typename CrsMatrixType::index_type ind = A.graph.entries;
00924   typename CrsMatrixType::values_type val = A.values;
00925 
00926   for (local_ordinal_type j = 0; j < numVecs; ++j) {
00927     for (local_ordinal_type i = 0; i < numRows; ++i) {
00928       X(i, j) = Y(i, j);
00929     }
00930   }
00931 
00932   // If local_ordinal_type is unsigned and numCols is 0, the loop
00933   // below will have entirely the wrong number of iterations.
00934   if (numCols == 0) {
00935     return;
00936   }
00937 
00938   // Don't use c >= 0 as the test, because that fails if
00939   // local_ordinal_type is unsigned.  We do c == 0 (last
00940   // iteration) below.
00941   for (local_ordinal_type c = numCols - 1; c != 0; --c) {
00942     matrix_scalar_type A_cc = STS::zero ();
00943     const offset_type beg = ptr(c);
00944     const offset_type end = ptr(c+1);
00945     for (offset_type k = beg; k < end; ++k) {
00946       const local_ordinal_type r = ind(k);
00947       const matrix_scalar_type A_rc = STS::conj (val(k));
00948       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
00949       // has equal local row and column indices.  That may not
00950       // necessarily hold, depending on the row and column Maps.  See
00951       // note above.
00952       if (r == c) {
00953         A_cc += A_rc;
00954       } else {
00955         for (local_ordinal_type j = 0; j < numVecs; ++j) {
00956           X(r, j) -= A_rc * X(c, j);
00957         }
00958       }
00959     } // for each entry A_rc in the current column c
00960     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00961       X(c, j) /= A_cc;
00962     }
00963   } // for each column c
00964 
00965   // Last iteration: c = 0.
00966   {
00967     const local_ordinal_type c = 0;
00968     matrix_scalar_type A_cc = STS::zero ();
00969     const offset_type beg = ptr(c);
00970     const offset_type end = ptr(c+1);
00971     for (offset_type k = beg; k < end; ++k) {
00972       const local_ordinal_type r = ind(k);
00973       const matrix_scalar_type A_rc = STS::conj (val(k));
00974       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
00975       // has equal local row and column indices.  That may not
00976       // necessarily hold, depending on the row and column Maps.  See
00977       // note above.
00978       if (r == c) {
00979         A_cc += A_rc;
00980       } else {
00981         for (local_ordinal_type j = 0; j < numVecs; ++j) {
00982           X(r, j) -= A_rc * X(c, j);
00983         }
00984       }
00985     } // for each entry A_rc in the current column c
00986     for (local_ordinal_type j = 0; j < numVecs; ++j) {
00987       X(c, j) /= A_cc;
00988     }
00989   }
00990 }
00991 
00992 
00993 template<class CrsMatrixType,
00994          class DomainMultiVectorType,
00995          class RangeMultiVectorType>
00996 void
00997 lowerTriSolveCsc (RangeMultiVectorType X,
00998                   const CrsMatrixType& A,
00999                   DomainMultiVectorType Y)
01000 {
01001   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
01002   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
01003   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
01004   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
01005 
01006   const local_ordinal_type numRows = A.numRows ();
01007   const local_ordinal_type numCols = A.numCols ();
01008   const local_ordinal_type numVecs = X.dimension_1 ();
01009   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
01010   typename CrsMatrixType::index_type ind = A.graph.entries;
01011   typename CrsMatrixType::values_type val = A.values;
01012 
01013   for (local_ordinal_type j = 0; j < numVecs; ++j) {
01014     for (local_ordinal_type i = 0; i < numRows; ++i) {
01015       X(i, j) = Y(i, j);
01016     }
01017   }
01018 
01019   for (local_ordinal_type c = 0; c < numCols; ++c) {
01020     matrix_scalar_type A_cc = STS::zero ();
01021     const offset_type beg = ptr(c);
01022     const offset_type end = ptr(c+1);
01023     for (offset_type k = beg; k < end; ++k) {
01024       const local_ordinal_type r = ind(k);
01025       const matrix_scalar_type A_rc = val(k);
01026       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
01027       // has equal local row and column indices.  That may not
01028       // necessarily hold, depending on the row and column Maps.  See
01029       // note above.
01030       if (r == c) {
01031         A_cc += A_rc;
01032       } else {
01033         for (local_ordinal_type j = 0; j < numVecs; ++j) {
01034           X(r, j) -= A_rc * X(c, j);
01035         }
01036       }
01037     } // for each entry A_rc in the current column c
01038     for (local_ordinal_type j = 0; j < numVecs; ++j) {
01039       X(c, j) /= A_cc;
01040     }
01041   } // for each column c
01042 }
01043 
01044 
01045 template<class CrsMatrixType,
01046          class DomainMultiVectorType,
01047          class RangeMultiVectorType>
01048 void
01049 lowerTriSolveCscUnitDiagConj (RangeMultiVectorType X,
01050                               const CrsMatrixType& A,
01051                               DomainMultiVectorType Y)
01052 {
01053   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
01054   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
01055   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
01056   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
01057 
01058   const local_ordinal_type numRows = A.numRows ();
01059   const local_ordinal_type numCols = A.numCols ();
01060   const local_ordinal_type numVecs = X.dimension_1 ();
01061   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
01062   typename CrsMatrixType::index_type ind = A.graph.entries;
01063   typename CrsMatrixType::values_type val = A.values;
01064 
01065   for (local_ordinal_type j = 0; j < numVecs; ++j) {
01066     for (local_ordinal_type i = 0; i < numRows; ++i) {
01067       X(i, j) = Y(i, j);
01068     }
01069   }
01070 
01071   for (local_ordinal_type c = 0; c < numCols; ++c) {
01072     const offset_type beg = ptr(c);
01073     const offset_type end = ptr(c+1);
01074     for (offset_type k = beg; k < end; ++k) {
01075       const local_ordinal_type r = ind(k);
01076       const matrix_scalar_type A_rc = STS::conj (val(k));
01077       for (local_ordinal_type j = 0; j < numVecs; ++j) {
01078         X(r, j) -= A_rc * X(c, j);
01079       }
01080     } // for each entry A_rc in the current column c
01081   } // for each column c
01082 }
01083 
01084 
01085 template<class CrsMatrixType,
01086          class DomainMultiVectorType,
01087          class RangeMultiVectorType>
01088 void
01089 lowerTriSolveCscConj (RangeMultiVectorType X,
01090                       const CrsMatrixType& A,
01091                       DomainMultiVectorType Y)
01092 {
01093   typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type;
01094   typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type;
01095   typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type;
01096   typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS;
01097 
01098   const local_ordinal_type numRows = A.numRows ();
01099   const local_ordinal_type numCols = A.numCols ();
01100   const local_ordinal_type numVecs = X.dimension_1 ();
01101   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
01102   typename CrsMatrixType::index_type ind = A.graph.entries;
01103   typename CrsMatrixType::values_type val = A.values;
01104 
01105   for (local_ordinal_type j = 0; j < numVecs; ++j) {
01106     for (local_ordinal_type i = 0; i < numRows; ++i) {
01107       X(i, j) = Y(i, j);
01108     }
01109   }
01110 
01111   for (local_ordinal_type c = 0; c < numCols; ++c) {
01112     matrix_scalar_type A_cc = STS::zero ();
01113     const offset_type beg = ptr(c);
01114     const offset_type end = ptr(c+1);
01115     for (offset_type k = beg; k < end; ++k) {
01116       const local_ordinal_type r = ind(k);
01117       const matrix_scalar_type A_rc = STS::conj (val(k));
01118       // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry
01119       // has equal local row and column indices.  That may not
01120       // necessarily hold, depending on the row and column Maps.  See
01121       // note above.
01122       if (r == c) {
01123         A_cc += A_rc;
01124       } else {
01125         for (local_ordinal_type j = 0; j < numVecs; ++j) {
01126           X(r, j) -= A_rc * X(c, j);
01127         }
01128       }
01129     } // for each entry A_rc in the current column c
01130     for (local_ordinal_type j = 0; j < numVecs; ++j) {
01131       X(c, j) /= A_cc;
01132     }
01133   } // for each column c
01134 }
01135 
01136 
01137 template<class CrsMatrixType,
01138          class DomainMultiVectorType,
01139          class RangeMultiVectorType>
01140 void
01141 triSolveKokkos (RangeMultiVectorType X,
01142                 const CrsMatrixType& A,
01143                 DomainMultiVectorType Y,
01144                 const Teuchos::EUplo triUplo,
01145                 const Teuchos::EDiag unitDiag,
01146                 const Teuchos::ETransp trans)
01147 {
01148   typedef typename CrsMatrixType::index_type::non_const_value_type LO;
01149   const char prefix[] = "Kokkos::Sequential::triSolveKokkos: ";
01150   const LO numRows = A.numRows ();
01151   const LO numCols = A.numCols ();
01152   const LO numVecs = X.dimension_1 ();
01153   typename CrsMatrixType::row_map_type ptr = A.graph.row_map;
01154 
01155   TEUCHOS_TEST_FOR_EXCEPTION(
01156     triUplo != Teuchos::LOWER_TRI && triUplo != Teuchos::UPPER_TRI &&
01157     triUplo != Teuchos::UNDEF_TRI,
01158     std::invalid_argument, prefix << "triUplo has an invalid value " << triUplo
01159     << ".  Valid values are Teuchos::LOWER_TRI=" << Teuchos::LOWER_TRI <<
01160     ", Teuchos::UPPER_TRI=" << Teuchos::UPPER_TRI << ", and Teuchos::UNDEF_TRI="
01161     << Teuchos::UNDEF_TRI << ".");
01162   TEUCHOS_TEST_FOR_EXCEPTION(
01163     triUplo == Teuchos::UNDEF_TRI, std::invalid_argument, prefix <<
01164     "The matrix is neither lower nor upper triangular (triUplo="
01165     "Teuchos::UNDEF_TRI), so you may not call this method.");
01166   TEUCHOS_TEST_FOR_EXCEPTION(
01167     unitDiag != Teuchos::UNIT_DIAG && unitDiag != Teuchos::NON_UNIT_DIAG,
01168     std::invalid_argument, prefix << "unitDiag has an invalid value "
01169     << unitDiag << ".  Valid values are Teuchos::UNIT_DIAG="
01170     << Teuchos::UNIT_DIAG << " and Teuchos::NON_UNIT_DIAG="
01171     << Teuchos::NON_UNIT_DIAG << ".");
01172   TEUCHOS_TEST_FOR_EXCEPTION(
01173     unitDiag != Teuchos::UNIT_DIAG && numRows > 0 && ptr(numRows) == 0,
01174     std::invalid_argument, prefix << "Triangular solve with an empty matrix "
01175     "is only valid if the matrix has an implicit unit diagonal.  This matrix "
01176     "does not.");
01177   TEUCHOS_TEST_FOR_EXCEPTION(
01178     trans != Teuchos::NO_TRANS && trans != Teuchos::TRANS &&
01179     trans != Teuchos::CONJ_TRANS,
01180     std::invalid_argument, prefix << "trans has an invalid value " << trans
01181     << ".  Valid values are Teuchos::NO_TRANS=" << Teuchos::NO_TRANS << ", "
01182     << "Teuchos::TRANS=" << Teuchos::TRANS << ", and Teuchos::CONJ_TRANS="
01183     << Teuchos::CONJ_TRANS << ".");
01184 
01185   TEUCHOS_TEST_FOR_EXCEPTION(
01186     numRows != static_cast<LO> (X.dimension_0 ()), std::invalid_argument,
01187     prefix << "numRows = " << numRows << " != X.dimension_0() = " <<
01188     X.dimension_0 () << ".");
01189   TEUCHOS_TEST_FOR_EXCEPTION(
01190     numCols != static_cast<LO> (Y.dimension_0 ()), std::invalid_argument,
01191     prefix << "numCols = " << numCols << " != Y.dimension_0() = " <<
01192     Y.dimension_0 () << ".");
01193   TEUCHOS_TEST_FOR_EXCEPTION(
01194     numVecs != static_cast<LO> (Y.dimension_1 ()), std::invalid_argument,
01195     prefix << "X.dimension_1 () = " << numVecs << " != Y.dimension_1 () = "
01196     << Y.dimension_1 () << ".");
01197 
01198   if (trans == Teuchos::NO_TRANS) {          // no transpose
01199     if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
01200       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01201         lowerTriSolveCsrUnitDiag (X, A, Y);
01202       } else {                          // non unit diagonal
01203         lowerTriSolveCsr (X, A, Y);
01204       }
01205     } else {                             // upper triangular
01206       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01207         upperTriSolveCsrUnitDiag (X, A, Y);
01208       } else {                          // non unit diagonal
01209         upperTriSolveCsr (X, A, Y);
01210       }
01211     }
01212   }
01213   else if (trans == Teuchos::TRANS) {           // transpose
01214     if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
01215       // Transposed lower tri CSR => upper tri CSC.
01216       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01217         upperTriSolveCscUnitDiag (X, A, Y);
01218       } else {                          // non unit diagonal
01219         upperTriSolveCsc (X, A, Y);
01220       }
01221     }
01222     else {                               // upper triangular
01223       // Transposed upper tri CSR => lower tri CSC.
01224       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01225         lowerTriSolveCscUnitDiag (X, A, Y);
01226       } else {                          // non unit diagonal
01227         lowerTriSolveCsc (X, A, Y);
01228       }
01229     }
01230   }
01231   else if (trans == Teuchos::CONJ_TRANS) { // conj transpose
01232     if (triUplo == Teuchos::LOWER_TRI) { // lower triangular
01233       // Transposed lower tri CSR => upper tri CSC.
01234       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01235         upperTriSolveCscUnitDiagConj (X, A, Y);
01236       } else {                          // non unit diagonal
01237         upperTriSolveCscConj (X, A, Y);
01238       }
01239     }
01240     else {                               // upper triangular
01241       // Transposed upper tri CSR => lower tri CSC.
01242       if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal
01243         lowerTriSolveCscUnitDiagConj (X, A, Y);
01244       } else {                          // non unit diagonal
01245         lowerTriSolveCscConj (X, A, Y);
01246       }
01247     }
01248   }
01249 }
01250 
01251 
01252 } // namespace Sequential
01253 } // namespace Kokkos
01254 
01255 #endif // KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends