|
Kokkos Core Kernels Package
Version of the Day
|
00001 /* 00002 //@HEADER 00003 // ************************************************************************ 00004 // 00005 // Kokkos: Node API and Parallel Node Kernels 00006 // Copyright (2008) Sandia Corporation 00007 // 00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00009 // the U.S. Government retains certain rights in this software. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00039 // 00040 // ************************************************************************ 00041 //@HEADER 00042 */ 00043 00044 #ifndef KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP 00045 #define KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP 00046 00064 00065 #include <KokkosLinAlg_config.h> 00066 #include <Kokkos_ArithTraits.hpp> 00067 00068 namespace Kokkos { 00069 namespace Sequential { 00070 00097 template<class LocalOrdinal, 00098 class OffsetType, 00099 class MatrixScalar, 00100 class DomainScalar, 00101 class RangeScalar> 00102 void 00103 gaussSeidel (const LocalOrdinal numRows, 00104 const LocalOrdinal numCols, 00105 const OffsetType* const ptr, 00106 const LocalOrdinal* const ind, 00107 const MatrixScalar* const val, 00108 const DomainScalar* const B, 00109 const OffsetType b_stride, 00110 RangeScalar* const X, 00111 const OffsetType x_stride, 00112 const MatrixScalar* const D, 00113 const MatrixScalar omega, 00114 const Kokkos::ESweepDirection direction) 00115 { 00116 using Kokkos::Details::ArithTraits; 00117 typedef LocalOrdinal LO; 00118 const OffsetType theNumRows = static_cast<OffsetType> (numRows); 00119 const OffsetType theNumCols = static_cast<OffsetType> (numCols); 00120 00121 if (numRows == 0 || numCols == 0) { 00122 return; // Nothing to do. 00123 } 00124 else if (numRows > 0 && ptr[numRows] == 0) { 00125 // All the off-diagonal entries of A are zero, and all the 00126 // diagonal entries are (implicitly) 1. Therefore compute: X := 00127 // (1 - omega) X + omega B. There's no need to care about the 00128 // direction, since there are no cross-row data dependencies in 00129 // this case. 00130 const MatrixScalar oneMinusOmega = 00131 ArithTraits<MatrixScalar>::one () - omega; 00132 for (OffsetType j = 0; j < theNumCols; ++j) { 00133 RangeScalar* const x_j = X + j*x_stride; 00134 const DomainScalar* const b_j = B + j*b_stride; 00135 for (OffsetType i = 0; i < theNumRows; ++i) { 00136 x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i]; 00137 } 00138 } 00139 return; 00140 } 00141 00142 if (numCols == 1) { 00143 if (direction == Kokkos::Forward) { 00144 for (LO i = 0; i < numRows; ++i) { 00145 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00146 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00147 const LO j = ind[k]; 00148 const MatrixScalar A_ij = val[k]; 00149 x_temp += A_ij * X[j]; 00150 } 00151 X[i] += omega * D[i] * (B[i] - x_temp); 00152 } 00153 } else if (direction == Kokkos::Backward) { 00154 // Split the loop so that it is correct even if LO is unsigned. 00155 // It's a bad idea for LO to be unsigned, but we want this to 00156 // work nevertheless. 00157 for (LO i = numRows - 1; i != 0; --i) { 00158 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00159 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00160 const LO j = ind[k]; 00161 const MatrixScalar A_ij = val[k]; 00162 x_temp += A_ij * X[j]; 00163 } 00164 X[i] += omega * D[i] * (B[i] - x_temp); 00165 } 00166 { // last loop iteration 00167 const LO i = 0; 00168 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00169 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00170 const LO j = ind[k]; 00171 const MatrixScalar A_ij = val[k]; 00172 x_temp += A_ij * X[j]; 00173 } 00174 X[i] += omega * D[i] * (B[i] - x_temp); 00175 } 00176 } 00177 } 00178 else { // numCols > 1 00179 // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with 00180 // multiple columns becomes important, we can add unrolled 00181 // implementations. The implementation below is not unrolled. 00182 // It may also be reasonable to parallelize over right-hand 00183 // sides, if there are enough of them, especially if the matrix 00184 // fits in cache. 00185 Teuchos::Array<RangeScalar> temp (numCols); 00186 RangeScalar* const x_temp = temp.getRawPtr (); 00187 00188 if (direction == Kokkos::Forward) { 00189 for (LO i = 0; i < numRows; ++i) { 00190 for (OffsetType c = 0; c < theNumCols; ++c) { 00191 x_temp[c] = ArithTraits<RangeScalar>::zero (); 00192 } 00193 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00194 const LO j = ind[k]; 00195 const MatrixScalar A_ij = val[k]; 00196 for (OffsetType c = 0; c < theNumCols; ++c) { 00197 x_temp[c] += A_ij * X[j + x_stride*c]; 00198 } 00199 } 00200 for (OffsetType c = 0; c < theNumCols; ++c) { 00201 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00202 } 00203 } 00204 } else if (direction == Kokkos::Backward) { // backward mode 00205 // Split the loop so that it is correct even if LO is unsigned. 00206 // It's a bad idea for LO to be unsigned, but we want this to 00207 // work nevertheless. 00208 for (LO i = numRows - 1; i != 0; --i) { 00209 for (OffsetType c = 0; c < theNumCols; ++c) { 00210 x_temp[c] = ArithTraits<RangeScalar>::zero (); 00211 } 00212 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00213 const LO j = ind[k]; 00214 const MatrixScalar A_ij = val[k]; 00215 for (OffsetType c = 0; c < theNumCols; ++c) { 00216 x_temp[c] += A_ij * X[j + x_stride*c]; 00217 } 00218 } 00219 for (OffsetType c = 0; c < theNumCols; ++c) { 00220 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00221 } 00222 } 00223 { // last loop iteration 00224 const LO i = 0; 00225 for (OffsetType c = 0; c < theNumCols; ++c) { 00226 x_temp[c] = ArithTraits<RangeScalar>::zero (); 00227 } 00228 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00229 const LO j = ind[k]; 00230 const MatrixScalar A_ij = val[k]; 00231 for (OffsetType c = 0; c < theNumCols; ++c) { 00232 x_temp[c] += A_ij * X[j + x_stride*c]; 00233 } 00234 } 00235 for (OffsetType c = 0; c < theNumCols; ++c) { 00236 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00237 } 00238 } 00239 } 00240 } 00241 } 00242 00243 00277 template<class LocalOrdinal, 00278 class OffsetType, 00279 class MatrixScalar, 00280 class DomainScalar, 00281 class RangeScalar> 00282 void 00283 reorderedGaussSeidel (const LocalOrdinal numRows, 00284 const LocalOrdinal numCols, 00285 const OffsetType* const ptr, 00286 const LocalOrdinal* const ind, 00287 const MatrixScalar* const val, 00288 const DomainScalar* const B, 00289 const OffsetType b_stride, 00290 RangeScalar* const X, 00291 const OffsetType x_stride, 00292 const MatrixScalar* const D, 00293 const LocalOrdinal* const rowInd, 00294 const LocalOrdinal numRowInds, // length of rowInd 00295 const MatrixScalar omega, 00296 const Kokkos::ESweepDirection direction) 00297 { 00298 using Kokkos::Details::ArithTraits; 00299 typedef LocalOrdinal LO; 00300 const OffsetType theNumRows = static_cast<OffsetType> (numRows); 00301 const OffsetType theNumCols = static_cast<OffsetType> (numCols); 00302 00303 if (numRows == 0 || numCols == 0) { 00304 return; // Nothing to do. 00305 } 00306 else if (numRows > 0 && ptr[numRows] == 0) { 00307 // All the off-diagonal entries of A are zero, and all the 00308 // diagonal entries are (implicitly) 1. Therefore compute: X := 00309 // (1 - omega) X + omega B. There's no need to care about the 00310 // direction or row ordering, since there are no cross-row data 00311 // dependencies in this case. 00312 const MatrixScalar oneMinusOmega = 00313 ArithTraits<MatrixScalar>::one () - omega; 00314 for (OffsetType j = 0; j < theNumCols; ++j) { 00315 RangeScalar* const x_j = X + j*x_stride; 00316 const DomainScalar* const b_j = B + j*b_stride; 00317 for (OffsetType i = 0; i < theNumRows; ++i) { 00318 x_j[i] = oneMinusOmega * x_j[i] + omega * b_j[i]; 00319 } 00320 } 00321 return; 00322 } 00323 00324 if (numCols == 1) { 00325 if (direction == Kokkos::Forward) { 00326 for (LO ii = 0; ii < numRowInds; ++ii) { 00327 LO i = rowInd[ii]; 00328 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00329 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00330 const LO j = ind[k]; 00331 const MatrixScalar A_ij = val[k]; 00332 x_temp += A_ij * X[j]; 00333 } 00334 X[i] += omega * D[i] * (B[i] - x_temp); 00335 } 00336 } else if (direction == Kokkos::Backward) { 00337 // Split the loop so that it is correct even if LO is unsigned. 00338 // It's a bad idea for LO to be unsigned, but we want this to 00339 // work nevertheless. 00340 for (LO ii = numRowInds - 1; ii != 0; --ii) { 00341 LO i = rowInd[ii]; 00342 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00343 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00344 const LO j = ind[k]; 00345 const MatrixScalar A_ij = val[k]; 00346 x_temp += A_ij * X[j]; 00347 } 00348 X[i] += omega * D[i] * (B[i] - x_temp); 00349 } 00350 { // last loop iteration 00351 const LO ii = 0; 00352 LO i = rowInd[ii]; 00353 RangeScalar x_temp = ArithTraits<RangeScalar>::zero (); 00354 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00355 const LO j = ind[k]; 00356 const MatrixScalar A_ij = val[k]; 00357 x_temp += A_ij * X[j]; 00358 } 00359 X[i] += omega * D[i] * (B[i] - x_temp); 00360 } 00361 } 00362 } 00363 else { // numCols > 1 00364 // mfh 20 Dec 2012: If Gauss-Seidel for multivectors with 00365 // multiple columns becomes important, we can add unrolled 00366 // implementations. The implementation below is not unrolled. 00367 // It may also be reasonable to parallelize over right-hand 00368 // sides, if there are enough of them, especially if the matrix 00369 // fits in cache. 00370 Teuchos::Array<RangeScalar> temp (numCols); 00371 RangeScalar* const x_temp = temp.getRawPtr (); 00372 00373 if (direction == Kokkos::Forward) { 00374 for (LO ii = 0; ii < numRowInds; ++ii) { 00375 LO i = rowInd[ii]; 00376 for (OffsetType c = 0; c < theNumCols; ++c) { 00377 x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero (); 00378 } 00379 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00380 const LO j = ind[k]; 00381 const MatrixScalar A_ij = val[k]; 00382 for (OffsetType c = 0; c < theNumCols; ++c) { 00383 x_temp[c] += A_ij * X[j + x_stride*c]; 00384 } 00385 } 00386 for (OffsetType c = 0; c < theNumCols; ++c) { 00387 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00388 } 00389 } 00390 } else if (direction == Kokkos::Backward) { // backward mode 00391 // Split the loop so that it is correct even if LO is unsigned. 00392 // It's a bad idea for LO to be unsigned, but we want this to 00393 // work nevertheless. 00394 for (LO ii = numRowInds - 1; ii != 0; --ii) { 00395 LO i = rowInd[ii]; 00396 for (OffsetType c = 0; c < theNumCols; ++c) { 00397 x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero (); 00398 } 00399 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00400 const LO j = ind[k]; 00401 const MatrixScalar A_ij = val[k]; 00402 for (OffsetType c = 0; c < theNumCols; ++c) { 00403 x_temp[c] += A_ij * X[j + x_stride*c]; 00404 } 00405 } 00406 for (OffsetType c = 0; c < theNumCols; ++c) { 00407 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00408 } 00409 } 00410 { // last loop iteration 00411 const LO ii = 0; 00412 LO i = rowInd[ii]; 00413 for (OffsetType c = 0; c < theNumCols; ++c) { 00414 x_temp[c] = Kokkos::Details::ArithTraits<RangeScalar>::zero (); 00415 } 00416 for (OffsetType k = ptr[i]; k < ptr[i+1]; ++k) { 00417 const LO j = ind[k]; 00418 const MatrixScalar A_ij = val[k]; 00419 for (OffsetType c = 0; c < theNumCols; ++c) { 00420 x_temp[c] += A_ij * X[j + x_stride*c]; 00421 } 00422 } 00423 for (OffsetType c = 0; c < theNumCols; ++c) { 00424 X[i + x_stride*c] += omega * D[i] * (B[i + b_stride*c] - x_temp[c]); 00425 } 00426 } 00427 } 00428 } 00429 } 00430 00431 00432 template<class CrsMatrixType, 00433 class DomainMultiVectorType, 00434 class RangeMultiVectorType> 00435 void 00436 lowerTriSolveCsrUnitDiag (RangeMultiVectorType X, 00437 const CrsMatrixType& A, 00438 DomainMultiVectorType Y) 00439 { 00440 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00441 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00442 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00443 00444 const local_ordinal_type numRows = A.numRows (); 00445 //const local_ordinal_type numCols = A.numCols (); 00446 const local_ordinal_type numVecs = X.dimension_1 (); 00447 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00448 typename CrsMatrixType::index_type ind = A.graph.entries; 00449 typename CrsMatrixType::values_type val = A.values; 00450 00451 for (local_ordinal_type r = 0; r < numRows; ++r) { 00452 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00453 X(r, j) = Y(r, j); 00454 } 00455 const offset_type beg = ptr(r); 00456 const offset_type end = ptr(r+1); 00457 for (offset_type k = beg; k < end; ++k) { 00458 const matrix_scalar_type A_rc = val(k); 00459 const local_ordinal_type c = ind(k); 00460 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00461 X(r, j) -= A_rc * X(c, j); 00462 } 00463 } // for each entry A_rc in the current row r 00464 } // for each row r 00465 } 00466 00467 00468 template<class CrsMatrixType, 00469 class DomainMultiVectorType, 00470 class RangeMultiVectorType> 00471 void 00472 lowerTriSolveCsr (RangeMultiVectorType X, 00473 const CrsMatrixType& A, 00474 DomainMultiVectorType Y) 00475 { 00476 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00477 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00478 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00479 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 00480 00481 const local_ordinal_type numRows = A.numRows (); 00482 //const local_ordinal_type numCols = A.numCols (); 00483 const local_ordinal_type numVecs = X.dimension_1 (); 00484 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00485 typename CrsMatrixType::index_type ind = A.graph.entries; 00486 typename CrsMatrixType::values_type val = A.values; 00487 00488 for (local_ordinal_type r = 0; r < numRows; ++r) { 00489 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00490 X(r, j) = Y(r, j); 00491 } 00492 00493 matrix_scalar_type A_rr = STS::zero (); 00494 const offset_type beg = ptr(r); 00495 const offset_type end = ptr(r+1); 00496 00497 for (offset_type k = beg; k < end; ++k) { 00498 const matrix_scalar_type A_rc = val(k); 00499 const local_ordinal_type c = ind(k); 00500 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 00501 // has equal local row and column indices. That may not 00502 // necessarily hold, depending on the row and column Maps. The 00503 // way to fix this would be for Tpetra::CrsMatrix to remember 00504 // the local column index of the diagonal entry (if there is 00505 // one) in each row, and pass that along to this function. 00506 if (r == c) { 00507 A_rr += A_rc; 00508 } else { 00509 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00510 X(r, j) -= A_rc * X(c, j); 00511 } 00512 } 00513 } // for each entry A_rc in the current row r 00514 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00515 X(r, j) /= A_rr; 00516 } 00517 } // for each row r 00518 } 00519 00520 00521 template<class CrsMatrixType, 00522 class DomainMultiVectorType, 00523 class RangeMultiVectorType> 00524 void 00525 upperTriSolveCsrUnitDiag (RangeMultiVectorType X, 00526 const CrsMatrixType& A, 00527 DomainMultiVectorType Y) 00528 { 00529 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00530 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00531 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00532 00533 const local_ordinal_type numRows = A.numRows (); 00534 //const local_ordinal_type numCols = A.numCols (); 00535 const local_ordinal_type numVecs = X.dimension_1 (); 00536 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00537 typename CrsMatrixType::index_type ind = A.graph.entries; 00538 typename CrsMatrixType::values_type val = A.values; 00539 00540 // If local_ordinal_type is unsigned and numRows is 0, the loop 00541 // below will have entirely the wrong number of iterations. 00542 if (numRows == 0) { 00543 return; 00544 } 00545 00546 // Don't use r >= 0 as the test, because that fails if 00547 // local_ordinal_type is unsigned. We do r == 0 (last 00548 // iteration) below. 00549 for (local_ordinal_type r = numRows - 1; r != 0; --r) { 00550 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00551 X(r, j) = Y(r, j); 00552 } 00553 const offset_type beg = ptr(r); 00554 const offset_type end = ptr(r+1); 00555 for (offset_type k = beg; k < end; ++k) { 00556 const matrix_scalar_type A_rc = val(k); 00557 const local_ordinal_type c = ind(k); 00558 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00559 X(r, j) -= A_rc * X(c, j); 00560 } 00561 } // for each entry A_rc in the current row r 00562 } // for each row r 00563 00564 // Last iteration: r = 0. 00565 { 00566 const local_ordinal_type r = 0; 00567 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00568 X(r, j) = Y(r, j); 00569 } 00570 const offset_type beg = ptr(r); 00571 const offset_type end = ptr(r+1); 00572 for (offset_type k = beg; k < end; ++k) { 00573 const matrix_scalar_type A_rc = val(k); 00574 const local_ordinal_type c = ind(k); 00575 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00576 X(r, j) -= A_rc * X(c, j); 00577 } 00578 } // for each entry A_rc in the current row r 00579 } // last iteration: r = 0 00580 } 00581 00582 00583 template<class CrsMatrixType, 00584 class DomainMultiVectorType, 00585 class RangeMultiVectorType> 00586 void 00587 upperTriSolveCsr (RangeMultiVectorType X, 00588 const CrsMatrixType& A, 00589 DomainMultiVectorType Y) 00590 { 00591 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00592 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00593 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00594 00595 const local_ordinal_type numRows = A.numRows (); 00596 //const local_ordinal_type numCols = A.numCols (); 00597 const local_ordinal_type numVecs = X.dimension_1 (); 00598 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00599 typename CrsMatrixType::index_type ind = A.graph.entries; 00600 typename CrsMatrixType::values_type val = A.values; 00601 00602 // If local_ordinal_type is unsigned and numRows is 0, the loop 00603 // below will have entirely the wrong number of iterations. 00604 if (numRows == 0) { 00605 return; 00606 } 00607 00608 // Don't use r >= 0 as the test, because that fails if 00609 // local_ordinal_type is unsigned. We do r == 0 (last 00610 // iteration) below. 00611 for (local_ordinal_type r = numRows - 1; r != 0; --r) { 00612 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00613 X(r, j) = Y(r, j); 00614 } 00615 const offset_type beg = ptr(r); 00616 const offset_type end = ptr(r+1); 00617 // We assume the diagonal entry is first in the row. 00618 const matrix_scalar_type A_rr = val(beg); 00619 for (offset_type k = beg + static_cast<offset_type> (1); k < end; ++k) { 00620 const matrix_scalar_type A_rc = val(k); 00621 const local_ordinal_type c = ind(k); 00622 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00623 X(r, j) -= A_rc * X(c, j); 00624 } 00625 } // for each entry A_rc in the current row r 00626 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00627 X(r, j) /= A_rr; 00628 } 00629 } // for each row r 00630 00631 // Last iteration: r = 0. 00632 { 00633 const local_ordinal_type r = 0; 00634 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00635 X(r, j) = Y(r, j); 00636 } 00637 const offset_type beg = ptr(r); 00638 const offset_type end = ptr(r+1); 00639 // We assume the diagonal entry is first in the row. 00640 const matrix_scalar_type A_rr = val(beg); 00641 for (size_t k = beg + 1; k < end; ++k) { 00642 const matrix_scalar_type A_rc = val(k); 00643 const local_ordinal_type c = ind(k); 00644 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00645 X(r, j) -= A_rc * X(c, j); 00646 } 00647 } // for each entry A_rc in the current row r 00648 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00649 X(r, j) /= A_rr; 00650 } 00651 } // last iteration: r = 0 00652 } 00653 00654 00655 template<class CrsMatrixType, 00656 class DomainMultiVectorType, 00657 class RangeMultiVectorType> 00658 void 00659 upperTriSolveCscUnitDiag (RangeMultiVectorType X, 00660 const CrsMatrixType& A, 00661 DomainMultiVectorType Y) 00662 { 00663 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00664 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00665 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00666 00667 const local_ordinal_type numRows = A.numRows (); 00668 const local_ordinal_type numCols = A.numCols (); 00669 const local_ordinal_type numVecs = X.dimension_1 (); 00670 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00671 typename CrsMatrixType::index_type ind = A.graph.entries; 00672 typename CrsMatrixType::values_type val = A.values; 00673 00674 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00675 for (local_ordinal_type i = 0; i < numRows; ++i) { 00676 X(i, j) = Y(i, j); 00677 } 00678 } 00679 00680 // If local_ordinal_type is unsigned and numCols is 0, the loop 00681 // below will have entirely the wrong number of iterations. 00682 if (numCols == 0) { 00683 return; 00684 } 00685 00686 // Don't use c >= 0 as the test, because that fails if 00687 // local_ordinal_type is unsigned. We do c == 0 (last 00688 // iteration) below. 00689 for (local_ordinal_type c = numCols - 1; c != 0; --c) { 00690 const offset_type beg = ptr(c); 00691 const offset_type end = ptr(c+1); 00692 for (offset_type k = beg; k < end; ++k) { 00693 const matrix_scalar_type A_rc = val(k); 00694 const local_ordinal_type r = ind(k); 00695 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00696 X(r, j) -= A_rc * X(c, j); 00697 } 00698 } // for each entry A_rc in the current column c 00699 } // for each column c 00700 00701 // Last iteration: c = 0. 00702 { 00703 const local_ordinal_type c = 0; 00704 const offset_type beg = ptr(c); 00705 const offset_type end = ptr(c+1); 00706 for (offset_type k = beg; k < end; ++k) { 00707 const matrix_scalar_type A_rc = val(k); 00708 const local_ordinal_type r = ind(k); 00709 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00710 X(r, j) -= A_rc * X(c, j); 00711 } 00712 } // for each entry A_rc in the current column c 00713 } 00714 } 00715 00716 00717 template<class CrsMatrixType, 00718 class DomainMultiVectorType, 00719 class RangeMultiVectorType> 00720 void 00721 upperTriSolveCsc (RangeMultiVectorType X, 00722 const CrsMatrixType& A, 00723 DomainMultiVectorType Y) 00724 { 00725 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00726 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00727 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00728 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 00729 00730 const local_ordinal_type numRows = A.numRows (); 00731 const local_ordinal_type numCols = A.numCols (); 00732 const local_ordinal_type numVecs = X.dimension_1 (); 00733 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00734 typename CrsMatrixType::index_type ind = A.graph.entries; 00735 typename CrsMatrixType::values_type val = A.values; 00736 00737 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00738 for (local_ordinal_type i = 0; i < numRows; ++i) { 00739 X(i, j) = Y(i, j); 00740 } 00741 } 00742 00743 // If local_ordinal_type is unsigned and numCols is 0, the loop 00744 // below will have entirely the wrong number of iterations. 00745 if (numCols == 0) { 00746 return; 00747 } 00748 00749 // Don't use c >= 0 as the test, because that fails if 00750 // local_ordinal_type is unsigned. We do c == 0 (last 00751 // iteration) below. 00752 for (local_ordinal_type c = numCols - 1; c != 0; --c) { 00753 matrix_scalar_type A_cc = STS::zero (); 00754 const offset_type beg = ptr(c); 00755 const offset_type end = ptr(c+1); 00756 for (offset_type k = beg; k < end; ++k) { 00757 const local_ordinal_type r = ind(k); 00758 const matrix_scalar_type A_rc = val(k); 00759 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 00760 // has equal local row and column indices. That may not 00761 // necessarily hold, depending on the row and column Maps. See 00762 // note above. 00763 if (r == c) { 00764 A_cc += A_rc; 00765 } else { 00766 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00767 X(r, j) -= A_rc * X(c, j); 00768 } 00769 } 00770 } // for each entry A_rc in the current column c 00771 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00772 X(c, j) /= A_cc; 00773 } 00774 } // for each column c 00775 00776 // Last iteration: c = 0. 00777 { 00778 const local_ordinal_type c = 0; 00779 matrix_scalar_type A_cc = STS::zero (); 00780 const offset_type beg = ptr(c); 00781 const offset_type end = ptr(c+1); 00782 for (offset_type k = beg; k < end; ++k) { 00783 const local_ordinal_type r = ind(k); 00784 const matrix_scalar_type A_rc = val(k); 00785 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 00786 // has equal local row and column indices. That may not 00787 // necessarily hold, depending on the row and column Maps. See 00788 // note above. 00789 if (r == c) { 00790 A_cc += A_rc; 00791 } else { 00792 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00793 X(r, j) -= A_rc * X(c, j); 00794 } 00795 } 00796 } // for each entry A_rc in the current column c 00797 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00798 X(c, j) /= A_cc; 00799 } 00800 } 00801 } 00802 00803 00804 template<class CrsMatrixType, 00805 class DomainMultiVectorType, 00806 class RangeMultiVectorType> 00807 void 00808 lowerTriSolveCscUnitDiag (RangeMultiVectorType X, 00809 const CrsMatrixType& A, 00810 DomainMultiVectorType Y) 00811 { 00812 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00813 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00814 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00815 00816 const local_ordinal_type numRows = A.numRows (); 00817 const local_ordinal_type numCols = A.numCols (); 00818 const local_ordinal_type numVecs = X.dimension_1 (); 00819 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00820 typename CrsMatrixType::index_type ind = A.graph.entries; 00821 typename CrsMatrixType::values_type val = A.values; 00822 00823 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00824 for (local_ordinal_type i = 0; i < numRows; ++i) { 00825 X(i, j) = Y(i, j); 00826 } 00827 } 00828 00829 for (local_ordinal_type c = 0; c < numCols; ++c) { 00830 const offset_type beg = ptr(c); 00831 const offset_type end = ptr(c+1); 00832 for (offset_type k = beg; k < end; ++k) { 00833 const local_ordinal_type r = ind(k); 00834 const matrix_scalar_type A_rc = val(k); 00835 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00836 X(r, j) -= A_rc * X(c, j); 00837 } 00838 } // for each entry A_rc in the current column c 00839 } // for each column c 00840 } 00841 00842 00843 template<class CrsMatrixType, 00844 class DomainMultiVectorType, 00845 class RangeMultiVectorType> 00846 void 00847 upperTriSolveCscUnitDiagConj (RangeMultiVectorType X, 00848 const CrsMatrixType& A, 00849 DomainMultiVectorType Y) 00850 { 00851 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00852 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00853 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00854 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 00855 00856 const local_ordinal_type numRows = A.numRows (); 00857 const local_ordinal_type numCols = A.numCols (); 00858 const local_ordinal_type numVecs = X.dimension_1 (); 00859 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00860 typename CrsMatrixType::index_type ind = A.graph.entries; 00861 typename CrsMatrixType::values_type val = A.values; 00862 00863 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00864 for (local_ordinal_type i = 0; i < numRows; ++i) { 00865 X(i, j) = Y(i, j); 00866 } 00867 } 00868 00869 // If local_ordinal_type is unsigned and numCols is 0, the loop 00870 // below will have entirely the wrong number of iterations. 00871 if (numCols == 0) { 00872 return; 00873 } 00874 00875 // Don't use c >= 0 as the test, because that fails if 00876 // local_ordinal_type is unsigned. We do c == 0 (last 00877 // iteration) below. 00878 for (local_ordinal_type c = numCols - 1; c != 0; --c) { 00879 const offset_type beg = ptr(c); 00880 const offset_type end = ptr(c+1); 00881 for (offset_type k = beg; k < end; ++k) { 00882 const local_ordinal_type r = ind(k); 00883 const matrix_scalar_type A_rc = STS::conj (val(k)); 00884 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00885 X(r, j) -= A_rc * X(c, j); 00886 } 00887 } // for each entry A_rc in the current column c 00888 } // for each column c 00889 00890 // Last iteration: c = 0. 00891 { 00892 const local_ordinal_type c = 0; 00893 const offset_type beg = ptr(c); 00894 const offset_type end = ptr(c+1); 00895 for (offset_type k = beg; k < end; ++k) { 00896 const local_ordinal_type r = ind(k); 00897 const matrix_scalar_type A_rc = STS::conj (val(k)); 00898 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00899 X(r, j) -= A_rc * X(c, j); 00900 } 00901 } // for each entry A_rc in the current column c 00902 } 00903 } 00904 00905 00906 template<class CrsMatrixType, 00907 class DomainMultiVectorType, 00908 class RangeMultiVectorType> 00909 void 00910 upperTriSolveCscConj (RangeMultiVectorType X, 00911 const CrsMatrixType& A, 00912 DomainMultiVectorType Y) 00913 { 00914 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 00915 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 00916 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 00917 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 00918 00919 const local_ordinal_type numRows = A.numRows (); 00920 const local_ordinal_type numCols = A.numCols (); 00921 const local_ordinal_type numVecs = X.dimension_1 (); 00922 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 00923 typename CrsMatrixType::index_type ind = A.graph.entries; 00924 typename CrsMatrixType::values_type val = A.values; 00925 00926 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00927 for (local_ordinal_type i = 0; i < numRows; ++i) { 00928 X(i, j) = Y(i, j); 00929 } 00930 } 00931 00932 // If local_ordinal_type is unsigned and numCols is 0, the loop 00933 // below will have entirely the wrong number of iterations. 00934 if (numCols == 0) { 00935 return; 00936 } 00937 00938 // Don't use c >= 0 as the test, because that fails if 00939 // local_ordinal_type is unsigned. We do c == 0 (last 00940 // iteration) below. 00941 for (local_ordinal_type c = numCols - 1; c != 0; --c) { 00942 matrix_scalar_type A_cc = STS::zero (); 00943 const offset_type beg = ptr(c); 00944 const offset_type end = ptr(c+1); 00945 for (offset_type k = beg; k < end; ++k) { 00946 const local_ordinal_type r = ind(k); 00947 const matrix_scalar_type A_rc = STS::conj (val(k)); 00948 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 00949 // has equal local row and column indices. That may not 00950 // necessarily hold, depending on the row and column Maps. See 00951 // note above. 00952 if (r == c) { 00953 A_cc += A_rc; 00954 } else { 00955 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00956 X(r, j) -= A_rc * X(c, j); 00957 } 00958 } 00959 } // for each entry A_rc in the current column c 00960 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00961 X(c, j) /= A_cc; 00962 } 00963 } // for each column c 00964 00965 // Last iteration: c = 0. 00966 { 00967 const local_ordinal_type c = 0; 00968 matrix_scalar_type A_cc = STS::zero (); 00969 const offset_type beg = ptr(c); 00970 const offset_type end = ptr(c+1); 00971 for (offset_type k = beg; k < end; ++k) { 00972 const local_ordinal_type r = ind(k); 00973 const matrix_scalar_type A_rc = STS::conj (val(k)); 00974 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 00975 // has equal local row and column indices. That may not 00976 // necessarily hold, depending on the row and column Maps. See 00977 // note above. 00978 if (r == c) { 00979 A_cc += A_rc; 00980 } else { 00981 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00982 X(r, j) -= A_rc * X(c, j); 00983 } 00984 } 00985 } // for each entry A_rc in the current column c 00986 for (local_ordinal_type j = 0; j < numVecs; ++j) { 00987 X(c, j) /= A_cc; 00988 } 00989 } 00990 } 00991 00992 00993 template<class CrsMatrixType, 00994 class DomainMultiVectorType, 00995 class RangeMultiVectorType> 00996 void 00997 lowerTriSolveCsc (RangeMultiVectorType X, 00998 const CrsMatrixType& A, 00999 DomainMultiVectorType Y) 01000 { 01001 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 01002 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 01003 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 01004 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 01005 01006 const local_ordinal_type numRows = A.numRows (); 01007 const local_ordinal_type numCols = A.numCols (); 01008 const local_ordinal_type numVecs = X.dimension_1 (); 01009 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 01010 typename CrsMatrixType::index_type ind = A.graph.entries; 01011 typename CrsMatrixType::values_type val = A.values; 01012 01013 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01014 for (local_ordinal_type i = 0; i < numRows; ++i) { 01015 X(i, j) = Y(i, j); 01016 } 01017 } 01018 01019 for (local_ordinal_type c = 0; c < numCols; ++c) { 01020 matrix_scalar_type A_cc = STS::zero (); 01021 const offset_type beg = ptr(c); 01022 const offset_type end = ptr(c+1); 01023 for (offset_type k = beg; k < end; ++k) { 01024 const local_ordinal_type r = ind(k); 01025 const matrix_scalar_type A_rc = val(k); 01026 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 01027 // has equal local row and column indices. That may not 01028 // necessarily hold, depending on the row and column Maps. See 01029 // note above. 01030 if (r == c) { 01031 A_cc += A_rc; 01032 } else { 01033 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01034 X(r, j) -= A_rc * X(c, j); 01035 } 01036 } 01037 } // for each entry A_rc in the current column c 01038 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01039 X(c, j) /= A_cc; 01040 } 01041 } // for each column c 01042 } 01043 01044 01045 template<class CrsMatrixType, 01046 class DomainMultiVectorType, 01047 class RangeMultiVectorType> 01048 void 01049 lowerTriSolveCscUnitDiagConj (RangeMultiVectorType X, 01050 const CrsMatrixType& A, 01051 DomainMultiVectorType Y) 01052 { 01053 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 01054 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 01055 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 01056 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 01057 01058 const local_ordinal_type numRows = A.numRows (); 01059 const local_ordinal_type numCols = A.numCols (); 01060 const local_ordinal_type numVecs = X.dimension_1 (); 01061 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 01062 typename CrsMatrixType::index_type ind = A.graph.entries; 01063 typename CrsMatrixType::values_type val = A.values; 01064 01065 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01066 for (local_ordinal_type i = 0; i < numRows; ++i) { 01067 X(i, j) = Y(i, j); 01068 } 01069 } 01070 01071 for (local_ordinal_type c = 0; c < numCols; ++c) { 01072 const offset_type beg = ptr(c); 01073 const offset_type end = ptr(c+1); 01074 for (offset_type k = beg; k < end; ++k) { 01075 const local_ordinal_type r = ind(k); 01076 const matrix_scalar_type A_rc = STS::conj (val(k)); 01077 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01078 X(r, j) -= A_rc * X(c, j); 01079 } 01080 } // for each entry A_rc in the current column c 01081 } // for each column c 01082 } 01083 01084 01085 template<class CrsMatrixType, 01086 class DomainMultiVectorType, 01087 class RangeMultiVectorType> 01088 void 01089 lowerTriSolveCscConj (RangeMultiVectorType X, 01090 const CrsMatrixType& A, 01091 DomainMultiVectorType Y) 01092 { 01093 typedef typename CrsMatrixType::row_map_type::non_const_value_type offset_type; 01094 typedef typename CrsMatrixType::index_type::non_const_value_type local_ordinal_type; 01095 typedef typename CrsMatrixType::values_type::non_const_value_type matrix_scalar_type; 01096 typedef Kokkos::Details::ArithTraits<matrix_scalar_type> STS; 01097 01098 const local_ordinal_type numRows = A.numRows (); 01099 const local_ordinal_type numCols = A.numCols (); 01100 const local_ordinal_type numVecs = X.dimension_1 (); 01101 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 01102 typename CrsMatrixType::index_type ind = A.graph.entries; 01103 typename CrsMatrixType::values_type val = A.values; 01104 01105 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01106 for (local_ordinal_type i = 0; i < numRows; ++i) { 01107 X(i, j) = Y(i, j); 01108 } 01109 } 01110 01111 for (local_ordinal_type c = 0; c < numCols; ++c) { 01112 matrix_scalar_type A_cc = STS::zero (); 01113 const offset_type beg = ptr(c); 01114 const offset_type end = ptr(c+1); 01115 for (offset_type k = beg; k < end; ++k) { 01116 const local_ordinal_type r = ind(k); 01117 const matrix_scalar_type A_rc = STS::conj (val(k)); 01118 // FIXME (mfh 28 Aug 2014) This assumes that the diagonal entry 01119 // has equal local row and column indices. That may not 01120 // necessarily hold, depending on the row and column Maps. See 01121 // note above. 01122 if (r == c) { 01123 A_cc += A_rc; 01124 } else { 01125 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01126 X(r, j) -= A_rc * X(c, j); 01127 } 01128 } 01129 } // for each entry A_rc in the current column c 01130 for (local_ordinal_type j = 0; j < numVecs; ++j) { 01131 X(c, j) /= A_cc; 01132 } 01133 } // for each column c 01134 } 01135 01136 01137 template<class CrsMatrixType, 01138 class DomainMultiVectorType, 01139 class RangeMultiVectorType> 01140 void 01141 triSolveKokkos (RangeMultiVectorType X, 01142 const CrsMatrixType& A, 01143 DomainMultiVectorType Y, 01144 const Teuchos::EUplo triUplo, 01145 const Teuchos::EDiag unitDiag, 01146 const Teuchos::ETransp trans) 01147 { 01148 typedef typename CrsMatrixType::index_type::non_const_value_type LO; 01149 const char prefix[] = "Kokkos::Sequential::triSolveKokkos: "; 01150 const LO numRows = A.numRows (); 01151 const LO numCols = A.numCols (); 01152 const LO numVecs = X.dimension_1 (); 01153 typename CrsMatrixType::row_map_type ptr = A.graph.row_map; 01154 01155 TEUCHOS_TEST_FOR_EXCEPTION( 01156 triUplo != Teuchos::LOWER_TRI && triUplo != Teuchos::UPPER_TRI && 01157 triUplo != Teuchos::UNDEF_TRI, 01158 std::invalid_argument, prefix << "triUplo has an invalid value " << triUplo 01159 << ". Valid values are Teuchos::LOWER_TRI=" << Teuchos::LOWER_TRI << 01160 ", Teuchos::UPPER_TRI=" << Teuchos::UPPER_TRI << ", and Teuchos::UNDEF_TRI=" 01161 << Teuchos::UNDEF_TRI << "."); 01162 TEUCHOS_TEST_FOR_EXCEPTION( 01163 triUplo == Teuchos::UNDEF_TRI, std::invalid_argument, prefix << 01164 "The matrix is neither lower nor upper triangular (triUplo=" 01165 "Teuchos::UNDEF_TRI), so you may not call this method."); 01166 TEUCHOS_TEST_FOR_EXCEPTION( 01167 unitDiag != Teuchos::UNIT_DIAG && unitDiag != Teuchos::NON_UNIT_DIAG, 01168 std::invalid_argument, prefix << "unitDiag has an invalid value " 01169 << unitDiag << ". Valid values are Teuchos::UNIT_DIAG=" 01170 << Teuchos::UNIT_DIAG << " and Teuchos::NON_UNIT_DIAG=" 01171 << Teuchos::NON_UNIT_DIAG << "."); 01172 TEUCHOS_TEST_FOR_EXCEPTION( 01173 unitDiag != Teuchos::UNIT_DIAG && numRows > 0 && ptr(numRows) == 0, 01174 std::invalid_argument, prefix << "Triangular solve with an empty matrix " 01175 "is only valid if the matrix has an implicit unit diagonal. This matrix " 01176 "does not."); 01177 TEUCHOS_TEST_FOR_EXCEPTION( 01178 trans != Teuchos::NO_TRANS && trans != Teuchos::TRANS && 01179 trans != Teuchos::CONJ_TRANS, 01180 std::invalid_argument, prefix << "trans has an invalid value " << trans 01181 << ". Valid values are Teuchos::NO_TRANS=" << Teuchos::NO_TRANS << ", " 01182 << "Teuchos::TRANS=" << Teuchos::TRANS << ", and Teuchos::CONJ_TRANS=" 01183 << Teuchos::CONJ_TRANS << "."); 01184 01185 TEUCHOS_TEST_FOR_EXCEPTION( 01186 numRows != static_cast<LO> (X.dimension_0 ()), std::invalid_argument, 01187 prefix << "numRows = " << numRows << " != X.dimension_0() = " << 01188 X.dimension_0 () << "."); 01189 TEUCHOS_TEST_FOR_EXCEPTION( 01190 numCols != static_cast<LO> (Y.dimension_0 ()), std::invalid_argument, 01191 prefix << "numCols = " << numCols << " != Y.dimension_0() = " << 01192 Y.dimension_0 () << "."); 01193 TEUCHOS_TEST_FOR_EXCEPTION( 01194 numVecs != static_cast<LO> (Y.dimension_1 ()), std::invalid_argument, 01195 prefix << "X.dimension_1 () = " << numVecs << " != Y.dimension_1 () = " 01196 << Y.dimension_1 () << "."); 01197 01198 if (trans == Teuchos::NO_TRANS) { // no transpose 01199 if (triUplo == Teuchos::LOWER_TRI) { // lower triangular 01200 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01201 lowerTriSolveCsrUnitDiag (X, A, Y); 01202 } else { // non unit diagonal 01203 lowerTriSolveCsr (X, A, Y); 01204 } 01205 } else { // upper triangular 01206 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01207 upperTriSolveCsrUnitDiag (X, A, Y); 01208 } else { // non unit diagonal 01209 upperTriSolveCsr (X, A, Y); 01210 } 01211 } 01212 } 01213 else if (trans == Teuchos::TRANS) { // transpose 01214 if (triUplo == Teuchos::LOWER_TRI) { // lower triangular 01215 // Transposed lower tri CSR => upper tri CSC. 01216 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01217 upperTriSolveCscUnitDiag (X, A, Y); 01218 } else { // non unit diagonal 01219 upperTriSolveCsc (X, A, Y); 01220 } 01221 } 01222 else { // upper triangular 01223 // Transposed upper tri CSR => lower tri CSC. 01224 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01225 lowerTriSolveCscUnitDiag (X, A, Y); 01226 } else { // non unit diagonal 01227 lowerTriSolveCsc (X, A, Y); 01228 } 01229 } 01230 } 01231 else if (trans == Teuchos::CONJ_TRANS) { // conj transpose 01232 if (triUplo == Teuchos::LOWER_TRI) { // lower triangular 01233 // Transposed lower tri CSR => upper tri CSC. 01234 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01235 upperTriSolveCscUnitDiagConj (X, A, Y); 01236 } else { // non unit diagonal 01237 upperTriSolveCscConj (X, A, Y); 01238 } 01239 } 01240 else { // upper triangular 01241 // Transposed upper tri CSR => lower tri CSC. 01242 if (unitDiag == Teuchos::UNIT_DIAG) { // unit diagonal 01243 lowerTriSolveCscUnitDiagConj (X, A, Y); 01244 } else { // non unit diagonal 01245 lowerTriSolveCscConj (X, A, Y); 01246 } 01247 } 01248 } 01249 } 01250 01251 01252 } // namespace Sequential 01253 } // namespace Kokkos 01254 01255 #endif // KOKKOS_SEQUENTIAL_SPARSEKERNELS_HPP
1.7.6.1