|
Kokkos Core Kernels Package
Version of the Day
|
00001 //@HEADER 00002 // ************************************************************************ 00003 // 00004 // Kokkos: Node API and Parallel Node Kernels 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 //@HEADER 00041 00042 #ifndef KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP 00043 #define KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP 00044 00045 #include <Kokkos_ConfigDefs.hpp> 00046 #include <functional> // std::plus 00047 00048 namespace Kokkos { 00049 namespace Sequential { 00050 00070 template<class OffsetType, class OrdinalType> 00071 OrdinalType 00072 countMergedRowEntries (const OffsetType ptr1[], 00073 const OrdinalType ind1[], 00074 const OffsetType ptr2[], 00075 const OrdinalType ind2[], 00076 const OrdinalType i); 00077 00104 template<class OffsetType, 00105 class OrdinalType, 00106 class ScalarType, 00107 class BinaryFunctionType> 00108 OrdinalType 00109 mergeRows (const OffsetType ptrOut[], 00110 OrdinalType indOut[], 00111 ScalarType valOut[], 00112 const OffsetType ptr1[], 00113 const OrdinalType ind1[], 00114 const ScalarType val1[], 00115 const OffsetType ptr2[], 00116 const OrdinalType ind2[], 00117 const ScalarType val2[], 00118 const OrdinalType i, 00119 BinaryFunctionType f); 00120 00126 template<class ScalarType> 00127 class AddMatrixEntries { 00128 public: 00129 AddMatrixEntries (const ScalarType& alpha, const ScalarType& beta) : 00130 alpha_ (alpha), beta_ (beta) {} 00131 00132 inline ScalarType 00133 operator () (const ScalarType& A_ij, const ScalarType& B_ij) const { 00134 return alpha_ * A_ij + beta_ * B_ij; 00135 } 00136 private: 00137 const ScalarType alpha_; 00138 const ScalarType beta_; 00139 }; 00140 00160 template<class OffsetType, class OrdinalType, class ScalarType> 00161 void 00162 addSparseMatrices (OffsetType*& ptrResult, 00163 OrdinalType*& indResult, 00164 ScalarType*& valResult, 00165 const ScalarType alpha, 00166 const OffsetType ptr1[], 00167 const OrdinalType ind1[], 00168 const ScalarType val1[], 00169 const ScalarType beta, 00170 const OffsetType ptr2[], 00171 const OrdinalType ind2[], 00172 const ScalarType val2[], 00173 const OrdinalType numRows); 00174 00175 template<class OffsetType, class OrdinalType> 00176 OrdinalType 00177 countMergedRowEntries (const OffsetType ptr1[], 00178 const OrdinalType ind1[], 00179 const OffsetType ptr2[], 00180 const OrdinalType ind2[], 00181 const OrdinalType i) 00182 { 00183 const OffsetType start1 = ptr1[i]; 00184 const OffsetType end1 = ptr1[i+1]; 00185 const OffsetType start2 = ptr2[i]; 00186 const OffsetType end2 = ptr2[i+1]; 00187 00188 OffsetType mark1 = start1, mark2 = start2; 00189 OrdinalType count = 0; 00190 while (mark1 < end1 && mark2 < end2) { 00191 if (ind1[mark1] == ind2[mark2]) { 00192 ++mark1; 00193 ++mark2; 00194 } else if (ind1[mark1] < ind2[mark2]) { 00195 ++mark1; 00196 } else { // ind1[mark1] > ind2[mark2] 00197 ++mark2; 00198 } 00199 ++count; 00200 } 00201 // Include any remaining entries. 00202 count += end1 - mark1; 00203 count += end2 - mark2; 00204 return count; 00205 } 00206 00207 template<class OffsetType, 00208 class OrdinalType, 00209 class ScalarType, 00210 class BinaryFunctionType> 00211 OrdinalType 00212 mergeRows (const OffsetType ptrOut[], 00213 OrdinalType indOut[], 00214 ScalarType valOut[], 00215 const OffsetType ptr1[], 00216 const OrdinalType ind1[], 00217 const ScalarType val1[], 00218 const OffsetType ptr2[], 00219 const OrdinalType ind2[], 00220 const ScalarType val2[], 00221 const OrdinalType i, 00222 BinaryFunctionType f) 00223 { 00224 const OffsetType startOut = ptrOut[i]; 00225 const OffsetType endOut = ptrOut[i+1]; 00226 const OffsetType start1 = ptr1[i]; 00227 const OffsetType end1 = ptr1[i+1]; 00228 const OffsetType start2 = ptr2[i]; 00229 const OffsetType end2 = ptr2[i+1]; 00230 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero (); 00231 00232 OffsetType mark1 = start1, mark2 = start2, markOut = startOut; 00233 while (mark1 < end1 && mark2 < end2 && markOut < endOut) { 00234 if (ind1[mark1] == ind2[mark2]) { 00235 indOut[markOut] = ind1[mark1]; 00236 valOut[markOut] = f (val1[mark1], val2[mark2]); 00237 ++mark1; 00238 ++mark2; 00239 } else if (ind1[mark1] < ind2[mark2]) { 00240 indOut[markOut] = ind1[mark1]; 00241 // This makes sense if f(x,y) is alpha*x + beta*y. 00242 valOut[markOut] = f (val1[mark1], zero); 00243 ++mark1; 00244 } else { // ind1[mark1] > ind2[mark2] 00245 indOut[markOut] = ind2[mark2]; 00246 // This makes sense if f(x,y) is alpha*x + beta*y. 00247 valOut[markOut] = f (zero, val2[mark2]); 00248 ++mark2; 00249 } 00250 ++markOut; 00251 } 00252 // Include any remaining entries. 00253 while (mark1 < end1 && markOut < endOut) { 00254 indOut[markOut] = ind1[mark1]; 00255 // This makes sense if f(x,y) is alpha*x + beta*y. 00256 valOut[markOut] = f (val1[mark1], zero); 00257 ++mark1; 00258 ++markOut; 00259 } 00260 while (mark2 < end2 && markOut < endOut) { 00261 indOut[markOut] = ind2[mark2]; 00262 // This makes sense if f(x,y) is alpha*x + beta*y. 00263 valOut[markOut] = f (zero, val2[mark2]); 00264 ++mark2; 00265 ++markOut; 00266 } 00267 // This is a logic error, because it means either that 00268 // countMergedRowEntries didn't work, or that it was called 00269 // incorrectly for this row. 00270 TEUCHOS_TEST_FOR_EXCEPTION( 00271 markOut >= endOut && (mark1 < end1 || mark2 < end2), 00272 std::logic_error, 00273 "Kokkos::Sequential::mergeRows: Row " << i << " of the output array has " 00274 << (end1 - mark1) << " + " << (end2 - mark2) << " too few entries."); 00275 return markOut; 00276 } 00277 00278 template<class OffsetType, class OrdinalType, class ScalarType> 00279 void 00280 addSparseMatrices (OffsetType*& ptrResult, 00281 OrdinalType*& indResult, 00282 ScalarType*& valResult, 00283 const ScalarType alpha, 00284 const OffsetType ptr1[], 00285 const OrdinalType ind1[], 00286 const ScalarType val1[], 00287 const ScalarType beta, 00288 const OffsetType ptr2[], 00289 const OrdinalType ind2[], 00290 const ScalarType val2[], 00291 const OrdinalType numRows) 00292 { 00293 typedef Teuchos::ScalarTraits<ScalarType> STS; 00294 00295 // We don't allocate using ArrayRCP's constructor (that takes the 00296 // array size), because it initializes the arrays itself. We want 00297 // to initialize ourselves, to ensure first-touch allocation. 00298 00299 OffsetType* ptrOut = NULL; 00300 OrdinalType* indOut = NULL; 00301 ScalarType* valOut = NULL; 00302 00303 // Allocate and fill ptrOut. 00304 try { 00305 // We should be careful not to initialize the array here, so that 00306 // the parallelizable loop that assigns to it below will first-touch 00307 // initialize it. 00308 ptrOut = new OffsetType [numRows + 1]; 00309 00310 // Expect that the column indices are sorted and merged. Compute 00311 // the number of entries in each row. We'll do this in place in the 00312 // row offsets array. This may be done safely in parallel. Doing 00313 // so would first-touch initialize ptrOut. 00314 00315 if (alpha == STS::zero ()) { 00316 if (beta == STS::zero ()) { 00317 // Special case: alpha == 0, beta == 0. 00318 // The resulting sum has zero entries. 00319 std::fill (ptrOut, ptrOut + numRows + 1, Teuchos::as<OffsetType> (0)); 00320 ptrResult = ptrOut; 00321 indResult = NULL; 00322 valResult = NULL; 00323 return; 00324 } else { // alpha == 0, beta != 0 00325 // Just copy the second matrix, suitably scaled, into the output. 00326 memcpy (ptrOut, ptr2, (numRows+1) * sizeof (OffsetType)); 00327 const OffsetType numEntries = ptr2[numRows]; 00328 indOut = new OrdinalType [numEntries]; 00329 memcpy (indOut, ind2, numEntries * sizeof (OrdinalType)); 00330 valOut = new ScalarType [numEntries]; 00331 if (beta == STS::one ()) { 00332 memcpy (valOut, val2, numEntries * sizeof (ScalarType)); 00333 } else { // beta != 1 00334 for (OrdinalType k = 0; k < numEntries; ++k) { 00335 valOut[k] = beta * val2[k]; 00336 } 00337 } 00338 ptrResult = ptrOut; 00339 indResult = indOut; 00340 valResult = valOut; 00341 return; 00342 } 00343 } 00344 else if (beta == STS::zero ()) { // alpha != 0, beta == 0 00345 // Just copy the first matrix into the output. 00346 memcpy (ptrOut, ptr1, (numRows+1) * sizeof (OffsetType)); 00347 const OffsetType numEntries = ptr1[numRows]; 00348 indOut = new OrdinalType [numEntries]; 00349 memcpy (indOut, ind1, numEntries * sizeof (OrdinalType)); 00350 valOut = new ScalarType [numEntries]; 00351 if (alpha == STS::one ()) { 00352 memcpy (valOut, val1, numEntries * sizeof (ScalarType)); 00353 } else { // alpha != 1 00354 for (OrdinalType k = 0; k < numEntries; ++k) { 00355 valOut[k] = alpha * val1[k]; 00356 } 00357 } 00358 ptrResult = ptrOut; 00359 indResult = indOut; 00360 valResult = valOut; 00361 return; 00362 } 00363 else { // alpha != 0 and beta != 0 00364 ptrOut[0] = 0; 00365 for (OrdinalType i = 0; i < numRows; ++i) { 00366 ptrOut[i+1] = countMergedRowEntries (ptr1, ind1, ptr2, ind2, i); 00367 } 00368 // Sum-scan to compute row offsets. 00369 // This may be parallelized via e.g., TBB's parallel_scan(). 00370 for (OrdinalType i = 1; i < numRows; ++i) { 00371 ptrOut[i+1] += ptrOut[i]; 00372 } 00373 } 00374 } catch (...) { 00375 if (ptrOut != NULL) { 00376 delete [] ptrOut; 00377 } 00378 if (indOut != NULL) { 00379 delete [] indOut; 00380 } 00381 if (valOut != NULL) { 00382 delete [] valOut; 00383 } 00384 throw; 00385 } 00386 // 00387 // Allocate storage for indices and values. 00388 // 00389 const OffsetType numEntries = ptrOut[numRows]; 00390 00391 // Allocate and fill indOut and valOut. 00392 try { 00393 indOut = new OrdinalType [numEntries]; 00394 valOut = new ScalarType [numEntries]; 00395 00396 // Merge and add the matrices. This may be done safely in 00397 // parallel, since all the arrays have correct sizes and writes to 00398 // different rows are independent. We've also already tested for 00399 // the special cases alpha == 0 and/or beta == 0. 00400 if (alpha == STS::one () && beta == STS::one ()) { 00401 for (OrdinalType i = 0; i < numRows; ++i) { 00402 (void) mergeRows (ptrOut, indOut, valOut, 00403 ptr1, ind1, val1, 00404 ptr2, ind2, val2, i, 00405 std::plus<ScalarType> ()); 00406 } 00407 } else { 00408 AddMatrixEntries<ScalarType> f (alpha, beta); 00409 for (OrdinalType i = 0; i < numRows; ++i) { 00410 (void) mergeRows (ptrOut, indOut, valOut, 00411 ptr1, ind1, val1, 00412 ptr2, ind2, val2, 00413 i, f); 00414 } 00415 } 00416 } catch (...) { 00417 if (indOut != NULL) { 00418 delete [] indOut; 00419 } 00420 if (valOut != NULL) { 00421 delete [] valOut; 00422 } 00423 if (ptrOut != NULL) { // we know it's not, but doesn't hurt to test 00424 delete [] ptrOut; 00425 } 00426 throw; 00427 } 00428 00429 // "Commit" the output arguments. 00430 ptrResult = ptrOut; 00431 indResult = indOut; 00432 valResult = valOut; 00433 } 00434 00435 } // namespace Sequential 00436 } // namespace Kokkos 00437 00438 #endif // KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP 00439
1.7.6.1