Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends
Kokkos_Sequential_AddSparseMatrices.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
00043 #define KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
00044 
00045 #include <Kokkos_ConfigDefs.hpp>
00046 #include <functional> // std::plus
00047 
00048 namespace Kokkos {
00049 namespace Sequential {
00050 
00070 template<class OffsetType, class OrdinalType>
00071 OrdinalType
00072 countMergedRowEntries (const OffsetType ptr1[],
00073                        const OrdinalType ind1[],
00074                        const OffsetType ptr2[],
00075                        const OrdinalType ind2[],
00076                        const OrdinalType i);
00077 
00104 template<class OffsetType,
00105          class OrdinalType,
00106          class ScalarType,
00107          class BinaryFunctionType>
00108 OrdinalType
00109 mergeRows (const OffsetType ptrOut[],
00110            OrdinalType indOut[],
00111            ScalarType valOut[],
00112            const OffsetType ptr1[],
00113            const OrdinalType ind1[],
00114            const ScalarType val1[],
00115            const OffsetType ptr2[],
00116            const OrdinalType ind2[],
00117            const ScalarType val2[],
00118            const OrdinalType i,
00119            BinaryFunctionType f);
00120 
00126 template<class ScalarType>
00127 class AddMatrixEntries {
00128 public:
00129   AddMatrixEntries (const ScalarType& alpha, const ScalarType& beta) :
00130     alpha_ (alpha), beta_ (beta) {}
00131 
00132   inline ScalarType
00133   operator () (const ScalarType& A_ij, const ScalarType& B_ij) const {
00134     return alpha_ * A_ij + beta_ * B_ij;
00135   }
00136 private:
00137   const ScalarType alpha_;
00138   const ScalarType beta_;
00139 };
00140 
00160 template<class OffsetType, class OrdinalType, class ScalarType>
00161 void
00162 addSparseMatrices (OffsetType*& ptrResult,
00163                    OrdinalType*& indResult,
00164                    ScalarType*& valResult,
00165                    const ScalarType alpha,
00166                    const OffsetType ptr1[],
00167                    const OrdinalType ind1[],
00168                    const ScalarType val1[],
00169                    const ScalarType beta,
00170                    const OffsetType ptr2[],
00171                    const OrdinalType ind2[],
00172                    const ScalarType val2[],
00173                    const OrdinalType numRows);
00174 
00175 template<class OffsetType, class OrdinalType>
00176 OrdinalType
00177 countMergedRowEntries (const OffsetType ptr1[],
00178                        const OrdinalType ind1[],
00179                        const OffsetType ptr2[],
00180                        const OrdinalType ind2[],
00181                        const OrdinalType i)
00182 {
00183   const OffsetType start1 = ptr1[i];
00184   const OffsetType end1 = ptr1[i+1];
00185   const OffsetType start2 = ptr2[i];
00186   const OffsetType end2 = ptr2[i+1];
00187 
00188   OffsetType mark1 = start1, mark2 = start2;
00189   OrdinalType count = 0;
00190   while (mark1 < end1 && mark2 < end2) {
00191     if (ind1[mark1] == ind2[mark2]) {
00192       ++mark1;
00193       ++mark2;
00194     } else if (ind1[mark1] < ind2[mark2]) {
00195       ++mark1;
00196     } else { // ind1[mark1] > ind2[mark2]
00197       ++mark2;
00198     }
00199     ++count;
00200   }
00201   // Include any remaining entries.
00202   count += end1 - mark1;
00203   count += end2 - mark2;
00204   return count;
00205 }
00206 
00207 template<class OffsetType,
00208          class OrdinalType,
00209          class ScalarType,
00210          class BinaryFunctionType>
00211 OrdinalType
00212 mergeRows (const OffsetType ptrOut[],
00213            OrdinalType indOut[],
00214            ScalarType valOut[],
00215            const OffsetType ptr1[],
00216            const OrdinalType ind1[],
00217            const ScalarType val1[],
00218            const OffsetType ptr2[],
00219            const OrdinalType ind2[],
00220            const ScalarType val2[],
00221            const OrdinalType i,
00222            BinaryFunctionType f)
00223 {
00224   const OffsetType startOut = ptrOut[i];
00225   const OffsetType endOut = ptrOut[i+1];
00226   const OffsetType start1 = ptr1[i];
00227   const OffsetType end1 = ptr1[i+1];
00228   const OffsetType start2 = ptr2[i];
00229   const OffsetType end2 = ptr2[i+1];
00230   const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero ();
00231 
00232   OffsetType mark1 = start1, mark2 = start2, markOut = startOut;
00233   while (mark1 < end1 && mark2 < end2 && markOut < endOut) {
00234     if (ind1[mark1] == ind2[mark2]) {
00235       indOut[markOut] = ind1[mark1];
00236       valOut[markOut] = f (val1[mark1], val2[mark2]);
00237       ++mark1;
00238       ++mark2;
00239     } else if (ind1[mark1] < ind2[mark2]) {
00240       indOut[markOut] = ind1[mark1];
00241       // This makes sense if f(x,y) is alpha*x + beta*y.
00242       valOut[markOut] = f (val1[mark1], zero);
00243       ++mark1;
00244     } else { // ind1[mark1] > ind2[mark2]
00245       indOut[markOut] = ind2[mark2];
00246       // This makes sense if f(x,y) is alpha*x + beta*y.
00247       valOut[markOut] = f (zero, val2[mark2]);
00248       ++mark2;
00249     }
00250     ++markOut;
00251   }
00252   // Include any remaining entries.
00253   while (mark1 < end1 && markOut < endOut) {
00254     indOut[markOut] = ind1[mark1];
00255     // This makes sense if f(x,y) is alpha*x + beta*y.
00256     valOut[markOut] = f (val1[mark1], zero);
00257     ++mark1;
00258     ++markOut;
00259   }
00260   while (mark2 < end2 && markOut < endOut) {
00261     indOut[markOut] = ind2[mark2];
00262     // This makes sense if f(x,y) is alpha*x + beta*y.
00263     valOut[markOut] = f (zero, val2[mark2]);
00264     ++mark2;
00265     ++markOut;
00266   }
00267   // This is a logic error, because it means either that
00268   // countMergedRowEntries didn't work, or that it was called
00269   // incorrectly for this row.
00270   TEUCHOS_TEST_FOR_EXCEPTION(
00271     markOut >= endOut && (mark1 < end1 || mark2 < end2),
00272     std::logic_error,
00273     "Kokkos::Sequential::mergeRows: Row " << i << " of the output array has "
00274     << (end1 - mark1) << " + " << (end2 - mark2) << " too few entries.");
00275   return markOut;
00276 }
00277 
00278 template<class OffsetType, class OrdinalType, class ScalarType>
00279 void
00280 addSparseMatrices (OffsetType*& ptrResult,
00281                    OrdinalType*& indResult,
00282                    ScalarType*& valResult,
00283                    const ScalarType alpha,
00284                    const OffsetType ptr1[],
00285                    const OrdinalType ind1[],
00286                    const ScalarType val1[],
00287                    const ScalarType beta,
00288                    const OffsetType ptr2[],
00289                    const OrdinalType ind2[],
00290                    const ScalarType val2[],
00291                    const OrdinalType numRows)
00292 {
00293   typedef Teuchos::ScalarTraits<ScalarType> STS;
00294 
00295   // We don't allocate using ArrayRCP's constructor (that takes the
00296   // array size), because it initializes the arrays itself.  We want
00297   // to initialize ourselves, to ensure first-touch allocation.
00298 
00299   OffsetType* ptrOut = NULL;
00300   OrdinalType* indOut = NULL;
00301   ScalarType* valOut = NULL;
00302 
00303   // Allocate and fill ptrOut.
00304   try {
00305     // We should be careful not to initialize the array here, so that
00306     // the parallelizable loop that assigns to it below will first-touch
00307     // initialize it.
00308     ptrOut = new OffsetType [numRows + 1];
00309 
00310     // Expect that the column indices are sorted and merged.  Compute
00311     // the number of entries in each row.  We'll do this in place in the
00312     // row offsets array.  This may be done safely in parallel.  Doing
00313     // so would first-touch initialize ptrOut.
00314 
00315     if (alpha == STS::zero ()) {
00316       if (beta == STS::zero ()) {
00317         // Special case: alpha == 0, beta == 0.
00318         // The resulting sum has zero entries.
00319         std::fill (ptrOut, ptrOut + numRows + 1, Teuchos::as<OffsetType> (0));
00320         ptrResult = ptrOut;
00321         indResult = NULL;
00322         valResult = NULL;
00323         return;
00324       } else { // alpha == 0, beta != 0
00325         // Just copy the second matrix, suitably scaled, into the output.
00326         memcpy (ptrOut, ptr2, (numRows+1) * sizeof (OffsetType));
00327         const OffsetType numEntries = ptr2[numRows];
00328         indOut = new OrdinalType [numEntries];
00329         memcpy (indOut, ind2, numEntries * sizeof (OrdinalType));
00330         valOut = new ScalarType [numEntries];
00331         if (beta == STS::one ()) {
00332           memcpy (valOut, val2, numEntries * sizeof (ScalarType));
00333         } else { // beta != 1
00334           for (OrdinalType k = 0; k < numEntries; ++k) {
00335             valOut[k] = beta * val2[k];
00336           }
00337         }
00338         ptrResult = ptrOut;
00339         indResult = indOut;
00340         valResult = valOut;
00341         return;
00342       }
00343     }
00344     else if (beta == STS::zero ()) { // alpha != 0, beta == 0
00345       // Just copy the first matrix into the output.
00346       memcpy (ptrOut, ptr1, (numRows+1) * sizeof (OffsetType));
00347       const OffsetType numEntries = ptr1[numRows];
00348       indOut = new OrdinalType [numEntries];
00349       memcpy (indOut, ind1, numEntries * sizeof (OrdinalType));
00350       valOut = new ScalarType [numEntries];
00351       if (alpha == STS::one ()) {
00352         memcpy (valOut, val1, numEntries * sizeof (ScalarType));
00353       } else { // alpha != 1
00354         for (OrdinalType k = 0; k < numEntries; ++k) {
00355           valOut[k] = alpha * val1[k];
00356         }
00357       }
00358       ptrResult = ptrOut;
00359       indResult = indOut;
00360       valResult = valOut;
00361       return;
00362     }
00363     else { // alpha != 0 and beta != 0
00364       ptrOut[0] = 0;
00365       for (OrdinalType i = 0; i < numRows; ++i) {
00366         ptrOut[i+1] = countMergedRowEntries (ptr1, ind1, ptr2, ind2, i);
00367       }
00368       // Sum-scan to compute row offsets.
00369       // This may be parallelized via e.g., TBB's parallel_scan().
00370       for (OrdinalType i = 1; i < numRows; ++i) {
00371         ptrOut[i+1] += ptrOut[i];
00372       }
00373     }
00374   } catch (...) {
00375     if (ptrOut != NULL) {
00376       delete [] ptrOut;
00377     }
00378     if (indOut != NULL) {
00379       delete [] indOut;
00380     }
00381     if (valOut != NULL) {
00382       delete [] valOut;
00383     }
00384     throw;
00385   }
00386   //
00387   // Allocate storage for indices and values.
00388   //
00389   const OffsetType numEntries = ptrOut[numRows];
00390 
00391   // Allocate and fill indOut and valOut.
00392   try {
00393     indOut = new OrdinalType [numEntries];
00394     valOut = new ScalarType [numEntries];
00395 
00396     // Merge and add the matrices.  This may be done safely in
00397     // parallel, since all the arrays have correct sizes and writes to
00398     // different rows are independent.  We've also already tested for
00399     // the special cases alpha == 0 and/or beta == 0.
00400     if (alpha == STS::one () && beta == STS::one ()) {
00401       for (OrdinalType i = 0; i < numRows; ++i) {
00402         (void) mergeRows (ptrOut, indOut, valOut,
00403                           ptr1, ind1, val1,
00404                           ptr2, ind2, val2, i,
00405                           std::plus<ScalarType> ());
00406       }
00407     } else {
00408       AddMatrixEntries<ScalarType> f (alpha, beta);
00409       for (OrdinalType i = 0; i < numRows; ++i) {
00410         (void) mergeRows (ptrOut, indOut, valOut,
00411                           ptr1, ind1, val1,
00412                           ptr2, ind2, val2,
00413                           i, f);
00414       }
00415     }
00416   } catch (...) {
00417     if (indOut != NULL) {
00418       delete [] indOut;
00419     }
00420     if (valOut != NULL) {
00421       delete [] valOut;
00422     }
00423     if (ptrOut != NULL) { // we know it's not, but doesn't hurt to test
00424       delete [] ptrOut;
00425     }
00426     throw;
00427   }
00428 
00429   // "Commit" the output arguments.
00430   ptrResult = ptrOut;
00431   indResult = indOut;
00432   valResult = valOut;
00433 }
00434 
00435 } // namespace Sequential
00436 } // namespace Kokkos
00437 
00438 #endif // KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
00439 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends