|
Kokkos Core Kernels Package
Version of the Day
|
00001 /* 00002 //@HEADER 00003 // ************************************************************************ 00004 // 00005 // Kokkos: Manycore Performance-Portable Multidimensional Arrays 00006 // Copyright (2012) Sandia Corporation 00007 // 00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00009 // the U.S. Government retains certain rights in this software. 00010 // 00011 // Redistribution and use in source and binary forms, with or without 00012 // modification, are permitted provided that the following conditions are 00013 // met: 00014 // 00015 // 1. Redistributions of source code must retain the above copyright 00016 // notice, this list of conditions and the following disclaimer. 00017 // 00018 // 2. Redistributions in binary form must reproduce the above copyright 00019 // notice, this list of conditions and the following disclaimer in the 00020 // documentation and/or other materials provided with the distribution. 00021 // 00022 // 3. Neither the name of the Corporation nor the names of the 00023 // contributors may be used to endorse or promote products derived from 00024 // this software without specific prior written permission. 00025 // 00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00037 // 00038 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) 00039 // 00040 // ************************************************************************ 00041 //@HEADER 00042 */ 00043 00044 #ifndef KOKKOS_SEGMENTED_VIEW_HPP_ 00045 #define KOKKOS_SEGMENTED_VIEW_HPP_ 00046 00047 #include <Kokkos_Core.hpp> 00048 #include <impl/Kokkos_Error.hpp> 00049 #include <cstdio> 00050 00051 namespace Kokkos { 00052 00053 namespace Impl { 00054 00055 template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> 00056 struct delete_segmented_view; 00057 00058 template<class MemorySpace> 00059 inline 00060 void DeviceSetAllocatableMemorySize(size_t size) { 00061 } 00062 00063 #ifdef KOKKOS_HAVE_CUDA 00064 template<> 00065 inline 00066 void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) { 00067 #ifdef __CUDACC__ 00068 size_t size_limit; 00069 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); 00070 if(size_limit<size) 00071 cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); 00072 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); 00073 #endif 00074 } 00075 #endif 00076 00077 #ifdef KOKKOS_HAVE_CUDA_UVM 00078 template<> 00079 inline 00080 void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) { 00081 #ifdef __CUDACC__ 00082 size_t size_limit; 00083 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); 00084 if(size_limit<size) 00085 cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size); 00086 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize); 00087 #endif 00088 }; 00089 #endif 00090 00091 00092 } 00093 00094 template< class DataType , 00095 class Arg1Type = void , 00096 class Arg2Type = void , 00097 class Arg3Type = void> 00098 class SegmentedView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > 00099 { 00100 public: 00102 00103 typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; 00104 00106 typedef View< typename traits::array_type , 00107 typename traits::array_layout , 00108 typename traits::memory_space , 00109 Kokkos::MemoryUnmanaged > t_dev ; 00110 00111 00112 private: 00113 Kokkos::View<t_dev*,typename traits::memory_space> segments_; 00114 00115 Kokkos::View<int,typename traits::memory_space> realloc_lock; 00116 Kokkos::View<int,typename traits::memory_space> nsegments_; 00117 00118 size_t segment_length_; 00119 size_t segment_length_m1_; 00120 int max_segments_; 00121 00122 int segment_length_log2; 00123 00124 // Dimensions, cardinality, capacity, and offset computation for 00125 // multidimensional array view of contiguous memory. 00126 // Inherits from Impl::Shape 00127 typedef Impl::ViewOffset< typename traits::shape_type 00128 , typename traits::array_layout 00129 > offset_map_type ; 00130 00131 offset_map_type m_offset_map ; 00132 00133 typedef View< typename traits::array_type , 00134 typename traits::array_layout , 00135 typename traits::memory_space , 00136 typename traits::memory_traits > array_type ; 00137 00138 typedef View< typename traits::const_data_type , 00139 typename traits::array_layout , 00140 typename traits::memory_space , 00141 typename traits::memory_traits > const_type ; 00142 00143 typedef View< typename traits::non_const_data_type , 00144 typename traits::array_layout , 00145 typename traits::memory_space , 00146 typename traits::memory_traits > non_const_type ; 00147 00148 typedef View< typename traits::non_const_data_type , 00149 typename traits::array_layout , 00150 HostSpace , 00151 void > HostMirror ; 00152 00153 template< bool Accessible > 00154 KOKKOS_INLINE_FUNCTION 00155 typename Impl::enable_if< Accessible , typename traits::size_type >::type 00156 dimension_0_intern() const { return nsegments_() * segment_length_ ; } 00157 00158 template< bool Accessible > 00159 KOKKOS_INLINE_FUNCTION 00160 typename Impl::enable_if< ! Accessible , typename traits::size_type >::type 00161 dimension_0_intern() const 00162 { 00163 // In Host space 00164 int n = 0 ; 00165 #if ! defined( __CUDA_ARCH__ ) 00166 Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) ); 00167 #endif 00168 00169 return n * segment_length_ ; 00170 } 00171 00172 public: 00173 00174 enum { Rank = traits::rank }; 00175 00176 KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; } 00177 00178 /* \brief return (current) size of dimension 0 */ 00179 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { 00180 enum { Accessible = Impl::VerifyExecutionCanAccessMemorySpace< 00181 Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; 00182 int n = SegmentedView::dimension_0_intern< Accessible >(); 00183 return n ; 00184 } 00185 00186 /* \brief return size of dimension 1 */ 00187 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; } 00188 /* \brief return size of dimension 2 */ 00189 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; } 00190 /* \brief return size of dimension 3 */ 00191 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; } 00192 /* \brief return size of dimension 4 */ 00193 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; } 00194 /* \brief return size of dimension 5 */ 00195 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; } 00196 /* \brief return size of dimension 6 */ 00197 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; } 00198 /* \brief return size of dimension 7 */ 00199 KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; } 00200 00201 /* \brief return size of dimension 2 */ 00202 KOKKOS_INLINE_FUNCTION typename traits::size_type size() const { 00203 return dimension_0() * 00204 m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * 00205 m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ; 00206 } 00207 00208 template< typename iType > 00209 KOKKOS_INLINE_FUNCTION 00210 typename traits::size_type dimension( const iType & i ) const { 00211 if(i==0) 00212 return dimension_0(); 00213 else 00214 return Impl::dimension( m_offset_map , i ); 00215 } 00216 00217 KOKKOS_INLINE_FUNCTION 00218 typename traits::size_type capacity() { 00219 return segments_.dimension_0() * 00220 m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 * 00221 m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7; 00222 } 00223 00224 KOKKOS_INLINE_FUNCTION 00225 typename traits::size_type get_num_segments() { 00226 enum { Accessible = Impl::VerifyExecutionCanAccessMemorySpace< 00227 Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value }; 00228 int n = SegmentedView::dimension_0_intern< Accessible >(); 00229 return n/segment_length_ ; 00230 } 00231 00232 KOKKOS_INLINE_FUNCTION 00233 typename traits::size_type get_max_segments() { 00234 return max_segments_; 00235 } 00236 00253 template< class LabelType > 00254 SegmentedView(const LabelType & label , 00255 const size_t view_length , 00256 const size_t n0 , 00257 const size_t n1 = 0 , 00258 const size_t n2 = 0 , 00259 const size_t n3 = 0 , 00260 const size_t n4 = 0 , 00261 const size_t n5 = 0 , 00262 const size_t n6 = 0 , 00263 const size_t n7 = 0 00264 ): segment_length_(view_length),segment_length_m1_(view_length-1) 00265 { 00266 segment_length_log2 = -1; 00267 size_t l = segment_length_; 00268 while(l>0) { 00269 l>>=1; 00270 segment_length_log2++; 00271 } 00272 l = 1<<segment_length_log2; 00273 if(l!=segment_length_) 00274 Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length"); 00275 00276 max_segments_ = (n0+segment_length_m1_)/segment_length_; 00277 00278 Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type)); 00279 00280 segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_); 00281 realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock"); 00282 nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews"); 00283 m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 ); 00284 00285 } 00286 00287 KOKKOS_INLINE_FUNCTION 00288 SegmentedView(const SegmentedView& src): 00289 segments_(src.segments_), 00290 realloc_lock (src.realloc_lock), 00291 nsegments_ (src.nsegments_), 00292 segment_length_(src.segment_length_), 00293 segment_length_m1_(src.segment_length_m1_), 00294 max_segments_ (src.max_segments_), 00295 segment_length_log2(src.segment_length_log2), 00296 m_offset_map (src.m_offset_map) 00297 {} 00298 00299 KOKKOS_INLINE_FUNCTION 00300 SegmentedView& operator= (const SegmentedView& src) { 00301 segments_ = src.segments_; 00302 realloc_lock = src.realloc_lock; 00303 nsegments_ = src.nsegments_; 00304 segment_length_= src.segment_length_; 00305 segment_length_m1_= src.segment_length_m1_; 00306 max_segments_ = src.max_segments_; 00307 segment_length_log2= src.segment_length_log2; 00308 m_offset_map = src.m_offset_map; 00309 return *this; 00310 } 00311 00312 ~SegmentedView() { 00313 if (traits::execution_space::in_parallel()) return; 00314 int count = traits::memory_space::count(segments_.ptr_on_device()); 00315 if(count == 1) { 00316 Kokkos::fence(); 00317 typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews"); 00318 Kokkos::deep_copy(h_nviews,nsegments_); 00319 Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this)); 00320 } 00321 } 00322 00323 KOKKOS_INLINE_FUNCTION 00324 t_dev get_segment(const int& i) const { 00325 return segments_[i]; 00326 } 00327 00328 template< class MemberType> 00329 KOKKOS_INLINE_FUNCTION 00330 void grow (MemberType& team_member, const size_t& growSize) const { 00331 if (growSize>max_segments_*segment_length_) { 00332 printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_); 00333 return; 00334 } 00335 if(team_member.team_rank()==0) { 00336 bool too_small = growSize > segment_length_ * nsegments_(); 00337 while(too_small && Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) ) { 00338 too_small = growSize > segment_length_ * nsegments_(); 00339 } 00340 if(too_small) { 00341 while(too_small) { 00342 const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* 00343 m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; 00344 typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size]; 00345 00346 segments_(nsegments_()) = 00347 t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7); 00348 nsegments_()++; 00349 too_small = growSize > segment_length_ * nsegments_(); 00350 } 00351 realloc_lock() = 0; 00352 } 00353 } 00354 team_member.team_barrier(); 00355 } 00356 00357 KOKKOS_INLINE_FUNCTION 00358 void grow_non_thread_safe (const size_t& growSize) const { 00359 if (growSize>max_segments_*segment_length_) { 00360 printf ("Exceeding maxSize: %i %i\n", growSize, max_segments_*segment_length_); 00361 return; 00362 } 00363 bool too_small = growSize > segment_length_ * nsegments_(); 00364 if(too_small) { 00365 while(too_small) { 00366 const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3* 00367 m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7; 00368 typename traits::non_const_value_type* const ptr = 00369 new typename traits::non_const_value_type[alloc_size]; 00370 00371 segments_(nsegments_()) = 00372 t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2, 00373 m_offset_map.N3, m_offset_map.N4, m_offset_map.N5, 00374 m_offset_map.N6, m_offset_map.N7); 00375 nsegments_()++; 00376 too_small = growSize > segment_length_ * nsegments_(); 00377 } 00378 } 00379 } 00380 00381 template< typename iType0 > 00382 KOKKOS_FORCEINLINE_FUNCTION 00383 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 1, iType0 >::type 00384 operator() ( const iType0 & i0 ) const 00385 { 00386 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_)); 00387 } 00388 00389 template< typename iType0 , typename iType1 > 00390 KOKKOS_FORCEINLINE_FUNCTION 00391 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 2, 00392 iType0 , iType1>::type 00393 operator() ( const iType0 & i0 , const iType1 & i1 ) const 00394 { 00395 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1); 00396 } 00397 00398 template< typename iType0 , typename iType1 , typename iType2 > 00399 KOKKOS_FORCEINLINE_FUNCTION 00400 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 3, 00401 iType0 , iType1 , iType2 >::type 00402 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const 00403 { 00404 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2); 00405 } 00406 00407 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > 00408 KOKKOS_FORCEINLINE_FUNCTION 00409 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 4, 00410 iType0 , iType1 , iType2 , iType3 >::type 00411 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const 00412 { 00413 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3); 00414 } 00415 00416 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , 00417 typename iType4 > 00418 KOKKOS_FORCEINLINE_FUNCTION 00419 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 5, 00420 iType0 , iType1 , iType2 , iType3 , iType4 >::type 00421 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , 00422 const iType4 & i4 ) const 00423 { 00424 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4); 00425 } 00426 00427 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , 00428 typename iType4 , typename iType5 > 00429 KOKKOS_FORCEINLINE_FUNCTION 00430 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 6, 00431 iType0 , iType1 , iType2 , iType3 , iType4 , iType5>::type 00432 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , 00433 const iType4 & i4 , const iType5 & i5 ) const 00434 { 00435 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5); 00436 } 00437 00438 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , 00439 typename iType4 , typename iType5 , typename iType6 > 00440 KOKKOS_FORCEINLINE_FUNCTION 00441 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 7, 00442 iType0 , iType1 , iType2 , iType3 , iType4 , iType5 , iType6>::type 00443 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , 00444 const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const 00445 { 00446 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6); 00447 } 00448 00449 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 , 00450 typename iType4 , typename iType5 , typename iType6 , typename iType7 > 00451 KOKKOS_FORCEINLINE_FUNCTION 00452 typename Impl::ViewEnableArrayOper< typename traits::value_type & , traits, typename traits::array_layout, 8, 00453 iType0 , iType1 , iType2 , iType3 , iType4 , iType5 , iType6 , iType7>::type 00454 operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , 00455 const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const 00456 { 00457 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7); 00458 } 00459 }; 00460 00461 namespace Impl { 00462 template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type> 00463 struct delete_segmented_view { 00464 typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type; 00465 typedef typename view_type::execution_space device_type; 00466 00467 view_type view_; 00468 delete_segmented_view(view_type view):view_(view) { 00469 } 00470 00471 KOKKOS_INLINE_FUNCTION 00472 void operator() (int i) const { 00473 delete [] view_.get_segment(i).ptr_on_device(); 00474 } 00475 }; 00476 00477 } 00478 } 00479 00480 #endif
1.7.6.1