|
Sierra Toolkit
Version of the Day
|
00001 /*------------------------------------------------------------------------*/ 00002 /* Copyright 2010 Sandia Corporation. */ 00003 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ 00004 /* license for use of this work by or on behalf of the U.S. Government. */ 00005 /* Export of this program may require a license from the */ 00006 /* United States Government. */ 00007 /*------------------------------------------------------------------------*/ 00008 00009 #ifndef stk_algsup_CudaMemoryMgr_hpp 00010 #define stk_algsup_CudaMemoryMgr_hpp 00011 00012 #include <stdio.h> 00013 #include <stdexcept> 00014 #include <map> 00015 00016 #include <stk_algsup/CudaCall.hpp> 00017 00018 namespace stk_classic { 00019 00031 class CudaMemoryMgr { 00032 public: 00034 CudaMemoryMgr() 00035 : host_to_device_map(), 00036 device_to_host_map() 00037 {} 00038 00043 virtual ~CudaMemoryMgr(); 00044 00045 #ifdef STK_HAVE_CUDA 00046 00055 template<class T> 00056 T* get_buffer(const T* host_ptr, size_t buf_size); 00057 00062 template<class T> 00063 T* get_buffer(size_t buf_size); 00064 00069 template<class T> 00070 void destroy_buffer(T*& device_ptr); 00071 00076 template<class T> 00077 void copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr); 00078 00083 template<class T> 00084 void copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr); 00085 00086 static CudaMemoryMgr& get_singleton(); 00087 00088 #endif 00089 00090 private: 00091 std::map<const void*,void*> host_to_device_map; 00092 std::map<const void*,const void*> device_to_host_map; 00093 };//class CudaMemoryMgr 00094 00095 #ifdef STK_HAVE_CUDA 00096 00097 //------------------------------------------------------------------------------ 00098 template<class T> 00099 inline 00100 T* CudaMemoryMgr::get_buffer(const T* host_ptr, size_t buf_size) 00101 { 00102 T* device_ptr = NULL; 00103 00104 std::map<const void*,void*>::iterator iter = host_to_device_map.find(host_ptr); 00105 00106 if (iter == host_to_device_map.end()) { 00107 void* void_device_ptr = NULL; 00108 CUDA_CALL( cudaMalloc( &void_device_ptr, sizeof(T)*buf_size) ); 00109 device_ptr = reinterpret_cast<T*>(void_device_ptr); 00110 00111 host_to_device_map.insert( std::make_pair(host_ptr, device_ptr) ); 00112 device_to_host_map.insert( std::make_pair(device_ptr, host_ptr) ); 00113 } 00114 else { 00115 device_ptr = reinterpret_cast<T*>(iter->second); 00116 } 00117 00118 return device_ptr; 00119 } 00120 00121 //------------------------------------------------------------------------------ 00122 template<class T> 00123 inline 00124 T* CudaMemoryMgr::get_buffer(size_t buf_size) 00125 { 00126 T* device_ptr = NULL; 00127 00128 CUDA_CALL( cudaMalloc( (void**)&device_ptr, sizeof(T)*buf_size) ); 00129 00130 device_to_host_map.insert( std::make_pair(device_ptr, NULL) ); 00131 00132 return device_ptr; 00133 } 00134 00135 //------------------------------------------------------------------------------ 00136 template<class T> 00137 inline 00138 void CudaMemoryMgr::destroy_buffer(T*& device_ptr) 00139 { 00140 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr); 00141 if (iter != device_to_host_map.end()) { 00142 const void* host_ptr = iter->second; 00143 if (host_ptr != NULL) { 00144 std::map<const void*,void*>::iterator iter2 = host_to_device_map.find(host_ptr); 00145 if (iter2 != host_to_device_map.end()) { 00146 host_to_device_map.erase(iter2); 00147 } 00148 } 00149 CUDA_CALL( cudaFree(device_ptr) ); 00150 device_ptr = NULL; 00151 device_to_host_map.erase(iter); 00152 } 00153 } 00154 00155 //------------------------------------------------------------------------------ 00156 template<class T> 00157 inline 00158 void CudaMemoryMgr::copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr) 00159 { 00160 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr); 00161 if (iter == device_to_host_map.end()) { 00162 //failed to find device_ptr in device_to_host_map 00163 throw std::runtime_error("CudaMemoryMgr::copy_to_buffer ERROR, device_ptr not known."); 00164 } 00165 00166 CUDA_CALL( cudaMemcpy( device_ptr, host_ptr, sizeof(T)*buf_size, cudaMemcpyHostToDevice) ); 00167 } 00168 00169 //------------------------------------------------------------------------------ 00170 template<class T> 00171 inline 00172 void CudaMemoryMgr::copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr) 00173 { 00174 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr); 00175 if (iter == device_to_host_map.end()) { 00176 //failed to find device_ptr in device_to_host_map 00177 throw std::runtime_error("CudaMemoryMgr::copy_from_buffer ERROR, device_ptr not known."); 00178 } 00179 00180 CUDA_CALL( cudaMemcpy( host_ptr, device_ptr, sizeof(T)*buf_size, cudaMemcpyDeviceToHost) ); 00181 } 00182 00183 #endif 00184 00185 }//namespace stk_classic 00186 00187 #endif 00188