apache · liyuchenmike · Sep 8, 2016 · Sep 14, 2016 · Sep 6, 2016 · Sep 7, 2016
diff --git a/include/singa/core/common.h b/include/singa/core/common.h
@@ -36,7 +36,10 @@
 
 
 #ifdef USE_OPENCL
-#include "singa/utils/opencl_utils.h"
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#include <CL/cl2.hpp>
+#include <unordered_map>
 #endif  // USE_OPENCL
 
 using std::atomic;
@@ -62,6 +65,9 @@ class Block {
   // Disabled as it is not used currently.
   // Block(void* ptr, size_t size, size_t offset, std::shared_ptr<atomic<int>>
   //  ref) : data_(ptr), size_(size), offset_(offset), ref_count_(ref) {}
+
+	// TODO(wangwei) check if the set is correct and add lock if shared sturcture is allowed
+	void set_data(void* ptr) { data_ = ptr; }
   void* mutable_data() {
     initialized_ = true;
     return static_cast<char*>(data_) + offset_;
@@ -107,8 +113,9 @@ typedef struct _Context {
 #endif // USE_CUDA
 
 #ifdef USE_OPENCL
-  // This stores the context ID of the OpenCL context controlled by ViennaCL.
-  long vcl_ctx_id;
+  std::shared_ptr<std::unordered_map<std::string, cl::Kernel>> kernels;
+  cl::CommandQueue ocl_cmdq;
+  cl::Context ocl_ctx;
 #endif
 
 } Context;

diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h
@@ -23,6 +23,7 @@
 #include <atomic>
 #include "singa/proto/core.pb.h"
 #include "singa/singa_config.h"
+#include "singa/core/common.h"
 
 #ifdef USE_CUDA
 #include "cnmem.h"
@@ -50,6 +51,57 @@ class DeviceMemPool {
 //  size_t init_size_ = 0, max_size_ = 0;
 };
 
+class CppMemPool {
+	public:
+		// initial pool size (MB), and the size of each memory uint in the memory pool (KB)
+		CppMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1);
+
+		// return a new pool based on the current pool
+		// once returned, the old pool will be invalid
+		// re-initial with pool size (MB), and set the size of each memory uint in the memory pool (KB)
+		void RsetMemPool(size_t init_size_mb = 256, size_t uint_size_kb = 1);
+
+		// create the memory requested, if size is larger than memUintSize, malloc from system call
+		// is_ptr_null indicate whether the pointer is null and if so we will initialize it in the malloc function,
+		// otherwise we will use the ptr directly and access its data and functions.
+		// after the malloc, the data pointer of the block will be changed and the orginal data pointer will be lost.
+		void Malloc(Block** ptr, const size_t size, bool is_ptr_null = true);
+		void Free(Block* ptr);
+
+  	std::pair<size_t, size_t> GetMemUsage();
+		size_t GetNumFreeUints(){return numUints - numAllocatedUintsInPool;};	
+
+		// release all memory.
+		// all pointers allocated in the pool must be freed before calling the descturctor. 
+  	~CppMemPool();
+
+	protected:
+	// each structure define a memory uint in the memory pool
+	// the structure is a static double linked list
+		struct _Uint {
+			struct _Uint *pPrev, *pNext;
+			Block* pBlk;
+		};
+
+		// pointer to the memory pool
+		void* pMemPool; 
+
+		// head pointer to allocated memory uint
+		struct _Uint* pAllocatedMemUint; 
+		// head pointer to free memory uint
+		struct _Uint* pFreeMemUint;
+
+		// the size of each memory uint with/out the meta data of the uint 
+		size_t memUintSize, memUintSizeNoMeta;
+
+		// the number of memory uints in the pool
+		size_t numUints;
+		// the number of allocated uints which are resided in the memory pool
+		size_t numAllocatedUintsInPool;
+		// the number of allocated uints including the ones resided outside the memory pool
+		size_t numAllocatedUints; 
+};
+
 #ifdef USE_CUDA
 class CnMemPool : public DeviceMemPool {
  public:

diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc
@@ -21,8 +21,157 @@
 #include "singa/proto/core.pb.h"
 #include <iostream>
 
-#ifdef USE_CUDA
 namespace singa {
+
+std::pair<size_t, size_t> CppMemPool::GetMemUsage() {
+	size_t total,free;
+	total = memUintSize * numUints;
+	free = total - memUintSize * numAllocatedUintsInPool;
+	return std::make_pair(free,total);
+}
+
+CppMemPool::CppMemPool(size_t init_size_mb, size_t uint_size_kb)	{
+	pMemPool = NULL ;
+	pAllocatedMemUint = pFreeMemUint = NULL;
+	memUintSize = memUintSizeNoMeta = 0;
+	numUints = numAllocatedUintsInPool = numAllocatedUints = 0;
+	RsetMemPool(init_size_mb,uint_size_kb);
+}
+
+
+void CppMemPool::RsetMemPool(size_t init_size_mb, size_t uint_size_kb)	{
+
+	if(numAllocatedUintsInPool == 0) { // in the case the pool is empty
+		// setting up the parameters in the memory pool
+		const size_t kNBytesPerKB = (1u << 10);
+		const size_t kNBytesPerMB = (1u << 20);
+		memUintSize = uint_size_kb * kNBytesPerKB;
+		memUintSizeNoMeta = memUintSize - sizeof(struct _Uint);
+		size_t poolSize = init_size_mb * kNBytesPerMB; 
+		bool memAligned = poolSize % memUintSize == 0;
+		numUints = memAligned ? (poolSize / memUintSize) : (poolSize / memUintSize + 1);
+		CHECK_GE(numUints,1);
+		poolSize = memUintSize * numUints;
+
+		// intialize the memory pool
+		pMemPool = malloc(poolSize);
+		CHECK(pMemPool != NULL);
+		for(size_t idx = 0; idx < numUints; idx++) {
+			struct _Uint *pCurUint = (struct _Uint*)((char *)pMemPool + idx * memUintSize);
+			pCurUint->pPrev = NULL;
+			pCurUint->pNext = pFreeMemUint;
+			if(pFreeMemUint != NULL) {
+				pFreeMemUint->pPrev = pCurUint;
+			}
+			pFreeMemUint = pCurUint;
+			pCurUint->pBlk = NULL;
+		}
+	} else { // the pool is not empty, create a new one and copy the old to the new one
+		CppMemPool* pNewPool = new CppMemPool(init_size_mb, uint_size_kb);
+		struct _Uint* pCurUint = pAllocatedMemUint;
+		for(size_t idx = 0; idx < numAllocatedUintsInPool; idx++) {
+			Block* pOldBlk = pCurUint->pBlk;
+			void* pData = pOldBlk->mutable_data();
+			pNewPool->Malloc(&pOldBlk, pOldBlk->size(), false);
+			size_t copySize = pOldBlk->size() - pOldBlk->offset();
+			memcpy(pOldBlk->mutable_data(),pData,copySize);
+			pCurUint = pCurUint->pNext;
+		}
+		// swap the new pool with the current
+		std::swap(pNewPool->pMemPool,pMemPool);
+		std::swap(pNewPool->pAllocatedMemUint,pAllocatedMemUint);
+		std::swap(pNewPool->pFreeMemUint,pFreeMemUint);
+		std::swap(pNewPool->memUintSize,memUintSize);
+		std::swap(pNewPool->memUintSizeNoMeta,memUintSizeNoMeta);
+		std::swap(pNewPool->numUints,numUints);	
+		std::swap(pNewPool->numAllocatedUintsInPool,numAllocatedUintsInPool);	
+		pNewPool->numAllocatedUints = 0;
+		delete pNewPool;
+	}
+}
+
+void CppMemPool::Malloc(Block** ptr, const size_t size, bool is_ptr_null) {
+	numAllocatedUints++;
+	// the size is larger than the memory uint size
+	if(size > memUintSizeNoMeta || pFreeMemUint == NULL) { 
+		void* pData = malloc(size);
+		if(is_ptr_null) {
+			*ptr = new Block(pData,size);
+		} else {
+			CHECK_EQ((*ptr)->size(),size);
+			(*ptr)->set_data(pData);
+		}
+		return;
+	}
+
+	// otherwise retrieve from one of the memory uint
+	numAllocatedUintsInPool++;
+	struct _Uint *pCurUint = pFreeMemUint;
+	pFreeMemUint = pCurUint->pNext;
+	if(pFreeMemUint != NULL) {
+		pFreeMemUint->pPrev = NULL;
+	}
+
+	pCurUint->pNext = pAllocatedMemUint;
+	if(pAllocatedMemUint != NULL) {
+		pAllocatedMemUint->pPrev = pCurUint;
+	}
+
+	pAllocatedMemUint = pCurUint;
+	void* pData = (void*)((char *)pCurUint + sizeof(struct _Uint));
+	if(is_ptr_null) {
+		*ptr = new Block(pData,size);
+	} else {
+		CHECK_EQ((*ptr)->size(),size);
+		(*ptr)->set_data(pData);
+	}
+	CHECK(pCurUint->pBlk == NULL);
+	pCurUint->pBlk = *ptr;
+}
+
+void CppMemPool::Free(Block* ptr) {
+	void* pData = ptr->mutable_data();
+	if(pMemPool < pData && pData < (void*)((char*)pMemPool + numUints * memUintSize)) {
+		struct _Uint *pCurUint = (struct _Uint*)((char*)pData-sizeof(struct _Uint));
+		CHECK(ptr == pCurUint->pBlk);
+
+		if(pCurUint == pAllocatedMemUint) {
+				pAllocatedMemUint = pCurUint->pNext;
+				if(pAllocatedMemUint != NULL) {
+					pAllocatedMemUint->pPrev = NULL;
+				}		
+		} else {
+				struct _Uint *pCurPrevUint = pCurUint->pPrev;
+				pCurUint->pPrev = NULL;
+				pCurPrevUint->pNext = pCurUint->pNext;
+				if(pCurUint->pNext != NULL) {
+					pCurUint->pNext->pPrev = pCurPrevUint;
+				}
+		}
+
+		pCurUint->pNext = pFreeMemUint;
+		if(pFreeMemUint != NULL) {
+			pFreeMemUint->pPrev = pCurUint;
+		}
+
+		pFreeMemUint = pCurUint;
+		pCurUint->pBlk = NULL;
+		numAllocatedUintsInPool--;
+	}
+	else {
+		free(pData);
+	}
+	numAllocatedUints--;
+	delete ptr;
+}
+
+CppMemPool::~CppMemPool() {
+	CHECK_EQ(numAllocatedUints,0);
+	free(pMemPool);
+}
+
+
+#ifdef USE_CUDA
 std::atomic<int> CnMemPool::pool_count(0);
 std::pair<size_t, size_t> CnMemPool::GetMemUsage() {
   size_t free, total;
@@ -107,5 +256,5 @@ void CudaMemPool::Free(void *ptr) {
   cudaError_t status = cudaFree(ptr);
   CHECK_EQ(status, cudaError_t::cudaSuccess);
 }
-}
 #endif
+}