hjk41的日志

Avatar

simple logging mechanism for CUDA device code

I have been trying to debug a piece of CUDA code, and found it extremely hard. Sometimes, the bug won't reproduce in -deviceemu, nor -G. And sometimes, when it does come up in -deviceemu, I am not able to set a breakpoint in the right function, because it has been inlined. So I composed this simple logging "library". I would like to share it with you guys, hope it would be helpful for someone.

/***********************************************************
 *  DLog.h
 *  Simple logging mechanism for CUDA device code
 *
 *  Usage: 
 *	   1. include it in your .cu file
 *	   2. call DLog_Init(size) to reserve size bytes as output buffer 
 *		   on device
 *	   3. call DLog<<data in your CUDA device code
 *	   4. call DLog_Dump() after kernel invocation to dump the output
 *
 *  This implementation is very simple, and inefficient. It supports only 
 *	 char, int and const char *.
 *
 *  Author: Chuntao HONG (chuntao.hong@gmail.com)
 *  Last modified: Oct. 13th, 2009
************************************************************/
#ifndef DLOG_H
#define DLOG_H

#include <cuda.h>
#include <iostream>
using namespace std;

const int MIN_ALIGN=4;

// align n to b bytes
template<class T>
__device__ T Align(T n, uint32_t b){
		return ((uint32_t)n&(b-1))==NULL ? n : n+b-((uint32_t)n&(b-1));
}

template<class T>
__device__ T MinAlign(T n){
		return ((uint32_t)n&(MIN_ALIGN-1))==NULL ? n : n+MIN_ALIGN-((uint32_t)n&(MIN_ALIGN-1));
}

// set some empty bytes, so that the logging data structures won't be polluted by out-of-bound writes
const int SAFTY_BYTES=1024*1024;

// data types supported, now only three
enum DataType{
		CHAR,
		INT,
		STRING
};

struct LogPack{
		int size;
		DataType type;
		union{
				char c;
				int i;
				char str[];
		}data;
};

__device__ int requiredLogSpace(const char*);
__device__ int requiredLogSpace(const int);
__device__ int requiredLogSpace(const unsigned int);
__device__ int requiredLogSpace(const char);

__device__ void assignLog(LogPack *, const char c);
__device__ void assignLog(LogPack *, const char * c);
__device__ void assignLog(LogPack *, const int c);
__device__ void assignLog(LogPack *, const unsigned int c);


struct MyLog{
public:
		char empty_bytes[SAFTY_BYTES];
		char * buf;
		unsigned int curPos;
		char empty_bytes2[SAFTY_BYTES];
public:
		template <class T>
		__device__ MyLog & operator<<(const T & t){
				int packSize=MinAlign(requiredLogSpace(t));
				int start=atomicAdd(&curPos, packSize);
				LogPack * pack=(LogPack *)(buf+start);
				assignLog(pack,t);
				pack->size=packSize;
				return *this;
		}
};

__device__ MyLog DLog;

__device__ int requiredLogSpace(const int i){
		return sizeof(i)+sizeof(DataType)+sizeof(int);
}
__device__ int requiredLogSpace(const char * str){
		int len=0;
		while(*str++){
				len++;
		}
		return len+1+sizeof(DataType)+sizeof(int);
}
__device__ static int requiredLogSpace(const unsigned int i){
		return sizeof(i)+sizeof(DataType)+sizeof(int);
}
__device__ static int requiredLogSpace(const char c){
		return sizeof(c)+sizeof(DataType)+sizeof(int);
}

__device__ void assignLog(LogPack * pack, const char c){
		pack->data.c=c;
		pack->type=CHAR;
}
__device__ void assignLog(LogPack * pack, const int i){
		pack->data.i=i;
		pack->type=INT;
}
__device__ void assignLog(LogPack * pack, const unsigned int i){
		pack->data.i=i;
		pack->type=INT;
}
__device__ void assignLog(LogPack * pack, const char * str){
		int i=0;
		while(*str){
				pack->data.str[i++]=*str;
				str++;
		}
		pack->type=STRING;
}


__host__ void DLog_Init(unsigned int size){
		MyLog localLog;
		localLog.curPos=SAFTY_BYTES;
		cudaMalloc((void**)&localLog.buf, size+SAFTY_BYTES);
		cudaMemset(localLog.buf, 0, size);
		cudaMemcpyToSymbol(DLog, &localLog, sizeof(MyLog), 0, cudaMemcpyHostToDevice);
}

__host__ void DLog_Dump(){
		MyLog localLog;
		cudaMemcpyFromSymbol(&localLog, DLog, sizeof(MyLog), 0, cudaMemcpyDeviceToHost);
		char * h_buf=new char[localLog.curPos+1];
		cudaMemcpy(h_buf, localLog.buf, localLog.curPos, cudaMemcpyDeviceToHost);
		cudaFree(localLog.buf);

		char * ptr=h_buf+SAFTY_BYTES;
		while(ptr<h_buf+localLog.curPos){
				LogPack * pack=(LogPack *)ptr;
				if(pack->type==CHAR){
						cout<<pack->data.c<<endl;
				}
				else if(pack->type==INT){
						cout<<"0x"<<hex<<pack->data.i<<endl;
				}
				else{
						cout<<pack->data.str<<endl;
				}
				ptr+=pack->size;
		}

		delete[] h_buf;
}

#endif

评论已关闭