#define CHECK(call) { \
cudaError_t err; \
if ( (err = (call)) != cudaSuccess) { \
fprintf(stderr, "Got error %s at %s:%d\n", cudaGetErrorString(err), \
__FILE__, __LINE__); \
exit(1); \
} \
}
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
#ifdef __DRIVER_TYPES_H__
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
file, line, errorMessage, (int)err, cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}
#endif
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Define this to turn on error checking
#define CUDA_ERROR_CHECK
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
// More careful checking. However, this will affect performance.
// Comment away if needed.
err = cudaDeviceSynchronize();
if( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
saxpy<<<(N+255)/256, 256>>>(N, 2.0, d_x, d_y);
cudaError_t errSync = cudaGetLastError();
cudaError_t errAsync = cudaDeviceSynchronize();
if (errSync != cudaSuccess)
printf("Sync kernel error: %s\n", cudaGetErrorString(errSync);
if (errAsync != cudaSuccess)
printf("Async kernel error: %s\n", cudaGetErrorString(errAsync);
#ifndef __GPU_TIMER_H__
#define __GPU_TIMER_H__
struct GpuTimer
{
cudaEvent_t start;
cudaEvent_t stop;
GpuTimer()
{
cudaEventCreate(&start);
cudaEventCreate(&stop);
}
~GpuTimer()
{
cudaEventDestroy(start);
cudaEventDestroy(stop);
}
void Start()
{
cudaEventRecord(start, 0);
}
void Stop()
{
cudaEventRecord(stop, 0);
}
float Elapsed()
{
float elapsed;
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed, start, stop);
return elapsed;
}
};
#endif /* __GPU_TIMER_H__ */
#include <cuda.h>
#include <cuda_runtime.h>
#define CUDA_CHECK(call) \
do { \
cudaError_t error; \
if ((error = (call)) != cudaSuccess) {\
fprintf(stderr, "CUDA error %s at %s:%d\n", cudaGetErrorString(error), __FILE__, __LINE__); \
exit(1); \
} \
} while (0)
template <typename T, typename Deleter>
using unique_ptr_temp = std::unique_ptr<T, Deleter>;
// template <typename T>
// void device_ptr_deleter(T* ptr)
// {
// CUDA_CHECK(cudaFree(ptr));
// }
//
// template <typename T>
// void host_ptr_deleter(T* ptr)
// {
// CUDA_CHECK(cudaFreeHost(ptr));
// }
template <typename T>
struct device_ptr_deleter {
void operator()(T* ptr) {
CUDA_CHECK(cudaFree(ptr));
}
};
template <typename T>
struct host_ptr_deleter {
void operator()(T* ptr) {
CUDA_CHECK(cudaFreeHost(ptr));
}
};
template <typename T>
// using unique_ptr_device = unique_ptr_temp<T, decltype(device_ptr_deleter<T>)>;
// using unique_ptr_device = std::unique_ptr<T[], decltype(device_ptr_deleter<T>)>;
using unique_ptr_device = std::unique_ptr<T[], device_ptr_deleter<T>>;
template <typename T>
// using unique_ptr_host = unique_ptr_temp<T, decltype(host_ptr_deleter<T>)>;
// using unique_ptr_host = std::unique_ptr<T[], decltype(host_ptr_deleter<T>)>;
using unique_ptr_host = std::unique_ptr<T[], host_ptr_deleter<T>>;