profiler Namespace Reference

profiler Namespace Reference#

Composable Kernel: ck::profiler Namespace Reference
ck::profiler Namespace Reference

Classes

struct  GpuVerifyResult
 
struct  GpuVerifyDeviceResult
 

Functions

template<typename ComputeDataType , typename OutDataType , typename AccDataType = ComputeDataType>
float compute_relative_tolerance (const int number_of_accumulations=1)
 
template<typename T >
__global__ void gpu_verify_kernel (const T *__restrict__ device_result, const T *__restrict__ reference_result, float rtol, float atol, long long size, GpuVerifyDeviceResult *result)
 
template<typename T >
GpuVerifyResult gpu_verify (const void *device_result, const void *reference_result, float rtol, float atol, std::size_t size, hipStream_t stream=nullptr)
 
template<typename T >
float gpu_reduce_max (const void *device_buffer, std::size_t size, hipStream_t stream=nullptr)
 
template<typename OutDataType , typename ComputeDataType = OutDataType, typename AccDataType = ComputeDataType>
GpuVerifyResult gpu_verify (const void *device_result, const void *reference_result, int number_of_accumulations, std::size_t size, hipStream_t stream=nullptr)
 
template<typename T >
__global__ void gpu_reduce_max_kernel (const T *__restrict__ data, long long size, float *__restrict__ max_val)
 

Function Documentation

◆ compute_relative_tolerance()

template<typename ComputeDataType , typename OutDataType , typename AccDataType = ComputeDataType>
float ck::profiler::compute_relative_tolerance ( const int  number_of_accumulations = 1)
inline

◆ gpu_reduce_max()

template<typename T >
float ck::profiler::gpu_reduce_max ( const void *  device_buffer,
std::size_t  size,
hipStream_t  stream = nullptr 
)

◆ gpu_reduce_max_kernel()

template<typename T >
__global__ void ck::profiler::gpu_reduce_max_kernel ( const T *__restrict__  data,
long long  size,
float *__restrict__  max_val 
)

◆ gpu_verify() [1/2]

template<typename T >
GpuVerifyResult ck::profiler::gpu_verify ( const void *  device_result,
const void *  reference_result,
float  rtol,
float  atol,
std::size_t  size,
hipStream_t  stream = nullptr 
)

◆ gpu_verify() [2/2]

template<typename OutDataType , typename ComputeDataType = OutDataType, typename AccDataType = ComputeDataType>
GpuVerifyResult ck::profiler::gpu_verify ( const void *  device_result,
const void *  reference_result,
int  number_of_accumulations,
std::size_t  size,
hipStream_t  stream = nullptr 
)

◆ gpu_verify_kernel()

template<typename T >
__global__ void ck::profiler::gpu_verify_kernel ( const T *__restrict__  device_result,
const T *__restrict__  reference_result,
float  rtol,
float  atol,
long long  size,
GpuVerifyDeviceResult result 
)