profiler Namespace Reference#
Classes | |
| struct | GpuVerifyResult |
| struct | GpuVerifyDeviceResult |
Functions | |
| template<typename ComputeDataType , typename OutDataType , typename AccDataType = ComputeDataType> | |
| float | compute_relative_tolerance (const int number_of_accumulations=1) |
| template<typename T > | |
| __global__ void | gpu_verify_kernel (const T *__restrict__ device_result, const T *__restrict__ reference_result, float rtol, float atol, long long size, GpuVerifyDeviceResult *result) |
| template<typename T > | |
| GpuVerifyResult | gpu_verify (const void *device_result, const void *reference_result, float rtol, float atol, std::size_t size, hipStream_t stream=nullptr) |
| template<typename T > | |
| float | gpu_reduce_max (const void *device_buffer, std::size_t size, hipStream_t stream=nullptr) |
| template<typename OutDataType , typename ComputeDataType = OutDataType, typename AccDataType = ComputeDataType> | |
| GpuVerifyResult | gpu_verify (const void *device_result, const void *reference_result, int number_of_accumulations, std::size_t size, hipStream_t stream=nullptr) |
| template<typename T > | |
| __global__ void | gpu_reduce_max_kernel (const T *__restrict__ data, long long size, float *__restrict__ max_val) |
Function Documentation
◆ compute_relative_tolerance()
template<typename ComputeDataType , typename OutDataType , typename AccDataType = ComputeDataType>
|
inline |
◆ gpu_reduce_max()
template<typename T >
| float ck::profiler::gpu_reduce_max | ( | const void * | device_buffer, |
| std::size_t | size, | ||
| hipStream_t | stream = nullptr |
||
| ) |
◆ gpu_reduce_max_kernel()
template<typename T >
| __global__ void ck::profiler::gpu_reduce_max_kernel | ( | const T *__restrict__ | data, |
| long long | size, | ||
| float *__restrict__ | max_val | ||
| ) |
◆ gpu_verify() [1/2]
template<typename T >
| GpuVerifyResult ck::profiler::gpu_verify | ( | const void * | device_result, |
| const void * | reference_result, | ||
| float | rtol, | ||
| float | atol, | ||
| std::size_t | size, | ||
| hipStream_t | stream = nullptr |
||
| ) |
◆ gpu_verify() [2/2]
template<typename OutDataType , typename ComputeDataType = OutDataType, typename AccDataType = ComputeDataType>
| GpuVerifyResult ck::profiler::gpu_verify | ( | const void * | device_result, |
| const void * | reference_result, | ||
| int | number_of_accumulations, | ||
| std::size_t | size, | ||
| hipStream_t | stream = nullptr |
||
| ) |
◆ gpu_verify_kernel()
template<typename T >
| __global__ void ck::profiler::gpu_verify_kernel | ( | const T *__restrict__ | device_result, |
| const T *__restrict__ | reference_result, | ||
| float | rtol, | ||
| float | atol, | ||
| long long | size, | ||
| GpuVerifyDeviceResult * | result | ||
| ) |