/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/3643/include/ck/tensor_operation/gpu/device/device_grouped_gemm_tile_loop.hpp Source File#
device_grouped_gemm_tile_loop.hpp
Go to the documentation of this file.
__host__ constexpr __device__ auto integer_divide_floor(X x, Y y)
Definition: math.hpp:66
Definition: ck.hpp:270
Definition: stream_config.hpp:9
Definition: device_grouped_gemm.hpp:100
Grouped GEMM kernel using output Tile Looping algorithm.
Definition: device_grouped_gemm_tile_loop.hpp:48
Definition: device_grouped_gemm_tile_loop.hpp:53
static constexpr int BLOCK_SUBSCRIPTION_FACTOR
Definition: device_grouped_gemm_tile_loop.hpp:56
static constexpr int CU_SIMDS
Definition: device_grouped_gemm_tile_loop.hpp:58
static int CalculateMaxOccupancyGridSize(const KernelFunction &kernel, const StreamConfig &stream_config)
Definition: device_grouped_gemm_tile_loop.hpp:68
static int GetComputeUnitCount()
Definition: device_grouped_gemm_tile_loop.hpp:94
static int GetCuBlocks()
Definition: device_grouped_gemm_tile_loop.hpp:61
static int GetKernelOccupancy(const KernelFunction &kernel)
Definition: device_grouped_gemm_tile_loop.hpp:86