/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/3643/include/ck_tile/ops/gemm/warp/warp_gemm_dispatcher.hpp Source File#
warp_gemm_dispatcher.hpp
Go to the documentation of this file.
34 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
35 template<> struct Dispatcher<float, float, float, 16, 16, 4, false> { using Type = WarpGemmMfmaF32F32F32M16N16K4; };
36 template<> struct Dispatcher<float, float, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF32F32F32M16N16K16<>; };
37 template<> struct Dispatcher<float, float, float, 16, 16, 8, false> { using Type = WarpGemmMfmaF32F32F32M16N16K8<>; };
38 template<> struct Dispatcher<float, float, float, 32, 32, 8, false> { using Type = WarpGemmMfmaF32F32F32M32N32K8<>; };
39 template<> struct Dispatcher<float, float, float, 16, 16, 16, true> { using Type = WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution<>; };
41 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
42 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaF16F16F32M32N32K8; };
43 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution; };
44 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaF16F16F32M32N32K16<>; };
45 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution<>; };
46 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M32N32K16<EDouble>; };
47 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution<EDouble>; };
48 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaF16F16F32M16N16K32<>; };
49 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, true> { using Type = WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution<>; };
50 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M16N16K32<EDouble>; };
51 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, true, false, false, EDouble> { using Type = WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution<EDouble>; };
52 template<> struct Dispatcher<half_t, half_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaF16F16F32M4N64K16; };
53 template<> struct Dispatcher<half_t, half_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaF16F16F32M64N4K16; };
56 template<bool TransposeC> struct Dispatcher<half_t, half_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f16_f16<TransposeC>;};
58 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaF16F16F32M16N16K16; };
59 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 16, true> { using Type = WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution; };
62 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8SwizzleA; };
63 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16SwizzleA; };
64 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 8, true, true> { using Type = WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution; };
65 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, true, true> { using Type = WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution; };
68 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
69 template<> struct Dispatcher<half_t, half_t, float, 32, 32, 16, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M32N32K16; };
70 template<> struct Dispatcher<half_t, half_t, float, 16, 16, 32, false, false, true> { using Type = WarpGemmSmfmacF16F16F32M16N16K32; };
73 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
74 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8; };
75 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution; };
76 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16<>; };
77 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution<>; };
78 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16<EDouble>; };
79 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution<EDouble>; };
80 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32<>; };
81 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, true> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution<>; };
82 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, false, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32<EDouble>; };
83 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 32, true, false, false, EDouble> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution<EDouble>; };
84 template<> struct Dispatcher<bf16_t, bf16_t, float, 4, 64, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M4N64K16; };
85 template<> struct Dispatcher<bf16_t, bf16_t, float, 64, 4, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M64N4K16; };
88 template<bool TransposeC> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf16_bf16<TransposeC>; };
90 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, false> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K16; };
91 template<> struct Dispatcher<bf16_t, bf16_t, float, 16, 16, 16, true> { using Type = WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution; };
94 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA; };
95 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, false, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA; };
96 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 8, true, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution; };
97 template<> struct Dispatcher<bf16_t, bf16_t, float, 32, 32, 16, true, true> { using Type = WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution; };
100 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
101 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8; };
102 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_fp8; };
103 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8; };
104 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_fp8_fp8; };
105 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 64, true> { using Type = WarpGemmMfma_f32_16x16x64_fp8_fp8_CTransposed; };
106 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed; };
107 template<> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed; };
108 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8; };
109 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed; };
110 template<> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_fp8_bf8; };
111 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_fp8_bf8; };
112 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8; };
113 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed; };
114 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 16, false> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8; };
115 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 32, false> { using Type = WarpGemmMfma_f32_32x32x32_bf8_bf8; };
116 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 32, false> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8; };
117 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 32, true> { using Type = WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed; };
118 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 64, false> { using Type = WarpGemmMfma_f32_16x16x64_bf8_bf8; };
119 template<> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 64, true> { using Type = WarpGemmMfma_f32_16x16x64_bf8_bf8_CTransposed; };
120 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 16, true> { using Type = WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed; };
124 struct Dispatcher<A, B, float, 16, 16, 128, false, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_f8f6f4<A, B, I>; };
125 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_fp8_CTransposed<I>; };
126 template<WGAttrNumAccessEnum I> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_fp8_bf8_CTransposed<I>; };
127 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, fp8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_fp8_CTransposed<I>; };
128 template<WGAttrNumAccessEnum I> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 128, true, false, false, I> { using Type = WarpGemmMfma_f32_16x16x128_bf8_bf8_CTransposed<I>; };
130 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_fp8<>; };
131 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_fp8_bf8<>; };
132 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_fp8<>; };
133 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 64, false> { using Type = WarpGemmMfma_f32_32x32x64_bf8_bf8<>; };
134 template<> struct Dispatcher<fp8_t, fp8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_fp8_fp8<EQuad>; };
135 template<> struct Dispatcher<fp8_t, bf8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_fp8_bf8<EQuad>; };
136 template<> struct Dispatcher<bf8_t, fp8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_bf8_fp8<EQuad>; };
137 template<> struct Dispatcher<bf8_t, bf8_t, float, 32, 32, 64, false, false, false, EQuad> { using Type = WarpGemmMfma_f32_32x32x64_bf8_bf8<EQuad>; };
140 template<bool TransposeC> struct Dispatcher<fp8_t, fp8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f8_f8<TransposeC>; };
141 template<bool TransposeC> struct Dispatcher<bf8_t, bf8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf8_bf8<TransposeC>; };
142 template<bool TransposeC> struct Dispatcher<fp8_t, bf8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_f8_bf8<TransposeC>; };
143 template<bool TransposeC> struct Dispatcher<bf8_t, fp8_t, float, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_f32_16x16x16_bf8_f8<TransposeC>; };
146 // ADataType, BDataType, AccDataType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity
147 template<> struct Dispatcher<int8_t, int8_t, int32_t, 32, 32, 16, false> { using Type = WarpGemmMfma_i32_32x32x16_i8_i8; };
148 template<> struct Dispatcher<int8_t, int8_t, int32_t, 32, 32, 16, true> { using Type = WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed; };
149 template<> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 32, false> { using Type = WarpGemmMfma_i32_16x16x32_i8_i8; };
150 template<> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 32, true> { using Type = WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed; };
152 template<bool TransposeC> struct Dispatcher<int8_t, int8_t, int32_t, 16, 16, 16, TransposeC, false> { using Type = WarpGemmWmma_i32_16x16x16_i8_i8<TransposeC>;};
Definition: cluster_descriptor.hpp:13
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_32x32x16_i8_i8
Definition: warp_gemm.hpp:404
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_bf8_bf8
Definition: warp_gemm.hpp:319
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_bf8
Definition: warp_gemm.hpp:297
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_fp8_fp8_CTransposed
Definition: warp_gemm.hpp:324
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_bf8
Definition: warp_gemm.hpp:285
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:249
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M16N16K16
Definition: warp_gemm.hpp:172
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M64N4K16
Definition: warp_gemm.hpp:158
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_fp8
Definition: warp_gemm.hpp:282
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleA
Definition: warp_gemm.hpp:85
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:139
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution
Definition: warp_gemm.hpp:215
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution
Definition: warp_gemm.hpp:211
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_fp8_fp8
Definition: warp_gemm.hpp:315
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_bf8
Definition: warp_gemm.hpp:279
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed
Definition: warp_gemm.hpp:385
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed
Definition: warp_gemm.hpp:304
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution
Definition: warp_gemm.hpp:89
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed
Definition: warp_gemm.hpp:393
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed
Definition: warp_gemm.hpp:311
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M32N32K16< WGAttrCtlEnum::Default_ > >> WarpGemmSmfmacF16F16F32M32N32K16
Definition: warp_gemm.hpp:162
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF32F32F32M16N16K4
Definition: warp_gemm.hpp:18
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M16N16K16
Definition: warp_gemm.hpp:51
typename impl::warp_gemm_dispatcher::Dispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition: warp_gemm_dispatcher.hpp:178
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_bf8_bf8
Definition: warp_gemm.hpp:293
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_16x16x32_i8_i8
Definition: warp_gemm.hpp:411
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_bf8_bf8
Definition: warp_gemm.hpp:307
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA
Definition: warp_gemm.hpp:207
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed
Definition: warp_gemm.hpp:381
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaBf16Bf16F32M32N32K8
Definition: warp_gemm.hpp:169
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed
Definition: warp_gemm.hpp:408
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_fp8
Definition: warp_gemm.hpp:273
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed
Definition: warp_gemm.hpp:415
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA
Definition: warp_gemm.hpp:202
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M64N4K16
Definition: warp_gemm.hpp:268
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_16x16x32_fp8_fp8
Definition: warp_gemm.hpp:300
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed
Definition: warp_gemm.hpp:389
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:259
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M16N16K32< WGAttrCtlEnum::Default_ > >> WarpGemmSmfmacF16F16F32M16N16K32
Definition: warp_gemm.hpp:165
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M32N32K8
Definition: warp_gemm.hpp:48
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution
Definition: warp_gemm.hpp:149
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaF16F16F32M32N32K8SwizzleA
Definition: warp_gemm.hpp:81
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M4N64K16
Definition: warp_gemm.hpp:154
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > >> WarpGemmMfma_f32_32x32x16_fp8_bf8
Definition: warp_gemm.hpp:276
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > >> WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution
Definition: warp_gemm.hpp:93
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_fp8
Definition: warp_gemm.hpp:289
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_bf8_bf8_CTransposed
Definition: warp_gemm.hpp:329
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M4N64K16
Definition: warp_gemm.hpp:264
Definition: warp_gemm_impl.hpp:11
Definition: warp_gemm_smfmac_impl.hpp:11
Definition: warp_gemm_dispatcher.hpp:30