Files
cutlass/media/images/cutlass-3.8-blackwell-gemm-peak-performance.svg
Yujia Zhai 833f6990e0 v3.8.0 update (#2082)
* 3.8 update

* fix Markus' name

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
2025-02-06 21:33:40 -05:00

2 lines
10 KiB
XML

<svg width="3886" height="1956" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="1242" y="-4" width="3886" height="1956"/></clipPath><clipPath id="clip1"><rect x="1242" y="-4" width="3886" height="1956"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-1242 4)"><g clip-path="url(#clip1)"><rect x="1247" y="0" width="3877" height="1948" fill="#FFFFFF" fill-opacity="1"/><g><path d="M1526.5 1662.55 5073.5 1662.55M1526.5 1508.55 5073.5 1508.55M1526.5 1354.54 5073.5 1354.54M1526.5 1199.54 5073.5 1199.54M1526.5 1045.53 5073.5 1045.53M1526.5 890.529 5073.5 890.529M1526.5 736.524 5073.5 736.524M1526.5 581.519 5073.5 581.519M1526.5 427.514 5073.5 427.514M1526.5 273.5 5073.5 273.5" stroke="#D9D9D9" stroke-width="3.4375" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none" fill-rule="nonzero"/></g><g><path d="M1679.06 451.015 1818.06 451.015 1818.06 1817 1679.06 1817ZM2122.07 399.013 2261.07 399.013 2261.07 1817 2122.07 1817ZM2566.08 299.01 2705.09 299.01 2705.09 1817 2566.08 1817ZM3009.1 377.012 3148.1 377.012 3148.1 1817 3009.1 1817ZM3452.11 409.013 3591.12 409.013 3591.12 1817 3452.11 1817ZM3896.13 322.011 4035.13 322.011 4035.13 1817 3896.13 1817ZM4339.14 363.012 4478.15 363.012 4478.15 1817 4339.14 1817ZM4782.16 339.011 4921.16 339.011 4921.16 1817 4782.16 1817Z" fill="#3B7D23" fill-rule="nonzero" fill-opacity="1"/></g><g><path d="M1526.5 1817.5 5073.5 1817.5" stroke="#D9D9D9" stroke-width="3.4375" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none" fill-rule="evenodd"/></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1445.54 1833)">0</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 1679)">10</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 1524)">20</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 1370)">30</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 1215)">40</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 1061)">50</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 907)">60</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 752)">70</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 598)">80</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1414.62 443)">90</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1383.71 289)">100</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1663.41 1904)">NVFP4</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 2102.65 1904)">MXFP4</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 2546.01 1904)">MXFP8</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 3046.84 1904)">S8</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 3491.33 1904)">F8</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 3919.23 1904)">F16</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 4345.46 1904)">BF16</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 4789.54 1904)">TF32</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="55" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(6.12323e-17 -1 1 6.12323e-17 1369.71 1487)">% throughput of theoretical peak @ power</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="83" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 1992.83 112)">CUTLASS 3.8 + CUDA 12.8 Blackwell SM100 GEMM Performance</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 2162.81 194)">16384x17920x16384 shape matmul on uniform random ints range [</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 4088.52 194)">-</text></g><g><text fill="#000000" fill-opacity="1" font-family="Roboto,Roboto_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="400" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(1 0 0 1 4106.25 194)">4,4]</text></g><rect x="1247.5" y="0.499836" width="3877" height="1948" stroke="#D9D9D9" stroke-width="3.4375" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none"/></g></g></svg>