729 lines
76 KiB
HTML
729 lines
76 KiB
HTML
<!doctype html>
|
|
<html class="no-js" lang="en">
|
|
<head><meta charset="utf-8"/>
|
|
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
|
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../genindex.html" /><link rel="search" title="Search" href="../../search.html" />
|
|
<link rel="canonical" href="docs/_modules/cutlass/library_defaults.html" />
|
|
|
|
<!-- Generated with Sphinx 6.1.3 and Furo 2023.03.27 -->
|
|
<title>cutlass.library_defaults - CUTLASS Python</title>
|
|
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?digest=fad236701ea90a88636c2a8c73b44ae642ed2a53" />
|
|
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
|
|
<link rel="stylesheet" type="text/css" href="../../_static/tabs.css" />
|
|
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
|
|
|
|
|
|
|
|
|
|
<style>
|
|
body {
|
|
--color-code-background: #eeffcc;
|
|
--color-code-foreground: black;
|
|
--color-brand-primary: #76B900;
|
|
--color-brand-content: #76B900;
|
|
|
|
}
|
|
@media not print {
|
|
body[data-theme="dark"] {
|
|
--color-code-background: #272822;
|
|
--color-code-foreground: #f8f8f2;
|
|
--color-brand-primary: #76B900;
|
|
--color-brand-content: #76B900;
|
|
|
|
}
|
|
@media (prefers-color-scheme: dark) {
|
|
body:not([data-theme="light"]) {
|
|
--color-code-background: #272822;
|
|
--color-code-foreground: #f8f8f2;
|
|
--color-brand-primary: #76B900;
|
|
--color-brand-content: #76B900;
|
|
|
|
}
|
|
}
|
|
}
|
|
</style></head>
|
|
<body>
|
|
|
|
<script>
|
|
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
|
</script>
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
|
<symbol id="svg-toc" viewBox="0 0 24 24">
|
|
<title>Contents</title>
|
|
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
|
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
|
</svg>
|
|
</symbol>
|
|
<symbol id="svg-menu" viewBox="0 0 24 24">
|
|
<title>Menu</title>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
|
<line x1="3" y1="12" x2="21" y2="12"></line>
|
|
<line x1="3" y1="6" x2="21" y2="6"></line>
|
|
<line x1="3" y1="18" x2="21" y2="18"></line>
|
|
</svg>
|
|
</symbol>
|
|
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
|
<title>Expand</title>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
|
<polyline points="9 18 15 12 9 6"></polyline>
|
|
</svg>
|
|
</symbol>
|
|
<symbol id="svg-sun" viewBox="0 0 24 24">
|
|
<title>Light mode</title>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
|
<circle cx="12" cy="12" r="5"></circle>
|
|
<line x1="12" y1="1" x2="12" y2="3"></line>
|
|
<line x1="12" y1="21" x2="12" y2="23"></line>
|
|
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
|
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
|
<line x1="1" y1="12" x2="3" y2="12"></line>
|
|
<line x1="21" y1="12" x2="23" y2="12"></line>
|
|
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
|
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
|
</svg>
|
|
</symbol>
|
|
<symbol id="svg-moon" viewBox="0 0 24 24">
|
|
<title>Dark mode</title>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
|
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
|
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
|
</svg>
|
|
</symbol>
|
|
<symbol id="svg-sun-half" viewBox="0 0 24 24">
|
|
<title>Auto light/dark mode</title>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
|
|
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
|
<circle cx="12" cy="12" r="9" />
|
|
<path d="M13 12h5" />
|
|
<path d="M13 15h4" />
|
|
<path d="M13 18h1" />
|
|
<path d="M13 9h4" />
|
|
<path d="M13 6h1" />
|
|
</svg>
|
|
</symbol>
|
|
</svg>
|
|
|
|
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
|
|
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
|
|
<label class="overlay sidebar-overlay" for="__navigation">
|
|
<div class="visually-hidden">Hide navigation sidebar</div>
|
|
</label>
|
|
<label class="overlay toc-overlay" for="__toc">
|
|
<div class="visually-hidden">Hide table of contents sidebar</div>
|
|
</label>
|
|
|
|
|
|
|
|
<div class="page">
|
|
<header class="mobile-header">
|
|
<div class="header-left">
|
|
<label class="nav-overlay-icon" for="__navigation">
|
|
<div class="visually-hidden">Toggle site navigation sidebar</div>
|
|
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
|
|
</label>
|
|
</div>
|
|
<div class="header-center">
|
|
<a href="../../index.html"><div class="brand">CUTLASS Python</div></a>
|
|
</div>
|
|
<div class="header-right">
|
|
<div class="theme-toggle-container theme-toggle-header">
|
|
<button class="theme-toggle">
|
|
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
|
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
|
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
|
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
|
</button>
|
|
</div>
|
|
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
|
|
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
|
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
|
</label>
|
|
</div>
|
|
</header>
|
|
<aside class="sidebar-drawer">
|
|
<div class="sidebar-container">
|
|
|
|
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../index.html">
|
|
|
|
<div class="sidebar-logo-container">
|
|
<img class="sidebar-logo only-light" src="../../_static/cutlass-logo-small.png" alt="Light Logo"/>
|
|
<img class="sidebar-logo only-dark" src="../../_static/cutlass-logo-small.png" alt="Dark Logo"/>
|
|
</div>
|
|
|
|
<span class="sidebar-brand-text">CUTLASS Python</span>
|
|
|
|
</a><form class="sidebar-search-container" method="get" action="../../search.html" role="search">
|
|
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
|
|
<input type="hidden" name="check_keywords" value="yes">
|
|
<input type="hidden" name="area" value="default">
|
|
</form>
|
|
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../index.html">Home</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Getting Started:</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../install.html">Installation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../externals/00_basic_gemm.html">Getting Started</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../contribute.html">Contributing</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Python Documentation:</span></p>
|
|
<ul>
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../modules.html">CUTLASS Python API</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|
<li class="toctree-l2 has-children"><a class="reference internal" href="../../cutlass.html">CUTLASS</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="../../cutlass.emit.html">Emitters</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="../../cutlass.op.html">Operations</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="../../cutlass.utils.html">Utilities</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Examples and Tutorials:</span></p>
|
|
<ul>
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../examples.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="../../externals/00_basic_gemm.html">Basic GEMM</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="../../externals/01_epilogue.html">Epilogue</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="../../externals/02_pytorch_extension_grouped_gemm.html">PyTorch Extension</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Reference:</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/cutlass">Github</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</aside>
|
|
<div class="main">
|
|
<div class="content">
|
|
<div class="article-container">
|
|
<a href="#" class="back-to-top muted-link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
|
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
|
</svg>
|
|
<span>Back to top</span>
|
|
</a>
|
|
<div class="content-icon-container">
|
|
<div class="theme-toggle-container theme-toggle-content">
|
|
<button class="theme-toggle">
|
|
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
|
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
|
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
|
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
|
</button>
|
|
</div>
|
|
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
|
|
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
|
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
|
</label>
|
|
</div>
|
|
<article role="main">
|
|
<h1>Source code for cutlass.library_defaults</h1><div class="highlight"><pre>
|
|
<span></span><span class="c1">#################################################################################################</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.</span>
|
|
<span class="c1"># SPDX-License-Identifier: BSD-3-Clause</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># Redistribution and use in source and binary forms, with or without</span>
|
|
<span class="c1"># modification, are permitted provided that the following conditions are met:</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># 1. Redistributions of source code must retain the above copyright notice, this</span>
|
|
<span class="c1"># list of conditions and the following disclaimer.</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># 2. Redistributions in binary form must reproduce the above copyright notice,</span>
|
|
<span class="c1"># this list of conditions and the following disclaimer in the documentation</span>
|
|
<span class="c1"># and/or other materials provided with the distribution.</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># 3. Neither the name of the copyright holder nor the names of its</span>
|
|
<span class="c1"># contributors may be used to endorse or promote products derived from</span>
|
|
<span class="c1"># this software without specific prior written permission.</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"</span>
|
|
<span class="c1"># AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE</span>
|
|
<span class="c1"># IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE</span>
|
|
<span class="c1"># DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE</span>
|
|
<span class="c1"># FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL</span>
|
|
<span class="c1"># DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR</span>
|
|
<span class="c1"># SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER</span>
|
|
<span class="c1"># CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,</span>
|
|
<span class="c1"># OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE</span>
|
|
<span class="c1"># OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1">#################################################################################################</span>
|
|
|
|
<span class="sd">"""</span>
|
|
<span class="sd">Classes containing valid operations for a given compute capability and data types.</span>
|
|
<span class="sd">"""</span>
|
|
|
|
<span class="kn">import</span> <span class="nn">logging</span>
|
|
<span class="kn">from</span> <span class="nn">cuda</span> <span class="kn">import</span> <span class="n">__version__</span>
|
|
|
|
<span class="c1"># Strip any additional information from the CUDA version</span>
|
|
<span class="n">_cuda_version</span> <span class="o">=</span> <span class="n">__version__</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"rc"</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
|
|
|
<span class="c1"># Imports from CUTLASS profiler generator and manifest scripts</span>
|
|
<span class="kn">import</span> <span class="nn">generator</span> <span class="k">as</span> <span class="nn">prof_generator</span>
|
|
<span class="kn">import</span> <span class="nn">manifest</span> <span class="k">as</span> <span class="nn">prof_manifest</span>
|
|
|
|
<span class="kn">import</span> <span class="nn">cutlass</span>
|
|
<span class="kn">from</span> <span class="nn">cutlass.utils.check</span> <span class="kn">import</span> <span class="n">valid_stage_count</span>
|
|
<span class="kn">from</span> <span class="nn">cutlass.utils.datatypes</span> <span class="kn">import</span> <span class="n">td_from_profiler_td</span><span class="p">,</span> <span class="n">td_from_profiler_op</span><span class="p">,</span> <span class="n">has_binding_type</span>
|
|
|
|
|
|
<span class="n">_generator_ccs</span> <span class="o">=</span> <span class="p">[</span><span class="mi">50</span><span class="p">,</span> <span class="mi">60</span><span class="p">,</span> <span class="mi">61</span><span class="p">,</span> <span class="mi">70</span><span class="p">,</span> <span class="mi">75</span><span class="p">,</span> <span class="mi">80</span><span class="p">,</span> <span class="mi">90</span><span class="p">]</span>
|
|
|
|
|
|
<div class="viewcode-block" id="KernelsForDataType"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.KernelsForDataType">[docs]</a><span class="k">class</span> <span class="nc">KernelsForDataType</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Container class for keeping track of kernels that correspond to a particular combination</span>
|
|
<span class="sd"> of data types for operands A, B, and accumulator</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">datatype_comb</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">):</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">datatype_comb</span> <span class="o">=</span> <span class="n">datatype_comb</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">layout_comb</span> <span class="o">=</span> <span class="n">layout_comb</span>
|
|
|
|
<span class="c1"># Dictionary mapping from alignment (int) to a list of kernels that fit the alignment</span>
|
|
<span class="c1"># constraint for the data type combination</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span> <span class="o">=</span> <span class="p">{}</span>
|
|
|
|
<div class="viewcode-block" id="KernelsForDataType.add"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.KernelsForDataType.add">[docs]</a> <span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">operation</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Add an operation to the list of supported kernels</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">alignment</span> <span class="o">=</span> <span class="n">operation</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">alignment</span>
|
|
<span class="k">if</span> <span class="n">alignment</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">[</span><span class="n">alignment</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">[</span><span class="n">alignment</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">operation</span><span class="p">)</span></div>
|
|
|
|
<span class="nd">@property</span>
|
|
<span class="k">def</span> <span class="nf">alignments</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns an unsorted list of alignments supported by this data type combination</span>
|
|
|
|
<span class="sd"> :return: unsorted list of alignments supported by this data type combination</span>
|
|
<span class="sd"> :rtype: list</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
|
|
|
<span class="nd">@property</span>
|
|
<span class="k">def</span> <span class="nf">all_operations</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns a list of all operations supported by this data type combination</span>
|
|
|
|
<span class="sd"> :return: list of all operations supported by this data type combination</span>
|
|
<span class="sd"> :rtype: list</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">ops</span> <span class="o">=</span> <span class="p">[]</span>
|
|
<span class="k">for</span> <span class="n">_</span><span class="p">,</span> <span class="n">alignment_ops</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
<span class="n">ops</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">alignment_ops</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">ops</span>
|
|
|
|
<div class="viewcode-block" id="KernelsForDataType.operations"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.KernelsForDataType.operations">[docs]</a> <span class="k">def</span> <span class="nf">operations</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">alignment</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns operations satisfying the alignment constraint indicated by `alignment`</span>
|
|
|
|
<span class="sd"> :param alignment: alignment constraint of operations to return</span>
|
|
<span class="sd"> :type alignment: int</span>
|
|
|
|
<span class="sd"> :return: list of operations</span>
|
|
<span class="sd"> :rtype: list</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">if</span> <span class="n">alignment</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">:</span>
|
|
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
|
|
<span class="sa">f</span><span class="s2">"No operations of alignment </span><span class="si">{</span><span class="n">alignment</span><span class="si">}</span><span class="s2"> found for data type and layout "</span>
|
|
<span class="sa">f</span><span class="s2">"combination </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">datatype_comb</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">layout_comb</span><span class="si">}</span><span class="s2">"</span>
|
|
<span class="p">)</span>
|
|
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">[</span><span class="n">alignment</span><span class="p">]</span></div>
|
|
|
|
<div class="viewcode-block" id="KernelsForDataType.find_alignment"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.KernelsForDataType.find_alignment">[docs]</a> <span class="k">def</span> <span class="nf">find_alignment</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">shape</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">,</span> <span class="n">layout</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns the most preferable alignment for a given shape and layout</span>
|
|
|
|
<span class="sd"> :param shape: extent of each dimension of the tensor</span>
|
|
<span class="sd"> :type shape: tuple</span>
|
|
<span class="sd"> :param layout: layout of the tensor</span>
|
|
<span class="sd"> :type layout: cutlass.LayoutType</span>
|
|
|
|
<span class="sd"> :return: maximum alignment supported by the data type combination and tensor size</span>
|
|
<span class="sd"> :rtype: int</span>
|
|
<span class="sd"> """</span>
|
|
<span class="c1"># Determine the leading dimension of the shape</span>
|
|
<span class="k">if</span> <span class="n">layout</span> <span class="o">==</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">:</span>
|
|
<span class="n">ld</span> <span class="o">=</span> <span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
<span class="k">elif</span> <span class="n">layout</span> <span class="o">==</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">:</span>
|
|
<span class="n">ld</span> <span class="o">=</span> <span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unexpected or unsupported layout </span><span class="si">{</span><span class="n">layout</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
|
|
<span class="k">for</span> <span class="n">alignment</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
<span class="k">if</span> <span class="n">ld</span> <span class="o">%</span> <span class="n">alignment</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="n">alignment</span>
|
|
|
|
<span class="c1"># Default to alignment of 1 if no others match</span>
|
|
<span class="k">return</span> <span class="mi">1</span></div>
|
|
|
|
<div class="viewcode-block" id="KernelsForDataType.sort"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.KernelsForDataType.sort">[docs]</a> <span class="k">def</span> <span class="nf">sort</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Sorts each list of kernels in `kernels_by_alignment` in descending order of threadblock shape</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">key</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">op</span><span class="p">:</span> <span class="p">(</span>
|
|
<span class="n">op</span><span class="o">.</span><span class="n">tile_description</span><span class="o">.</span><span class="n">threadblock_shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
<span class="o">*</span> <span class="n">op</span><span class="o">.</span><span class="n">tile_description</span><span class="o">.</span><span class="n">threadblock_shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
<span class="o">*</span> <span class="n">op</span><span class="o">.</span><span class="n">tile_description</span><span class="o">.</span><span class="n">threadblock_shape</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
|
|
<span class="p">)</span>
|
|
<span class="k">for</span> <span class="n">alignment</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">kernels_by_alignment</span><span class="p">[</span><span class="n">alignment</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div></div>
|
|
|
|
|
|
<div class="viewcode-block" id="ArchOptions"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.ArchOptions">[docs]</a><span class="k">class</span> <span class="nc">ArchOptions</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Structure for keeping track of kernels available on a given compute capability</span>
|
|
|
|
<span class="sd"> :param target_cc: compute capability of the device on which kernels will be run</span>
|
|
<span class="sd"> :type target_cc: int</span>
|
|
<span class="sd"> :param kernel_cc: compute capability of the kernels to generate</span>
|
|
<span class="sd"> :type kernel_cc: int</span>
|
|
<span class="sd"> :param operation_kind: type of operation to register</span>
|
|
<span class="sd"> :type operation_kind: cutlass.OperationKind</span>
|
|
<span class="sd"> :param gemm_kinds: types of GEMM operations that can be included</span>
|
|
<span class="sd"> :type gemm_kinds: list</span>
|
|
<span class="sd"> :param allowed_math_operations: types of primitive math operations allowed</span>
|
|
<span class="sd"> :type allowed_math_operations: list</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
|
|
<span class="bp">self</span><span class="p">,</span>
|
|
<span class="n">target_cc</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
|
|
<span class="n">kernel_cc</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
|
|
<span class="n">operation_kind</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OperationKind</span><span class="p">,</span>
|
|
<span class="n">gemm_kinds</span><span class="p">:</span> <span class="nb">list</span><span class="p">,</span>
|
|
<span class="n">allowed_math_operations</span><span class="p">:</span> <span class="nb">list</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">MathOperation</span><span class="o">.</span><span class="n">multiply_add</span><span class="p">,</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">MathOperation</span><span class="o">.</span><span class="n">multiply_add_saturate</span><span class="p">,</span>
|
|
<span class="p">]</span>
|
|
<span class="p">):</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">cc</span> <span class="o">=</span> <span class="n">kernel_cc</span>
|
|
|
|
<span class="c1"># Dictionary with following structure:</span>
|
|
<span class="c1"># Key: OpcodeClass</span>
|
|
<span class="c1"># Value: Dictionary with the following structure:</span>
|
|
<span class="c1"># Key: tuple of ((DataType, DataType, DataType), (LayoutType, LayoutType, LayoutType),</span>
|
|
<span class="c1"># representing ((element_a, element_b, element_accumulator), (layout_a, layout_b))</span>
|
|
<span class="c1"># Value: KernelsForDataType</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span> <span class="o">=</span> <span class="p">{}</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">op_class</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">allowed_math_operations</span> <span class="o">=</span> <span class="n">allowed_math_operations</span>
|
|
|
|
<span class="c1"># Identify the method within CUTLASS generator script that generates kernel</span>
|
|
<span class="c1"># descriptions for the target CC</span>
|
|
<span class="n">generate_function_name</span> <span class="o">=</span> <span class="s2">"GenerateSM"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">kernel_cc</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">prof_generator</span><span class="p">,</span> <span class="n">generate_function_name</span><span class="p">):</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"No generator found for architecture </span><span class="si">{</span><span class="n">kernel_cc</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span>
|
|
<span class="n">generate_function</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">prof_generator</span><span class="p">,</span> <span class="n">generate_function_name</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Initialize a default manifest and populate it with valid kernel descriptions</span>
|
|
<span class="c1"># for the target CC</span>
|
|
<span class="n">args</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="s2">"--kernels=all"</span><span class="p">,</span>
|
|
<span class="sa">f</span><span class="s2">"--log-level=</span><span class="si">{</span><span class="n">logging</span><span class="o">.</span><span class="n">getLevelName</span><span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">level</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
|
|
<span class="p">]</span>
|
|
<span class="n">manifest_args</span> <span class="o">=</span> <span class="n">prof_generator</span><span class="o">.</span><span class="n">define_parser</span><span class="p">()</span><span class="o">.</span><span class="n">parse_args</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
|
<span class="n">manifest</span> <span class="o">=</span> <span class="n">prof_manifest</span><span class="o">.</span><span class="n">Manifest</span><span class="p">(</span><span class="n">manifest_args</span><span class="p">)</span>
|
|
<span class="n">generate_function</span><span class="p">(</span><span class="n">manifest</span><span class="p">,</span> <span class="n">_cuda_version</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="n">operation_kind</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">manifest</span><span class="o">.</span><span class="n">operations</span><span class="p">:</span>
|
|
<span class="c1"># No kernels generated for this architecture, this could be because the CUDA</span>
|
|
<span class="c1"># toolkit is insufficient to support operations in this CC</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"No operations of type </span><span class="si">{</span><span class="n">operation_kind</span><span class="si">}</span><span class="s2"> found for CC </span><span class="si">{</span><span class="n">kernel_cc</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span>
|
|
|
|
<span class="c1"># Iterate through the available operations for this operation kind and</span>
|
|
<span class="c1"># find available opclasses and data types</span>
|
|
<span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">op_list</span> <span class="ow">in</span> <span class="n">manifest</span><span class="o">.</span><span class="n">operations</span><span class="p">[</span><span class="n">operation_kind</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
<span class="k">for</span> <span class="n">op</span> <span class="ow">in</span> <span class="n">op_list</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">op</span><span class="o">.</span><span class="n">gemm_kind</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">gemm_kinds</span><span class="p">:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="n">mi</span> <span class="o">=</span> <span class="n">op</span><span class="o">.</span><span class="n">tile_description</span><span class="o">.</span><span class="n">math_instruction</span>
|
|
<span class="k">if</span> <span class="n">mi</span><span class="o">.</span><span class="n">math_operation</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">allowed_math_operations</span><span class="p">:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="n">datatype_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">mi</span><span class="o">.</span><span class="n">element_a</span><span class="p">,</span> <span class="n">mi</span><span class="o">.</span><span class="n">element_b</span><span class="p">,</span> <span class="n">mi</span><span class="o">.</span><span class="n">element_accumulator</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Skip any data types that do not currently have conversions via cutlass_bindings</span>
|
|
<span class="k">if</span> <span class="kc">False</span> <span class="ow">in</span> <span class="p">[</span><span class="n">has_binding_type</span><span class="p">(</span><span class="n">elt</span><span class="p">)</span> <span class="k">for</span> <span class="n">elt</span> <span class="ow">in</span> <span class="n">datatype_comb</span><span class="p">]:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="c1"># Prune operations that don't fit in shared memory</span>
|
|
<span class="n">td</span> <span class="o">=</span> <span class="n">td_from_profiler_op</span><span class="p">(</span><span class="n">op</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">valid_stage_count</span><span class="p">(</span><span class="n">target_cc</span><span class="p">,</span> <span class="n">td</span><span class="p">)[</span><span class="mi">0</span><span class="p">]:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="k">if</span> <span class="n">mi</span><span class="o">.</span><span class="n">opcode_class</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">mi</span><span class="o">.</span><span class="n">opcode_class</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
|
|
|
<span class="n">datatype_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">mi</span><span class="o">.</span><span class="n">element_a</span><span class="p">,</span> <span class="n">mi</span><span class="o">.</span><span class="n">element_b</span><span class="p">,</span> <span class="n">mi</span><span class="o">.</span><span class="n">element_accumulator</span><span class="p">)</span>
|
|
<span class="n">layout_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">op</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">layout</span><span class="p">,</span> <span class="n">op</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">layout</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Register TF32 kernels as F32 to enable F32 -> TF32 conversion + TF32 Tensor Core operations</span>
|
|
<span class="k">if</span> <span class="n">datatype_comb</span> <span class="o">==</span> <span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">tf32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">tf32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">):</span>
|
|
<span class="c1"># TF32 kernels only supported on SM80 and beyond</span>
|
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cc</span> <span class="o"><</span> <span class="mi">80</span><span class="p">:</span>
|
|
<span class="k">continue</span>
|
|
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">cc</span> <span class="o">==</span> <span class="mi">90</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="p">(</span><span class="n">op</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">element</span> <span class="o">!=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span>
|
|
<span class="ow">or</span> <span class="n">op</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">element</span> <span class="o">!=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span>
|
|
<span class="ow">or</span> <span class="n">op</span><span class="o">.</span><span class="n">C</span><span class="o">.</span><span class="n">element</span> <span class="o">!=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">):</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="n">datatype_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">)</span>
|
|
|
|
<span class="n">opclass_dict</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">mi</span><span class="o">.</span><span class="n">opcode_class</span><span class="p">]</span>
|
|
<span class="n">key</span> <span class="o">=</span> <span class="p">(</span><span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">opclass_dict</span><span class="p">:</span>
|
|
<span class="n">opclass_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">KernelsForDataType</span><span class="p">(</span><span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)</span>
|
|
<span class="n">opclass_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">op</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Set the default opclass to TensorOp, if available. Otherwise default to SIMT</span>
|
|
<span class="k">if</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">TensorOp</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">op_class</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">TensorOp</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">op_class</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span>
|
|
|
|
<span class="c1"># The profiler's generator may generate only a limited set of combinations of operands for SIMT kernels.</span>
|
|
<span class="c1"># Here, we generate additional versions via a generic TileDescription.</span>
|
|
<span class="k">if</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
|
|
|
<span class="n">types</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s8</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s8</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s8</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s8</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s8</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">s32</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f16</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f32</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f64</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f64</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="o">.</span><span class="n">f64</span><span class="p">),</span>
|
|
<span class="p">]</span>
|
|
|
|
<span class="n">layouts</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">ColumnMajor</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">ColumnMajor</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">RowMajor</span><span class="p">),</span>
|
|
<span class="p">(</span><span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">ColumnMajor</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">ColumnMajor</span><span class="p">),</span>
|
|
<span class="p">]</span>
|
|
<span class="n">alignment</span> <span class="o">=</span> <span class="mi">1</span>
|
|
<span class="n">epilogue_functor</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">EpilogueFunctor</span><span class="o">.</span><span class="n">LinearCombination</span>
|
|
<span class="n">swizzling_functor</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">SwizzlingFunctor</span><span class="o">.</span><span class="n">Identity8</span>
|
|
<span class="k">for</span> <span class="n">type_comb</span> <span class="ow">in</span> <span class="n">types</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">layout_comb</span> <span class="ow">in</span> <span class="n">layouts</span><span class="p">:</span>
|
|
<span class="n">comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">type_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">comb</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">]:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="n">A</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">TensorDescription</span><span class="p">(</span><span class="n">type_comb</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">layout_comb</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">alignment</span><span class="p">)</span>
|
|
<span class="n">B</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">TensorDescription</span><span class="p">(</span><span class="n">type_comb</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">layout_comb</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">alignment</span><span class="p">)</span>
|
|
<span class="n">C</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">TensorDescription</span><span class="p">(</span><span class="n">type_comb</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="o">.</span><span class="n">ColumnMajor</span><span class="p">,</span> <span class="n">alignment</span><span class="p">)</span>
|
|
<span class="n">math_inst</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">MathInstruction</span><span class="p">(</span>
|
|
<span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
|
|
<span class="n">type_comb</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
|
|
<span class="n">type_comb</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
|
|
<span class="n">type_comb</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">,</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">MathOperation</span><span class="o">.</span><span class="n">multiply_add</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">td</span> <span class="o">=</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">TileDescription</span><span class="p">(</span>
|
|
<span class="p">[</span><span class="mi">128</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="mi">2</span><span class="p">,</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">math_inst</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="mi">1024</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Prune operations that don't fit in shared memory</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">valid_stage_count</span><span class="p">(</span><span class="n">target_cc</span><span class="p">,</span> <span class="n">td_from_profiler_td</span><span class="p">(</span><span class="n">td</span><span class="p">))[</span><span class="mi">0</span><span class="p">]:</span>
|
|
<span class="k">continue</span>
|
|
|
|
<span class="n">new_operation</span> <span class="o">=</span> <span class="n">prof_manifest</span><span class="o">.</span><span class="n">GemmOperation</span><span class="p">(</span>
|
|
<span class="n">cutlass</span><span class="o">.</span><span class="n">GemmKind</span><span class="o">.</span><span class="n">Universal</span><span class="p">,</span> <span class="n">td</span><span class="o">.</span><span class="n">minimum_compute_capability</span><span class="p">,</span>
|
|
<span class="n">td</span><span class="p">,</span> <span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">,</span> <span class="n">C</span><span class="p">,</span> <span class="n">type_comb</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">epilogue_functor</span><span class="p">,</span> <span class="n">swizzling_functor</span><span class="p">)</span>
|
|
|
|
<span class="n">new_kernels</span> <span class="o">=</span> <span class="n">KernelsForDataType</span><span class="p">(</span><span class="n">type_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)</span>
|
|
<span class="n">new_kernels</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">new_operation</span><span class="p">)</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="o">.</span><span class="n">Simt</span><span class="p">][</span><span class="n">comb</span><span class="p">]</span> <span class="o">=</span> <span class="n">new_kernels</span>
|
|
|
|
<span class="c1"># Sort all operations</span>
|
|
<span class="k">for</span> <span class="n">oc</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
|
|
<span class="k">for</span> <span class="n">comb</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">oc</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">oc</span><span class="p">][</span><span class="n">comb</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
|
|
|
<div class="viewcode-block" id="ArchOptions.opclass_supports_combination"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.ArchOptions.opclass_supports_combination">[docs]</a> <span class="k">def</span> <span class="nf">opclass_supports_combination</span><span class="p">(</span>
|
|
<span class="bp">self</span><span class="p">,</span> <span class="n">op_class</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="p">,</span> <span class="n">datatype_comb</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">:</span> <span class="nb">tuple</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns whether the provided operation class supports the provided data type and layout combination</span>
|
|
|
|
<span class="sd"> :param op_class: operation class to consider</span>
|
|
<span class="sd"> :type op_class: cutlass.OpcodeClass</span>
|
|
<span class="sd"> :param datatype_comb: tuple of data types for (element_A, element_B, element_accumulator)</span>
|
|
<span class="sd"> :type datatype_comb: tuple[cutlass.DataType]</span>
|
|
<span class="sd"> :param layout_comb: tuple of data types for (layout_A, layout_B)</span>
|
|
<span class="sd"> :type layout_comb: tuple[cutlass.LayoutType]</span>
|
|
|
|
<span class="sd"> :return: set of operation classes that support the provided data type and layout combination</span>
|
|
<span class="sd"> :rtype: set</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">if</span> <span class="n">op_class</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">:</span>
|
|
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unexpected or unsupported operation class </span><span class="si">{</span><span class="n">op_class</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
|
|
<span class="k">return</span> <span class="p">(</span><span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">op_class</span><span class="p">]</span></div>
|
|
|
|
<div class="viewcode-block" id="ArchOptions.supporting_opclasses"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.ArchOptions.supporting_opclasses">[docs]</a> <span class="k">def</span> <span class="nf">supporting_opclasses</span><span class="p">(</span>
|
|
<span class="bp">self</span><span class="p">,</span>
|
|
<span class="n">element_a</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">element_b</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">element_accumulator</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">layout_a</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="p">,</span>
|
|
<span class="n">layout_b</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="nb">set</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns a set of operation classes that support the provided data type combination</span>
|
|
|
|
<span class="sd"> :param element_a: data type of operand A</span>
|
|
<span class="sd"> :type element_a: cutlass.DataType</span>
|
|
<span class="sd"> :param element_b: data type of operand B</span>
|
|
<span class="sd"> :type element_b: cutlass.DataType</span>
|
|
<span class="sd"> :param element_accumulator: data type of accumulator</span>
|
|
<span class="sd"> :type element_accumulator: cutlass.DataType</span>
|
|
<span class="sd"> :param layout_a: layout of operand A</span>
|
|
<span class="sd"> :type layout_a: cutlass.LayoutType</span>
|
|
<span class="sd"> :param layout_b: layout of operand B</span>
|
|
<span class="sd"> :type layout_b: cutlass.LayoutType</span>
|
|
|
|
<span class="sd"> :return: set of operation classes that support the provided data type combination</span>
|
|
<span class="sd"> :rtype: set</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">supporting_op_classes</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
|
<span class="n">datatype_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">element_a</span><span class="p">,</span> <span class="n">element_b</span><span class="p">,</span> <span class="n">element_accumulator</span><span class="p">)</span>
|
|
<span class="n">layout_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">layout_a</span><span class="p">,</span> <span class="n">layout_b</span><span class="p">)</span>
|
|
|
|
<span class="k">for</span> <span class="n">op_class</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
|
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">opclass_supports_combination</span><span class="p">(</span><span class="n">op_class</span><span class="p">,</span> <span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">):</span>
|
|
<span class="n">supporting_op_classes</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">op_class</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">supporting_op_classes</span></div>
|
|
|
|
<div class="viewcode-block" id="ArchOptions.operations"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.ArchOptions.operations">[docs]</a> <span class="k">def</span> <span class="nf">operations</span><span class="p">(</span>
|
|
<span class="bp">self</span><span class="p">,</span>
|
|
<span class="n">op_class</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OpcodeClass</span><span class="p">,</span>
|
|
<span class="n">element_a</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">element_b</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">element_accumulator</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span>
|
|
<span class="n">layout_a</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="p">,</span>
|
|
<span class="n">layout_b</span><span class="p">:</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">LayoutType</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">KernelsForDataType</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns whether the provided operation class supports the provided data type combination</span>
|
|
|
|
<span class="sd"> :param op_class: operation class to consider</span>
|
|
<span class="sd"> :type op_class: cutlass.OpcodeClass</span>
|
|
<span class="sd"> :param element_a: data type of operand A</span>
|
|
<span class="sd"> :type element_a: cutlass.DataType</span>
|
|
<span class="sd"> :param element_b: data type of operand B</span>
|
|
<span class="sd"> :type element_b: cutlass.DataType</span>
|
|
<span class="sd"> :param element_accumulator: data type of accumulator</span>
|
|
<span class="sd"> :type element_accumulator: cutlass.DataType</span>
|
|
<span class="sd"> :param layout_a: layout of operand A</span>
|
|
<span class="sd"> :type layout_a: cutlass.LayoutType</span>
|
|
<span class="sd"> :param layout_b: layout of operand B</span>
|
|
<span class="sd"> :type layout_b: cutlass.LayoutType</span>
|
|
|
|
<span class="sd"> :return: container of kernels by alignment supported by the provided combination of parameters</span>
|
|
<span class="sd"> :rtype: KernelsForDataType</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">datatype_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">element_a</span><span class="p">,</span> <span class="n">element_b</span><span class="p">,</span> <span class="n">element_accumulator</span><span class="p">)</span>
|
|
<span class="n">layout_comb</span> <span class="o">=</span> <span class="p">(</span><span class="n">layout_a</span><span class="p">,</span> <span class="n">layout_b</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">opclass_supports_combination</span><span class="p">(</span><span class="n">op_class</span><span class="p">,</span> <span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">):</span>
|
|
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
|
|
<span class="sa">f</span><span class="s2">"Data type layout combination </span><span class="si">{</span><span class="n">datatype_comb</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="n">layout_comb</span><span class="si">}</span><span class="s2"> "</span>
|
|
<span class="sa">f</span><span class="s2">"is not supported by opcode class </span><span class="si">{</span><span class="n">op_class</span><span class="si">}</span><span class="s2"> on CC </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">cc</span><span class="si">}</span><span class="s2">."</span>
|
|
<span class="p">)</span>
|
|
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">operations_by_opclass</span><span class="p">[</span><span class="n">op_class</span><span class="p">][(</span><span class="n">datatype_comb</span><span class="p">,</span> <span class="n">layout_comb</span><span class="p">)]</span></div></div>
|
|
|
|
|
|
<div class="viewcode-block" id="OptionRegistry"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.OptionRegistry">[docs]</a><span class="k">class</span> <span class="nc">OptionRegistry</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Container of all architecture-specific options</span>
|
|
|
|
<span class="sd"> :param target_cc: compute capability of the device on which operations will be run</span>
|
|
<span class="sd"> :type target_cc: int</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">target_cc</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">registry</span> <span class="o">=</span> <span class="p">{}</span>
|
|
|
|
<span class="n">gemm_kinds</span> <span class="o">=</span> <span class="p">[</span><span class="n">cutlass</span><span class="o">.</span><span class="n">GemmKind</span><span class="o">.</span><span class="n">Universal</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">GemmKind</span><span class="o">.</span><span class="n">Universal3x</span><span class="p">]</span>
|
|
<span class="c1"># Construct options for each CC</span>
|
|
<span class="k">for</span> <span class="n">kernel_cc</span> <span class="ow">in</span> <span class="n">_generator_ccs</span><span class="p">:</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">registry</span><span class="p">[</span><span class="n">kernel_cc</span><span class="p">]</span> <span class="o">=</span> <span class="n">ArchOptions</span><span class="p">(</span><span class="n">target_cc</span><span class="p">,</span> <span class="n">kernel_cc</span><span class="p">,</span> <span class="n">cutlass</span><span class="o">.</span><span class="n">OperationKind</span><span class="o">.</span><span class="n">Gemm</span><span class="p">,</span> <span class="n">gemm_kinds</span><span class="p">)</span>
|
|
|
|
<div class="viewcode-block" id="OptionRegistry.options_for_cc"><a class="viewcode-back" href="../../cutlass.html#cutlass.library_defaults.OptionRegistry.options_for_cc">[docs]</a> <span class="k">def</span> <span class="nf">options_for_cc</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cc</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">ArchOptions</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">registry</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cc</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></div></div>
|
|
</pre></div>
|
|
</article>
|
|
</div>
|
|
<footer>
|
|
|
|
<div class="related-pages">
|
|
|
|
|
|
</div>
|
|
<div class="bottom-of-page">
|
|
<div class="left-details">
|
|
<div class="copyright">
|
|
Copyright © 2023, NVIDIA
|
|
</div>
|
|
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
|
|
|
|
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
|
|
|
</div>
|
|
<div class="right-details">
|
|
<div class="icons">
|
|
<a class="muted-link " href="https://github.com/NVIDIA/cutlass" aria-label="GitHub">
|
|
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
|
|
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>
|
|
</svg>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</footer>
|
|
</div>
|
|
<aside class="toc-drawer no-toc">
|
|
|
|
|
|
|
|
</aside>
|
|
</div>
|
|
</div><script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
<script src="../../_static/doctools.js"></script>
|
|
<script src="../../_static/sphinx_highlight.js"></script>
|
|
<script src="../../_static/scripts/furo.js"></script>
|
|
<script src="../../_static/clipboard.min.js"></script>
|
|
<script src="../../_static/copybutton.js"></script>
|
|
<script src="../../_static/tabs.js"></script>
|
|
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
|
|
</body>
|
|
</html> |