CUTLASS 2.0 (#62)

CUTLASS 2.0

Substantially refactored for

- Better performance, particularly for native Turing Tensor Cores
- Robust and durable templates spanning the design space
- Encapsulated functionality embodying modern C++11 programming techniques
- Optimized containers and data types for efficient, generic, portable device code

Updates to:
- Quick start guide
- Documentation
- Utilities
- CUTLASS Profiler

Native Turing Tensor Cores
- Efficient GEMM kernels targeting Turing Tensor Cores
- Mixed-precision floating point, 8-bit integer, 4-bit integer, and binarized operands

Coverage of existing CUTLASS functionality:
- GEMM kernels targeting CUDA and Tensor Cores in NVIDIA GPUs
- Volta Tensor Cores through native mma.sync and through WMMA API
- Optimizations such as parallel reductions, threadblock rasterization, and intra-threadblock reductions
- Batched GEMM operations
- Complex-valued GEMMs

Note: this commit and all that follow require a host compiler supporting C++11 or greater.
This commit is contained in:
Andrew Kerr
2019-11-19 16:55:34 -08:00
committed by GitHub
parent b5cab177a9
commit fb335f6a5f
5434 changed files with 599799 additions and 250176 deletions

View File

@ -1,26 +1,3 @@
/*
@licstart The following is the entire license notice for the
JavaScript code in this file.
Copyright (C) 1997-2017 by Dimitri van Heesch
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
@licend The above is the entire license notice
for the JavaScript code in this file
*/
function toggleVisibility(linkObj)
{
var base = $(linkObj).attr('id');
@ -38,7 +15,7 @@ function toggleVisibility(linkObj)
summary.hide();
$(linkObj).removeClass('closed').addClass('opened');
$(trigger).attr('src',src.substring(0,src.length-10)+'open.png');
}
}
return false;
}
@ -60,7 +37,7 @@ function toggleLevel(level)
$(this).show();
} else if (l==level+1) {
i.removeClass('iconfclosed iconfopen').addClass('iconfclosed');
a.html('▶');
a.html('►');
$(this).show();
} else {
$(this).hide();
@ -87,7 +64,7 @@ function toggleFolder(id)
// replace down arrow by right arrow for current row
var currentRowSpans = currentRow.find("span");
currentRowSpans.filter(".iconfopen").removeClass("iconfopen").addClass("iconfclosed");
currentRowSpans.filter(".arrow").html('▶');
currentRowSpans.filter(".arrow").html('►');
rows.filter("[id^=row_"+id+"]").hide(); // hide all children
} else { // we are SHOWING
// replace right arrow by down arrow for current row
@ -97,7 +74,7 @@ function toggleFolder(id)
// replace down arrows by right arrows for child rows
var childRowsSpans = childRows.find("span");
childRowsSpans.filter(".iconfopen").removeClass("iconfopen").addClass("iconfclosed");
childRowsSpans.filter(".arrow").html('▶');
childRowsSpans.filter(".arrow").html('►');
childRows.show(); //show all children
}
updateStripes();
@ -117,4 +94,4 @@ function toggleInherit(id)
$(img).attr('src',src.substring(0,src.length-10)+'open.png');
}
}
/* @license-end */