CUTLASS 2.1 (#83)

CUTLASS 2.1 contributes:
- BLAS-style host-side API added to CUTLASS Library
- Planar Complex GEMM kernels targeting Volta and Turing Tensor Cores
- Minor enhancements and bug fixes
This commit is contained in:
Andrew Kerr
2020-04-07 13:51:25 -07:00
committed by GitHub
parent 7c0cd26d13
commit 96dab34ad9
196 changed files with 20653 additions and 1995 deletions

View File

@ -64,37 +64,22 @@ void FilterArchitecture() {
/// Maximum compute capability for which the kernels are enabled
int max_compute_capability;
/// If true, architecture is assumed to be silicon
bool silicon;
}
test_filters[] = {
{ "SM50*", 50, kMaxDevice, true},
{ "SM60*", 60, kMaxDevice, true},
{ "SM61*", 61, kMaxDevice, true},
{ "SM70*", 70, 75, true},
{ "SM75*", 75, kMaxDevice, true},
{ "SM50*", 50, kMaxDevice},
{ "SM60*", 60, kMaxDevice},
{ "SM61*", 61, kMaxDevice},
{ "SM70*", 70, 75},
{ "SM75*", 75, kMaxDevice},
{ 0, 0, false }
};
bool running_on_silicon = false;
for (int i = 0; test_filters[i].filter; ++i) {
if (deviceMajorMinor == test_filters[i].min_compute_capability) {
running_on_silicon = test_filters[i].silicon;
break;
}
}
// Set negative test filters
std::stringstream ss;
ss << "-";
for (int i = 0, j = 0; test_filters[i].filter; ++i) {
if (!running_on_silicon && deviceMajorMinor != test_filters[i].min_compute_capability) {
ss << (j++ ? ":" : "") << test_filters[i].filter;
}
else if (deviceMajorMinor < test_filters[i].min_compute_capability ||
if (deviceMajorMinor < test_filters[i].min_compute_capability ||
deviceMajorMinor > test_filters[i].max_compute_capability) {
ss << (j++ ? ":" : "") << test_filters[i].filter;