CUTLASS 2.6.1 - functional and performance enhancements to strided DGRAD, fixes, and tuning

* cutlass 2.6 update

* remove debug prints

* cutlass 2.6.1 (minor update)

* Updated CHANGELOG.

* Minor edit to readme to indicate patch version.

* Minor edit to readme.

Co-authored-by:  Haicheng Wu <haichengw@nvidia.com>, Andrew Kerr <akerr@nvidia.com>
This commit is contained in:
Manish Gupta
2021-09-03 10:26:15 -07:00
committed by GitHub
parent a01feb93d9
commit 6c2f8f2fb8
55 changed files with 317 additions and 315 deletions

View File

@ -791,7 +791,7 @@ bool GemmOperationProfiler::verify_with_reference_(
handle.set_provider(provider);
Status status = handle.gemm_universal(
library::GemmUniversalMode::kGemm,
problem_.mode,
gemm_workspace_.configuration.problem_size.m(),
gemm_workspace_.configuration.problem_size.n(),
gemm_workspace_.configuration.problem_size.k(),

View File

@ -29,6 +29,7 @@
#pragma once
#include <cuda_runtime.h>
#include "cutlass/cutlass.h"
namespace cutlass {
namespace profiler {

View File

@ -425,7 +425,9 @@ void Options::Profiling::print_usage(std::ostream &out) const {
<< " Number of ms to sleep between profiling periods (ms).\n\n"
<< " --profiling-enabled=<bool> "
<< " If true, profiling is actually conducted.\n\n";
<< " If true, profiling is actually conducted.\n\n"
;
}
void Options::Profiling::print_options(std::ostream &out, int indent) const {