CUTLASS 2.6.1 - functional and performance enhancements to strided DGRAD, fixes, and tuning
* cutlass 2.6 update * remove debug prints * cutlass 2.6.1 (minor update) * Updated CHANGELOG. * Minor edit to readme to indicate patch version. * Minor edit to readme. Co-authored-by: Haicheng Wu <haichengw@nvidia.com>, Andrew Kerr <akerr@nvidia.com>
This commit is contained in:
@ -791,7 +791,7 @@ bool GemmOperationProfiler::verify_with_reference_(
|
||||
handle.set_provider(provider);
|
||||
|
||||
Status status = handle.gemm_universal(
|
||||
library::GemmUniversalMode::kGemm,
|
||||
problem_.mode,
|
||||
gemm_workspace_.configuration.problem_size.m(),
|
||||
gemm_workspace_.configuration.problem_size.n(),
|
||||
gemm_workspace_.configuration.problem_size.k(),
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -425,7 +425,9 @@ void Options::Profiling::print_usage(std::ostream &out) const {
|
||||
<< " Number of ms to sleep between profiling periods (ms).\n\n"
|
||||
|
||||
<< " --profiling-enabled=<bool> "
|
||||
<< " If true, profiling is actually conducted.\n\n";
|
||||
<< " If true, profiling is actually conducted.\n\n"
|
||||
|
||||
;
|
||||
}
|
||||
|
||||
void Options::Profiling::print_options(std::ostream &out, int indent) const {
|
||||
|
||||
Reference in New Issue
Block a user