CUTLASS 3.2.1 (#1113)
* Updates for 3.2.1 release. * Minor fix in gemm op profiler for raster order. * Add scheduler mapping for raster order in the kernels.
This commit is contained in:
@ -73,7 +73,7 @@ set_target_properties(cutlass_profiler PROPERTIES EXPORT_NAME profiler)
|
||||
target_include_directories(
|
||||
cutlass_profiler
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_LIST_DIR}/src
|
||||
${CMAKE_CURRENT_LIST_DIR}/include
|
||||
)
|
||||
|
||||
#
|
||||
@ -97,14 +97,14 @@ install(
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
)
|
||||
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_GEMM --operation=Gemm --providers=cutlass --verification-providers=cublas,device --junit-output=test_cutlass_profiler_gemm)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_CONV2D --operation=Conv2d --providers=cutlass --verification-providers=cudnn,device --junit-output=test_cutlass_profiler_conv2d)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_CONV3D --operation=Conv3d --providers=cutlass --verification-providers=cudnn,device,host --junit-output=test_cutlass_profiler_conv3d)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_SPGEMM --operation=SparseGemm --providers=cutlass --verification-providers=cublas,device,host --junit-output=test_cutlass_profiler_spgemm)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_RANK_K --operation=RankK --providers=cutlass --verification-providers=cublas --junit-output=test_cutlass_profiler_rank_k)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_RANK_2K --operation=Rank2K --providers=cutlass --verification-providers=cublas --junit-output=test_cutlass_profiler_rank_2k)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_TRMM --operation=Trmm --providers=cutlass --verification-providers=device,host --junit-output=test_cutlass_profiler_trmm)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_SYMM --operation=Symm --providers=cutlass --verification-providers=cublas,host --junit-output=test_cutlass_profiler_symm)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_GEMM --operation=Gemm --providers=cutlass --verification-providers=cublas,device --junit-output=test_cutlass_profiler_gemm --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_CONV2D --operation=Conv2d --providers=cutlass --verification-providers=cudnn,device --junit-output=test_cutlass_profiler_conv2d --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_CONV3D --operation=Conv3d --providers=cutlass --verification-providers=cudnn,device,host --junit-output=test_cutlass_profiler_conv3d --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_SPGEMM --operation=SparseGemm --providers=cutlass --verification-providers=cublas,device,host --junit-output=test_cutlass_profiler_spgemm --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_RANK_K --operation=RankK --providers=cutlass --verification-providers=cublas --junit-output=test_cutlass_profiler_rank_k --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_RANK_2K --operation=Rank2K --providers=cutlass --verification-providers=cublas --junit-output=test_cutlass_profiler_rank_2k --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_TRMM --operation=Trmm --providers=cutlass --verification-providers=device,host --junit-output=test_cutlass_profiler_trmm --print-kernel-before-running=true)
|
||||
set(CUTLASS_PROFILER_TEST_COMMAND_OPTIONS_SYMM --operation=Symm --providers=cutlass --verification-providers=cublas,host --junit-output=test_cutlass_profiler_symm --print-kernel-before-running=true)
|
||||
|
||||
cutlass_add_executable_tests(
|
||||
test_profiler cutlass_profiler
|
||||
|
||||
@ -383,6 +383,8 @@ public:
|
||||
/// Destructor
|
||||
virtual ~Conv2dOperationProfiler();
|
||||
|
||||
Conv2dProblem const& problem() const { return problem_; }
|
||||
|
||||
/// Prints usage statement for the math function
|
||||
virtual void print_usage(std::ostream &out) const;
|
||||
|
||||
@ -332,6 +332,8 @@ public:
|
||||
/// Destructor
|
||||
virtual ~Conv3dOperationProfiler();
|
||||
|
||||
Conv3dProblem const& problem() const { return problem_; }
|
||||
|
||||
/// Prints usage statement for the math function
|
||||
virtual void print_usage(std::ostream &out) const;
|
||||
|
||||
@ -82,6 +82,7 @@ public:
|
||||
int split_k_slices;
|
||||
int batch_count;
|
||||
|
||||
cutlass::library::RasterOrder raster_order;
|
||||
// gemm with parallel interleaved reduction
|
||||
// gemm epilogue (alpha, beta) = (1.0, 0.0)
|
||||
// reduction epilogue (alpha, beta) = (GemmProblem::alpha, GemmProblem::beta)
|
||||
@ -94,7 +95,8 @@ public:
|
||||
|
||||
GemmProblem():
|
||||
mode(library::GemmUniversalMode::kGemm),
|
||||
m(16), n(16), k(16), lda(0), ldb(0), ldc(0), split_k_slices(1), batch_count(1) { }
|
||||
m(16), n(16), k(16), lda(0), ldb(0), ldc(0), split_k_slices(1), batch_count(1),
|
||||
raster_order(cutlass::library::RasterOrder::kHeuristic){ }
|
||||
|
||||
/// Parses the problem
|
||||
Status parse(
|
||||
@ -178,6 +180,8 @@ public:
|
||||
/// Destructor
|
||||
virtual ~GemmOperationProfiler();
|
||||
|
||||
GemmProblem const& problem() const { return problem_; }
|
||||
|
||||
/// Prints usage statement for the math function
|
||||
virtual void print_usage(std::ostream &out) const;
|
||||
|
||||
@ -247,6 +247,10 @@ public:
|
||||
/// Sort results by (currently by flops-per-byte)
|
||||
bool sort_results;
|
||||
|
||||
/// Prints the name of the kernel being profiled before running the kernel.
|
||||
/// This is useful for determining which kernel is causing a run of the profiler to hang
|
||||
bool print_kernel_before_running;
|
||||
|
||||
//
|
||||
// Methods
|
||||
//
|
||||
@ -935,6 +935,15 @@ bool arg_as_IteratorAlgorithmID(
|
||||
ProblemSpace const &problem_space,
|
||||
ProblemSpace::Problem const &problem);
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_RasterOrder(library::RasterOrder &raster_order, KernelArgument::Value const *value_ptr);
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_RasterOrder(
|
||||
library::RasterOrder &raster_order,
|
||||
char const *name,
|
||||
ProblemSpace const &problem_space,
|
||||
ProblemSpace::Problem const &problem);
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_ProviderID(library::Provider &provider, KernelArgument::Value const *value_ptr);
|
||||
@ -39,9 +39,8 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "conv2d_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
|
||||
#include "cutlass/profiler/conv2d_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
using namespace cutlass::library;
|
||||
|
||||
|
||||
@ -40,9 +40,8 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "conv3d_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
|
||||
#include "cutlass/profiler/conv3d_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
using namespace cutlass::library;
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
#include <stdexcept>
|
||||
|
||||
#if CUTLASS_ENABLE_CUBLAS
|
||||
#include "cublas_helpers.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "cudnn_helpers.h"
|
||||
#include "cutlass/profiler/cudnn_helpers.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -36,15 +36,15 @@
|
||||
#include <stdexcept>
|
||||
|
||||
// Profiler includes
|
||||
#include "cutlass_profiler.h"
|
||||
#include "gemm_operation_profiler.h"
|
||||
#include "rank_k_operation_profiler.h"
|
||||
#include "rank_2k_operation_profiler.h"
|
||||
#include "trmm_operation_profiler.h"
|
||||
#include "symm_operation_profiler.h"
|
||||
#include "conv2d_operation_profiler.h"
|
||||
#include "conv3d_operation_profiler.h"
|
||||
#include "sparse_gemm_operation_profiler.h"
|
||||
#include "cutlass/profiler/cutlass_profiler.h"
|
||||
#include "cutlass/profiler/gemm_operation_profiler.h"
|
||||
#include "cutlass/profiler/rank_k_operation_profiler.h"
|
||||
#include "cutlass/profiler/rank_2k_operation_profiler.h"
|
||||
#include "cutlass/profiler/trmm_operation_profiler.h"
|
||||
#include "cutlass/profiler/symm_operation_profiler.h"
|
||||
#include "cutlass/profiler/conv2d_operation_profiler.h"
|
||||
#include "cutlass/profiler/conv3d_operation_profiler.h"
|
||||
#include "cutlass/profiler/sparse_gemm_operation_profiler.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include "cutlass/library/util.h"
|
||||
|
||||
#include "device_allocation.h"
|
||||
#include "cutlass/profiler/device_allocation.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
\brief
|
||||
*/
|
||||
|
||||
#include "device_context.h"
|
||||
#include "cutlass/profiler/device_context.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
\brief Provides several functions for filling tensors with data.
|
||||
*/
|
||||
|
||||
#include "enumerated_types.h"
|
||||
#include "cutlass/profiler/enumerated_types.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -39,10 +39,9 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "gemm_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/gemm_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
#include "cutlass/library/singleton.h"
|
||||
#include "cutlass/library/library.h"
|
||||
#include "cutlass/library/handle.h"
|
||||
@ -74,6 +73,7 @@ GemmOperationProfiler::GemmOperationProfiler(Options const &options):
|
||||
{ArgumentTypeID::kEnumerated, {"split_k_mode", "split-k-mode"}, "Variant of split K mode(serial, parallel)"},
|
||||
{ArgumentTypeID::kInteger, {"split_k_slices", "split-k-slices"}, "Number of partitions of K dimension"},
|
||||
{ArgumentTypeID::kInteger, {"batch_count", "batch-count"}, "Number of GEMMs computed in one batch"},
|
||||
{ArgumentTypeID::kEnumerated, {"raster_order", "raster-order"}, "Raster order (heuristic, along_n, along_m)"},
|
||||
},
|
||||
{ library::Provider::kCUBLAS}
|
||||
) {
|
||||
@ -174,7 +174,7 @@ Status GemmOperationProfiler::GemmProblem::parse(
|
||||
}
|
||||
|
||||
this->mode = library::GemmUniversalMode::kGemm;
|
||||
if(this->split_k_mode == library::SplitKMode::kParallel) {
|
||||
if (this->split_k_mode == library::SplitKMode::kParallel) {
|
||||
this->mode = library::GemmUniversalMode::kGemmSplitKParallel;
|
||||
}
|
||||
|
||||
@ -190,6 +190,11 @@ Status GemmOperationProfiler::GemmProblem::parse(
|
||||
this->mode = library::GemmUniversalMode::kBatched;
|
||||
}
|
||||
|
||||
if (!arg_as_RasterOrder(this->raster_order, "raster_order", problem_space, problem)) {
|
||||
// default value
|
||||
this->raster_order = library::RasterOrder::kHeuristic;
|
||||
}
|
||||
|
||||
if (this->split_k_slices > 1 && this->batch_count > 1) {
|
||||
// At least one of these must be one
|
||||
return Status::kErrorInvalidProblem;
|
||||
@ -322,6 +327,7 @@ void GemmOperationProfiler::GemmProblem::initialize_result(
|
||||
set_argument(result, "split_k_mode", problem_space, library::to_string(split_k_mode));
|
||||
set_argument(result, "split_k_slices", problem_space, split_k_slices);
|
||||
set_argument(result, "batch_count", problem_space, batch_count);
|
||||
set_argument(result, "raster_order", problem_space, library::to_string(raster_order));
|
||||
set_argument(result, "alpha", problem_space,
|
||||
library::lexical_cast(alpha, operation_desc.element_epilogue));
|
||||
|
||||
@ -376,6 +382,8 @@ Status GemmOperationProfiler::initialize_configuration(
|
||||
gemm_workspace_.arguments.alpha = problem_.alpha.data();
|
||||
gemm_workspace_.arguments.beta = problem_.beta.data();
|
||||
gemm_workspace_.arguments.pointer_mode = library::ScalarPointerMode::kHost;
|
||||
gemm_workspace_.arguments.raster_order = problem_.raster_order;
|
||||
|
||||
// initialize reduction operation for parallel splitKMode
|
||||
if (problem_.split_k_mode == library::SplitKMode::kParallel) {
|
||||
if (!initialize_reduction_configuration_(operation, problem)) {
|
||||
@ -610,7 +618,7 @@ Status GemmOperationProfiler::initialize_workspace(
|
||||
results_.back().op_kind = library::OperationKind::kGemm;
|
||||
results_.back().disposition = Disposition::kNotRun;
|
||||
|
||||
for(auto provider : verification_providers_) {
|
||||
for (auto provider : verification_providers_) {
|
||||
results_.back().verification_map[provider] = Disposition::kNotRun;
|
||||
}
|
||||
}
|
||||
@ -1102,7 +1110,6 @@ Status GemmOperationProfiler::profile_cutlass_(
|
||||
void *device_workspace) {
|
||||
|
||||
GpuTimer timer;
|
||||
|
||||
// initialize gemm underlying operation to handle parallel reduction
|
||||
library::Operation const * underlying_operation = operation;
|
||||
|
||||
@ -1223,7 +1230,6 @@ Status GemmOperationProfiler::profile_cutlass_(
|
||||
//
|
||||
|
||||
timer.stop_and_wait();
|
||||
|
||||
//
|
||||
// Update performance result
|
||||
//
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -34,9 +34,9 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "options.h"
|
||||
#include "cutlass/profiler/options.h"
|
||||
|
||||
#include "cutlass_profiler.h"
|
||||
#include "cutlass/profiler/cutlass_profiler.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -47,9 +47,9 @@
|
||||
// sleep not supported
|
||||
#endif
|
||||
|
||||
#include "options.h"
|
||||
#include "operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/options.h"
|
||||
#include "cutlass/profiler/operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -100,12 +100,11 @@ OperationProfiler::OperationProfiler(
|
||||
verification_providers_.push_back(provider);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
OperationProfiler::~OperationProfiler() {
|
||||
|
||||
}
|
||||
OperationProfiler::~OperationProfiler() {}
|
||||
|
||||
/// Gets the schema description
|
||||
std::string const & OperationProfiler::description() const {
|
||||
@ -349,6 +348,11 @@ int OperationProfiler::profile_all(
|
||||
|
||||
if (continue_profiling) {
|
||||
|
||||
if (options.report.print_kernel_before_running) {
|
||||
std::cout << "Profiling kernel for JUnit test " << options.report.junit_output_path << ": "
|
||||
<< operation_name << std::endl;
|
||||
}
|
||||
|
||||
status = this->initialize_workspace(
|
||||
options,
|
||||
report,
|
||||
@ -679,7 +683,7 @@ bool OperationProfiler::find_string_matches_(
|
||||
|
||||
// Search filter_tokens in operation_name in order
|
||||
size_t start = 0, idx = 0;
|
||||
for(auto & token : filter_tokens) {
|
||||
for (auto & token : filter_tokens) {
|
||||
// Check if characters left to be parsed in operation_name
|
||||
if (start < operation_name.length()) {
|
||||
// Find token in operation_name[start:]
|
||||
|
||||
@ -39,7 +39,7 @@
|
||||
|
||||
#include "cutlass/library/util.h"
|
||||
|
||||
#include "options.h"
|
||||
#include "cutlass/profiler/options.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -145,7 +145,7 @@ void Options::Device::print_device_info(std::ostream &out) const {
|
||||
|
||||
out << "Device Name,SM,CUDA Device ID,Phy Device ID" << std::endl;
|
||||
|
||||
for(int device = 0; device < num_devices; device++) {
|
||||
for (int device = 0; device < num_devices; device++) {
|
||||
result = cudaSetDevice(device);
|
||||
if (result != cudaSuccess) {
|
||||
throw std::runtime_error("cudaSetDevice() failed for device");
|
||||
@ -587,7 +587,7 @@ Options::Report::Report(cutlass::CommandLine const &cmdline) {
|
||||
cmdline.get_cmd_line_argument("append", append, false);
|
||||
cmdline.get_cmd_line_argument("output", output_path);
|
||||
cmdline.get_cmd_line_argument("junit-output", junit_output_path);
|
||||
|
||||
|
||||
if (cmdline.check_cmd_line_flag("tags")) {
|
||||
cmdline.get_cmd_line_argument_pairs("tags", pivot_tags);
|
||||
}
|
||||
@ -597,6 +597,8 @@ Options::Report::Report(cutlass::CommandLine const &cmdline) {
|
||||
cmdline.get_cmd_line_argument("verbose", verbose, true);
|
||||
|
||||
cmdline.get_cmd_line_argument("sort-results", sort_results, false);
|
||||
|
||||
cmdline.get_cmd_line_argument("print-kernel-before-running", print_kernel_before_running, false);
|
||||
}
|
||||
|
||||
void Options::Report::print_usage(std::ostream &out) const {
|
||||
@ -613,6 +615,10 @@ void Options::Report::print_usage(std::ostream &out) const {
|
||||
<< " --junit-output=<path> "
|
||||
<< " Path to junit output file for result reporting. Operation kind and '.junit.xml' is appended.\n\n"
|
||||
|
||||
<< " --print-kernel-before-running=<bool> "
|
||||
<< " Prints the name of the kernel being profiled before running the kernel." << end_of_line
|
||||
<< " This is useful for determining which kernel is causing a run of the profiler to hang\n\n"
|
||||
|
||||
<< " --report-not-run=<bool> "
|
||||
<< " If true, reports the status of all kernels including those that" << end_of_line
|
||||
<< " do not satisfy the given arguments.\n\n"
|
||||
@ -634,7 +640,8 @@ void Options::Report::print_options(std::ostream &out, int indent) const {
|
||||
<< indent_str(indent) << "append: " << append << "\n"
|
||||
<< indent_str(indent) << "output: " << output_path << "\n"
|
||||
<< indent_str(indent) << "junit-output: " << junit_output_path << "\n"
|
||||
<< indent_str(indent) << "report_not_run: " << report_not_run << "\n"
|
||||
<< indent_str(indent) << "print-kernel-before-running: " << print_kernel_before_running << "\n"
|
||||
<< indent_str(indent) << "report-not-run: " << report_not_run << "\n"
|
||||
<< indent_str(indent) << "tags:\n";
|
||||
|
||||
for (auto const & tag : pivot_tags) {
|
||||
|
||||
@ -42,8 +42,8 @@
|
||||
|
||||
#include "cutlass/library/util.h"
|
||||
|
||||
#include "performance_report.h"
|
||||
#include "debug.h"
|
||||
#include "cutlass/profiler/performance_report.h"
|
||||
#include "cutlass/profiler/debug.h"
|
||||
namespace cutlass {
|
||||
namespace profiler {
|
||||
|
||||
@ -382,6 +382,7 @@ std::ostream & PerformanceReport::print_result_csv_(
|
||||
<< "," << result.gbytes_per_sec()
|
||||
<< "," << result.gflops_per_sec()
|
||||
;
|
||||
|
||||
}
|
||||
else {
|
||||
out << std::string(2
|
||||
|
||||
@ -39,8 +39,8 @@
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
// CUTLASS Profiler includes
|
||||
#include "enumerated_types.h"
|
||||
#include "performance_result.h"
|
||||
#include "cutlass/profiler/enumerated_types.h"
|
||||
#include "cutlass/profiler/performance_result.h"
|
||||
|
||||
// CUTLASS Library includes
|
||||
#include "cutlass/library/library.h"
|
||||
|
||||
@ -38,7 +38,7 @@
|
||||
|
||||
#include "cutlass/library/util.h"
|
||||
|
||||
#include "problem_space.h"
|
||||
#include "cutlass/profiler/problem_space.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -845,6 +845,46 @@ bool arg_as_NumericTypeID(
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_RasterOrder(
|
||||
library::RasterOrder &raster_order,
|
||||
KernelArgument::Value const *value_ptr) {
|
||||
|
||||
if (value_ptr->not_null) {
|
||||
if (value_ptr->argument->description->type == ArgumentTypeID::kEnumerated) {
|
||||
|
||||
raster_order = library::from_string<library::RasterOrder>(
|
||||
static_cast<EnumeratedTypeArgument::EnumeratedTypeValue const *>(value_ptr)->element);
|
||||
|
||||
if (raster_order == library::RasterOrder::kInvalid) {
|
||||
throw std::runtime_error(
|
||||
"arg_as_RasterOrder() - illegal cast.");
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error(
|
||||
"arg_as_RasterOrder() - illegal cast.");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_RasterOrder(
|
||||
library::RasterOrder &raster_order,
|
||||
char const *name,
|
||||
ProblemSpace const &problem_space,
|
||||
ProblemSpace::Problem const &problem) {
|
||||
|
||||
size_t idx = problem_space.argument_index(name);
|
||||
KernelArgument::Value const *value_ptr = problem.at(idx).get();
|
||||
|
||||
return arg_as_RasterOrder(raster_order, value_ptr);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Lexically casts an argument to an int64 if it is defined. Returns true if not null.
|
||||
bool arg_as_LayoutTypeID(
|
||||
library::LayoutTypeID &layout_type,
|
||||
|
||||
@ -41,9 +41,9 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "rank_2k_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/rank_2k_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -41,9 +41,9 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "rank_k_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/rank_k_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -38,9 +38,9 @@
|
||||
#include <iomanip>
|
||||
#include <ios>
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "sparse_gemm_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/sparse_gemm_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -41,9 +41,9 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "symm_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/symm_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -41,9 +41,9 @@
|
||||
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
#include "cublas_helpers.h"
|
||||
#include "trmm_operation_profiler.h"
|
||||
#include "gpu_timer.h"
|
||||
#include "cutlass/profiler/cublas_helpers.h"
|
||||
#include "cutlass/profiler/trmm_operation_profiler.h"
|
||||
#include "cutlass/profiler/gpu_timer.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
Reference in New Issue
Block a user